From 42732fa7d15878aedf226a24ecc52c0c8322c858 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 16 Oct 2023 09:12:51 -0700 Subject: [PATCH 1/2] Update Crucible and Propolis (#4282) Propolis changes: PHD: refactor & add support Propolis server "environments" (#547) Begin making Accessor interface more robust Update Crucible and Omicron deps for Hakari fixes Add cloud-init volume generation to standalone Use specified toolchain version for all GHA checks Use params to configure rust-toolchain in GHA Update and lock GHA dependencies Crucible changes: Use regions_dataset path for apply_smf (#1000) Don't unwrap when we can't create a dataset (#992) Fix tests and update log messages. (#995) Better backpressure (#990) Update Rust crate proptest to 1.3.1 (#977) Read only downstairs can skip Live Repair (#984) Update Rust crate expectorate to 1.1.0 (#975) Add trait for `ExtentInner` (#982) report backpressure in upstairs_info dtrace probe (#987) Support multiple downstairs operations in GtoS (#985) --------- Co-authored-by: Alan Hanson --- Cargo.lock | 142 +++++++++----------------------------- Cargo.toml | 14 ++-- package-manifest.toml | 12 ++-- workspace-hack/Cargo.toml | 20 ++---- 4 files changed, 50 insertions(+), 138 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f2ee1511b..3265ed19de 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -67,17 +67,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "ahash" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" -dependencies = [ - "getrandom 0.2.10", - "once_cell", - "version_check", -] - [[package]] name = "ahash" version = "0.8.3" @@ -506,7 +495,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "bhyve_api_sys", "libc", @@ -516,7 +505,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "libc", "strum", @@ -1246,7 +1235,7 @@ dependencies = [ [[package]] name = "cpuid_profile_config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "propolis", "serde", @@ -1454,7 +1443,7 @@ dependencies = [ [[package]] name = "crucible" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ "aes-gcm-siv", "anyhow", @@ -1466,6 +1455,7 @@ dependencies = [ "crucible-client-types", "crucible-common", "crucible-protocol", + "crucible-workspace-hack", "dropshot", "futures", "futures-core", @@ -1493,45 +1483,45 @@ dependencies = [ "usdt", "uuid", "version_check", - "workspace-hack", ] [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ "anyhow", "chrono", + "crucible-workspace-hack", "percent-encoding", "progenitor", "reqwest", "schemars", "serde", "serde_json", - "workspace-hack", ] [[package]] name = "crucible-client-types" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ "base64 0.21.4", + "crucible-workspace-hack", "schemars", "serde", "serde_json", "uuid", - "workspace-hack", ] [[package]] name = "crucible-common" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ "anyhow", "atty", + "crucible-workspace-hack", "nix 0.26.2 (registry+https://github.com/rust-lang/crates.io-index)", "rusqlite", "rustls-pemfile", @@ -1550,16 +1540,16 @@ dependencies = [ "twox-hash", "uuid", "vergen", - "workspace-hack", ] [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ "anyhow", "chrono", + "crucible-workspace-hack", "percent-encoding", "progenitor", "reqwest", @@ -1567,38 +1557,43 @@ dependencies = [ "serde", "serde_json", "uuid", - "workspace-hack", ] [[package]] name = "crucible-protocol" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ "anyhow", "bincode", "bytes", "crucible-common", + "crucible-workspace-hack", "num_enum 0.7.0", "schemars", "serde", "tokio-util", "uuid", - "workspace-hack", ] [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source = "git+https://github.com/oxidecomputer/crucible?rev=657d985247b41e38aac2e271c7ce8bc9ea81f4b6#657d985247b41e38aac2e271c7ce8bc9ea81f4b6" dependencies = [ + "crucible-workspace-hack", "libc", "num-derive", "num-traits", "thiserror", - "workspace-hack", ] +[[package]] +name = "crucible-workspace-hack" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fbd293370c6cb9c334123675263de33fc9e53bbdfc8bdd5e329237cf0205fdc7" + [[package]] name = "crunchy" version = "0.2.2" @@ -2034,7 +2029,7 @@ checksum = "7e1a8646b2c125eeb9a84ef0faa6d2d102ea0d5da60b824ade2743263117b848" [[package]] name = "dladm" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "libc", "strum", @@ -2994,9 +2989,6 @@ name = "hashbrown" version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888" -dependencies = [ - "ahash 0.7.6", -] [[package]] name = "hashbrown" @@ -3004,7 +2996,7 @@ version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "43a3c133739dddd0d2990f9a4bdf8eb4b21ef50e4851ca85ab661199821d510e" dependencies = [ - "ahash 0.8.3", + "ahash", ] [[package]] @@ -3013,7 +3005,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" dependencies = [ - "ahash 0.8.3", + "ahash", "allocator-api2", ] @@ -5421,7 +5413,6 @@ dependencies = [ "futures", "futures-channel", "futures-core", - "futures-executor", "futures-io", "futures-sink", "futures-task", @@ -5429,13 +5420,11 @@ dependencies = [ "gateway-messages", "generic-array", "getrandom 0.2.10", - "hashbrown 0.12.3", "hashbrown 0.13.2", "hashbrown 0.14.0", "hex", "hyper", "hyper-rustls", - "indexmap 1.9.3", "indexmap 2.0.0", "inout", "ipnetwork", @@ -5453,9 +5442,7 @@ dependencies = [ "num-traits", "once_cell", "openapiv3", - "parking_lot 0.12.1", "petgraph", - "phf_shared 0.11.2", "postgres-types", "ppv-lite86", "predicates 3.0.4", @@ -5489,7 +5476,6 @@ dependencies = [ "toml_datetime", "toml_edit 0.19.15", "tracing", - "tracing-core", "trust-dns-proto", "unicode-bidi", "unicode-normalization", @@ -6628,7 +6614,7 @@ dependencies = [ [[package]] name = "propolis" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "anyhow", "bhyve_api", @@ -6661,7 +6647,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "async-trait", "base64 0.21.4", @@ -6685,7 +6671,7 @@ dependencies = [ [[package]] name = "propolis-server" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "anyhow", "async-trait", @@ -6737,7 +6723,7 @@ dependencies = [ [[package]] name = "propolis-server-config" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "cpuid_profile_config", "serde", @@ -6749,7 +6735,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "schemars", "serde", @@ -9267,7 +9253,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0955b8137a1df6f1a2e9a37d8a6656291ff0297c1a97c24e0d8425fe2312f79a" dependencies = [ "once_cell", - "valuable", ] [[package]] @@ -9756,12 +9741,6 @@ dependencies = [ "serde", ] -[[package]] -name = "valuable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" - [[package]] name = "vcpkg" version = "0.2.15" @@ -9796,7 +9775,7 @@ checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" [[package]] name = "viona_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "libc", "viona_api_sys", @@ -9805,7 +9784,7 @@ dependencies = [ [[package]] name = "viona_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=901b710b6e5bd05a94a323693c2b971e7e7b240e#901b710b6e5bd05a94a323693c2b971e7e7b240e" +source = "git+https://github.com/oxidecomputer/propolis?rev=334df299a56cd0d33e1227ed4ce4d2fe7478d541#334df299a56cd0d33e1227ed4ce4d2fe7478d541" dependencies = [ "libc", ] @@ -10384,61 +10363,6 @@ dependencies = [ "windows-sys 0.48.0", ] -[[package]] -name = "workspace-hack" -version = "0.1.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=20273bcca1fd5834ebc3e67dfa7020f0e99ad681#20273bcca1fd5834ebc3e67dfa7020f0e99ad681" -dependencies = [ - "bitflags 2.4.0", - "bytes", - "cc", - "chrono", - "console", - "crossbeam-utils", - "crypto-common", - "digest", - "either", - "futures-channel", - "futures-core", - "futures-executor", - "futures-sink", - "futures-util", - "getrandom 0.2.10", - "hashbrown 0.12.3", - "hex", - "hyper", - "indexmap 1.9.3", - "libc", - "log", - "mio", - "num-traits", - "once_cell", - "openapiv3", - "parking_lot 0.12.1", - "phf_shared 0.11.2", - "rand 0.8.5", - "rand_chacha 0.3.1", - "rand_core 0.6.4", - "reqwest", - "rustls", - "schemars", - "semver 1.0.18", - "serde", - "slog", - "syn 1.0.109", - "syn 2.0.32", - "time", - "time-macros", - "tokio", - "tokio-util", - "toml_datetime", - "toml_edit 0.19.15", - "tracing", - "tracing-core", - "usdt", - "uuid", -] - [[package]] name = "wyz" version = "0.5.1" diff --git a/Cargo.toml b/Cargo.toml index 6a671e6b49..72a7f6157e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -163,10 +163,10 @@ cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "20273bcca1fd5834ebc3e67dfa7020f0e99ad681" } -crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "20273bcca1fd5834ebc3e67dfa7020f0e99ad681" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "20273bcca1fd5834ebc3e67dfa7020f0e99ad681" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "20273bcca1fd5834ebc3e67dfa7020f0e99ad681" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } +crucible-client-types = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" } curve25519-dalek = "4" datatest-stable = "0.1.3" display-error-chain = "0.1.1" @@ -281,9 +281,9 @@ pretty-hex = "0.3.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", features = [ "generated-migration" ] } -propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "901b710b6e5bd05a94a323693c2b971e7e7b240e", default-features = false, features = ["mock-only"] } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541", features = [ "generated-migration" ] } +propolis-server = { git = "https://github.com/oxidecomputer/propolis", rev = "334df299a56cd0d33e1227ed4ce4d2fe7478d541", default-features = false, features = ["mock-only"] } proptest = "1.3.1" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index 7cf235c24a..a88f8170d0 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -381,10 +381,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source.commit = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "0671570dfed8bff8e64c42a41269d961426bdd07e72b9ca8c2e3f28e7ead3c1c" +source.sha256 = "010281ff5c3a0807c9e770d79264c954816a055aa482988d81e85ed98242e454" output.type = "zone" [package.crucible-pantry] @@ -392,10 +392,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "20273bcca1fd5834ebc3e67dfa7020f0e99ad681" +source.commit = "657d985247b41e38aac2e271c7ce8bc9ea81f4b6" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "c35cc24945d047f8d77e438ee606e6a83be64f0f97356fdc3308be716dcf3718" +source.sha256 = "809936edff2957e761e49667d5477e34b7a862050b4e082a59fdc95096d3bdd5" output.type = "zone" # Refer to @@ -406,10 +406,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "901b710b6e5bd05a94a323693c2b971e7e7b240e" +source.commit = "334df299a56cd0d33e1227ed4ce4d2fe7478d541" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "0f681cdbe7312f66fd3c99fe033b379e49c59fa4ad04d307f68b12514307e976" +source.sha256 = "531e0654de94b6e805836c35aa88b8a1ac691184000a03976e2b7825061e904e" output.type = "zone" [package.maghemite] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1871fd1b4d..36c3fe3f5f 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -39,7 +39,6 @@ flate2 = { version = "1.0.27" } futures = { version = "0.3.28" } futures-channel = { version = "0.3.28", features = ["sink"] } futures-core = { version = "0.3.28" } -futures-executor = { version = "0.3.28" } futures-io = { version = "0.3.28", default-features = false, features = ["std"] } futures-sink = { version = "0.3.28" } futures-task = { version = "0.3.28", default-features = false, features = ["std"] } @@ -49,11 +48,9 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.0", features = ["raw"] } hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } -hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12.3", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } hyper = { version = "0.14.27", features = ["full"] } -indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1.9.3", default-features = false, features = ["serde-1", "std"] } -indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2.0.0", features = ["serde"] } +indexmap = { version = "2.0.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } @@ -68,9 +65,7 @@ num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } openapiv3 = { version = "1.0.3", default-features = false, features = ["skip_serializing_defaults"] } -parking_lot = { version = "0.12.1", features = ["send_guard"] } petgraph = { version = "0.6.4", features = ["serde-1"] } -phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } @@ -91,7 +86,7 @@ slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "rele spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } -syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } textwrap = { version = "0.16.0" } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } @@ -100,7 +95,6 @@ tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.14", features = ["net"] } toml = { version = "0.7.8" } tracing = { version = "0.1.37", features = ["log"] } -tracing-core = { version = "0.1.31" } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } @@ -137,7 +131,6 @@ flate2 = { version = "1.0.27" } futures = { version = "0.3.28" } futures-channel = { version = "0.3.28", features = ["sink"] } futures-core = { version = "0.3.28" } -futures-executor = { version = "0.3.28" } futures-io = { version = "0.3.28", default-features = false, features = ["std"] } futures-sink = { version = "0.3.28" } futures-task = { version = "0.3.28", default-features = false, features = ["std"] } @@ -147,11 +140,9 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.0", features = ["raw"] } hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } -hashbrown-5ef9efb8ec2df382 = { package = "hashbrown", version = "0.12.3", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } hyper = { version = "0.14.27", features = ["full"] } -indexmap-dff4ba8e3ae991db = { package = "indexmap", version = "1.9.3", default-features = false, features = ["serde-1", "std"] } -indexmap-f595c2ba2a3f28df = { package = "indexmap", version = "2.0.0", features = ["serde"] } +indexmap = { version = "2.0.0", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } @@ -166,9 +157,7 @@ num-integer = { version = "0.1.45", features = ["i128"] } num-iter = { version = "0.1.43", default-features = false, features = ["i128"] } num-traits = { version = "0.2.16", features = ["i128", "libm"] } openapiv3 = { version = "1.0.3", default-features = false, features = ["skip_serializing_defaults"] } -parking_lot = { version = "0.12.1", features = ["send_guard"] } petgraph = { version = "0.6.4", features = ["serde-1"] } -phf_shared = { version = "0.11.2" } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.0.4" } @@ -189,7 +178,7 @@ slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "rele spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } -syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.32", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } textwrap = { version = "0.16.0" } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } @@ -199,7 +188,6 @@ tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.14", features = ["net"] } toml = { version = "0.7.8" } tracing = { version = "0.1.37", features = ["log"] } -tracing-core = { version = "0.1.31" } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } From 7d8878998de09d2cd1b461c15fc2de18b7717d9b Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 16 Oct 2023 10:17:19 -0700 Subject: [PATCH 2/2] Create zone bundles from ZFS snapshots (#4225) - Fixes #4010 - Previously, we copied log files directly out of their original locations, which meant we contended with several other components: `logadm` rotating the log file; the log archiver moving the to longer-term storage; and the program writing to the file itself. This commit changes the operation of the bundler, to first create a ZFS snapshot of the filesystem(s) containing the log files, clone them, and then copy files out of the clones. We destroy those clones / snapshots after completing, and when the sled-agent starts to help with crash-safety. --- illumos-utils/src/running_zone.rs | 78 +--- illumos-utils/src/zfs.rs | 118 +++++ sled-agent/src/bootstrap/pre_server.rs | 2 +- sled-agent/src/zone_bundle.rs | 621 ++++++++++++++++++++----- 4 files changed, 646 insertions(+), 173 deletions(-) diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index 734f22bd30..805419cb5d 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -391,13 +391,16 @@ pub struct RunningZone { } impl RunningZone { + /// The path to the zone's root filesystem (i.e., `/`), within zonepath. + pub const ROOT_FS_PATH: &'static str = "root"; + pub fn name(&self) -> &str { &self.inner.name } - /// Returns the filesystem path to the zone's root + /// Returns the filesystem path to the zone's root in the GZ. pub fn root(&self) -> Utf8PathBuf { - self.inner.zonepath.join("root") + self.inner.zonepath.join(Self::ROOT_FS_PATH) } pub fn control_interface(&self) -> AddrObject { @@ -958,13 +961,11 @@ impl RunningZone { }; let binary = Utf8PathBuf::from(path); - // Fetch any log files for this SMF service. - let Some((log_file, rotated_log_files)) = - self.service_log_files(&service_name)? + let Some(log_file) = self.service_log_file(&service_name)? else { error!( self.inner.log, - "failed to find log files for existing service"; + "failed to find log file for existing service"; "service_name" => &service_name, ); continue; @@ -975,7 +976,6 @@ impl RunningZone { binary, pid, log_file, - rotated_log_files, }); } } @@ -992,72 +992,24 @@ impl RunningZone { .collect()) } - /// Return any SMF log files associated with the named service. + /// Return any SMF log file associated with the named service. /// - /// Given a named service, this returns a tuple of the latest or current log - /// file, and an array of any rotated log files. If the service does not - /// exist, or there are no log files, `None` is returned. - pub fn service_log_files( + /// Given a named service, this returns the path of the current log file. + /// This can be used to find rotated or archived log files, but keep in mind + /// this returns only the current, if it exists. + pub fn service_log_file( &self, name: &str, - ) -> Result)>, ServiceError> { + ) -> Result, ServiceError> { let output = self.run_cmd(&["svcs", "-L", name])?; let mut lines = output.lines(); let Some(current) = lines.next() else { return Ok(None); }; - // We need to prepend the zonepath root to get the path in the GZ. We - // can do this with `join()`, but that will _replace_ the path if the - // second one is absolute. So trim any prefixed `/` from each path. - let root = self.root(); - let current_log_file = - root.join(current.trim().trim_start_matches('/')); - - // The rotated log files should have the same prefix as the current, but - // with an index appended. We'll search the parent directory for - // matching names, skipping the current file. - // - // See https://illumos.org/man/8/logadm for details on the naming - // conventions around these files. - let dir = current_log_file.parent().unwrap(); - let mut rotated_files: Vec = Vec::new(); - for entry in dir.read_dir_utf8()? { - let entry = entry?; - let path = entry.path(); - - // Camino's Utf8Path only considers whole path components to match, - // so convert both paths into a &str and use that object's - // starts_with. See the `camino_starts_with_behaviour` test. - let path_ref: &str = path.as_ref(); - let current_log_file_ref: &str = current_log_file.as_ref(); - if path != current_log_file - && path_ref.starts_with(current_log_file_ref) - { - rotated_files.push(path.clone().into()); - } - } - - Ok(Some((current_log_file, rotated_files))) + return Ok(Some(Utf8PathBuf::from(current.trim()))); } } -#[test] -fn camino_starts_with_behaviour() { - let logfile = - Utf8PathBuf::from("/zonepath/var/svc/log/oxide-nexus:default.log"); - let rotated_logfile = - Utf8PathBuf::from("/zonepath/var/svc/log/oxide-nexus:default.log.0"); - - let logfile_as_string: &str = logfile.as_ref(); - let rotated_logfile_as_string: &str = rotated_logfile.as_ref(); - - assert!(logfile != rotated_logfile); - assert!(logfile_as_string != rotated_logfile_as_string); - - assert!(!rotated_logfile.starts_with(&logfile)); - assert!(rotated_logfile_as_string.starts_with(&logfile_as_string)); -} - impl Drop for RunningZone { fn drop(&mut self) { if let Some(_) = self.id.take() { @@ -1088,8 +1040,6 @@ pub struct ServiceProcess { pub pid: u32, /// The path for the current log file. pub log_file: Utf8PathBuf, - /// The paths for any rotated log files. - pub rotated_log_files: Vec, } /// Errors returned from [`InstalledZone::install`]. diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index 9118a9a3cd..a6af997619 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -108,6 +108,26 @@ pub struct GetValueError { err: GetValueErrorRaw, } +#[derive(Debug, thiserror::Error)] +#[error("Failed to list snapshots: {0}")] +pub struct ListSnapshotsError(#[from] crate::ExecutionError); + +#[derive(Debug, thiserror::Error)] +#[error("Failed to create snapshot '{snap_name}' from filesystem '{filesystem}': {err}")] +pub struct CreateSnapshotError { + filesystem: String, + snap_name: String, + err: crate::ExecutionError, +} + +#[derive(Debug, thiserror::Error)] +#[error("Failed to delete snapshot '{filesystem}@{snap_name}': {err}")] +pub struct DestroySnapshotError { + filesystem: String, + snap_name: String, + err: crate::ExecutionError, +} + /// Wraps commands for interacting with ZFS. pub struct Zfs {} @@ -184,6 +204,20 @@ impl Zfs { Ok(filesystems) } + /// Return the name of a dataset for a ZFS object. + /// + /// The object can either be a dataset name, or a path, in which case it + /// will be resolved to the _mounted_ ZFS dataset containing that path. + pub fn get_dataset_name(object: &str) -> Result { + let mut command = std::process::Command::new(ZFS); + let cmd = command.args(&["get", "-Hpo", "value", "name", object]); + execute(cmd) + .map(|output| { + String::from_utf8_lossy(&output.stdout).trim().to_string() + }) + .map_err(|err| ListDatasetsError { name: object.to_string(), err }) + } + /// Destroys a dataset. pub fn destroy_dataset(name: &str) -> Result<(), DestroyDatasetError> { let mut command = std::process::Command::new(PFEXEC); @@ -379,6 +413,7 @@ impl Zfs { } } + /// Set the value of an Oxide-managed ZFS property. pub fn set_oxide_value( filesystem_name: &str, name: &str, @@ -404,6 +439,7 @@ impl Zfs { Ok(()) } + /// Get the value of an Oxide-managed ZFS property. pub fn get_oxide_value( filesystem_name: &str, name: &str, @@ -434,6 +470,88 @@ impl Zfs { } Ok(value.to_string()) } + + /// List all extant snapshots. + pub fn list_snapshots() -> Result, ListSnapshotsError> { + let mut command = std::process::Command::new(ZFS); + let cmd = command.args(&["list", "-H", "-o", "name", "-t", "snapshot"]); + execute(cmd) + .map(|output| { + let stdout = String::from_utf8_lossy(&output.stdout); + stdout + .trim() + .lines() + .map(|line| { + let (filesystem, snap_name) = + line.split_once('@').unwrap(); + Snapshot { + filesystem: filesystem.to_string(), + snap_name: snap_name.to_string(), + } + }) + .collect() + }) + .map_err(ListSnapshotsError::from) + } + + /// Create a snapshot of a filesystem. + /// + /// A list of properties, as name-value tuples, may be passed to this + /// method, for creating properties directly on the snapshots. + pub fn create_snapshot<'a>( + filesystem: &'a str, + snap_name: &'a str, + properties: &'a [(&'a str, &'a str)], + ) -> Result<(), CreateSnapshotError> { + let mut command = std::process::Command::new(ZFS); + let mut cmd = command.arg("snapshot"); + for (name, value) in properties.iter() { + cmd = cmd.arg("-o").arg(&format!("{name}={value}")); + } + cmd.arg(&format!("{filesystem}@{snap_name}")); + execute(cmd).map(|_| ()).map_err(|err| CreateSnapshotError { + filesystem: filesystem.to_string(), + snap_name: snap_name.to_string(), + err, + }) + } + + /// Destroy a named snapshot of a filesystem. + pub fn destroy_snapshot( + filesystem: &str, + snap_name: &str, + ) -> Result<(), DestroySnapshotError> { + let mut command = std::process::Command::new(ZFS); + let path = format!("{filesystem}@{snap_name}"); + let cmd = command.args(&["destroy", &path]); + execute(cmd).map(|_| ()).map_err(|err| DestroySnapshotError { + filesystem: filesystem.to_string(), + snap_name: snap_name.to_string(), + err, + }) + } +} + +/// A read-only snapshot of a ZFS filesystem. +#[derive(Clone, Debug)] +pub struct Snapshot { + pub filesystem: String, + pub snap_name: String, +} + +impl Snapshot { + /// Return the full path to the snapshot directory within the filesystem. + pub fn full_path(&self) -> Result { + let mountpoint = Zfs::get_value(&self.filesystem, "mountpoint")?; + Ok(Utf8PathBuf::from(mountpoint) + .join(format!(".zfs/snapshot/{}", self.snap_name))) + } +} + +impl fmt::Display for Snapshot { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}@{}", self.filesystem, self.snap_name) + } } /// Returns all datasets managed by Omicron diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 71325fef3d..0c19c30865 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -368,7 +368,7 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> { } fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> { - let zoned = true; + let zoned = false; let do_format = true; let encryption_details = None; let quota = None; diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 4c2d6a4113..55661371c3 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -17,6 +17,17 @@ use chrono::Utc; use flate2::bufread::GzDecoder; use illumos_utils::running_zone::is_oxide_smf_log_file; use illumos_utils::running_zone::RunningZone; +use illumos_utils::running_zone::ServiceProcess; +use illumos_utils::zfs::CreateSnapshotError; +use illumos_utils::zfs::DestroyDatasetError; +use illumos_utils::zfs::DestroySnapshotError; +use illumos_utils::zfs::EnsureFilesystemError; +use illumos_utils::zfs::GetValueError; +use illumos_utils::zfs::ListDatasetsError; +use illumos_utils::zfs::ListSnapshotsError; +use illumos_utils::zfs::SetValueError; +use illumos_utils::zfs::Snapshot; +use illumos_utils::zfs::Zfs; use illumos_utils::zfs::ZFS; use illumos_utils::zone::AdmError; use schemars::JsonSchema; @@ -141,6 +152,68 @@ impl ZoneBundleMetadata { } } +// The name of the snapshot created from the zone root filesystem. +const ZONE_ROOT_SNAPSHOT_NAME: &'static str = "zone-root"; + +// The prefix for all the snapshots for each filesystem containing archived +// logs. Each debug data, such as `oxp_/crypt/debug`, generates a snapshot +// named `zone-archives-`. +const ARCHIVE_SNAPSHOT_PREFIX: &'static str = "zone-archives-"; + +// An extra ZFS user property attached to all zone bundle snapshots. +// +// This is used to ensure that we are not accidentally deleting ZFS objects that +// a user has created, but which happen to be named the same thing. +const ZONE_BUNDLE_ZFS_PROPERTY_NAME: &'static str = "oxide:for-zone-bundle"; +const ZONE_BUNDLE_ZFS_PROPERTY_VALUE: &'static str = "true"; + +// Initialize the ZFS resources we need for zone bundling. +// +// This deletes any snapshots matching the names we expect to create ourselves +// during bundling. +#[cfg(not(test))] +fn initialize_zfs_resources(log: &Logger) -> Result<(), BundleError> { + let zb_snapshots = + Zfs::list_snapshots().unwrap().into_iter().filter(|snap| { + // Check for snapshots named how we expect to create them. + if snap.snap_name != ZONE_ROOT_SNAPSHOT_NAME + || !snap.snap_name.starts_with(ARCHIVE_SNAPSHOT_PREFIX) + { + return false; + } + + // Additionally check for the zone-bundle-specific property. + // + // If we find a dataset that matches our names, but which _does not_ + // have such a property, we'll panic rather than possibly deleting + // user data. + let name = snap.to_string(); + let value = + Zfs::get_oxide_value(&name, ZONE_BUNDLE_ZFS_PROPERTY_NAME) + .unwrap_or_else(|_| { + panic!( + "Found a ZFS snapshot with a name reserved for \ + zone bundling, but which does not have the \ + zone-bundle-specific property. Bailing out, \ + rather than risking deletion of user data. \ + snap_name = {}, property = {}", + &name, ZONE_BUNDLE_ZFS_PROPERTY_NAME + ) + }); + assert_eq!(value, ZONE_BUNDLE_ZFS_PROPERTY_VALUE); + true + }); + for snapshot in zb_snapshots { + Zfs::destroy_snapshot(&snapshot.filesystem, &snapshot.snap_name)?; + debug!( + log, + "destroyed pre-existing zone bundle snapshot"; + "snapshot" => %snapshot, + ); + } + Ok(()) +} + /// A type managing zone bundle creation and automatic cleanup. #[derive(Clone)] pub struct ZoneBundler { @@ -256,6 +329,11 @@ impl ZoneBundler { resources: StorageResources, cleanup_context: CleanupContext, ) -> Self { + // This is compiled out in tests because there's no way to set our + // expectations on the mockall object it uses internally. Not great. + #[cfg(not(test))] + initialize_zfs_resources(&log) + .expect("Failed to initialize existing ZFS resources"); let notify_cleanup = Arc::new(Notify::new()); let inner = Arc::new(Mutex::new(Inner { resources, @@ -358,7 +436,6 @@ impl ZoneBundler { .all_u2_mountpoints(sled_hardware::disk::U2_DEBUG_DATASET) .await .into_iter() - .map(|p| p.join(zone.name())) .collect(); let context = ZoneBundleContext { cause, storage_dirs, extra_log_dirs }; info!( @@ -446,7 +523,7 @@ struct ZoneBundleContext { storage_dirs: Vec, // The reason or cause for creating a zone bundle. cause: ZoneBundleCause, - // Extra directories searched for logfiles for the name zone. + // Extra directories searched for logfiles for the named zone. // // Logs are periodically archived out of their original location, and onto // one or more U.2 drives. This field is used to specify that archive @@ -572,6 +649,30 @@ pub enum BundleError { #[error("Cleanup failed")] Cleanup(#[source] anyhow::Error), + + #[error("Failed to create ZFS snapshot")] + CreateSnapshot(#[from] CreateSnapshotError), + + #[error("Failed to destroy ZFS snapshot")] + DestroySnapshot(#[from] DestroySnapshotError), + + #[error("Failed to list ZFS snapshots")] + ListSnapshot(#[from] ListSnapshotsError), + + #[error("Failed to ensure ZFS filesystem")] + EnsureFilesystem(#[from] EnsureFilesystemError), + + #[error("Failed to destroy ZFS dataset")] + DestroyDataset(#[from] DestroyDatasetError), + + #[error("Failed to list ZFS datasets")] + ListDatasets(#[from] ListDatasetsError), + + #[error("Failed to set Oxide-specific ZFS property")] + SetProperty(#[from] SetValueError), + + #[error("Failed to get ZFS property value")] + GetProperty(#[from] GetValueError), } // Helper function to write an array of bytes into the tar archive, with @@ -597,6 +698,256 @@ fn insert_data( }) } +// Create a read-only snapshot from an existing filesystem. +fn create_snapshot( + log: &Logger, + filesystem: &str, + snap_name: &str, +) -> Result { + Zfs::create_snapshot( + filesystem, + snap_name, + &[(ZONE_BUNDLE_ZFS_PROPERTY_NAME, ZONE_BUNDLE_ZFS_PROPERTY_VALUE)], + )?; + debug!( + log, + "created snapshot"; + "filesystem" => filesystem, + "snap_name" => snap_name, + ); + Ok(Snapshot { + filesystem: filesystem.to_string(), + snap_name: snap_name.to_string(), + }) +} + +// Create snapshots for the filesystems we need to copy out all log files. +// +// A key feature of the zone-bundle process is that we pull all the log files +// for a zone. This is tricky. The logs are both being written to by the +// programs we're interested in, and also potentially being rotated by `logadm`, +// and / or archived out to the U.2s through the code in `crate::dump_setup`. +// +// We need to capture all these logs, while avoiding inconsistent state (e.g., a +// missing log message that existed when the bundle was created) and also +// interrupting the rotation and archival processes. We do this by taking ZFS +// snapshots of the relevant datasets when we want to create the bundle. +// +// When we receive a bundling request, we take a snapshot of a few datasets: +// +// - The zone filesystem itself, `oxz_/crypt/zone/`. +// +// - All of the U.2 debug datasets, like `oxp_/crypt/debug`, which we know +// contain logs for the given zone. This is done by looking at all the service +// processes in the zone, and mapping the locations of archived logs, such as +// `/pool/ext//crypt/debug/` to the zpool name. +// +// This provides us with a consistent view of the log files at the time the +// bundle was requested. Note that this ordering, taking the root FS snapshot +// first followed by the archive datasets, ensures that we don't _miss_ log +// messages that existed when the bundle was requested. It's possible that we +// double-count them however: the archiver could run concurrently, and result in +// a log file existing on the root snapshot when we create it, and also on the +// achive snapshot by the time we get around to creating that. +// +// At this point, we operate entirely on those snapshots. We search for +// "current" log files in the root snapshot, and archived log files in the +// archive snapshots. +fn create_zfs_snapshots( + log: &Logger, + zone: &RunningZone, + extra_log_dirs: &[Utf8PathBuf], +) -> Result, BundleError> { + // Snapshot the root filesystem. + let dataset = Zfs::get_dataset_name(zone.root().as_str())?; + let root_snapshot = + create_snapshot(log, &dataset, ZONE_ROOT_SNAPSHOT_NAME)?; + let mut snapshots = vec![root_snapshot]; + + // Look at all the provided extra log directories, and take a snapshot for + // any that have a directory with the zone name. These may not have any log + // file in them yet, but we'll snapshot now and then filter more judiciously + // when we actually find the files we want to bundle. + let mut maybe_err = None; + for dir in extra_log_dirs.iter() { + let zone_dir = dir.join(zone.name()); + match std::fs::metadata(&zone_dir) { + Ok(d) => { + if d.is_dir() { + let dataset = match Zfs::get_dataset_name(zone_dir.as_str()) + { + Ok(ds) => Utf8PathBuf::from(ds), + Err(e) => { + error!( + log, + "failed to list datasets, will \ + unwind any previously created snapshots"; + "error" => ?e, + ); + assert!(maybe_err + .replace(BundleError::from(e)) + .is_none()); + break; + } + }; + + // These datasets are named like `/...`. Since + // we're snapshotting zero or more of them, we disambiguate + // with the pool name. + let pool_name = dataset + .components() + .next() + .expect("Zone archive datasets must be non-empty"); + let snap_name = + format!("{}{}", ARCHIVE_SNAPSHOT_PREFIX, pool_name); + match create_snapshot(log, dataset.as_str(), &snap_name) { + Ok(snapshot) => snapshots.push(snapshot), + Err(e) => { + error!( + log, + "failed to create snapshot, will \ + unwind any previously created"; + "error" => ?e, + ); + assert!(maybe_err.replace(e).is_none()); + break; + } + } + } + } + Err(e) if e.kind() == std::io::ErrorKind::NotFound => { + trace!( + log, + "skipping non-existent zone-bundle directory"; + "dir" => %zone_dir, + ); + } + Err(e) => { + error!( + log, + "failed to get metadata for potential zone directory"; + "zone_dir" => %zone_dir, + "error" => ?e, + ); + } + } + } + if let Some(err) = maybe_err { + cleanup_zfs_snapshots(log, &snapshots); + return Err(err); + }; + Ok(snapshots) +} + +// Destroy any created ZFS snapshots. +fn cleanup_zfs_snapshots(log: &Logger, snapshots: &[Snapshot]) { + for snapshot in snapshots.iter() { + match Zfs::destroy_snapshot(&snapshot.filesystem, &snapshot.snap_name) { + Ok(_) => debug!( + log, + "destroyed zone bundle ZFS snapshot"; + "snapshot" => %snapshot, + ), + Err(e) => error!( + log, + "failed to destroy zone bundle ZFS snapshot"; + "snapshot" => %snapshot, + "error" => ?e, + ), + } + } +} + +// List all log files (current, rotated, and archived) that should be part of +// the zone bundle for a single service. +async fn find_service_log_files( + log: &Logger, + zone_name: &str, + svc: &ServiceProcess, + extra_log_dirs: &[Utf8PathBuf], + snapshots: &[Snapshot], +) -> Result, BundleError> { + // The current and any rotated, but not archived, log files live in the zone + // root filesystem. Extract any which match. + // + // There are a few path tricks to keep in mind here. We've created a + // snapshot from the zone's filesystem, which is usually something like + // `/crypt/zone/`. That snapshot is placed in a hidden + // directory within the base dataset, something like + // `oxp_/crypt/zone/.zfs/snapshot/`. + // + // The log files themselves are things like `/var/svc/log/...`, but in the + // actual ZFS dataset comprising the root FS for the zone, there are + // additional directories, most notably `/root`. So the _cloned_ + // log file will live at + // `//crypt/zone/.zfs/snapshot//root/var/svc/log/...`. + let mut current_log_file = snapshots[0].full_path()?; + current_log_file.push(RunningZone::ROOT_FS_PATH); + current_log_file.push(svc.log_file.as_str().trim_start_matches('/')); + let log_dir = + current_log_file.parent().expect("Current log file must have a parent"); + let mut log_files = vec![current_log_file.clone()]; + for entry in log_dir.read_dir_utf8().map_err(|err| { + BundleError::ReadDirectory { directory: log_dir.into(), err } + })? { + let entry = entry.map_err(|err| BundleError::ReadDirectory { + directory: log_dir.into(), + err, + })?; + let path = entry.path(); + + // Camino's Utf8Path only considers whole path components to match, + // so convert both paths into a &str and use that object's + // starts_with. See the `camino_starts_with_behaviour` test. + let path_ref: &str = path.as_ref(); + let current_log_file_ref: &str = current_log_file.as_ref(); + if path != current_log_file + && path_ref.starts_with(current_log_file_ref) + { + log_files.push(path.clone().into()); + } + } + + // The _archived_ log files are slightly trickier. They can technically live + // in many different datasets, because the archive process may need to start + // archiving to one location, but move to another if a quota is hit. We'll + // iterate over all the extra log directories and try to find any log files + // in those filesystem snapshots. + let snapped_extra_log_dirs = snapshots + .iter() + .skip(1) + .flat_map(|snapshot| { + extra_log_dirs.iter().map(|d| { + // Join the snapshot path with both the log directory and the + // zone name, to arrive at something like: + // /path/to/dataset/.zfs/snapshot//path/to/extra/ + snapshot.full_path().map(|p| p.join(d).join(zone_name)) + }) + }) + .collect::, _>>()?; + debug!( + log, + "looking for extra log files in filesystem snapshots"; + "extra_dirs" => ?&snapped_extra_log_dirs, + ); + log_files.extend( + find_archived_log_files( + log, + zone_name, + &svc.service_name, + svc.log_file.file_name().unwrap(), + snapped_extra_log_dirs.iter(), + ) + .await, + ); + debug!( + log, + "found log files"; + "log_files" => ?&log_files, + ); + Ok(log_files) +} + // Create a service bundle for the provided zone. // // This runs a series of debugging commands in the zone, to collect data about @@ -702,14 +1053,8 @@ async fn create( } } - // Debugging commands run on the specific processes this zone defines. - const ZONE_PROCESS_COMMANDS: [&str; 3] = [ - "pfiles", "pstack", - "pargs", - // TODO-completeness: We may want `gcore`, since that encompasses - // the above commands and much more. It seems like overkill now, - // however. - ]; + // Enumerate the list of Oxide-specific services inside the zone that we + // want to include in the bundling process. let procs = match zone .service_processes() .context("failed to enumerate zone service processes") @@ -733,6 +1078,48 @@ async fn create( return Err(BundleError::from(e)); } }; + + // Create ZFS snapshots of filesystems containing log files. + // + // We need to capture log files from two kinds of locations: + // + // - The zone root filesystem, where the current and rotated (but not + // archived) log files live. + // - Zero or more filesystems on the U.2s used for archiving older log + // files. + // + // Both of these are dynamic. The current log file is likely being written + // by the service itself, and `logadm` may also be rotating files. At the + // same time, the log-archival process in `dump_setup.rs` may be copying + // these out to the U.2s, after which it deletes those on the zone + // filesystem itself. + // + // To avoid various kinds of corruption, such as a bad tarball or missing + // log messages, we'll create ZFS snapshots of each of these relevant + // filesystems, and insert those (now-static) files into the zone-bundle + // tarballs. + let snapshots = + match create_zfs_snapshots(log, zone, &context.extra_log_dirs) { + Ok(snapshots) => snapshots, + Err(e) => { + error!( + log, + "failed to create ZFS snapshots"; + "zone_name" => zone.name(), + "error" => ?e, + ); + return Err(e); + } + }; + + // Debugging commands run on the specific processes this zone defines. + const ZONE_PROCESS_COMMANDS: [&str; 3] = [ + "pfiles", "pstack", + "pargs", + // TODO-completeness: We may want `gcore`, since that encompasses + // the above commands and much more. It seems like overkill now, + // however. + ]; for svc in procs.into_iter() { let pid_s = svc.pid.to_string(); for cmd in ZONE_PROCESS_COMMANDS { @@ -765,72 +1152,61 @@ async fn create( } } - // We may need to extract log files that have been archived out of the - // zone filesystem itself. See `crate::dump_setup` for the logic which - // does this. - let archived_log_files = find_archived_log_files( + // Collect and insert all log files. + // + // This takes files from the snapshot of either the zone root + // filesystem, or the filesystem containing the archived log files. + let all_log_files = match find_service_log_files( log, zone.name(), - &svc.service_name, + &svc, &context.extra_log_dirs, + &snapshots, ) - .await; - - // Copy any log files, current and rotated, into the tarball as - // well. - // - // Safety: This pathbuf was retrieved by locating an existing file - // on the filesystem, so we're sure it has a name and the unwrap is - // safe. - debug!( - log, - "appending current log file to zone bundle"; - "zone" => zone.name(), - "log_file" => %svc.log_file, - ); - if let Err(e) = builder.append_path_with_name( - &svc.log_file, - svc.log_file.file_name().unwrap(), - ) { - error!( - log, - "failed to append current log file to zone bundle"; - "zone" => zone.name(), - "log_file" => %svc.log_file, - "error" => ?e, - ); - return Err(BundleError::AddBundleData { - tarball_path: svc.log_file.file_name().unwrap().into(), - err: e, - }); - } - for f in svc.rotated_log_files.iter().chain(archived_log_files.iter()) { - debug!( - log, - "appending rotated log file to zone bundle"; - "zone" => zone.name(), - "log_file" => %f, - ); - if let Err(e) = - builder.append_path_with_name(f, f.file_name().unwrap()) - { + .await + { + Ok(f) => f, + Err(e) => { error!( log, - "failed to append rotated log file to zone bundle"; + "failed to find service log files"; "zone" => zone.name(), - "log_file" => %f, "error" => ?e, ); - return Err(BundleError::AddBundleData { - tarball_path: f.file_name().unwrap().into(), - err: e, - }); + cleanup_zfs_snapshots(&log, &snapshots); + return Err(e); + } + }; + for log_file in all_log_files.into_iter() { + match builder + .append_path_with_name(&log_file, log_file.file_name().unwrap()) + { + Ok(_) => { + debug!( + log, + "appended log file to zone bundle"; + "zone" => zone.name(), + "log_file" => %log_file, + ); + } + Err(e) => { + error!( + log, + "failed to append log file to zone bundle"; + "zone" => zone.name(), + "log_file" => %svc.log_file, + "error" => ?e, + ); + } } } } // Finish writing out the tarball itself. - builder.into_inner().context("Failed to build bundle")?; + if let Err(e) = builder.into_inner().context("Failed to build bundle") { + cleanup_zfs_snapshots(&log, &snapshots); + return Err(BundleError::from(e)); + } // Copy the bundle to the other locations. We really want the bundles to // be duplicates, not an additional, new bundle. @@ -840,14 +1216,21 @@ async fn create( // the final locations should that last copy fail for any of them. // // See: https://github.com/oxidecomputer/omicron/issues/3876. + let mut copy_err = None; for other_dir in zone_bundle_dirs.iter().skip(1) { let to = other_dir.join(&filename); debug!(log, "copying bundle"; "from" => %full_path, "to" => %to); - tokio::fs::copy(&full_path, &to).await.map_err(|err| { + if let Err(e) = tokio::fs::copy(&full_path, &to).await.map_err(|err| { BundleError::CopyArchive { from: full_path.to_owned(), to, err } - })?; + }) { + copy_err = Some(e); + break; + } + } + cleanup_zfs_snapshots(&log, &snapshots); + if let Some(err) = copy_err { + return Err(err); } - info!(log, "finished zone bundle"; "metadata" => ?zone_metadata); Ok(zone_metadata) } @@ -857,11 +1240,12 @@ async fn create( // // Note that errors are logged, rather than failing the whole function, so that // one failed listing does not prevent collecting any other log files. -async fn find_archived_log_files( +async fn find_archived_log_files<'a, T: Iterator>( log: &Logger, zone_name: &str, svc_name: &str, - dirs: &[Utf8PathBuf], + log_file_prefix: &str, + dirs: T, ) -> Vec { // The `dirs` should be things like // `/pool/ext//crypt/debug/`, but it's really up to @@ -870,7 +1254,7 @@ async fn find_archived_log_files( // Within that, we'll just look for things that appear to be Oxide-managed // SMF service log files. let mut files = Vec::new(); - for dir in dirs.iter() { + for dir in dirs { if dir.exists() { let mut rd = match tokio::fs::read_dir(&dir).await { Ok(rd) => rd, @@ -900,8 +1284,9 @@ async fn find_archived_log_files( }; let fname = path.file_name().unwrap(); let is_oxide = is_oxide_smf_log_file(fname); - let contains = fname.contains(svc_name); - if is_oxide && contains { + let matches_log_file = + fname.starts_with(log_file_prefix); + if is_oxide && matches_log_file { debug!( log, "found archived log file"; @@ -911,12 +1296,14 @@ async fn find_archived_log_files( ); files.push(path); } else { - debug!( + trace!( log, "skipping non-matching log file"; + "zone_name" => zone_name, + "service_name" => svc_name, "filename" => fname, "is_oxide_smf_log_file" => is_oxide, - "contains_svc_name" => contains, + "matches_log_file" => matches_log_file, ); } } @@ -2299,43 +2686,61 @@ mod illumos_tests { let log = test_logger(); let tmpdir = tempfile::tempdir().expect("Failed to make tempdir"); - let mut should_match = [ - "oxide-foo:default.log", - "oxide-foo:default.log.1000", - "system-illumos-foo:default.log", - "system-illumos-foo:default.log.100", - ]; - let should_not_match = [ - "oxide-foo:default", - "not-oxide-foo:default.log.1000", - "system-illumos-foo", - "not-system-illumos-foo:default.log.100", - ]; - for name in should_match.iter().chain(should_not_match.iter()) { - let path = tmpdir.path().join(name); - tokio::fs::File::create(path) - .await - .expect("failed to create dummy file"); + for prefix in ["oxide", "system-illumos"] { + let mut should_match = [ + format!("{prefix}-foo:default.log"), + format!("{prefix}-foo:default.log.1000"), + ]; + let should_not_match = [ + format!("{prefix}-foo:default"), + format!("not-{prefix}-foo:default.log.1000"), + ]; + for name in should_match.iter().chain(should_not_match.iter()) { + let path = tmpdir.path().join(name); + tokio::fs::File::create(path) + .await + .expect("failed to create dummy file"); + } + + let path = Utf8PathBuf::try_from( + tmpdir.path().as_os_str().to_str().unwrap(), + ) + .unwrap(); + let mut files = find_archived_log_files( + &log, + "zone-name", // unused here, for logging only + "svc:/oxide/foo:default", + &format!("{prefix}-foo:default.log"), + std::iter::once(&path), + ) + .await; + + // Sort everything to compare correctly. + should_match.sort(); + files.sort(); + assert_eq!(files.len(), should_match.len()); + assert!(files + .iter() + .zip(should_match.iter()) + .all(|(file, name)| { file.file_name().unwrap() == *name })); } + } - let path = - Utf8PathBuf::try_from(tmpdir.path().as_os_str().to_str().unwrap()) - .unwrap(); - let mut files = find_archived_log_files( - &log, - "zone-name", // unused here, for logging only - "foo", - &[path], - ) - .await; - - // Sort everything to compare correctly. - should_match.sort(); - files.sort(); - assert_eq!(files.len(), should_match.len()); - assert!(files - .iter() - .zip(should_match.iter()) - .all(|(file, name)| { file.file_name().unwrap() == *name })); + #[test] + fn camino_starts_with_behaviour() { + let logfile = + Utf8PathBuf::from("/zonepath/var/svc/log/oxide-nexus:default.log"); + let rotated_logfile = Utf8PathBuf::from( + "/zonepath/var/svc/log/oxide-nexus:default.log.0", + ); + + let logfile_as_string: &str = logfile.as_ref(); + let rotated_logfile_as_string: &str = rotated_logfile.as_ref(); + + assert!(logfile != rotated_logfile); + assert!(logfile_as_string != rotated_logfile_as_string); + + assert!(!rotated_logfile.starts_with(&logfile)); + assert!(rotated_logfile_as_string.starts_with(&logfile_as_string)); } }