diff --git a/Cargo.lock b/Cargo.lock index e620a341b14..7222889a47a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3243,6 +3243,7 @@ dependencies = [ "anyhow", "async-trait", "bhyve_api", + "byteorder", "camino", "cfg-if 1.0.0", "futures", diff --git a/Cargo.toml b/Cargo.toml index 8fd9f872831..842f0c385b0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -137,6 +137,7 @@ bcs = "0.1.5" bincode = "1.3.3" bootstrap-agent-client = { path = "bootstrap-agent-client" } buf-list = { version = "1.0.3", features = ["tokio1"] } +byteorder = "1.4.3" bytes = "1.4.0" bytesize = "1.2.0" camino = "1.1" diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 04bdf8cad09..3c0c2e7fc98 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -9,6 +9,7 @@ license = "MPL-2.0" anyhow.workspace = true async-trait.workspace = true bhyve_api.workspace = true +byteorder.workspace = true camino.workspace = true cfg-if.workspace = true futures.workspace = true diff --git a/illumos-utils/src/dumpadm.rs b/illumos-utils/src/dumpadm.rs new file mode 100644 index 00000000000..e6c83b223c9 --- /dev/null +++ b/illumos-utils/src/dumpadm.rs @@ -0,0 +1,177 @@ +use byteorder::{LittleEndian, ReadBytesExt}; +use camino::Utf8PathBuf; +use std::ffi::OsString; +use std::fs::File; +use std::io::{Seek, SeekFrom}; +use std::os::unix::ffi::OsStringExt; +use std::process::Command; + +#[derive(thiserror::Error, Debug)] +pub enum DumpHdrError { + #[error("I/O error while attempting to open raw disk: {0}")] + OpenRaw(std::io::Error), + + #[error("I/O error while seeking to dumphdr offset: {0}")] + Seek(std::io::Error), + + #[error("I/O error while reading magic bytes: {0}")] + ReadMagic(std::io::Error), + + #[error("I/O error while reading version bytes: {0}")] + ReadVersion(std::io::Error), + + #[error("I/O error while reading flag bytes: {0}")] + ReadFlags(std::io::Error), + + #[error("Invalid magic number {0} (expected 0xdefec8ed)")] + InvalidMagic(u32), + + #[error("Invalid dumphdr version {0} (expected 10)")] + InvalidVersion(u32), +} + +pub fn dump_flag_is_valid( + dump_slice: &Utf8PathBuf, +) -> Result { + // values from /usr/src/uts/common/sys/dumphdr.h: + const DUMP_OFFSET: u64 = 65536; // pad at start/end of dev + + const DUMP_MAGIC: u32 = 0xdefec8ed; // weird hex but ok + const DUMP_VERSION: u32 = 10; // version of this dumphdr + + const DF_VALID: u32 = 0x00000001; // Dump is valid (savecore clears) + + let mut f = File::open(dump_slice).map_err(DumpHdrError::OpenRaw)?; + f.seek(SeekFrom::Start(DUMP_OFFSET)).map_err(DumpHdrError::Seek)?; + + // read the first few fields of dumphdr. + // typedef struct dumphdr { + // uint32_t dump_magic; + // uint32_t dump_version; + // uint32_t dump_flags; + // /* [...] */ + // } + + let magic = + f.read_u32::().map_err(DumpHdrError::ReadMagic)?; + if magic != DUMP_MAGIC { + return Err(DumpHdrError::InvalidMagic(magic)); + } + + let version = + f.read_u32::().map_err(DumpHdrError::ReadVersion)?; + if version != DUMP_VERSION { + return Err(DumpHdrError::InvalidVersion(version)); + } + + let flags = + f.read_u32::().map_err(DumpHdrError::ReadFlags)?; + Ok((flags & DF_VALID) != 0) +} + +const DUMPADM: &str = "/usr/sbin/dumpadm"; +const SAVECORE: &str = "/usr/bin/savecore"; + +#[derive(thiserror::Error, Debug)] +pub enum DumpAdmError { + #[error("Error obtaining or modifying dump configuration. dump_slice: {dump_slice}, savecore_dir: {savecore_dir:?}")] + Execution { dump_slice: Utf8PathBuf, savecore_dir: Option }, + + #[error("Invalid invocation of dumpadm: {0:?} {1:?}")] + InvalidCommand(Vec, std::ffi::OsString), + + #[error("dumpadm process was terminated by a signal.")] + TerminatedBySignal, + + #[error("dumpadm invocation exited with unexpected return code {0}")] + UnexpectedExitCode(i32), + + #[error( + "Failed to create placeholder savecore directory at /tmp/crash: {0}" + )] + Mkdir(std::io::Error), + + #[error("savecore failed: {0:?}")] + SavecoreFailure(std::ffi::OsString), + + #[error("Failed to execute dumpadm process: {0}")] + ExecDumpadm(std::io::Error), + + #[error("Failed to execute savecore process: {0}")] + ExecSavecore(std::io::Error), +} + +pub fn dumpadm( + dump_slice: &Utf8PathBuf, + savecore_dir: Option<&Utf8PathBuf>, +) -> Result, DumpAdmError> { + let mut cmd = Command::new(DUMPADM); + cmd.env_clear(); + + // Include memory from the current process if there is one for the panic + // context, in addition to kernel memory: + cmd.arg("-c").arg("curproc"); + + // Use the given block device path for dump storage: + cmd.arg("-d").arg(dump_slice); + + // Compress crash dumps: + cmd.arg("-z").arg("on"); + + if let Some(savecore_dir) = savecore_dir { + // Run savecore(8) to place the existing contents of dump_slice (if + // any) into savecore_dir, and clear the presence flag. + cmd.arg("-s").arg(savecore_dir); + } else { + // Do not run savecore(8) automatically... + cmd.arg("-n"); + + // ...but do create and use a tmpfs path (rather than the default + // location under /var/crash, which is in the ramdisk pool), because + // dumpadm refuses to do what we ask otherwise. + let tmp_crash = "/tmp/crash"; + std::fs::create_dir_all(tmp_crash).map_err(DumpAdmError::Mkdir)?; + + cmd.arg("-s").arg(tmp_crash); + } + + let out = cmd.output().map_err(DumpAdmError::ExecDumpadm)?; + + match out.status.code() { + Some(0) => { + if savecore_dir.is_some() { + if let Ok(true) = dump_flag_is_valid(dump_slice) { + return savecore(); + } + } + Ok(None) + } + Some(1) => Err(DumpAdmError::Execution { + dump_slice: dump_slice.clone(), + savecore_dir: savecore_dir.cloned(), + }), + Some(2) => { + // unwrap: every arg we've provided in this function is UTF-8 + let mut args = + vec![cmd.get_program().to_str().unwrap().to_string()]; + cmd.get_args() + .for_each(|arg| args.push(arg.to_str().unwrap().to_string())); + let stderr = OsString::from_vec(out.stderr); + Err(DumpAdmError::InvalidCommand(args, stderr)) + } + Some(n) => Err(DumpAdmError::UnexpectedExitCode(n)), + None => Err(DumpAdmError::TerminatedBySignal), + } +} + +pub fn savecore() -> Result, DumpAdmError> { + let mut cmd = Command::new(SAVECORE); + cmd.env_clear(); + cmd.arg("-v"); + let out = cmd.output().map_err(DumpAdmError::ExecSavecore)?; + if out.status.success() { + Ok(Some(OsString::from_vec(out.stdout))) + } else { + Err(DumpAdmError::SavecoreFailure(OsString::from_vec(out.stderr))) + } +} diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 59881c0e1d0..754412e9030 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -10,6 +10,7 @@ pub mod addrobj; pub mod destructor; pub mod dkio; pub mod dladm; +pub mod dumpadm; pub mod fstyp; pub mod libc; pub mod link; diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs new file mode 100644 index 00000000000..06575168c27 --- /dev/null +++ b/sled-agent/src/storage/dump_setup.rs @@ -0,0 +1,225 @@ +use crate::storage_manager::DiskWrapper; +use camino::Utf8PathBuf; +use illumos_utils::zpool::ZpoolHealth; +use omicron_common::disk::DiskIdentity; +use sled_hardware::DiskVariant; +use slog::Logger; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::sync::MutexGuard; + +#[derive(Default)] +pub struct DumpSetup { + savecore_lock: Arc>, +} + +impl DumpSetup { + pub(crate) async fn poll_dumpdev_setup( + &self, + disks: &mut MutexGuard<'_, HashMap>, + log: Logger, + ) { + let mut dump_slices = Vec::new(); + let mut pilot_saved_crash_dirs = Vec::new(); + let mut u2_dump_dirs = Vec::new(); + for (_id, disk_wrapper) in disks.iter() { + match disk_wrapper { + DiskWrapper::Real { disk, .. } => match disk.variant() { + DiskVariant::M2 => { + match disk.dump_device_devfs_path(false) { + Ok(path) => dump_slices.push(path), + Err(err) => { + warn!(log, "Error getting dump device devfs path: {err:?}"); + } + } + pilot_saved_crash_dirs.push( + disk.zpool_name().dataset_mountpoint( + sled_hardware::disk::CRASH_DATASET, + ), + ); + } + DiskVariant::U2 => { + let name = disk.zpool_name(); + if let Ok(info) = illumos_utils::zpool::Zpool::get_info( + &name.to_string(), + ) { + if info.health() == ZpoolHealth::Online { + u2_dump_dirs.push(name.dataset_mountpoint( + sled_hardware::disk::DUMP_DATASET, + )); + } else { + warn!(log, "Zpool {name:?} not online, won't attempt to savecore dumps there"); + } + } + } + }, + DiskWrapper::Synthetic { .. } => {} + } + } + + // TODO: remove when pilot isn't doing this any more + let u2_dump_dirs_clone = u2_dump_dirs.clone(); + let log_clone = log.clone(); + tokio::spawn(async move { + if let Err(err) = Self::move_pilot_savecores( + &log_clone, + pilot_saved_crash_dirs, + u2_dump_dirs_clone, + ) + .await + { + error!(log_clone, "Could not move dump saved to M.2 by pilot to U.2 dump zvol: {err:?}"); + } + }); + + let savecore_lock = self.savecore_lock.clone(); + tokio::task::spawn_blocking(move || { + // TODO: a more reasonable way of deduplicating the effort. + let _guard = savecore_lock.lock(); + Self::run_dumpadm_and_savecore(log, dump_slices, u2_dump_dirs); + }); + } + + fn run_dumpadm_and_savecore( + log: Logger, + dump_slices: Vec, + u2_dump_dirs: Vec, + ) { + for dump_slice in dump_slices { + // NOTE: because of the need to have dumpadm change the global + // state of which slice the system is using for dumps in order + // for savecore to behave the way we want (i.e. clear the flag + // after succeeding), we could hypothetically miss a dump if + // the kernel crashes again while savecore is still running. + if u2_dump_dirs.is_empty() { + // Don't risk overwriting an existing dump if there's + // already one there until we can attempt to savecore(8) + // it away and clear the flag to make room. + match illumos_utils::dumpadm::dump_flag_is_valid(&dump_slice) { + Ok(false) => { + // Have dumpadm write the config for crash dumps to be + // on this slice, at least, until a U.2 comes along. + match illumos_utils::dumpadm::dumpadm(&dump_slice, None) + { + Ok(_) => { + info!(log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)"); + } + Err(err) => { + warn!(log, "Could not configure {dump_slice:?} as dump device: {err:?}"); + } + } + } + Ok(true) => { + warn!(log, "Not configuring {dump_slice:?} as it appears to contain a dump we cannot yet send to a U.2 debug zvol"); + } + Err(err) => { + info!( + log, + "Dump slice {dump_slice:?} appears to be unused : {err:?}", + ); + } + } + } else { + // Try each U.2 until we succeed once + for mountpoint in &u2_dump_dirs { + // Let's try to see if it appears to have a dump already + match illumos_utils::dumpadm::dump_flag_is_valid( + &dump_slice, + ) { + Ok(true) => { + info!(log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore to {mountpoint:?}"); + } + Ok(false) => { + info!(log, "Dump slice {dump_slice:?} appears to have already been saved"); + } + Err(err) => { + info!(log, "Dump slice {dump_slice:?} appears to be unused: {err:?}"); + } + } + // Have dumpadm write the config for crash dumps to be + // on this slice, and invoke savecore(8) to save any + // dump that's already present there. + match illumos_utils::dumpadm::dumpadm( + &dump_slice, + Some(mountpoint), + ) { + Err(err) => { + warn!(log, "Could not configure {dump_slice:?} as dump device with {mountpoint:?} as savecore destination: {err:?}"); + } + Ok(saved) => { + if let Some(stdout) = saved { + info!( + log, + "Saved dump from {dump_slice:?} to {mountpoint:?}: {stdout:?}" + ); + } else { + info!( + log, + "Set {dump_slice:?} as system dump slice", + ); + } + // If there was one present, we successfully + // compressed it onto a U.2's pool, no need to + // try others. + break; + } + } + } + } + } + } + + // pilot currently will savecore to the crash zvol on the internal M.2, + // move it to the U.2 to be consistent with where the others go + async fn move_pilot_savecores( + log: &Logger, + pilot_saved_crash_dirs: Vec, + u2_dump_dirs: Vec, + ) -> std::io::Result<()> { + let vmdump = std::ffi::OsStr::new("vmdump"); + for crash_dir in &pilot_saved_crash_dirs { + if let Ok(dir) = crash_dir.read_dir() { + for entry in dir.flatten() { + if let Some(name) = entry.path().file_stem() { + if name == vmdump { + for dump_dir in &u2_dump_dirs { + let mut dest_n = 0; + while dump_dir + .join_os(vmdump) + .with_extension(format!("{dest_n}")) + .exists() + { + dest_n += 1; + } + let dest = dump_dir + .join_os(vmdump) + .with_extension(format!("{dest_n}")); + + let mut dest_f = + tokio::fs::File::create(&dest).await?; + let mut src_f = + tokio::fs::File::open(&entry.path()) + .await?; + tokio::io::copy(&mut src_f, &mut dest_f) + .await?; + dest_f.sync_all().await?; + drop(src_f); + drop(dest_f); + + if let Err(err) = + tokio::fs::remove_file(entry.path()).await + { + warn!(log, "Could not remove copy of dump from M.2 after copying it to U.2: {err:?}"); + } else { + info!(log, "Relocated dump saved by pilot at {entry:?} to {dest:?}"); + break; + } + } + } + } + } + } + } + Ok(()) + } +} diff --git a/sled-agent/src/storage/mod.rs b/sled-agent/src/storage/mod.rs index 8444ecace40..74bd59a1511 100644 --- a/sled-agent/src/storage/mod.rs +++ b/sled-agent/src/storage/mod.rs @@ -5,3 +5,4 @@ //! Management of local storage pub(crate) mod dataset; +pub(crate) mod dump_setup; diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index 0d2ddca3114..94cb8f4d2ce 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -6,6 +6,7 @@ use crate::nexus::NexusClientWithResolver; use crate::storage::dataset::DatasetName; +use crate::storage::dump_setup::DumpSetup; use camino::Utf8PathBuf; use futures::stream::FuturesOrdered; use futures::FutureExt; @@ -31,6 +32,7 @@ use tokio::sync::{mpsc, oneshot, Mutex}; use tokio::task::JoinHandle; use uuid::Uuid; +use illumos_utils::dumpadm::DumpHdrError; #[cfg(test)] use illumos_utils::{zfs::MockZfs as Zfs, zpool::MockZpool as Zpool}; #[cfg(not(test))] @@ -125,6 +127,9 @@ pub enum Error { #[error("Underlay not yet initialized")] UnderlayNotInitialized, + + #[error("Encountered error checking dump device flags: {0}")] + DumpHdr(#[from] DumpHdrError), } /// A ZFS storage pool. @@ -165,7 +170,7 @@ struct UnderlayRequest { } #[derive(PartialEq, Eq, Clone)] -enum DiskWrapper { +pub(crate) enum DiskWrapper { Real { disk: Disk, devfs_path: Utf8PathBuf }, Synthetic { zpool_name: ZpoolName }, } @@ -338,6 +343,9 @@ struct StorageWorker { // A mechanism for requesting disk encryption keys from the // [`key_manager::KeyManager`] key_requester: StorageKeyRequester, + + // Invokes dumpadm(8) and savecore(8) when new disks are encountered + dump_setup: Arc, } #[derive(Clone, Debug)] @@ -621,6 +629,8 @@ impl StorageWorker { >, disk: DiskWrapper, ) -> Result<(), Error> { + let log = self.log.clone(); + disks.insert(disk.identity(), disk.clone()); self.physical_disk_notify(NotifyDiskRequest::Add { identity: disk.identity(), @@ -630,6 +640,8 @@ impl StorageWorker { self.upsert_zpool(&resources, disk.identity(), disk.zpool_name()) .await?; + self.dump_setup.poll_dumpdev_setup(disks, log).await; + Ok(()) } @@ -965,6 +977,7 @@ impl StorageManager { rx, underlay: Arc::new(Mutex::new(None)), key_requester, + dump_setup: Arc::new(DumpSetup::default()), }; worker.do_work(resources).await diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 7d58330e9ff..089c23be58c 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -245,14 +245,17 @@ pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 30); // U.2 datasets live under the encrypted dataset and inherit encryption pub const ZONE_DATASET: &'static str = "crypt/zone"; +pub const DUMP_DATASET: &'static str = "crypt/debug"; // This is the root dataset for all U.2 drives. Encryption is inherited. pub const CRYPT_DATASET: &'static str = "crypt"; -const U2_EXPECTED_DATASET_COUNT: usize = 1; +const U2_EXPECTED_DATASET_COUNT: usize = 2; static U2_EXPECTED_DATASETS: [ExpectedDataset; U2_EXPECTED_DATASET_COUNT] = [ // Stores filesystems for zones ExpectedDataset::new(ZONE_DATASET).wipe(), + // For storing full kernel RAM dumps + ExpectedDataset::new(DUMP_DATASET), ]; const M2_EXPECTED_DATASET_COUNT: usize = 5; @@ -565,6 +568,17 @@ impl Disk { ) } + pub fn dump_device_devfs_path( + &self, + raw: bool, + ) -> Result { + self.paths.partition_device_path( + &self.partitions, + Partition::DumpDevice, + raw, + ) + } + pub fn slot(&self) -> i64 { self.slot }