From 1d3eb63c79500813899ea3e5be712f5965bc680a Mon Sep 17 00:00:00 2001 From: lif <> Date: Mon, 17 Jul 2023 09:07:34 +0000 Subject: [PATCH] Put process core dumps onto the U.2 debug zvol (Part of #2478) This configures coreadm to put all core dumps onto the M.2 'crash' dataset, and creates a thread that rotates them all onto a U.2 'debug' dataset every 5 minutes. This also refactors the dumpadm/savecore code to be less redundant and more flexible. --- Cargo.lock | 1 + illumos-utils/src/coreadm.rs | 62 +++++ illumos-utils/src/dumpadm.rs | 4 +- illumos-utils/src/lib.rs | 1 + sled-agent/Cargo.toml | 1 + sled-agent/src/storage/dump_setup.rs | 353 ++++++++++++++++++++++----- sled-agent/src/storage_manager.rs | 6 +- 7 files changed, 360 insertions(+), 68 deletions(-) create mode 100644 illumos-utils/src/coreadm.rs diff --git a/Cargo.lock b/Cargo.lock index f43561b3c16..969630ed8c5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4877,6 +4877,7 @@ dependencies = [ "crucible-agent-client", "crucible-client-types", "ddm-admin-client", + "derive_more", "dns-server", "dns-service-client 0.1.0", "dpd-client", diff --git a/illumos-utils/src/coreadm.rs b/illumos-utils/src/coreadm.rs new file mode 100644 index 00000000000..543dbca239c --- /dev/null +++ b/illumos-utils/src/coreadm.rs @@ -0,0 +1,62 @@ +use camino::Utf8PathBuf; +use std::ffi::OsString; +use std::os::unix::ffi::OsStringExt; +use std::process::Command; + +#[derive(thiserror::Error, Debug)] +pub enum CoreAdmError { + #[error("Error obtaining or modifying coreadm configuration. core_dir: {core_dir:?}")] + Execution { core_dir: Utf8PathBuf }, + + #[error("Invalid invocation of coreadm: {0:?} {1:?}")] + InvalidCommand(Vec, OsString), + + #[error("coreadm process was terminated by a signal.")] + TerminatedBySignal, + + #[error("coreadm invocation exited with unexpected return code {0}")] + UnexpectedExitCode(i32), + + #[error("Failed to execute dumpadm process: {0}")] + Exec(std::io::Error), +} + +const COREADM: &str = "/usr/bin/coreadm"; + +pub fn coreadm(core_dir: &Utf8PathBuf) -> Result<(), CoreAdmError> { + let mut cmd = Command::new(COREADM); + cmd.env_clear(); + + // disable per-process core patterns + cmd.arg("-d").arg("process"); + cmd.arg("-d").arg("proc-setid"); + + // use the global core pattern + cmd.arg("-e").arg("global"); + cmd.arg("-e").arg("global-setid"); + + // set the global pattern to place all cores into core_dir, + // with filenames of "core.[zone-name].[exe-filename].[pid].[time]" + cmd.arg("-g").arg(core_dir.join("core.%z.%f.%p.%t")); + + // also collect DWARF data from the exe and its library deps + cmd.arg("-G").arg("default+debug"); + + let out = cmd.output().map_err(CoreAdmError::Exec)?; + + match out.status.code() { + Some(0) => Ok(()), + Some(1) => Err(CoreAdmError::Execution { core_dir: core_dir.clone() }), + Some(2) => { + // unwrap: every arg we've provided in this function is UTF-8 + let mut args = + vec![cmd.get_program().to_str().unwrap().to_string()]; + cmd.get_args() + .for_each(|arg| args.push(arg.to_str().unwrap().to_string())); + let stderr = OsString::from_vec(out.stderr); + Err(CoreAdmError::InvalidCommand(args, stderr)) + } + Some(n) => Err(CoreAdmError::UnexpectedExitCode(n)), + None => Err(CoreAdmError::TerminatedBySignal), + } +} diff --git a/illumos-utils/src/dumpadm.rs b/illumos-utils/src/dumpadm.rs index 36a66764541..934f4908e1a 100644 --- a/illumos-utils/src/dumpadm.rs +++ b/illumos-utils/src/dumpadm.rs @@ -84,7 +84,7 @@ pub enum DumpAdmError { Execution { dump_slice: Utf8PathBuf, savecore_dir: Option }, #[error("Invalid invocation of dumpadm: {0:?} {1:?}")] - InvalidCommand(Vec, std::ffi::OsString), + InvalidCommand(Vec, OsString), #[error("dumpadm process was terminated by a signal.")] TerminatedBySignal, @@ -98,7 +98,7 @@ pub enum DumpAdmError { Mkdir(std::io::Error), #[error("savecore failed: {0:?}")] - SavecoreFailure(std::ffi::OsString), + SavecoreFailure(OsString), #[error("Failed to execute dumpadm process: {0}")] ExecDumpadm(std::io::Error), diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 754412e9030..4f5f8d5450b 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -7,6 +7,7 @@ use cfg_if::cfg_if; pub mod addrobj; +pub mod coreadm; pub mod destructor; pub mod dkio; pub mod dladm; diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 81e6b863881..12e33862b2e 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -21,6 +21,7 @@ clap.workspace = true crucible-client-types.workspace = true crucible-agent-client.workspace = true ddm-admin-client.workspace = true +derive_more.workspace = true dns-server.workspace = true dns-service-client.workspace = true dpd-client.workspace = true diff --git a/sled-agent/src/storage/dump_setup.rs b/sled-agent/src/storage/dump_setup.rs index 94d0a5b5f53..5260ae63cb5 100644 --- a/sled-agent/src/storage/dump_setup.rs +++ b/sled-agent/src/storage/dump_setup.rs @@ -1,37 +1,89 @@ use crate::storage_manager::DiskWrapper; use camino::Utf8PathBuf; +use derive_more::{AsRef, Deref}; +use illumos_utils::dumpadm::DumpAdmError; use illumos_utils::zpool::ZpoolHealth; use omicron_common::disk::DiskIdentity; use sled_hardware::DiskVariant; use slog::Logger; -use std::collections::HashMap; -use std::sync::Arc; +use std::collections::{HashMap, HashSet}; +use std::ffi::OsString; +use std::sync::{Arc, Weak}; use tokio::sync::MutexGuard; #[derive(Default)] pub struct DumpSetup { - // prevent us from running savecore concurrently. - savecore_lock: Arc>, + worker: Arc>, } +impl DumpSetup { + pub fn new(log: Logger) -> Self { + let worker: Arc> = Default::default(); + let worker_weak = Arc::downgrade(&worker); + std::thread::spawn(move || Self::poll_file_rotation(worker_weak, log)); + Self { worker } + } +} + +// we sure are passing a lot of Utf8PathBufs around, let's be careful about it +#[derive(AsRef, Clone, Debug, Deref, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct DumpSlicePath(Utf8PathBuf); +#[derive(AsRef, Clone, Debug, Deref, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct DumpDirPath(Utf8PathBuf); +#[derive(AsRef, Clone, Debug, Deref, Eq, Hash, Ord, PartialEq, PartialOrd)] +struct CorePath(Utf8PathBuf); + +#[derive(Default)] +struct DumpSetupWorker { + chosen_dump_slice: Option, + chosen_dump_dir: Option, + chosen_core_dir: Option, + + known_dump_slices: Vec, + known_dump_dirs: Vec, + known_core_dirs: Vec, + + savecored_slices: HashSet, +} + +const ROTATION_DURATION: std::time::Duration = + std::time::Duration::from_secs(300); + impl DumpSetup { pub(crate) async fn update_dumpdev_setup( &self, disks: &mut MutexGuard<'_, HashMap>, log: Logger, ) { - let mut dump_slices = Vec::new(); + let mut m2_dump_slices = Vec::new(); let mut u2_dump_dirs = Vec::new(); + let mut m2_core_dirs = Vec::new(); for (_id, disk_wrapper) in disks.iter() { match disk_wrapper { DiskWrapper::Real { disk, .. } => match disk.variant() { DiskVariant::M2 => { match disk.dump_device_devfs_path(false) { - Ok(path) => dump_slices.push(path), + Ok(path) => { + m2_dump_slices.push(DumpSlicePath(path)) + } Err(err) => { warn!(log, "Error getting dump device devfs path: {err:?}"); } } + let name = disk.zpool_name(); + if let Ok(info) = illumos_utils::zpool::Zpool::get_info( + &name.to_string(), + ) { + if info.health() == ZpoolHealth::Online { + m2_core_dirs.push(CorePath( + name.dataset_mountpoint( + sled_hardware::disk::CRASH_DATASET, + ), + )); + } else { + warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); + } + } } DiskVariant::U2 => { let name = disk.zpool_name(); @@ -39,11 +91,13 @@ impl DumpSetup { &name.to_string(), ) { if info.health() == ZpoolHealth::Online { - u2_dump_dirs.push(name.dataset_mountpoint( - sled_hardware::disk::DUMP_DATASET, + u2_dump_dirs.push(DumpDirPath( + name.dataset_mountpoint( + sled_hardware::disk::DUMP_DATASET, + ), )); } else { - warn!(log, "Zpool {name:?} not online, won't attempt to savecore dumps there"); + warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); } } } @@ -52,65 +106,104 @@ impl DumpSetup { } } - dump_slices.sort(); - u2_dump_dirs.sort(); - - let savecore_lock = self.savecore_lock.clone(); - tokio::task::spawn_blocking(move || { - // TODO: a more reasonable way of deduplicating the effort. - let _guard = savecore_lock.lock(); - Self::run_dumpadm_and_savecore(log, dump_slices, u2_dump_dirs); + let savecore_lock = self.worker.clone(); + tokio::task::spawn_blocking(move || match savecore_lock.lock() { + Ok(mut guard) => { + guard.update_disk_loadout( + log, + m2_dump_slices, + u2_dump_dirs, + m2_core_dirs, + ); + } + Err(err) => { + error!(log, "DumpSetup mutex poisoned: {err:?}"); + } }); } - fn run_dumpadm_and_savecore( + fn poll_file_rotation( + worker: Weak>, log: Logger, - dump_slices: Vec, - u2_dump_dirs: Vec, ) { - for dump_slice in dump_slices { - // NOTE: because of the need to have dumpadm change the global - // state of which slice the system is using for dumps in order - // for savecore to behave the way we want (i.e. clear the flag - // after succeeding), we could hypothetically miss a dump if - // the kernel crashes again while savecore is still running. - if u2_dump_dirs.is_empty() { - // Don't risk overwriting an existing dump if there's - // already one there until we can attempt to savecore(8) - // it away and clear the flag to make room. - match illumos_utils::dumpadm::dump_flag_is_valid(&dump_slice) { - Ok(false) => { - // Have dumpadm write the config for crash dumps to be - // on this slice, at least, until a U.2 comes along. - match illumos_utils::dumpadm::dumpadm(&dump_slice, None) - { - Ok(_) => { - info!(log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)"); - } - Err(err) => { - warn!(log, "Could not configure {dump_slice:?} as dump device: {err:?}"); - } + loop { + std::thread::sleep(ROTATION_DURATION); + if let Some(mutex) = worker.upgrade() { + match mutex.lock() { + Ok(guard) => { + if let Err(err) = guard.rotate_files(&log) { + error!(log, "DumpSetup mutex poisoned in poll thread: {err:?}"); } } - Ok(true) => { - warn!(log, "Not configuring {dump_slice:?} as it appears to contain a dump we cannot yet send to a U.2 debug zvol"); - } Err(err) => { - debug!( + error!( log, - "Dump slice {dump_slice:?} appears to be unused : {err:?}", + "DumpSetup mutex poisoned in poll thread: {err:?}" ); + break; } } } else { - // Try each U.2 until we succeed once - for mountpoint in &u2_dump_dirs { - // Let's try to see if it appears to have a dump already + break; + } + } + } +} + +impl DumpSetupWorker { + fn update_disk_loadout( + &mut self, + log: Logger, + dump_slices: Vec, + dump_dirs: Vec, + core_dirs: Vec, + ) { + self.known_dump_slices = dump_slices; + self.known_dump_dirs = dump_dirs; + self.known_core_dirs = core_dirs; + + self.known_dump_slices.sort(); + self.known_dump_dirs.sort(); + self.known_core_dirs.sort(); + + if let Some(x) = &self.chosen_dump_dir { + if !self.known_dump_dirs.contains(x) { + warn!(log, "Previously-chosen dump dir {x:?} no longer exists in our view of reality"); + self.chosen_dump_dir = None; + } + } + if let Some(x) = &self.chosen_dump_slice { + if !self.known_dump_slices.contains(x) { + warn!(log, "Previously-chosen dump slice {x:?} no longer exists in our view of reality"); + self.chosen_dump_slice = None; + } + } + if let Some(x) = &self.chosen_core_dir { + if !self.known_core_dirs.contains(x) { + warn!(log, "Previously-chosen core dir {x:?} no longer exists in our view of reality"); + self.chosen_core_dir = None; + } + } + + if self.chosen_dump_dir.is_none() { + // TODO: decide based on factors more meaningful than disk ID sort order + self.chosen_dump_dir = self.known_dump_dirs.first().cloned(); + } + + if self.chosen_core_dir.is_none() { + // TODO: decide based on factors more meaningful than disk ID sort order + self.chosen_core_dir = self.known_core_dirs.first().cloned(); + } + + if self.chosen_dump_slice.is_none() { + if self.chosen_dump_dir.is_some() { + for dump_slice in self.known_dump_slices.clone() { + // Let's try to see if it appears to have a kernel dump already match illumos_utils::dumpadm::dump_flag_is_valid( &dump_slice, ) { Ok(true) => { - debug!(log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore to {mountpoint:?}"); + debug!(log, "Dump slice {dump_slice:?} appears to have a valid header; will attempt to savecore"); } Ok(false) => { info!(log, "Dump slice {dump_slice:?} appears to have already been saved"); @@ -119,21 +212,77 @@ impl DumpSetup { debug!(log, "Dump slice {dump_slice:?} appears to be unused: {err:?}"); } } - // Have dumpadm write the config for crash dumps to be - // on this slice, and invoke savecore(8) to save any - // dump that's already present there. - match illumos_utils::dumpadm::dumpadm( - &dump_slice, - Some(mountpoint), - ) { + if let Ok(_) = self.dumpadm_and_savecore(&dump_slice) { + self.chosen_dump_slice = Some(dump_slice); + break; + } + } + } else { + // Don't risk overwriting an existing kernel dump if there's + // already one there until we can attempt to savecore(8) + // it away and clear the flag to make room. + for dump_slice in &self.known_dump_slices { + match illumos_utils::dumpadm::dump_flag_is_valid(dump_slice) + { + Ok(false) => { + // Have dumpadm write the config for crash dumps to be + // on this slice, at least, until a U.2 comes along. + match illumos_utils::dumpadm::dumpadm( + dump_slice, None, + ) { + Ok(_) => { + info!(log, "Using dump device {dump_slice:?} with no savecore destination (no U.2 debug zvol yet)"); + self.chosen_dump_slice = + Some(dump_slice.clone()); + break; + } + Err(err) => { + warn!(log, "Could not configure {dump_slice:?} as dump device: {err:?}"); + } + } + } + Ok(true) => { + warn!(log, "Not configuring {dump_slice:?} as it appears to contain a dump we cannot yet send to a U.2 debug zvol"); + } Err(err) => { - warn!(log, "Could not configure {dump_slice:?} as dump device with {mountpoint:?} as savecore destination: {err:?}"); + debug!( + log, + "Dump slice {dump_slice:?} appears to be unused : {err:?}", + ); } + } + } + } + } + + if let Some(core_dir) = self.chosen_core_dir.clone() { + // tell the system to write *userspace process* cores here. + match illumos_utils::coreadm::coreadm(&core_dir) { + Ok(()) => { + info!( + log, + "Set process core dump directory to {core_dir:?}" + ); + } + Err(err) => { + error!(log, "Couldn't configure process core dump directory to {core_dir:?}: {err:?}"); + } + } + } + + if let Some(dump_dir) = self.chosen_dump_dir.clone() { + let mut changed_slice = false; + for dump_slice in self.known_dump_slices.clone() { + if !self.savecored_slices.contains(&dump_slice) { + changed_slice = true; + // temporarily changes the system's dump slice so savecore(8) + // can update the header in the slice when it finishes... + match self.dumpadm_and_savecore(&dump_slice) { Ok(saved) => { - if let Some(stdout) = saved { + if let Some(stdout) = &saved { info!( log, - "Saved dump from {dump_slice:?} to {mountpoint:?}: {stdout:?}" + "Saved dump from {dump_slice:?} to {dump_dir:?}: {stdout:?}" ); } else { info!( @@ -141,14 +290,88 @@ impl DumpSetup { "Set {dump_slice:?} as system dump slice", ); } - // If there was one present, we successfully - // compressed it onto a U.2's pool, no need to - // try others. - break; + } + Err(err) => { + warn!(log, "Could not configure {dump_slice:?} as dump device with {dump_dir:?} as savecore destination: {err:?}"); } } } } + + // ...so then we restore the chosen dump slice for the system to use + // in the event of a kernel crash + if changed_slice { + if let Some(dump_slice) = &self.chosen_dump_slice { + if let Err(err) = + illumos_utils::dumpadm::dumpadm(dump_slice, None) + { + error!(log, "Could not restore dump slice to {dump_slice:?}: {err:?}"); + } + } + } + } + } + + fn rotate_files(&self, log: &Logger) -> Result<(), std::io::Error> { + if let Some(dump_dir) = &self.chosen_dump_dir { + for core_dir in &self.known_core_dirs { + if let Ok(dir) = core_dir.read_dir() { + for entry in dir.flatten() { + if let Some(path) = entry.file_name().to_str() { + let dest = dump_dir.join(path); + + let mut dest_f = std::fs::File::create(&dest)?; + let mut src_f = std::fs::File::open(&entry.path())?; + std::io::copy(&mut src_f, &mut dest_f)?; + dest_f.sync_all()?; + drop(src_f); + drop(dest_f); + + if let Err(err) = std::fs::remove_file(entry.path()) + { + warn!(log, "Could not remove {entry:?} after copying it to {dest:?}: {err:?}"); + } else { + info!( + log, + "Relocated core {entry:?} to {dest:?}" + ); + break; + } + } else { + error!(log, "Non-UTF8 path found while rotating core dumps: {entry:?}"); + } + } + } + } + } + Ok(()) + } + + // Have dumpadm write the config for crash dumps to be + // on this slice, and then invoke savecore(8) to save any + // dump that's already present there. + // + // NOTE: because of the need to have dumpadm change the global + // state of which slice the system is using for dumps in order + // for savecore to behave the way we want (i.e. clear the flag + // after succeeding), we could hypothetically miss a dump if + // the kernel crashes again while savecore is still running. + fn dumpadm_and_savecore( + &mut self, + dump_slice: &DumpSlicePath, + ) -> Result, DumpAdmError> { + // TODO: untangle savecore from illumos_utils::dumpadm + assert!(self.chosen_dump_dir.is_some()); + + let savecore_dir = self.chosen_dump_dir.clone().unwrap().0; + + match illumos_utils::dumpadm::dumpadm(&dump_slice, Some(&savecore_dir)) + { + Ok(saved) => { + self.savecored_slices.insert(dump_slice.clone()); + Ok(saved) + } + Err(err) => Err(err), } } } diff --git a/sled-agent/src/storage_manager.rs b/sled-agent/src/storage_manager.rs index b186b1c95f6..a4eb8edac93 100644 --- a/sled-agent/src/storage_manager.rs +++ b/sled-agent/src/storage_manager.rs @@ -675,6 +675,9 @@ impl StorageWorker { self.physical_disk_notify(NotifyDiskRequest::Remove(key.clone())) .await; } + + self.dump_setup.update_dumpdev_setup(disks, self.log.clone()).await; + Ok(()) } @@ -971,13 +974,14 @@ impl StorageManager { resources: resources.clone(), tx, task: tokio::task::spawn(async move { + let dump_setup = Arc::new(DumpSetup::new(log.clone())); let mut worker = StorageWorker { log, nexus_notifications: FuturesOrdered::new(), rx, underlay: Arc::new(Mutex::new(None)), key_requester, - dump_setup: Arc::new(DumpSetup::default()), + dump_setup, }; worker.do_work(resources).await