Skip to content

Commit

Permalink
Put process core dumps onto the U.2 debug zvol
Browse files Browse the repository at this point in the history
(Part of oxidecomputer#2478)

This configures coreadm to put all core dumps onto the M.2 'crash'
dataset, and creates a thread that rotates them all onto a U.2 'debug'
dataset every 5 minutes.

This also refactors the dumpadm/savecore code to be less redundant and
more flexible, and adds an amount of arbitrary logic for e.g. picking
the U.2 onto which to save cores.
  • Loading branch information
lif committed Jul 19, 2023
1 parent ebd3db2 commit 6db61f3
Show file tree
Hide file tree
Showing 7 changed files with 459 additions and 78 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

62 changes: 62 additions & 0 deletions illumos-utils/src/coreadm.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use camino::Utf8PathBuf;
use std::ffi::OsString;
use std::os::unix::ffi::OsStringExt;
use std::process::Command;

#[derive(thiserror::Error, Debug)]
pub enum CoreAdmError {
#[error("Error obtaining or modifying coreadm configuration. core_dir: {core_dir:?}")]
Execution { core_dir: Utf8PathBuf },

#[error("Invalid invocation of coreadm: {0:?} {1:?}")]
InvalidCommand(Vec<String>, OsString),

#[error("coreadm process was terminated by a signal.")]
TerminatedBySignal,

#[error("coreadm invocation exited with unexpected return code {0}")]
UnexpectedExitCode(i32),

#[error("Failed to execute dumpadm process: {0}")]
Exec(std::io::Error),
}

const COREADM: &str = "/usr/bin/coreadm";

pub fn coreadm(core_dir: &Utf8PathBuf) -> Result<(), CoreAdmError> {
let mut cmd = Command::new(COREADM);
cmd.env_clear();

// disable per-process core patterns
cmd.arg("-d").arg("process");
cmd.arg("-d").arg("proc-setid");

// use the global core pattern
cmd.arg("-e").arg("global");
cmd.arg("-e").arg("global-setid");

// set the global pattern to place all cores into core_dir,
// with filenames of "core.[zone-name].[exe-filename].[pid].[time]"
cmd.arg("-g").arg(core_dir.join("core.%z.%f.%p.%t"));

// also collect DWARF data from the exe and its library deps
cmd.arg("-G").arg("default+debug");

let out = cmd.output().map_err(CoreAdmError::Exec)?;

match out.status.code() {
Some(0) => Ok(()),
Some(1) => Err(CoreAdmError::Execution { core_dir: core_dir.clone() }),
Some(2) => {
// unwrap: every arg we've provided in this function is UTF-8
let mut args =
vec![cmd.get_program().to_str().unwrap().to_string()];
cmd.get_args()
.for_each(|arg| args.push(arg.to_str().unwrap().to_string()));
let stderr = OsString::from_vec(out.stderr);
Err(CoreAdmError::InvalidCommand(args, stderr))
}
Some(n) => Err(CoreAdmError::UnexpectedExitCode(n)),
None => Err(CoreAdmError::TerminatedBySignal),
}
}
4 changes: 2 additions & 2 deletions illumos-utils/src/dumpadm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ pub enum DumpAdmError {
Execution { dump_slice: Utf8PathBuf, savecore_dir: Option<Utf8PathBuf> },

#[error("Invalid invocation of dumpadm: {0:?} {1:?}")]
InvalidCommand(Vec<String>, std::ffi::OsString),
InvalidCommand(Vec<String>, OsString),

#[error("dumpadm process was terminated by a signal.")]
TerminatedBySignal,
Expand All @@ -98,7 +98,7 @@ pub enum DumpAdmError {
Mkdir(std::io::Error),

#[error("savecore failed: {0:?}")]
SavecoreFailure(std::ffi::OsString),
SavecoreFailure(OsString),

#[error("Failed to execute dumpadm process: {0}")]
ExecDumpadm(std::io::Error),
Expand Down
1 change: 1 addition & 0 deletions illumos-utils/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
use cfg_if::cfg_if;

pub mod addrobj;
pub mod coreadm;
pub mod destructor;
pub mod dkio;
pub mod dladm;
Expand Down
1 change: 1 addition & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ clap.workspace = true
crucible-client-types.workspace = true
crucible-agent-client.workspace = true
ddm-admin-client.workspace = true
derive_more.workspace = true
dns-server.workspace = true
dns-service-client.workspace = true
dpd-client.workspace = true
Expand Down
Loading

0 comments on commit 6db61f3

Please sign in to comment.