Skip to content

Commit

Permalink
WIP log rotation into U.2 dataset from all zones
Browse files Browse the repository at this point in the history
  • Loading branch information
lif committed Jul 19, 2023
1 parent fba5019 commit d129bd5
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 22 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ gateway-client = { path = "gateway-client" }
gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-service", default-features = false, features = ["std"], rev = "146a687f7413bfe580869bb6017f3bfe8b4710b1" }
gateway-sp-comms = { git = "https://github.com/oxidecomputer/management-gateway-service", rev = "146a687f7413bfe580869bb6017f3bfe8b4710b1" }
gateway-test-utils = { path = "gateway-test-utils" }
glob = "0.3.1"
headers = "0.3.8"
heck = "0.4"
hex = "0.4.3"
Expand Down
1 change: 1 addition & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dpd-client.workspace = true
dropshot.workspace = true
flate2.workspace = true
futures.workspace = true
glob.workspace = true
http.workspace = true
hyper-staticfile.workspace = true
gateway-client.workspace = true
Expand Down
137 changes: 115 additions & 22 deletions sled-agent/src/storage/dump_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,18 @@ use crate::storage_manager::DiskWrapper;
use camino::Utf8PathBuf;
use derive_more::{AsRef, Deref};
use illumos_utils::dumpadm::DumpAdmError;
use illumos_utils::zone::{AdmError, Zones};
use illumos_utils::zpool::ZpoolHealth;
use omicron_common::disk::DiskIdentity;
use sled_hardware::DiskVariant;
use slog::Logger;
use std::collections::{HashMap, HashSet};
use std::ffi::OsString;
use std::fs::DirEntry;
use std::path::{Path, PathBuf};
use std::sync::{Arc, Weak};
use tokio::sync::MutexGuard;
use zone::Zone;

pub struct DumpSetup {
worker: Arc<std::sync::Mutex<DumpSetupWorker>>,
Expand Down Expand Up @@ -140,7 +144,7 @@ impl DumpSetup {
match mutex.lock() {
Ok(mut guard) => {
guard.reevaluate_choices();
if let Err(err) = guard.rotate_files(&log) {
if let Err(err) = guard.rotate_files() {
error!(
log,
"Failed to rotate debug/dump files: {err:?}"
Expand Down Expand Up @@ -411,43 +415,114 @@ impl DumpSetupWorker {
}
}

fn rotate_files(&self, log: &Logger) -> Result<(), std::io::Error> {
fn rotate_files(&self) -> std::io::Result<()> {
if let Some(debug_dir) = &self.chosen_debug_dir {
if self.known_core_dirs.is_empty() {
info!(log, "No core dump locations yet known.");
info!(self.log, "No core dump locations yet known.");
}
for core_dir in &self.known_core_dirs {
if let Ok(dir) = core_dir.read_dir() {
for entry in dir.flatten() {
if let Some(path) = entry.file_name().to_str() {
let dest = debug_dir.join(path);

let mut dest_f = std::fs::File::create(&dest)?;
let mut src_f = std::fs::File::open(&entry.path())?;
Self::copy_sync_and_remove(&entry.path(), &dest)?;

std::io::copy(&mut src_f, &mut dest_f)?;
dest_f.sync_all()?;

drop(src_f);
drop(dest_f);

if let Err(err) = std::fs::remove_file(entry.path())
{
warn!(log, "Could not remove {entry:?} after copying it to {dest:?}: {err:?}");
} else {
info!(
log,
"Relocated core {entry:?} to {dest:?}"
);
}
info!(self.log, "Relocated {entry:?} to {dest:?}");
} else {
error!(log, "Non-UTF8 path found while rotating core dumps: {entry:?}");
error!(self.log, "Non-UTF8 path found while rotating core dumps: {entry:?}");
}
}
}
}
} else {
info!(log, "No rotation destination for crash dumps yet chosen.");
info!(
self.log,
"No rotation destination for crash dumps yet chosen."
);
}

if let Err(err) = self.rotate_logs() {
error!(
self.log,
"Failure while trying to rotate logs to debug dataset: {err:?}"
);
}

Ok(())
}

fn copy_sync_and_remove(
source: impl AsRef<Path>,
dest: impl AsRef<Path>,
) -> std::io::Result<()> {
let source = source.as_ref();
let dest = dest.as_ref();
let mut dest_f = std::fs::File::create(&dest)?;
let mut src_f = std::fs::File::open(&source)?;

std::io::copy(&mut src_f, &mut dest_f)?;

dest_f.sync_all()?;

drop(src_f);
drop(dest_f);

std::fs::remove_file(source)?;
Ok(())
}

fn rotate_logs(&self) -> Result<(), RotateLogsError> {
let debug_dir = self
.chosen_debug_dir
.as_ref()
.ok_or(RotateLogsError::NoDebugDirYet)?;
// zone crate's 'deprecated' functions collide if you try to enable
// its 'sync' and 'async' features simultaneously :(
let rt =
tokio::runtime::Runtime::new().map_err(RotateLogsError::Tokio)?;
let oxz_zones = rt.block_on(Zones::get())?;
Self::rotate_logs_inner(
debug_dir,
PathBuf::from("/var/svc/log"),
"global",
)?;
for zone in oxz_zones {
let logdir = zone.path().join("root/var/svc/log");
let zone_name = zone.name();
Self::rotate_logs_inner(debug_dir, logdir, zone_name)?;
}
Ok(())
}

fn rotate_logs_inner(
debug_dir: &DebugDirPath,
logdir: PathBuf,
zone_name: &str,
) -> Result<(), RotateLogsError> {
// pattern matching rotated logs, e.g. foo.log.3
let pattern = logdir
.join("*.log.*")
.to_str()
.ok_or_else(|| RotateLogsError::Utf8(zone_name.to_string()))?
.to_string();
let glob = glob::glob(&pattern)?;
for entry in glob.flatten() {
let dest_dir = debug_dir.join(zone_name).into_std_path_buf();
std::fs::create_dir_all(&dest_dir)?;
let src_name = entry.file_name().unwrap();
// as we rotate them out, logadm will keep resetting to .log.0,
// so we need to maintain our own numbering in the dest dataset
let mut n = 0;
while dest_dir
.join(src_name)
.with_extension(format!("{n}"))
.exists()
{
n += 1;
}
let dest = dest_dir.join(src_name).with_extension(format!("{n}"));
Self::copy_sync_and_remove(entry, dest)?;
}
Ok(())
}
Expand Down Expand Up @@ -480,3 +555,21 @@ impl DumpSetupWorker {
}
}
}

#[derive(thiserror::Error, Debug)]
enum RotateLogsError {
#[error("Couldn't make an async runtime to get zone info: {0}")]
Tokio(std::io::Error),
#[error("I/O error: {0}")]
IoError(#[from] std::io::Error),
#[error("Error calling zoneadm: {0}")]
Zoneadm(#[from] AdmError),
#[error("Non-UTF8 zone path for zone {0}")]
Utf8(String),
#[error("Glob pattern invalid: {0}")]
Glob(#[from] glob::PatternError),
#[error(
"No debug dir into which we should rotate logs has yet been chosen"
)]
NoDebugDirYet,
}

0 comments on commit d129bd5

Please sign in to comment.