Skip to content

Commit

Permalink
Create zone bundles from ZFS snapshots (#4225)
Browse files Browse the repository at this point in the history
- Fixes #4010
- Previously, we copied log files directly out of their original
locations, which meant we contended with several other components:
`logadm` rotating the log file; the log archiver moving the to
longer-term storage; and the program writing to the file itself. This
commit changes the operation of the bundler, to first create a ZFS
snapshot of the filesystem(s) containing the log files, clone them, and
then copy files out of the clones. We destroy those clones / snapshots
after completing, and when the sled-agent starts to help with
crash-safety.
  • Loading branch information
bnaecker authored Oct 16, 2023
1 parent 42732fa commit 7d88789
Show file tree
Hide file tree
Showing 4 changed files with 646 additions and 173 deletions.
78 changes: 14 additions & 64 deletions illumos-utils/src/running_zone.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,13 +391,16 @@ pub struct RunningZone {
}

impl RunningZone {
/// The path to the zone's root filesystem (i.e., `/`), within zonepath.
pub const ROOT_FS_PATH: &'static str = "root";

pub fn name(&self) -> &str {
&self.inner.name
}

/// Returns the filesystem path to the zone's root
/// Returns the filesystem path to the zone's root in the GZ.
pub fn root(&self) -> Utf8PathBuf {
self.inner.zonepath.join("root")
self.inner.zonepath.join(Self::ROOT_FS_PATH)
}

pub fn control_interface(&self) -> AddrObject {
Expand Down Expand Up @@ -958,13 +961,11 @@ impl RunningZone {
};
let binary = Utf8PathBuf::from(path);

// Fetch any log files for this SMF service.
let Some((log_file, rotated_log_files)) =
self.service_log_files(&service_name)?
let Some(log_file) = self.service_log_file(&service_name)?
else {
error!(
self.inner.log,
"failed to find log files for existing service";
"failed to find log file for existing service";
"service_name" => &service_name,
);
continue;
Expand All @@ -975,7 +976,6 @@ impl RunningZone {
binary,
pid,
log_file,
rotated_log_files,
});
}
}
Expand All @@ -992,72 +992,24 @@ impl RunningZone {
.collect())
}

/// Return any SMF log files associated with the named service.
/// Return any SMF log file associated with the named service.
///
/// Given a named service, this returns a tuple of the latest or current log
/// file, and an array of any rotated log files. If the service does not
/// exist, or there are no log files, `None` is returned.
pub fn service_log_files(
/// Given a named service, this returns the path of the current log file.
/// This can be used to find rotated or archived log files, but keep in mind
/// this returns only the current, if it exists.
pub fn service_log_file(
&self,
name: &str,
) -> Result<Option<(Utf8PathBuf, Vec<Utf8PathBuf>)>, ServiceError> {
) -> Result<Option<Utf8PathBuf>, ServiceError> {
let output = self.run_cmd(&["svcs", "-L", name])?;
let mut lines = output.lines();
let Some(current) = lines.next() else {
return Ok(None);
};
// We need to prepend the zonepath root to get the path in the GZ. We
// can do this with `join()`, but that will _replace_ the path if the
// second one is absolute. So trim any prefixed `/` from each path.
let root = self.root();
let current_log_file =
root.join(current.trim().trim_start_matches('/'));

// The rotated log files should have the same prefix as the current, but
// with an index appended. We'll search the parent directory for
// matching names, skipping the current file.
//
// See https://illumos.org/man/8/logadm for details on the naming
// conventions around these files.
let dir = current_log_file.parent().unwrap();
let mut rotated_files: Vec<Utf8PathBuf> = Vec::new();
for entry in dir.read_dir_utf8()? {
let entry = entry?;
let path = entry.path();

// Camino's Utf8Path only considers whole path components to match,
// so convert both paths into a &str and use that object's
// starts_with. See the `camino_starts_with_behaviour` test.
let path_ref: &str = path.as_ref();
let current_log_file_ref: &str = current_log_file.as_ref();
if path != current_log_file
&& path_ref.starts_with(current_log_file_ref)
{
rotated_files.push(path.clone().into());
}
}

Ok(Some((current_log_file, rotated_files)))
return Ok(Some(Utf8PathBuf::from(current.trim())));
}
}

#[test]
fn camino_starts_with_behaviour() {
let logfile =
Utf8PathBuf::from("/zonepath/var/svc/log/oxide-nexus:default.log");
let rotated_logfile =
Utf8PathBuf::from("/zonepath/var/svc/log/oxide-nexus:default.log.0");

let logfile_as_string: &str = logfile.as_ref();
let rotated_logfile_as_string: &str = rotated_logfile.as_ref();

assert!(logfile != rotated_logfile);
assert!(logfile_as_string != rotated_logfile_as_string);

assert!(!rotated_logfile.starts_with(&logfile));
assert!(rotated_logfile_as_string.starts_with(&logfile_as_string));
}

impl Drop for RunningZone {
fn drop(&mut self) {
if let Some(_) = self.id.take() {
Expand Down Expand Up @@ -1088,8 +1040,6 @@ pub struct ServiceProcess {
pub pid: u32,
/// The path for the current log file.
pub log_file: Utf8PathBuf,
/// The paths for any rotated log files.
pub rotated_log_files: Vec<Utf8PathBuf>,
}

/// Errors returned from [`InstalledZone::install`].
Expand Down
118 changes: 118 additions & 0 deletions illumos-utils/src/zfs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,26 @@ pub struct GetValueError {
err: GetValueErrorRaw,
}

#[derive(Debug, thiserror::Error)]
#[error("Failed to list snapshots: {0}")]
pub struct ListSnapshotsError(#[from] crate::ExecutionError);

#[derive(Debug, thiserror::Error)]
#[error("Failed to create snapshot '{snap_name}' from filesystem '{filesystem}': {err}")]
pub struct CreateSnapshotError {
filesystem: String,
snap_name: String,
err: crate::ExecutionError,
}

#[derive(Debug, thiserror::Error)]
#[error("Failed to delete snapshot '{filesystem}@{snap_name}': {err}")]
pub struct DestroySnapshotError {
filesystem: String,
snap_name: String,
err: crate::ExecutionError,
}

/// Wraps commands for interacting with ZFS.
pub struct Zfs {}

Expand Down Expand Up @@ -184,6 +204,20 @@ impl Zfs {
Ok(filesystems)
}

/// Return the name of a dataset for a ZFS object.
///
/// The object can either be a dataset name, or a path, in which case it
/// will be resolved to the _mounted_ ZFS dataset containing that path.
pub fn get_dataset_name(object: &str) -> Result<String, ListDatasetsError> {
let mut command = std::process::Command::new(ZFS);
let cmd = command.args(&["get", "-Hpo", "value", "name", object]);
execute(cmd)
.map(|output| {
String::from_utf8_lossy(&output.stdout).trim().to_string()
})
.map_err(|err| ListDatasetsError { name: object.to_string(), err })
}

/// Destroys a dataset.
pub fn destroy_dataset(name: &str) -> Result<(), DestroyDatasetError> {
let mut command = std::process::Command::new(PFEXEC);
Expand Down Expand Up @@ -379,6 +413,7 @@ impl Zfs {
}
}

/// Set the value of an Oxide-managed ZFS property.
pub fn set_oxide_value(
filesystem_name: &str,
name: &str,
Expand All @@ -404,6 +439,7 @@ impl Zfs {
Ok(())
}

/// Get the value of an Oxide-managed ZFS property.
pub fn get_oxide_value(
filesystem_name: &str,
name: &str,
Expand Down Expand Up @@ -434,6 +470,88 @@ impl Zfs {
}
Ok(value.to_string())
}

/// List all extant snapshots.
pub fn list_snapshots() -> Result<Vec<Snapshot>, ListSnapshotsError> {
let mut command = std::process::Command::new(ZFS);
let cmd = command.args(&["list", "-H", "-o", "name", "-t", "snapshot"]);
execute(cmd)
.map(|output| {
let stdout = String::from_utf8_lossy(&output.stdout);
stdout
.trim()
.lines()
.map(|line| {
let (filesystem, snap_name) =
line.split_once('@').unwrap();
Snapshot {
filesystem: filesystem.to_string(),
snap_name: snap_name.to_string(),
}
})
.collect()
})
.map_err(ListSnapshotsError::from)
}

/// Create a snapshot of a filesystem.
///
/// A list of properties, as name-value tuples, may be passed to this
/// method, for creating properties directly on the snapshots.
pub fn create_snapshot<'a>(
filesystem: &'a str,
snap_name: &'a str,
properties: &'a [(&'a str, &'a str)],
) -> Result<(), CreateSnapshotError> {
let mut command = std::process::Command::new(ZFS);
let mut cmd = command.arg("snapshot");
for (name, value) in properties.iter() {
cmd = cmd.arg("-o").arg(&format!("{name}={value}"));
}
cmd.arg(&format!("{filesystem}@{snap_name}"));
execute(cmd).map(|_| ()).map_err(|err| CreateSnapshotError {
filesystem: filesystem.to_string(),
snap_name: snap_name.to_string(),
err,
})
}

/// Destroy a named snapshot of a filesystem.
pub fn destroy_snapshot(
filesystem: &str,
snap_name: &str,
) -> Result<(), DestroySnapshotError> {
let mut command = std::process::Command::new(ZFS);
let path = format!("{filesystem}@{snap_name}");
let cmd = command.args(&["destroy", &path]);
execute(cmd).map(|_| ()).map_err(|err| DestroySnapshotError {
filesystem: filesystem.to_string(),
snap_name: snap_name.to_string(),
err,
})
}
}

/// A read-only snapshot of a ZFS filesystem.
#[derive(Clone, Debug)]
pub struct Snapshot {
pub filesystem: String,
pub snap_name: String,
}

impl Snapshot {
/// Return the full path to the snapshot directory within the filesystem.
pub fn full_path(&self) -> Result<Utf8PathBuf, GetValueError> {
let mountpoint = Zfs::get_value(&self.filesystem, "mountpoint")?;
Ok(Utf8PathBuf::from(mountpoint)
.join(format!(".zfs/snapshot/{}", self.snap_name)))
}
}

impl fmt::Display for Snapshot {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}@{}", self.filesystem, self.snap_name)
}
}

/// Returns all datasets managed by Omicron
Expand Down
2 changes: 1 addition & 1 deletion sled-agent/src/bootstrap/pre_server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -368,7 +368,7 @@ fn ensure_zfs_key_directory_exists(log: &Logger) -> Result<(), StartError> {
}

fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {
let zoned = true;
let zoned = false;
let do_format = true;
let encryption_details = None;
let quota = None;
Expand Down
Loading

0 comments on commit 7d88789

Please sign in to comment.