Skip to content

Commit

Permalink
feat(internal): Replace cp use with system call
Browse files Browse the repository at this point in the history
Previously the flags we provided to cp were compiled into the library
based on platform, this caused issues when using a different binary to
the one provided by the system as flags were sometimes not available.

This commit adds a dependency on reflink-copy to enable the same copy on
write behavior for postgresql directories but without using the cp
binary directly.
  • Loading branch information
johnchildren committed Nov 1, 2023
1 parent cffa64e commit a3e29c1
Show file tree
Hide file tree
Showing 5 changed files with 91 additions and 64 deletions.
1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ tokio = { version = "1.8", features = ["parking_lot", "rt", "sync", "io-util", "
tracing = "0.1"
which = "4.0"
once_cell = "1"
reflink-copy = "0.1"

[dev-dependencies]
test-log = { version = "0.2", default-features = false, features = ["trace"] }
Expand Down
52 changes: 22 additions & 30 deletions src/asynchronous.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ use std::process::Stdio;
use std::sync::Arc;

use tempfile::TempDir;
use tokio::fs::create_dir_all;
use tokio::io::Lines;
use tokio::process::{ChildStderr, ChildStdout};

use tokio::sync::oneshot::Sender;
use tokio::sync::{Semaphore, SemaphorePermit};
use tokio::{
Expand All @@ -16,7 +16,7 @@ use tokio::{
use tracing::{debug, instrument};

use crate::errors::{ProcessCapture, TmpPostgrustError, TmpPostgrustResult};
use crate::search::find_postgresql_command;
use crate::search::{all_dir_entries, build_copy_dst_path, find_postgresql_command};
use crate::POSTGRES_UID_GID;

/// Limit the total processes that can be running at any one time.
Expand Down Expand Up @@ -52,7 +52,7 @@ async fn exec_process(

#[instrument]
pub(crate) fn start_postgres_subprocess(
data_directory: &'_ Path,
data_directory: &Path,
port: u32,
) -> TmpPostgrustResult<Child> {
let postgres_path =
Expand All @@ -72,7 +72,7 @@ pub(crate) fn start_postgres_subprocess(
}

#[instrument]
pub(crate) async fn exec_init_db(data_directory: &'_ Path) -> TmpPostgrustResult<()> {
pub(crate) async fn exec_init_db(data_directory: &Path) -> TmpPostgrustResult<()> {
let initdb_path = find_postgresql_command("bin", "initdb").expect("failed to find initdb");

debug!("Initializing database in: {:?}", data_directory);
Expand All @@ -85,37 +85,29 @@ pub(crate) async fn exec_init_db(data_directory: &'_ Path) -> TmpPostgrustResult
}

#[instrument]
pub(crate) async fn exec_copy_dir(src_dir: &'_ Path, dst_dir: &'_ Path) -> TmpPostgrustResult<()> {
for read_dir in src_dir
.read_dir()
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?
{
let mut cmd = Command::new("cp");
cmd.arg("-R");

#[cfg(target_os = "macos")]
cmd.arg("-c");
#[cfg(target_os = "linux")]
cmd.arg("--reflink=auto");

cmd.arg(
read_dir
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?
.path(),
)
.arg(dst_dir);

exec_process(&mut cmd, TmpPostgrustError::CopyCachedInitDBFailed).await?;
pub(crate) async fn exec_copy_dir(src_dir: &Path, dst_dir: &Path) -> TmpPostgrustResult<()> {
let (dirs, others) = all_dir_entries(src_dir)?;

for entry in dirs {
create_dir_all(build_copy_dst_path(&entry, src_dir, dst_dir)?)
.await
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?;
}

for entry in others {
reflink_copy::reflink_or_copy(&entry, build_copy_dst_path(&entry, src_dir, dst_dir)?)
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?;
}

Ok(())
}

#[instrument]
pub(crate) async fn exec_create_db(
socket: &'_ Path,
socket: &Path,
port: u32,
owner: &'_ str,
dbname: &'_ str,
owner: &str,
dbname: &str,
) -> TmpPostgrustResult<()> {
let mut command = Command::new("createdb");
command
Expand All @@ -135,9 +127,9 @@ pub(crate) async fn exec_create_db(

#[instrument]
pub(crate) async fn exec_create_user(
socket: &'_ Path,
socket: &Path,
port: u32,
username: &'_ str,
username: &str,
) -> TmpPostgrustResult<()> {
let mut command = Command::new("createuser");
command
Expand Down
9 changes: 6 additions & 3 deletions src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ pub enum TmpPostgrustError {
/// Error when `initdb` fails to execute.
#[error("initdb failed")]
InitDBFailed(ProcessCapture),
/// Error when `cp` fails for the initialized database.
#[error("copying cached database failed")]
CopyCachedInitDBFailed(ProcessCapture),
/// Error when a file to be copied is not found.
#[error("copying cached database failed, file not found")]
CopyCachedInitDBFailedFileNotFound(#[source] std::io::Error),
/// Error when the file type cannot be read when copying the cached db.
#[error("copying cached database failed, could not read file type")]
CopyCachedInitDBFailedCouldNotReadFileType(#[source] std::io::Error),
/// Error when the source directory filepath cannot be stripped.
#[error("copying cached database failed, could not strip path prefix")]
CopyCachedInitDBFailedCouldNotStripPathPrefix(#[source] std::path::StripPrefixError),
/// Error when a copy process cannot be joined.
#[cfg(feature = "tokio-process")]
#[error("copying cached database failed, failed to join cp process")]
Expand Down
43 changes: 42 additions & 1 deletion src/search.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
use std::path::PathBuf;
use std::path::{Path, PathBuf};

use glob::glob;
use which::which;

use crate::errors::{TmpPostgrustError, TmpPostgrustResult};

/// Addtional file system locations to search for binaries
/// if `initdb` and `postgres` are not in the $PATH.
const SEARCH_PATHS: [&str; 5] = [
Expand Down Expand Up @@ -31,3 +33,42 @@ pub(crate) fn find_postgresql_command(dir: &str, name: &str) -> Result<PathBuf,
}
Err(())
}

/// Return a tuple of directory paths and other paths in a sub path by recursing through
/// and reading all directories.
pub(crate) fn all_dir_entries(src_dir: &Path) -> TmpPostgrustResult<(Vec<PathBuf>, Vec<PathBuf>)> {
let mut dirs = Vec::new();
let mut others = Vec::new();
for read_dir in src_dir
.read_dir()
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?
{
let entry = read_dir.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?;
let entry_file_type = entry
.file_type()
.map_err(TmpPostgrustError::CopyCachedInitDBFailedCouldNotReadFileType)?;

if entry_file_type.is_dir() {
let (sub_dirs, sub_others) = all_dir_entries(&entry.path())?;
dirs.push(entry.path());
dirs.extend(sub_dirs);
others.extend(sub_others);
} else {
others.push(entry.path());
}
}
Ok((dirs, others))
}

pub(crate) fn build_copy_dst_path(
target_path: &Path,
src_dir: &Path,
dst_dir: &Path,
) -> TmpPostgrustResult<PathBuf> {
let entry_sub_path = target_path
.strip_prefix(src_dir)
.map_err(TmpPostgrustError::CopyCachedInitDBFailedCouldNotStripPathPrefix)?;
let dst_path = dst_dir.join(entry_sub_path);

Ok(dst_path)
}
50 changes: 20 additions & 30 deletions src/synchronous.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use std::convert::TryInto;
use std::fs::create_dir_all;
use std::io::BufReader;
use std::io::Lines;
use std::os::unix::process::CommandExt;
Expand All @@ -17,6 +18,8 @@ use tempfile::TempDir;
use tracing::{debug, instrument};

use crate::errors::{ProcessCapture, TmpPostgrustError, TmpPostgrustResult};
use crate::search::all_dir_entries;
use crate::search::build_copy_dst_path;
use crate::search::find_postgresql_command;
use crate::POSTGRES_UID_GID;

Expand Down Expand Up @@ -49,7 +52,7 @@ fn exec_process(

#[instrument]
pub(crate) fn start_postgres_subprocess(
data_directory: &'_ Path,
data_directory: &Path,
port: u32,
) -> TmpPostgrustResult<Child> {
let postgres_path =
Expand All @@ -69,7 +72,7 @@ pub(crate) fn start_postgres_subprocess(
}

#[instrument]
pub(crate) fn exec_init_db(data_directory: &'_ Path) -> TmpPostgrustResult<()> {
pub(crate) fn exec_init_db(data_directory: &Path) -> TmpPostgrustResult<()> {
let initdb_path = find_postgresql_command("bin", "initdb").expect("failed to find initdb");

debug!("Initializing database in: {:?}", data_directory);
Expand All @@ -83,28 +86,19 @@ pub(crate) fn exec_init_db(data_directory: &'_ Path) -> TmpPostgrustResult<()> {
}

#[instrument]
pub(crate) fn exec_copy_dir(src_dir: &'_ Path, dst_dir: &'_ Path) -> TmpPostgrustResult<()> {
for read_dir in src_dir
.read_dir()
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?
{
let mut cmd = Command::new("cp");
cmd.arg("-R");

#[cfg(target_os = "macos")]
cmd.arg("-c");
#[cfg(target_os = "linux")]
cmd.arg("--reflink=auto");

cmd.arg(
read_dir
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?
.path(),
)
.arg(dst_dir);
pub(crate) fn exec_copy_dir(src_dir: &Path, dst_dir: &Path) -> TmpPostgrustResult<()> {
let (dirs, others) = all_dir_entries(src_dir)?;

for entry in dirs {
create_dir_all(build_copy_dst_path(&entry, src_dir, dst_dir)?)
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?;
}

exec_process(&mut cmd, TmpPostgrustError::CopyCachedInitDBFailed)?;
for entry in others {
reflink_copy::reflink_or_copy(&entry, build_copy_dst_path(&entry, src_dir, dst_dir)?)
.map_err(TmpPostgrustError::CopyCachedInitDBFailedFileNotFound)?;
}

Ok(())
}

Expand All @@ -124,10 +118,10 @@ pub(crate) fn chown_to_non_root(dir: &Path) -> TmpPostgrustResult<()> {

#[instrument]
pub(crate) fn exec_create_db(
socket: &'_ Path,
socket: &Path,
port: u32,
owner: &'_ str,
dbname: &'_ str,
owner: &str,
dbname: &str,
) -> TmpPostgrustResult<()> {
let mut command = Command::new("createdb");
command
Expand All @@ -146,11 +140,7 @@ pub(crate) fn exec_create_db(
}

#[instrument]
pub(crate) fn exec_create_user(
socket: &'_ Path,
port: u32,
username: &'_ str,
) -> TmpPostgrustResult<()> {
pub(crate) fn exec_create_user(socket: &Path, port: u32, username: &str) -> TmpPostgrustResult<()> {
let mut command = Command::new("createuser");
command
.arg("-h")
Expand Down

0 comments on commit a3e29c1

Please sign in to comment.