Skip to content

Commit

Permalink
Merge pull request #8745 from Turbo87/dump-db-temp
Browse files Browse the repository at this point in the history
dump_db: Use `tempfile` for temporary file/folder creation
  • Loading branch information
Turbo87 authored May 28, 2024
2 parents 946085a + d053f32 commit 09527e2
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 79 deletions.
11 changes: 0 additions & 11 deletions src/tests/dump_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,19 +10,12 @@ use once_cell::sync::Lazy;
use regex::Regex;
use secrecy::ExposeSecret;
use std::io::Read;
use std::sync::Mutex;
use tar::Archive;

/// Mutex to ensure that only one test is dumping the database at a time, since
/// the dump directory is shared between all invocations of the background job.
static DUMP_DIR_MUTEX: Lazy<Mutex<()>> = Lazy::new(|| Mutex::new(()));

static PATH_DATE_RE: Lazy<Regex> = Lazy::new(|| Regex::new(r"^\d{4}-\d{2}-\d{2}-\d{6}").unwrap());

#[tokio::test(flavor = "multi_thread")]
async fn test_dump_db_job() {
let _guard = DUMP_DIR_MUTEX.lock();

let (app, _, _, token) = TestApp::full().with_token();

app.db(|conn| {
Expand Down Expand Up @@ -85,8 +78,6 @@ fn tar_paths<R: Read>(archive: &mut Archive<R>) -> Vec<String> {

#[test]
fn dump_db_and_reimport_dump() {
let _guard = DUMP_DIR_MUTEX.lock();

crates_io::util::tracing::init_for_test();

let db_one = TestDatabase::new();
Expand All @@ -109,8 +100,6 @@ fn dump_db_and_reimport_dump() {

#[test]
fn test_sql_scripts() {
let _guard = DUMP_DIR_MUTEX.lock();

crates_io::util::tracing::init_for_test();

let db = TestDatabase::new();
Expand Down
122 changes: 54 additions & 68 deletions src/worker/jobs/dump_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,13 @@ impl BackgroundJob for DumpDb {
directory.populate(&database_url)?;

info!(path = ?directory.export_dir, "Creating tarball");
DumpTarball::create(&directory.export_dir)
create_tarball(&directory.export_dir)
})
.await?;

info!("Uploading tarball");
env.storage
.upload_db_dump(target_name, &tarball.tarball_path)
.upload_db_dump(target_name, tarball.path())
.await?;
info!("Database dump tarball uploaded");

Expand All @@ -71,15 +71,23 @@ impl BackgroundJob for DumpDb {
/// make sure it gets deleted again even in the case of an error.
#[derive(Debug)]
pub struct DumpDirectory {
/// The temporary directory that contains the export directory. This is
/// allowing `dead_code` since we're only relying on the `Drop`
/// implementation to clean up the directory.
#[allow(dead_code)]
tempdir: tempfile::TempDir,

pub timestamp: chrono::DateTime<chrono::Utc>,
pub export_dir: PathBuf,
}

impl DumpDirectory {
pub fn create() -> anyhow::Result<Self> {
let tempdir = tempfile::tempdir()?;

let timestamp = chrono::Utc::now();
let timestamp_str = timestamp.format("%Y-%m-%d-%H%M%S").to_string();
let export_dir = std::env::temp_dir().join("dump-db").join(timestamp_str);
let export_dir = tempdir.path().join(timestamp_str);

debug!(?export_dir, "Creating database dump folder…");
fs::create_dir_all(&export_dir).with_context(|| {
Expand All @@ -90,6 +98,7 @@ impl DumpDirectory {
})?;

Ok(Self {
tempdir,
timestamp,
export_dir,
})
Expand Down Expand Up @@ -179,12 +188,6 @@ impl DumpDirectory {
}
}

impl Drop for DumpDirectory {
fn drop(&mut self) {
std::fs::remove_dir_all(&self.export_dir).unwrap();
}
}

pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> {
debug!(?script, "Running psql script…");
let psql_script =
Expand Down Expand Up @@ -213,73 +216,56 @@ pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> {
Ok(())
}

/// Manage the tarball of the database dump.
///
/// Create the tarball, upload it to S3, and make sure it gets deleted.
struct DumpTarball {
tarball_path: PathBuf,
}
fn create_tarball(export_dir: &Path) -> anyhow::Result<tempfile::NamedTempFile> {
debug!("Creating tarball file");
let tempfile = tempfile::NamedTempFile::new()?;
let encoder = flate2::write::GzEncoder::new(tempfile.as_file(), flate2::Compression::default());

impl DumpTarball {
fn create(export_dir: &Path) -> anyhow::Result<Self> {
let tarball_path = export_dir.with_extension("tar.gz");
let mut archive = tar::Builder::new(encoder);

debug!(path = ?tarball_path, "Creating tarball file");
let tarfile = File::create(&tarball_path)?;
let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap());
debug!(path = ?tar_top_dir, "Appending directory to tarball");
archive.append_dir(&tar_top_dir, export_dir)?;

let result = Self { tarball_path };
let encoder = flate2::write::GzEncoder::new(tarfile, flate2::Compression::default());
// Append readme, metadata, schemas.
let mut paths = Vec::new();
for entry in fs::read_dir(export_dir)? {
let entry = entry?;
let file_type = entry.file_type()?;
if file_type.is_file() {
paths.push((entry.path(), entry.file_name()));
}
}
// Sort paths to make the tarball deterministic.
paths.sort();
for (path, file_name) in paths {
let name_in_tar = tar_top_dir.join(file_name);
debug!(name = ?name_in_tar, "Appending file to tarball");
archive.append_path_with_name(path, name_in_tar)?;
}

let mut archive = tar::Builder::new(encoder);
// Append topologically sorted tables to make it possible to pipeline
// importing with gz extraction.

let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap());
debug!(path = ?tar_top_dir, "Appending directory to tarball");
archive.append_dir(&tar_top_dir, export_dir)?;
debug!("Sorting database tables");
let visibility_config = VisibilityConfig::get();
let sorted_tables = visibility_config.topological_sort();

// Append readme, metadata, schemas.
let mut paths = Vec::new();
for entry in fs::read_dir(export_dir)? {
let entry = entry?;
let file_type = entry.file_type()?;
if file_type.is_file() {
paths.push((entry.path(), entry.file_name()));
}
}
// Sort paths to make the tarball deterministic.
paths.sort();
for (path, file_name) in paths {
let name_in_tar = tar_top_dir.join(file_name);
let path = tar_top_dir.join("data");
debug!(?path, "Appending directory to tarball");
archive.append_dir(path, export_dir.join("data"))?;
for table in sorted_tables {
let csv_path = export_dir.join("data").join(table).with_extension("csv");
if csv_path.exists() {
let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv");
debug!(name = ?name_in_tar, "Appending file to tarball");
archive.append_path_with_name(path, name_in_tar)?;
}

// Append topologically sorted tables to make it possible to pipeline
// importing with gz extraction.

debug!("Sorting database tables");
let visibility_config = VisibilityConfig::get();
let sorted_tables = visibility_config.topological_sort();

let path = tar_top_dir.join("data");
debug!(?path, "Appending directory to tarball");
archive.append_dir(path, export_dir.join("data"))?;
for table in sorted_tables {
let csv_path = export_dir.join("data").join(table).with_extension("csv");
if csv_path.exists() {
let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv");
debug!(name = ?name_in_tar, "Appending file to tarball");
archive.append_path_with_name(csv_path, name_in_tar)?;
}
archive.append_path_with_name(csv_path, name_in_tar)?;
}

Ok(result)
}
}

impl Drop for DumpTarball {
fn drop(&mut self) {
std::fs::remove_file(&self.tarball_path).unwrap();
}
drop(archive);

Ok(tempfile)
}

mod configuration;
Expand Down Expand Up @@ -307,8 +293,8 @@ mod tests {
fs::write(p.join("data").join("crate_owners.csv"), "").unwrap();
fs::write(p.join("data").join("users.csv"), "").unwrap();

let tarball = DumpTarball::create(&p).unwrap();
let gz = GzDecoder::new(File::open(&*tarball.tarball_path).unwrap());
let tarball = create_tarball(&p).unwrap();
let gz = GzDecoder::new(File::open(tarball.path()).unwrap());
let mut tar = Archive::new(gz);

let entries = tar.entries().unwrap();
Expand Down

0 comments on commit 09527e2

Please sign in to comment.