From 652372bfb93411f6bfc33fe32d13710af0b89bad Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 24 May 2024 12:49:36 +0200 Subject: [PATCH 1/3] workers/jobs/dump_db: Replace `DumpTarball` struct with `tempfile::NamedTempFile` --- src/worker/jobs/dump_db.rs | 105 ++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 61 deletions(-) diff --git a/src/worker/jobs/dump_db.rs b/src/worker/jobs/dump_db.rs index 6edb301801..6ccf545868 100644 --- a/src/worker/jobs/dump_db.rs +++ b/src/worker/jobs/dump_db.rs @@ -38,13 +38,13 @@ impl BackgroundJob for DumpDb { directory.populate(&database_url)?; info!(path = ?directory.export_dir, "Creating tarball"); - DumpTarball::create(&directory.export_dir) + create_tarball(&directory.export_dir) }) .await?; info!("Uploading tarball"); env.storage - .upload_db_dump(target_name, &tarball.tarball_path) + .upload_db_dump(target_name, tarball.path()) .await?; info!("Database dump tarball uploaded"); @@ -213,73 +213,56 @@ pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> { Ok(()) } -/// Manage the tarball of the database dump. -/// -/// Create the tarball, upload it to S3, and make sure it gets deleted. -struct DumpTarball { - tarball_path: PathBuf, -} +fn create_tarball(export_dir: &Path) -> anyhow::Result { + debug!("Creating tarball file"); + let tempfile = tempfile::NamedTempFile::new()?; + let encoder = flate2::write::GzEncoder::new(tempfile.as_file(), flate2::Compression::default()); -impl DumpTarball { - fn create(export_dir: &Path) -> anyhow::Result { - let tarball_path = export_dir.with_extension("tar.gz"); + let mut archive = tar::Builder::new(encoder); - debug!(path = ?tarball_path, "Creating tarball file"); - let tarfile = File::create(&tarball_path)?; + let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap()); + debug!(path = ?tar_top_dir, "Appending directory to tarball"); + archive.append_dir(&tar_top_dir, export_dir)?; - let result = Self { tarball_path }; - let encoder = flate2::write::GzEncoder::new(tarfile, flate2::Compression::default()); + // Append readme, metadata, schemas. + let mut paths = Vec::new(); + for entry in fs::read_dir(export_dir)? { + let entry = entry?; + let file_type = entry.file_type()?; + if file_type.is_file() { + paths.push((entry.path(), entry.file_name())); + } + } + // Sort paths to make the tarball deterministic. + paths.sort(); + for (path, file_name) in paths { + let name_in_tar = tar_top_dir.join(file_name); + debug!(name = ?name_in_tar, "Appending file to tarball"); + archive.append_path_with_name(path, name_in_tar)?; + } - let mut archive = tar::Builder::new(encoder); + // Append topologically sorted tables to make it possible to pipeline + // importing with gz extraction. - let tar_top_dir = PathBuf::from(export_dir.file_name().unwrap()); - debug!(path = ?tar_top_dir, "Appending directory to tarball"); - archive.append_dir(&tar_top_dir, export_dir)?; + debug!("Sorting database tables"); + let visibility_config = VisibilityConfig::get(); + let sorted_tables = visibility_config.topological_sort(); - // Append readme, metadata, schemas. - let mut paths = Vec::new(); - for entry in fs::read_dir(export_dir)? { - let entry = entry?; - let file_type = entry.file_type()?; - if file_type.is_file() { - paths.push((entry.path(), entry.file_name())); - } - } - // Sort paths to make the tarball deterministic. - paths.sort(); - for (path, file_name) in paths { - let name_in_tar = tar_top_dir.join(file_name); + let path = tar_top_dir.join("data"); + debug!(?path, "Appending directory to tarball"); + archive.append_dir(path, export_dir.join("data"))?; + for table in sorted_tables { + let csv_path = export_dir.join("data").join(table).with_extension("csv"); + if csv_path.exists() { + let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv"); debug!(name = ?name_in_tar, "Appending file to tarball"); - archive.append_path_with_name(path, name_in_tar)?; + archive.append_path_with_name(csv_path, name_in_tar)?; } - - // Append topologically sorted tables to make it possible to pipeline - // importing with gz extraction. - - debug!("Sorting database tables"); - let visibility_config = VisibilityConfig::get(); - let sorted_tables = visibility_config.topological_sort(); - - let path = tar_top_dir.join("data"); - debug!(?path, "Appending directory to tarball"); - archive.append_dir(path, export_dir.join("data"))?; - for table in sorted_tables { - let csv_path = export_dir.join("data").join(table).with_extension("csv"); - if csv_path.exists() { - let name_in_tar = tar_top_dir.join("data").join(table).with_extension("csv"); - debug!(name = ?name_in_tar, "Appending file to tarball"); - archive.append_path_with_name(csv_path, name_in_tar)?; - } - } - - Ok(result) } -} -impl Drop for DumpTarball { - fn drop(&mut self) { - std::fs::remove_file(&self.tarball_path).unwrap(); - } + drop(archive); + + Ok(tempfile) } mod configuration; @@ -307,8 +290,8 @@ mod tests { fs::write(p.join("data").join("crate_owners.csv"), "").unwrap(); fs::write(p.join("data").join("users.csv"), "").unwrap(); - let tarball = DumpTarball::create(&p).unwrap(); - let gz = GzDecoder::new(File::open(&*tarball.tarball_path).unwrap()); + let tarball = create_tarball(&p).unwrap(); + let gz = GzDecoder::new(File::open(tarball.path()).unwrap()); let mut tar = Archive::new(gz); let entries = tar.entries().unwrap(); From 7560b9ecf4e4bebb7d35a3de4eb42bfd0dda9626 Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Fri, 24 May 2024 15:03:29 +0200 Subject: [PATCH 2/3] workers/jobs/dump_db: Use `tempfile::tempdir()` in `DumpDirectory` struct --- src/worker/jobs/dump_db.rs | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/worker/jobs/dump_db.rs b/src/worker/jobs/dump_db.rs index 6ccf545868..72e3c8e9d0 100644 --- a/src/worker/jobs/dump_db.rs +++ b/src/worker/jobs/dump_db.rs @@ -71,15 +71,23 @@ impl BackgroundJob for DumpDb { /// make sure it gets deleted again even in the case of an error. #[derive(Debug)] pub struct DumpDirectory { + /// The temporary directory that contains the export directory. This is + /// allowing `dead_code` since we're only relying on the `Drop` + /// implementation to clean up the directory. + #[allow(dead_code)] + tempdir: tempfile::TempDir, + pub timestamp: chrono::DateTime, pub export_dir: PathBuf, } impl DumpDirectory { pub fn create() -> anyhow::Result { + let tempdir = tempfile::tempdir()?; + let timestamp = chrono::Utc::now(); let timestamp_str = timestamp.format("%Y-%m-%d-%H%M%S").to_string(); - let export_dir = std::env::temp_dir().join("dump-db").join(timestamp_str); + let export_dir = tempdir.path().join(timestamp_str); debug!(?export_dir, "Creating database dump folder…"); fs::create_dir_all(&export_dir).with_context(|| { @@ -90,6 +98,7 @@ impl DumpDirectory { })?; Ok(Self { + tempdir, timestamp, export_dir, }) @@ -179,12 +188,6 @@ impl DumpDirectory { } } -impl Drop for DumpDirectory { - fn drop(&mut self) { - std::fs::remove_dir_all(&self.export_dir).unwrap(); - } -} - pub fn run_psql(script: &Path, database_url: &str) -> anyhow::Result<()> { debug!(?script, "Running psql script…"); let psql_script = From d053f32f8053262ea8fa5cd42b60880c6e1212bf Mon Sep 17 00:00:00 2001 From: Tobias Bieniek Date: Tue, 28 May 2024 11:29:23 +0200 Subject: [PATCH 3/3] tests/dump_db: Remove obsolete dump dir mutex Now that we are using random temporary directories we no longer need this mutex. --- src/tests/dump_db.rs | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/tests/dump_db.rs b/src/tests/dump_db.rs index 4c0d38a92c..d9582d9a8e 100644 --- a/src/tests/dump_db.rs +++ b/src/tests/dump_db.rs @@ -10,19 +10,12 @@ use once_cell::sync::Lazy; use regex::Regex; use secrecy::ExposeSecret; use std::io::Read; -use std::sync::Mutex; use tar::Archive; -/// Mutex to ensure that only one test is dumping the database at a time, since -/// the dump directory is shared between all invocations of the background job. -static DUMP_DIR_MUTEX: Lazy> = Lazy::new(|| Mutex::new(())); - static PATH_DATE_RE: Lazy = Lazy::new(|| Regex::new(r"^\d{4}-\d{2}-\d{2}-\d{6}").unwrap()); #[tokio::test(flavor = "multi_thread")] async fn test_dump_db_job() { - let _guard = DUMP_DIR_MUTEX.lock(); - let (app, _, _, token) = TestApp::full().with_token(); app.db(|conn| { @@ -85,8 +78,6 @@ fn tar_paths(archive: &mut Archive) -> Vec { #[test] fn dump_db_and_reimport_dump() { - let _guard = DUMP_DIR_MUTEX.lock(); - crates_io::util::tracing::init_for_test(); let db_one = TestDatabase::new(); @@ -109,8 +100,6 @@ fn dump_db_and_reimport_dump() { #[test] fn test_sql_scripts() { - let _guard = DUMP_DIR_MUTEX.lock(); - crates_io::util::tracing::init_for_test(); let db = TestDatabase::new();