From 8c5ea2942bf5e47483d5f9fe0b9e9c401e384f34 Mon Sep 17 00:00:00 2001 From: tanneberger Date: Tue, 10 Dec 2024 20:33:59 +0100 Subject: [PATCH] downstream sha1dir --- Cargo.lock | 58 +---------- Cargo.toml | 3 +- src/package/lock.rs | 2 +- src/package/management.rs | 2 +- src/util/mod.rs | 1 + src/util/sha1dir.rs | 200 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 209 insertions(+), 57 deletions(-) create mode 100644 src/util/sha1dir.rs diff --git a/Cargo.lock b/Cargo.lock index e9cd000..c135aa7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,9 +174,9 @@ dependencies = [ [[package]] name = "cpufeatures" -version = "0.2.14" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "608697df725056feaccfa42cffdaeeec3fccc4ffc38358ecd19b243e716a78e0" +checksum = "16b80225097f2e5ae4e7179dd2266824648f3e2f49d9134d584b76389d31c4c3" dependencies = [ "libc", ] @@ -360,12 +360,6 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - [[package]] name = "home" version = "0.5.9" @@ -483,6 +477,7 @@ dependencies = [ "git2", "lazy_static", "log", + "parking_lot", "print_logger", "rayon", "regex", @@ -490,7 +485,7 @@ dependencies = [ "serde", "serde_derive", "serde_json", - "sha1dir", + "sha1", "tempfile", "toml", "url", @@ -545,16 +540,6 @@ dependencies = [ "minimal-lexical", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "once_cell" version = "1.20.2" @@ -826,20 +811,6 @@ dependencies = [ "digest", ] -[[package]] -name = "sha1dir" -version = "1.0.10" -source = "git+https://github.com/tanneberger/sha1dir#0ca1e593297836663bab09a7d69156fd580f8e2a" -dependencies = [ - "clap", - "js-sys", - "num_cpus", - "parking_lot", - "rayon", - "sha1", - "wasm-logger", -] - [[package]] name = "shlex" version = "1.3.0" @@ -1075,27 +1046,6 @@ version = "0.2.93" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c62a0a307cb4a311d3a07867860911ca130c3494e8c2719593806c08bc5d0484" -[[package]] -name = "wasm-logger" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "074649a66bb306c8f2068c9016395fa65d8e08d2affcbf95acf3c24c3ab19718" -dependencies = [ - "log", - "wasm-bindgen", - "web-sys", -] - -[[package]] -name = "web-sys" -version = "0.3.70" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26fdeaafd9bd129f65e7c031593c24d62186301e0c72c8978fa1678be7d532c0" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "which" version = "6.0.3" diff --git a/Cargo.toml b/Cargo.toml index 05497a3..c9e2748 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,5 +44,6 @@ url = { version = "2.5", features = ["serde"] } anyhow = "1.0" versions = { version = "6.3.2", features = ["serde"]} log = "0.4" -sha1dir = { version = "1.0", git = "https://github.com/tanneberger/sha1dir" } colored = "2.1.0" +parking_lot = "0.12" +sha1 = "0.10" diff --git a/src/package/lock.rs b/src/package/lock.rs index 9bf4b05..2c3bff1 100644 --- a/src/package/lock.rs +++ b/src/package/lock.rs @@ -1,6 +1,6 @@ +use crate::util::sha1dir; use colored::Colorize; use serde::{Deserialize, Deserializer, Serialize, Serializer}; -use sha1dir; use versions::Versioning; use log::error; diff --git a/src/package/management.rs b/src/package/management.rs index 61b9c2e..3e04b8f 100644 --- a/src/package/management.rs +++ b/src/package/management.rs @@ -2,8 +2,8 @@ use colored::Colorize; use log::error; use versions::{Requirement, Versioning}; +use crate::util::sha1dir; use crate::{GitCloneAndCheckoutCap, GitUrl}; -use sha1dir; use std::collections::HashMap; use std::fs; use std::fs::File; diff --git a/src/util/mod.rs b/src/util/mod.rs index ac4782f..5b41aa0 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,6 +1,7 @@ pub mod analyzer; mod command_line; pub mod errors; +pub mod sha1dir; pub use command_line::*; use std::path::{Path, PathBuf}; diff --git a/src/util/sha1dir.rs b/src/util/sha1dir.rs new file mode 100644 index 0000000..9c7560d --- /dev/null +++ b/src/util/sha1dir.rs @@ -0,0 +1,200 @@ +// Full credit goes to David Tolnay https://github.com/dtolnay/sha1dir +// I submitted a pull request to make the function available as a library +// function, but this pull request (#19) was rejected, hence this file. + +#![allow( + clippy::cast_possible_truncation, + clippy::let_underscore_untyped, + clippy::needless_collect, + clippy::needless_pass_by_value, + clippy::uninlined_format_args, + clippy::unnecessary_wraps, + clippy::unseparated_literal_suffix +)] + +use parking_lot::Mutex; +use rayon::{Scope, ThreadPoolBuilder}; +use sha1::{Digest, Sha1}; +use std::error::Error; +use std::fmt::{self, Display}; +use std::fs::{self, File, Metadata}; +use std::io::{self, Read, Write}; +use std::path::{Path, PathBuf}; +use std::process; +use std::sync::Once; + +type Result = std::result::Result>; + +pub fn die, E: Display>(path: P, error: E) -> ! { + static DIE: Once = Once::new(); + + DIE.call_once(|| { + let path = path.as_ref().display(); + let _ = writeln!(io::stderr(), "{}: {}", path, error,); + process::exit(1); + }); + + unreachable!() +} + +pub fn configure_thread_pool(threads: usize) { + let result = ThreadPoolBuilder::new().num_threads(threads).build_global(); + + // This is the only time the thread pool is initialized. + result.unwrap(); +} + +pub fn canonicalize>(path: P) -> PathBuf { + match fs::canonicalize(&path) { + Ok(canonical) => canonical, + Err(error) => die(path, error), + } +} + +pub struct Checksum { + bytes: Mutex<[u8; 20]>, +} + +impl Checksum { + fn new() -> Self { + Checksum { + bytes: Mutex::new([0u8; 20]), + } + } +} + +impl Display for Checksum { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for i in self.bytes.lock().as_ref() { + write!(f, "{:02x}", i)?; + } + Ok(()) + } +} + +impl Checksum { + fn put(&self, rhs: Sha1) { + for (lhs, rhs) in self.bytes.lock().iter_mut().zip(rhs.finalize()) { + *lhs ^= rhs; + } + } +} + +fn get_file_as_byte_vec(filename: &Path) -> Vec { + let mut f = File::open(&filename).expect("no file found"); + let metadata = fs::metadata(&filename).expect("unable to read metadata"); + let mut buffer = vec![0; metadata.len() as usize]; + f.read(&mut buffer).expect("buffer overflow"); + + buffer +} + +pub fn checksum_current_dir(label: &Path, ignore_unknown_filetypes: bool) -> Checksum { + let checksum = Checksum::new(); + rayon::scope(|scope| { + if let Err(error) = (|| -> Result<()> { + for child in Path::new(".").read_dir()? { + let child = child?; + scope.spawn({ + let checksum = &checksum; + move |scope| { + entry( + scope, + label, + checksum, + Path::new(&child.file_name()), + ignore_unknown_filetypes, + ); + } + }); + } + Ok(()) + })() { + die(label, error); + } + }); + checksum +} + +fn entry<'scope>( + scope: &Scope<'scope>, + base: &'scope Path, + checksum: &'scope Checksum, + path: &Path, + ignore_unknown_filetypes: bool, +) { + let metadata = match path.symlink_metadata() { + Ok(metadata) => metadata, + Err(error) => die(base.join(path), error), + }; + + let file_type = metadata.file_type(); + let result = if file_type.is_file() { + file(checksum, path, metadata) + } else if file_type.is_symlink() { + symlink(checksum, path) + } else if file_type.is_dir() { + dir(scope, base, checksum, path, ignore_unknown_filetypes) + } else if ignore_unknown_filetypes { + Ok(()) + } else { + die(base.join(path), "Unsupported file type"); + }; + + if let Err(error) = result { + die(base.join(path), error); + } +} + +fn file(checksum: &Checksum, path: &Path, metadata: Metadata) -> Result<()> { + let mut sha = begin(path, b'f'); + + // Enforced by memmap: "memory map must have a non-zero length" + if metadata.len() > 0 { + sha.update(get_file_as_byte_vec(path)); + } + + checksum.put(sha); + + Ok(()) +} + +fn symlink(checksum: &Checksum, path: &Path) -> Result<()> { + let mut sha = begin(path, b'l'); + sha.update(path.read_link()?.as_os_str().as_encoded_bytes()); + checksum.put(sha); + + Ok(()) +} + +fn dir<'scope>( + scope: &Scope<'scope>, + base: &'scope Path, + checksum: &'scope Checksum, + path: &Path, + ignore_unknown_filetypes: bool, +) -> Result<()> { + let sha = begin(path, b'd'); + checksum.put(sha); + + for child in path.read_dir()? { + let child = child?.path(); + scope.spawn(move |scope| entry(scope, base, checksum, &child, ignore_unknown_filetypes)); + } + + Ok(()) +} + +fn begin(path: &Path, kind: u8) -> Sha1 { + let mut sha = Sha1::new(); + let path_bytes = path.as_os_str().as_encoded_bytes(); + sha.update([kind]); + sha.update((path_bytes.len() as u32).to_le_bytes()); + sha.update(path_bytes); + sha +} + +#[test] +fn test_cli() { + ::command().debug_assert(); +}