From de56245d2a6d82d757c097097c5fe766ef1a3769 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 13:15:48 +0100 Subject: [PATCH 1/3] chore: Refactor StoredEntry a little bit --- crates/jean/src/main.rs | 17 +++++---- crates/rc-zip/src/format/archive.rs | 37 ++++++++++++------- crates/rc-zip/src/format/directory_header.rs | 10 +++-- crates/rc-zip/src/reader/sync/entry_reader.rs | 24 ++++-------- crates/rc-zip/src/tests.rs | 4 +- 5 files changed, 49 insertions(+), 43 deletions(-) diff --git a/crates/jean/src/main.rs b/crates/jean/src/main.rs index b543299..8ac74c7 100644 --- a/crates/jean/src/main.rs +++ b/crates/jean/src/main.rs @@ -96,8 +96,8 @@ fn do_main(cli: Cli) -> Result<(), Box> { rc_zip::EntryContents::File => { methods.insert(entry.method()); num_files += 1; - compressed_size += entry.compressed_size; - uncompressed_size += entry.uncompressed_size; + compressed_size += entry.inner.compressed_size; + uncompressed_size += entry.inner.uncompressed_size; } } } @@ -135,7 +135,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { } else { Cow::from(entry.name().truncate_path(55)) }, - size = format_size(entry.uncompressed_size, BINARY), + size = format_size(entry.inner.uncompressed_size, BINARY), ); if verbose { print!( @@ -169,7 +169,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { let mut uncompressed_size: u64 = 0; for entry in reader.entries() { if let EntryContents::File = entry.contents() { - uncompressed_size += entry.uncompressed_size; + uncompressed_size += entry.inner.uncompressed_size; } } @@ -250,10 +250,13 @@ fn do_main(cli: Cli) -> Result<(), Box> { let mut entry_writer = File::create(path)?; let entry_reader = entry.reader(); let before_entry_bytes = done_bytes; - let mut progress_reader = - ProgressRead::new(entry_reader, entry.uncompressed_size, |prog| { + let mut progress_reader = ProgressRead::new( + entry_reader, + entry.inner.uncompressed_size, + |prog| { pbar.set_position(before_entry_bytes + prog.done); - }); + }, + ); let copied_bytes = std::io::copy(&mut progress_reader, &mut entry_writer)?; done_bytes = before_entry_bytes + copied_bytes; diff --git a/crates/rc-zip/src/format/archive.rs b/crates/rc-zip/src/format/archive.rs index 1e37ce1..5c9240c 100644 --- a/crates/rc-zip/src/format/archive.rs +++ b/crates/rc-zip/src/format/archive.rs @@ -87,12 +87,6 @@ pub struct StoredEntry { /// This contains the entry's name, timestamps, comment, compression method. pub entry: Entry, - /// CRC-32 hash as found in the central directory. - /// - /// Note that this may be zero, and the actual CRC32 might be in the local header, or (more - /// commonly) in the data descriptor instead. - pub crc32: u32, - /// Offset of the local file header in the zip file /// /// ```text @@ -108,14 +102,6 @@ pub struct StoredEntry { /// ``` pub header_offset: u64, - /// Size in bytes, after compression - pub compressed_size: u64, - - /// Size in bytes, before compression - /// - /// This will be zero for directories. - pub uncompressed_size: u64, - /// External attributes (zip) pub external_attrs: u32, @@ -151,6 +137,25 @@ pub struct StoredEntry { /// but they are also made available here raw. pub extra_fields: Vec, + pub inner: StoredEntryInner, +} + +#[derive(Clone, Copy, Debug)] +pub struct StoredEntryInner { + /// CRC-32 hash as found in the central directory. + /// + /// Note that this may be zero, and the actual CRC32 might be in the local header, or (more + /// commonly) in the data descriptor instead. + pub crc32: u32, + + /// Size in bytes, after compression + pub compressed_size: u64, + + /// Size in bytes, before compression + /// + /// This will be zero for directories. + pub uncompressed_size: u64, + /// True if this entry was read from a zip64 archive pub is_zip64: bool, } @@ -172,6 +177,7 @@ impl StoredEntry { } /// The compression method used for this entry + #[inline(always)] pub fn method(&self) -> Method { self.entry.method } @@ -184,6 +190,7 @@ impl StoredEntry { /// epoch, if something went really wrong. /// /// If you're reading this after the year 2038, or after the year 2108, godspeed. + #[inline(always)] pub fn modified(&self) -> DateTime { self.entry.modified } @@ -191,6 +198,7 @@ impl StoredEntry { /// This entry's "created" timestamp, if available. /// /// See [StoredEntry::modified()] for caveats. + #[inline(always)] pub fn created(&self) -> Option<&DateTime> { self.entry.created.as_ref() } @@ -198,6 +206,7 @@ impl StoredEntry { /// This entry's "last accessed" timestamp, if available. /// /// See [StoredEntry::modified()] for caveats. + #[inline(always)] pub fn accessed(&self) -> Option<&DateTime> { self.entry.accessed.as_ref() } diff --git a/crates/rc-zip/src/format/directory_header.rs b/crates/rc-zip/src/format/directory_header.rs index 05e9a30..cabdf5c 100644 --- a/crates/rc-zip/src/format/directory_header.rs +++ b/crates/rc-zip/src/format/directory_header.rs @@ -228,9 +228,12 @@ impl DirectoryHeader { reader_version: self.reader_version, flags: self.flags, - crc32: self.crc32, - compressed_size, - uncompressed_size, + inner: StoredEntryInner { + crc32: self.crc32, + compressed_size, + uncompressed_size, + is_zip64, + }, header_offset, uid, @@ -240,7 +243,6 @@ impl DirectoryHeader { extra_fields, external_attrs: self.external_attrs, - is_zip64, }) } } diff --git a/crates/rc-zip/src/reader/sync/entry_reader.rs b/crates/rc-zip/src/reader/sync/entry_reader.rs index a433de4..81280d8 100644 --- a/crates/rc-zip/src/reader/sync/entry_reader.rs +++ b/crates/rc-zip/src/reader/sync/entry_reader.rs @@ -48,13 +48,8 @@ where rd: EOFNormalizer, eof: bool, state: State, - // entry info required for extraction - // copy them over to reduce the amount of necessary lifetimes/references - compressed_size: u64, - uncompressed_size: u64, + inner: StoredEntryInner, method: Method, - is_zip64: bool, - crc32: u32, } impl io::Read for EntryReader @@ -76,7 +71,7 @@ where trace!("local file header: {:#?}", header); transition!(self.state => (S::ReadLocalHeader { buffer }) { - let limited_reader = LimitedReader::new(buffer, self.compressed_size); + let limited_reader = LimitedReader::new(buffer, self.inner.compressed_size); let decoder: Box> = match self.method { Method::Store => Box::new(StoreDecoder::new(limited_reader)), Method::Deflate => Box::new(deflate::Decoder::new(limited_reader)), @@ -158,7 +153,7 @@ where buffer.available_space() ); - match DataDescriptorRecord::parse(buffer.data(), self.is_zip64) { + match DataDescriptorRecord::parse(buffer.data(), self.inner.is_zip64) { Ok((_remaining, descriptor)) => { trace!("data descriptor = {:#?}", descriptor); transition!(self.state => (S::ReadDataDescriptor { metrics, header, .. }) { @@ -195,16 +190,16 @@ where ref header, ref descriptor, } => { - let expected_crc32 = if self.crc32 != 0 { - self.crc32 + let expected_crc32 = if self.inner.crc32 != 0 { + self.inner.crc32 } else if let Some(descriptor) = descriptor.as_ref() { descriptor.crc32 } else { header.crc32 }; - let expected_size = if self.uncompressed_size != 0 { - self.uncompressed_size + let expected_size = if self.inner.uncompressed_size != 0 { + self.inner.uncompressed_size } else if let Some(descriptor) = descriptor.as_ref() { descriptor.uncompressed_size } else { @@ -252,11 +247,8 @@ where state: State::ReadLocalHeader { buffer: circular::Buffer::with_capacity(Self::DEFAULT_BUFFER_SIZE), }, - compressed_size: entry.compressed_size, - uncompressed_size: entry.uncompressed_size, method: entry.method(), - is_zip64: entry.is_zip64, - crc32: entry.crc32, + inner: entry.inner, } } } diff --git a/crates/rc-zip/src/tests.rs b/crates/rc-zip/src/tests.rs index 7976a79..2684521 100644 --- a/crates/rc-zip/src/tests.rs +++ b/crates/rc-zip/src/tests.rs @@ -337,7 +337,7 @@ fn test_fsm() { let sync_archive = bs.read_zip().unwrap(); for (se, e) in sync_archive.entries().zip(archive.entries()) { assert_eq!(se.name(), e.name()); - assert_eq!(se.compressed_size, e.compressed_size); - assert_eq!(se.uncompressed_size, e.uncompressed_size); + assert_eq!(se.inner.compressed_size, e.inner.compressed_size); + assert_eq!(se.inner.uncompressed_size, e.inner.uncompressed_size); } } From fa619bdcfa76761d9c654ad32c3f9e07d06eae2b Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 13:22:10 +0100 Subject: [PATCH 2/3] Group use std directives --- crates/jean/src/main.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/crates/jean/src/main.rs b/crates/jean/src/main.rs index 8ac74c7..7ac507c 100644 --- a/crates/jean/src/main.rs +++ b/crates/jean/src/main.rs @@ -1,12 +1,15 @@ use clap::{Parser, Subcommand}; use humansize::{format_size, BINARY}; use rc_zip::{prelude::*, EntryContents}; -use std::path::PathBuf; -use std::time::Duration; -use std::{borrow::Cow, fmt}; + use std::{ + borrow::Cow, + collections::HashSet, + fmt, fs::File, io::{self, Read}, + path::PathBuf, + time::Duration, }; struct Optional(Option); @@ -73,7 +76,6 @@ fn do_main(cli: Cli) -> Result<(), Box> { println!("Comment:\n{}", comment); } - use std::collections::HashSet; let mut creator_versions = HashSet::::new(); let mut reader_versions = HashSet::::new(); let mut methods = HashSet::::new(); From 0f59509dca96de9818de98134bd335b8ce155be3 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 13:32:03 +0100 Subject: [PATCH 3/3] Check docs in CI, deny warnings (Also fix some intra-doc links) --- .github/workflows/test.yml | 4 +++ crates/rc-zip/src/format/archive.rs | 47 ++++++++++++++++++++++++++--- crates/rc-zip/src/lib.rs | 4 +-- 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d992603..60f6170 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,6 +28,10 @@ jobs: uses: taiki-e/install-action@v2 with: tool: just,nextest,cargo-llvm-cov + - name: Run cargo doc, deny warnings + run: | + export RUSTDOCFLAGS="-D warnings" + cargo doc --all-features --no-deps - name: Run cargo clippy run: | cargo clippy --all-targets --all-features diff --git a/crates/rc-zip/src/format/archive.rs b/crates/rc-zip/src/format/archive.rs index 5c9240c..d3b7618 100644 --- a/crates/rc-zip/src/format/archive.rs +++ b/crates/rc-zip/src/format/archive.rs @@ -4,7 +4,7 @@ use crate::format::*; /// along with a list of [entries][StoredEntry]. /// /// It is obtained via an [ArchiveReader](crate::reader::ArchiveReader), or via a higher-level API -/// like the [ReadZip](crate::reader::ReadZip) trait. +/// like the [ReadZip](crate::reader::sync::ReadZip) trait. pub struct Archive { pub(crate) size: u64, pub(crate) encoding: Encoding, @@ -161,14 +161,53 @@ pub struct StoredEntryInner { } impl StoredEntry { - /// Returns the entry's name + /// Returns the entry's name. See also + /// [sanitized_name()](StoredEntry::sanitized_name), which returns a + /// sanitized version of the name. /// - /// This should be a relative path, separated by `/`. However, there are zip files in the wild - /// with all sorts of evil variants, so, be conservative in what you accept. + /// This should be a relative path, separated by `/`. However, there are zip + /// files in the wild with all sorts of evil variants, so, be conservative + /// in what you accept. pub fn name(&self) -> &str { self.entry.name.as_ref() } + /// Returns a sanitized version of the entry's name, if it + /// seems safe. In particular, if this method feels like the + /// entry name is trying to do a zip slip (cf. + /// ), it'll return + /// None. + /// + /// Other than that, it will strip any leading slashes on non-Windows OSes. + pub fn sanitized_name(&self) -> Option<&str> { + let name = self.name(); + + // refuse entries with traversed/absolute path to mitigate zip slip + if name.contains("..") { + return None; + } + + #[cfg(windows)] + { + if name.contains(":\\") || name.starts_with("\\") { + return None; + } + Some(name) + } + + #[cfg(not(windows))] + { + // strip absolute prefix on entries pointing to root path + let mut entry_chars = name.chars(); + let mut name = name; + while name.starts_with('/') { + entry_chars.next(); + name = entry_chars.as_str() + } + Some(name) + } + } + /// The entry's comment, if any. /// /// When reading a zip file, an empty comment results in None. diff --git a/crates/rc-zip/src/lib.rs b/crates/rc-zip/src/lib.rs index d0cdcf2..7ca49bf 100644 --- a/crates/rc-zip/src/lib.rs +++ b/crates/rc-zip/src/lib.rs @@ -4,12 +4,12 @@ //! //! ### Reading //! -//! [ArchiveReader](ArchiveReader) is your first stop. It +//! [ArchiveReader](reader::ArchiveReader) is your first stop. It //! ensures we are dealing with a valid zip archive, and reads the central //! directory. It does not perform I/O itself, but rather, it is a state machine //! that asks for reads at specific offsets. //! -//! An [Archive](Archive) contains a full list of [entries](types::StoredEntry), +//! An [Archive] contains a full list of [entries](StoredEntry), //! which you can then extract. //! //! ### Writing