From d713ee8f81642b62d2bacf2875e4db9cce2426f8 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Mon, 5 Feb 2024 19:22:13 +0100 Subject: [PATCH] Start unifying local / central file headers --- rc-zip-sync/examples/jean.rs | 20 +- rc-zip-sync/src/lib.rs | 2 +- rc-zip-sync/src/read_zip.rs | 20 +- rc-zip-sync/src/streaming_entry_reader.rs | 6 +- rc-zip/src/corpus/mod.rs | 18 +- rc-zip/src/fsm/archive.rs | 12 +- rc-zip/src/fsm/entry/mod.rs | 19 +- rc-zip/src/parse/archive.rs | 217 ++++++++---------- ...er.rs => central_directory_file_header.rs} | 168 ++++---------- rc-zip/src/parse/extra_field.rs | 71 ++++-- .../src/parse/{local.rs => local_headers.rs} | 4 +- rc-zip/src/parse/mod.rs | 8 +- rc-zip/src/parse/raw.rs | 8 - rc-zip/src/parse/version.rs | 135 +++++------ 14 files changed, 303 insertions(+), 405 deletions(-) rename rc-zip/src/parse/{directory_header.rs => central_directory_file_header.rs} (51%) rename rc-zip/src/parse/{local.rs => local_headers.rs} (98%) diff --git a/rc-zip-sync/examples/jean.rs b/rc-zip-sync/examples/jean.rs index be5458a..cff7351 100644 --- a/rc-zip-sync/examples/jean.rs +++ b/rc-zip-sync/examples/jean.rs @@ -1,7 +1,7 @@ use cfg_if::cfg_if; use clap::{Parser, Subcommand}; use humansize::{format_size, BINARY}; -use rc_zip::parse::{Archive, EntryContents, Method, Version}; +use rc_zip::parse::{Archive, EntryKind, Method, Version}; use rc_zip_sync::{ReadZip, ReadZipEntriesStreaming}; use std::{ @@ -102,14 +102,14 @@ fn do_main(cli: Cli) -> Result<(), Box> { for entry in archive.entries() { creator_versions.insert(entry.creator_version); reader_versions.insert(entry.reader_version); - match entry.contents() { - EntryContents::Symlink => { + match entry.kind() { + EntryKind::Symlink => { num_symlinks += 1; } - EntryContents::Directory => { + EntryKind::Directory => { num_dirs += 1; } - EntryContents::File => { + EntryKind::File => { methods.insert(entry.method()); num_files += 1; compressed_size += entry.inner.compressed_size; @@ -166,7 +166,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { gid = Optional(entry.gid), ); - if let EntryContents::Symlink = entry.contents() { + if let EntryKind::Symlink = entry.contents() { let mut target = String::new(); entry.reader().read_to_string(&mut target).unwrap(); print!("\t{target}", target = target); @@ -193,7 +193,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { let mut num_symlinks = 0; let mut uncompressed_size: u64 = 0; for entry in reader.entries() { - if let EntryContents::File = entry.contents() { + if let EntryKind::File = entry.contents() { uncompressed_size += entry.inner.uncompressed_size; } } @@ -220,7 +220,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { pbar.set_message(entry_name.to_string()); match entry.contents() { - EntryContents::Symlink => { + EntryKind::Symlink => { num_symlinks += 1; cfg_if! { @@ -256,7 +256,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { } } } - EntryContents::Directory => { + EntryKind::Directory => { num_dirs += 1; let path = dir.join(entry_name); std::fs::create_dir_all( @@ -264,7 +264,7 @@ fn do_main(cli: Cli) -> Result<(), Box> { .expect("all full entry paths should have parent paths"), )?; } - EntryContents::File => { + EntryKind::File => { num_files += 1; let path = dir.join(entry_name); std::fs::create_dir_all( diff --git a/rc-zip-sync/src/lib.rs b/rc-zip-sync/src/lib.rs index 1bb0ef1..469f489 100644 --- a/rc-zip-sync/src/lib.rs +++ b/rc-zip-sync/src/lib.rs @@ -14,5 +14,5 @@ mod streaming_entry_reader; // re-exports pub use rc_zip; pub use read_zip::{ - HasCursor, ReadZip, ReadZipEntriesStreaming, ReadZipWithSize, SyncArchive, SyncStoredEntry, + HasCursor, ReadZip, ReadZipEntriesStreaming, ReadZipWithSize, SyncArchive, SyncEntry, }; diff --git a/rc-zip-sync/src/read_zip.rs b/rc-zip-sync/src/read_zip.rs index 845fe9b..16ade0b 100644 --- a/rc-zip-sync/src/read_zip.rs +++ b/rc-zip-sync/src/read_zip.rs @@ -2,9 +2,7 @@ use rc_zip::chrono::{DateTime, TimeZone, Utc}; use rc_zip::{ error::{Error, FormatError}, fsm::{ArchiveFsm, FsmResult}, - parse::{ - Archive, ExtraField, ExtraFieldSettings, LocalFileHeaderRecord, NtfsAttr, StoredEntry, - }, + parse::{Archive, ExtraField, ExtraFieldSettings, LocalFileHeader, NtfsAttr, StoredEntry}, }; use tracing::trace; use winnow::{ @@ -123,8 +121,8 @@ where F: HasCursor, { /// Iterate over all files in this zip, read from the central directory. - pub fn entries(&self) -> impl Iterator> { - self.archive.entries().map(move |entry| SyncStoredEntry { + pub fn entries(&self) -> impl Iterator> { + self.archive.entries().map(move |entry| SyncEntry { file: self.file, entry, }) @@ -132,11 +130,11 @@ where /// Attempts to look up an entry by name. This is usually a bad idea, /// as names aren't necessarily normalized in zip archives. - pub fn by_name>(&self, name: N) -> Option> { + pub fn by_name>(&self, name: N) -> Option> { self.archive .entries() .find(|&x| x.name() == name.as_ref()) - .map(|entry| SyncStoredEntry { + .map(|entry| SyncEntry { file: self.file, entry, }) @@ -144,12 +142,12 @@ where } /// A zip entry, read synchronously from a file or other I/O resource. -pub struct SyncStoredEntry<'a, F> { +pub struct SyncEntry<'a, F> { file: &'a F, entry: &'a StoredEntry, } -impl Deref for SyncStoredEntry<'_, F> { +impl Deref for SyncEntry<'_, F> { type Target = StoredEntry; fn deref(&self) -> &Self::Target { @@ -157,7 +155,7 @@ impl Deref for SyncStoredEntry<'_, F> { } } -impl<'a, F> SyncStoredEntry<'a, F> +impl<'a, F> SyncEntry<'a, F> where F: HasCursor, { @@ -259,7 +257,7 @@ where buf.fill(n); let mut input = Partial::new(buf.data()); - match LocalFileHeaderRecord::parser.parse_next(&mut input) { + match LocalFileHeader::parser.parse_next(&mut input) { Ok(header) => { let consumed = input.as_bytes().offset_from(&buf.data()); trace!(?header, %consumed, "Got local file header record!"); diff --git a/rc-zip-sync/src/streaming_entry_reader.rs b/rc-zip-sync/src/streaming_entry_reader.rs index 9705987..62c8f3a 100644 --- a/rc-zip-sync/src/streaming_entry_reader.rs +++ b/rc-zip-sync/src/streaming_entry_reader.rs @@ -1,7 +1,7 @@ use oval::Buffer; use rc_zip::{ fsm::{EntryFsm, FsmResult}, - parse::LocalFileHeaderRecord, + parse::LocalFileHeader, }; use std::{ io::{self, Write}, @@ -10,7 +10,7 @@ use std::{ use tracing::trace; pub struct StreamingEntryReader { - header: LocalFileHeaderRecord, + header: LocalFileHeader, rd: R, state: State, } @@ -34,7 +34,7 @@ impl StreamingEntryReader where R: io::Read, { - pub(crate) fn new(remain: Buffer, header: LocalFileHeaderRecord, rd: R) -> Self { + pub(crate) fn new(remain: Buffer, header: LocalFileHeader, rd: R) -> Self { Self { rd, header, diff --git a/rc-zip/src/corpus/mod.rs b/rc-zip/src/corpus/mod.rs index 2cc8cc9..cab0c15 100644 --- a/rc-zip/src/corpus/mod.rs +++ b/rc-zip/src/corpus/mod.rs @@ -9,7 +9,7 @@ use chrono::{DateTime, FixedOffset, TimeZone, Timelike, Utc}; use crate::{ encoding::Encoding, error::Error, - parse::{Archive, EntryContents, StoredEntry}, + parse::{Archive, Entry, EntryKind}, }; pub struct Case { @@ -246,14 +246,12 @@ pub fn check_case(test: &Case, archive: Result<&Archive, &Error>) { // then each implementation should check individual files } -pub fn check_file_against(file: &CaseFile, entry: &StoredEntry, actual_bytes: &[u8]) { +pub fn check_file_against(file: &CaseFile, entry: &Entry, actual_bytes: &[u8]) { if let Some(expected) = file.modified { assert_eq!( - expected, - entry.modified(), + expected, entry.modified, "entry {} should have modified = {:?}", - entry.name(), - expected + entry.name, expected ) } @@ -262,10 +260,10 @@ pub fn check_file_against(file: &CaseFile, entry: &StoredEntry, actual_bytes: &[ } // I have honestly yet to see a zip file _entry_ with a comment. - assert!(entry.comment().is_none()); + assert!(entry.comment.is_empty()); - match entry.contents() { - EntryContents::File => { + match entry.kind() { + EntryKind::File => { match &file.content { FileContent::Unchecked => { // ah well @@ -283,7 +281,7 @@ pub fn check_file_against(file: &CaseFile, entry: &StoredEntry, actual_bytes: &[ } } } - EntryContents::Symlink | EntryContents::Directory => { + EntryKind::Symlink | EntryKind::Directory => { assert!(matches!(file.content, FileContent::Unchecked)); } } diff --git a/rc-zip/src/fsm/archive.rs b/rc-zip/src/fsm/archive.rs index 6641d59..080994c 100644 --- a/rc-zip/src/fsm/archive.rs +++ b/rc-zip/src/fsm/archive.rs @@ -3,8 +3,8 @@ use crate::{ encoding::Encoding, error::{Error, FormatError}, parse::{ - Archive, DirectoryHeader, EndOfCentralDirectory, EndOfCentralDirectory64Locator, - EndOfCentralDirectory64Record, EndOfCentralDirectoryRecord, Located, StoredEntry, + Archive, CentralDirectoryFileHeader, EndOfCentralDirectory, EndOfCentralDirectory64Locator, + EndOfCentralDirectory64Record, EndOfCentralDirectoryRecord, Entry, Located, }, }; @@ -66,7 +66,7 @@ enum State { /// Reading all headers from the central directory ReadCentralDirectory { eocd: EndOfCentralDirectory, - directory_headers: Vec, + directory_headers: Vec, }, #[default] @@ -256,7 +256,7 @@ impl ArchiveFsm { "initial offset & len" ); 'read_headers: while !input.is_empty() { - match DirectoryHeader::parser.parse_next(&mut input) { + match CentralDirectoryFileHeader::parser.parse_next(&mut input) { Ok(dh) => { trace!( input_empty_now = input.is_empty(), @@ -336,9 +336,9 @@ impl ArchiveFsm { let is_zip64 = eocd.dir64.is_some(); let global_offset = eocd.global_offset as u64; - let entries: Result, Error> = directory_headers + let entries: Result, Error> = directory_headers .iter() - .map(|x| x.as_stored_entry(is_zip64, encoding, global_offset)) + .map(|x| x.as_entry(is_zip64, encoding, global_offset)) .collect(); let entries = entries?; diff --git a/rc-zip/src/fsm/entry/mod.rs b/rc-zip/src/fsm/entry/mod.rs index 39c2769..90f1ca3 100644 --- a/rc-zip/src/fsm/entry/mod.rs +++ b/rc-zip/src/fsm/entry/mod.rs @@ -27,7 +27,7 @@ mod zstd_dec; use crate::{ error::{Error, FormatError, UnsupportedError}, - parse::{DataDescriptorRecord, LocalFileHeaderRecord, Method, StoredEntryInner}, + parse::{DataDescriptorRecord, Entry, LocalFileHeader, Method}, }; use super::FsmResult; @@ -43,7 +43,7 @@ enum State { ReadData { /// The local file header for this entry - header: LocalFileHeaderRecord, + header: LocalFileHeader, /// Entry compressed size compressed_size: u64, @@ -63,7 +63,7 @@ enum State { ReadDataDescriptor { /// The local file header for this entry - header: LocalFileHeaderRecord, + header: LocalFileHeader, /// Size we've decompressed + crc32 hash we've computed metrics: EntryReadMetrics, @@ -71,7 +71,7 @@ enum State { Validate { /// The local file header for this entry - header: LocalFileHeaderRecord, + header: LocalFileHeader, /// Size we've decompressed + crc32 hash we've computed metrics: EntryReadMetrics, @@ -87,14 +87,14 @@ enum State { /// A state machine that can parse a zip entry pub struct EntryFsm { state: State, - entry: Option, + entry: Option, buffer: Buffer, eof: bool, } impl EntryFsm { /// Create a new state machine for decompressing a zip entry - pub fn new(entry: Option) -> Self { + pub fn new(entry: Option) -> Self { Self { state: State::ReadLocalHeader, entry, @@ -119,6 +119,11 @@ impl EntryFsm { } } + /// Like `process`, but only processes the header: + pub fn process_header_only(&mut self) -> Option<&LocalFileHeader> { + todo!() + } + /// Process the input and write the output to the given buffer /// /// This function will return `FsmResult::Continue` if it needs more input @@ -148,7 +153,7 @@ impl EntryFsm { match &mut self.state { S::ReadLocalHeader => { let mut input = Partial::new(self.buffer.data()); - match LocalFileHeaderRecord::parser.parse_next(&mut input) { + match LocalFileHeader::parser.parse_next(&mut input) { Ok(header) => { let consumed = input.as_bytes().offset_from(&self.buffer.data()); tracing::trace!(local_file_header = ?header, consumed, "parsed local file header"); diff --git a/rc-zip/src/parse/archive.rs b/rc-zip/src/parse/archive.rs index 4b464eb..a6ee74e 100644 --- a/rc-zip/src/parse/archive.rs +++ b/rc-zip/src/parse/archive.rs @@ -1,11 +1,14 @@ -use chrono::{DateTime, Utc}; +use chrono::{offset::Utc, DateTime, TimeZone}; use num_enum::{FromPrimitive, IntoPrimitive}; +use winnow::{binary::le_u16, PResult, Partial}; use crate::{ encoding::Encoding, - parse::{ExtraField, Mode, Version}, + parse::{Mode, Version}, }; +use super::{zero_datetime, ExtraField, NtfsAttr}; + /// An Archive contains general information about a zip files, along with a list /// of [entries][StoredEntry]. /// @@ -17,7 +20,7 @@ use crate::{ pub struct Archive { pub(crate) size: u64, pub(crate) encoding: Encoding, - pub(crate) entries: Vec, + pub(crate) entries: Vec, pub(crate) comment: Option, } @@ -28,14 +31,14 @@ impl Archive { } /// Iterate over all files in this zip, read from the central directory. - pub fn entries(&self) -> impl Iterator { + pub fn entries(&self) -> impl Iterator { self.entries.iter() } /// Attempts to look up an entry by name. This is usually a bad idea, /// as names aren't necessarily normalized in zip archives. - pub fn by_name>(&self, name: N) -> Option<&StoredEntry> { - self.entries.iter().find(|&x| x.name() == name.as_ref()) + pub fn by_name>(&self, name: N) -> Option<&Entry> { + self.entries.iter().find(|&x| x.name == name.as_ref()) } /// Returns the detected character encoding for text fields @@ -59,42 +62,40 @@ impl Archive { #[derive(Clone)] pub struct Entry { /// Name of the file - /// Must be a relative path, not start with a drive letter (e.g. C:), - /// and must use forward slashes instead of back slashes + /// + /// This should be a relative path, separated by `/`. However, there are zip + /// files in the wild with all sorts of evil variants, so, be conservative + /// in what you accept. + /// + /// See also [Self::sanitized_name], which returns a sanitized version of + /// the name, working around zip slip vulnerabilities. pub name: String, - /// Compression method - /// - /// See [Method][] for more details. + /// Compression method: Store, Deflate, Bzip2, etc. pub method: Method, /// Comment is any arbitrary user-defined string shorter than 64KiB - pub comment: Option, + pub comment: String, - /// Modified timestamp - pub modified: chrono::DateTime, - - /// Created timestamp - pub created: Option>, + /// This entry's "last modified" timestamp - with caveats + /// + /// Due to the history of the ZIP file format, this may be inaccurate. It may be offset + /// by a few hours, if there is no extended timestamp information. It may have a resolution + /// as low as two seconds, if only MSDOS timestamps are present. It may default to the Unix + /// epoch, if something went really wrong. + /// + /// If you're reading this after the year 2038, or after the year 2108, godspeed. + pub modified: DateTime, - /// Accessed timestamp - pub accessed: Option>, -} + /// This entry's "created" timestamp, if available. + /// + /// See [Self::modified] for caveats. + pub created: Option>, -/// An entry as stored into an Archive. Contains additional metadata and offset information. -/// -/// Whereas [Entry][] is archive-independent, [StoredEntry][] contains information that is tied to -/// a specific archive. -/// -/// When reading archives, one deals with a list of [StoredEntry][], whereas when writing one, one -/// typically only specifies an [Entry][] and provides the entry's contents: fields like the CRC32 -/// hash, uncompressed size, and compressed size are derived automatically from the input. -#[derive(Clone)] -pub struct StoredEntry { - /// Archive-independent information + /// This entry's "last accessed" timestamp, if available. /// - /// This contains the entry's name, timestamps, comment, compression method. - pub entry: Entry, + /// See [Self::accessed] for caveats. + pub accessed: Option>, /// Offset of the local file header in the zip file /// @@ -111,12 +112,6 @@ pub struct StoredEntry { /// ``` pub header_offset: u64, - /// External attributes (zip) - pub external_attrs: u32, - - /// Version of zip supported by the tool that crated this archive. - pub creator_version: Version, - /// Version of zip needed to extract this archive. pub reader_version: Version, @@ -139,24 +134,6 @@ pub struct StoredEntry { /// Only present if a Unix extra field or New Unix extra field was found. pub gid: Option, - /// File mode - pub mode: Mode, - - /// Any extra fields recognized while parsing the file. - /// - /// Most of these should be normalized and accessible as other fields, - /// but they are also made available here raw. - pub extra_fields: Vec, - - /// These fields are cheap to clone and needed for entry readers, - /// hence them being in a separate struct - pub inner: StoredEntryInner, -} - -/// Fields required to read an entry properly, typically cloned into owned entry -/// readers. -#[derive(Clone, Copy, Debug)] -pub struct StoredEntryInner { /// CRC-32 hash as found in the central directory. /// /// Note that this may be zero, and the actual CRC32 might be in the local header, or (more @@ -171,22 +148,11 @@ pub struct StoredEntryInner { /// This will be zero for directories. pub uncompressed_size: u64, - /// True if this entry was read from a zip64 archive - pub is_zip64: bool, + /// File mode. + pub mode: Mode, } -impl StoredEntry { - /// Returns the entry's name. See also - /// [sanitized_name()](StoredEntry::sanitized_name), which returns a - /// sanitized version of the name. - /// - /// This should be a relative path, separated by `/`. However, there are zip - /// files in the wild with all sorts of evil variants, so, be conservative - /// in what you accept. - pub fn name(&self) -> &str { - self.entry.name.as_ref() - } - +impl Entry { /// Returns a sanitized version of the entry's name, if it /// seems safe. In particular, if this method feels like the /// entry name is trying to do a zip slip (cf. @@ -195,7 +161,7 @@ impl StoredEntry { /// /// Other than that, it will strip any leading slashes on non-Windows OSes. pub fn sanitized_name(&self) -> Option<&str> { - let name = self.name(); + let name = self.name.as_str(); // refuse entries with traversed/absolute path to mitigate zip slip if name.contains("..") { @@ -223,52 +189,56 @@ impl StoredEntry { } } - /// The entry's comment, if any. - /// - /// When reading a zip file, an empty comment results in None. - pub fn comment(&self) -> Option<&str> { - self.entry.comment.as_ref().map(|x| x.as_ref()) - } - - /// The compression method used for this entry - #[inline(always)] - pub fn method(&self) -> Method { - self.entry.method - } - - /// This entry's "last modified" timestamp - with caveats - /// - /// Due to the history of the ZIP file format, this may be inaccurate. It may be offset - /// by a few hours, if there is no extended timestamp information. It may have a resolution - /// as low as two seconds, if only MSDOS timestamps are present. It may default to the Unix - /// epoch, if something went really wrong. - /// - /// If you're reading this after the year 2038, or after the year 2108, godspeed. - #[inline(always)] - pub fn modified(&self) -> DateTime { - self.entry.modified - } - - /// This entry's "created" timestamp, if available. - /// - /// See [StoredEntry::modified()] for caveats. - #[inline(always)] - pub fn created(&self) -> Option<&DateTime> { - self.entry.created.as_ref() - } - - /// This entry's "last accessed" timestamp, if available. - /// - /// See [StoredEntry::modified()] for caveats. - #[inline(always)] - pub fn accessed(&self) -> Option<&DateTime> { - self.entry.accessed.as_ref() + /// Apply the extra field to the entry, updating its metadata. + pub(crate) fn set_extra_field(&mut self, ef: &ExtraField) { + match &ef { + ExtraField::Zip64(z64) => { + self.uncompressed_size = z64.uncompressed_size; + self.compressed_size = z64.compressed_size; + self.header_offset = z64.header_offset; + } + ExtraField::Timestamp(ts) => { + self.modified = Utc + .timestamp_opt(ts.mtime as i64, 0) + .single() + .unwrap_or_else(zero_datetime); + } + ExtraField::Ntfs(nf) => { + for attr in &nf.attrs { + // note: other attributes are unsupported + if let NtfsAttr::Attr1(attr) = attr { + self.modified = attr.mtime.to_datetime().unwrap_or_else(zero_datetime); + self.created = attr.ctime.to_datetime(); + self.accessed = attr.atime.to_datetime(); + } + } + } + ExtraField::Unix(uf) => { + self.modified = Utc + .timestamp_opt(uf.mtime as i64, 0) + .single() + .unwrap_or_else(zero_datetime); + + if self.uid.is_none() { + self.uid = Some(uf.uid as u32); + } + + if self.gid.is_none() { + self.gid = Some(uf.gid as u32); + } + } + ExtraField::NewUnix(uf) => { + self.uid = Some(uf.uid as u32); + self.gid = Some(uf.uid as u32); + } + _ => {} + }; } } -/// The contents of an entry: a directory, a file, or a symbolic link. +/// The entry's file type: a directory, a file, or a symbolic link. #[derive(Debug)] -pub enum EntryContents { +pub enum EntryKind { /// The entry is a directory Directory, @@ -279,15 +249,15 @@ pub enum EntryContents { Symlink, } -impl StoredEntry { - /// Determine [EntryContents] of this entry based on its mode. - pub fn contents(&self) -> EntryContents { +impl Entry { + /// Determine the kind of this entry based on its mode. + pub fn kind(&self) -> EntryKind { if self.mode.has(Mode::SYMLINK) { - EntryContents::Symlink + EntryKind::Symlink } else if self.mode.has(Mode::DIR) { - EntryContents::Directory + EntryKind::Directory } else { - EntryContents::File + EntryKind::File } } } @@ -342,3 +312,10 @@ pub enum Method { #[num_enum(catch_all)] Unrecognized(u16), } + +impl Method { + /// Parse a method from a byte slice + pub fn parser(i: &mut Partial<&[u8]>) -> PResult { + le_u16(i).map(From::from) + } +} diff --git a/rc-zip/src/parse/directory_header.rs b/rc-zip/src/parse/central_directory_file_header.rs similarity index 51% rename from rc-zip/src/parse/directory_header.rs rename to rc-zip/src/parse/central_directory_file_header.rs index db38717..4a03735 100644 --- a/rc-zip/src/parse/directory_header.rs +++ b/rc-zip/src/parse/central_directory_file_header.rs @@ -13,13 +13,14 @@ use crate::{ error::{Error, FormatError}, parse::{ zero_datetime, Entry, ExtraField, ExtraFieldSettings, HostSystem, Mode, MsdosMode, - MsdosTimestamp, NtfsAttr, StoredEntry, StoredEntryInner, UnixMode, Version, ZipBytes, - ZipString, + MsdosTimestamp, NtfsAttr, UnixMode, Version, ZipBytes, ZipString, }, }; +use super::{EntryCdFields, Method}; + /// 4.3.12 Central directory structure: File header -pub struct DirectoryHeader { +pub struct CentralDirectoryFileHeader { /// version made by pub creator_version: Version, @@ -30,7 +31,7 @@ pub struct DirectoryHeader { pub flags: u16, /// compression method - pub method: u16, + pub method: Method, /// last mod file datetime pub modified: MsdosTimestamp, @@ -57,16 +58,16 @@ pub struct DirectoryHeader { pub header_offset: u32, /// name - pub name: ZipString, // FIXME: should this be Cow? + pub name: ZipString, /// extra - pub extra: ZipBytes, // FIXME: should this be Cow<[u8]>? + pub extra: ZipBytes, /// comment pub comment: ZipString, } -impl DirectoryHeader { +impl CentralDirectoryFileHeader { const SIGNATURE: &'static str = "PK\x01\x02"; /// Parser for the central directory file header @@ -75,7 +76,7 @@ impl DirectoryHeader { let creator_version = Version::parser.parse_next(i)?; let reader_version = Version::parser.parse_next(i)?; let flags = le_u16.parse_next(i)?; - let method = le_u16.parse_next(i)?; + let method = Method::parser.parse_next(i)?; let modified = MsdosTimestamp::parser.parse_next(i)?; let crc32 = le_u32.parse_next(i)?; let compressed_size = le_u32.parse_next(i)?; @@ -112,7 +113,7 @@ impl DirectoryHeader { } } -impl DirectoryHeader { +impl CentralDirectoryFileHeader { /// Returns true if the name or comment is not valid UTF-8 pub fn is_non_utf8(&self) -> bool { let (valid1, require1) = detect_utf8(&self.name.0[..]); @@ -136,83 +137,48 @@ impl DirectoryHeader { /// Converts the directory header into a stored entry: this involves /// parsing the extra fields and converting the timestamps. - pub fn as_stored_entry( - &self, - is_zip64: bool, - encoding: Encoding, - global_offset: u64, - ) -> Result { - let mut comment: Option = None; - if let Some(comment_field) = self.comment.clone().into_option() { - comment = Some(encoding.decode(&comment_field.0)?); - } - - let name = encoding.decode(&self.name.0)?; - - let mut compressed_size = self.compressed_size as u64; - let mut uncompressed_size = self.uncompressed_size as u64; - let mut header_offset = self.header_offset as u64 + global_offset; - - let mut modified: Option> = None; - let mut created: Option> = None; - let mut accessed: Option> = None; - - let mut uid: Option = None; - let mut gid: Option = None; + pub fn as_entry(&self, encoding: Encoding, global_offset: u64) -> Result { + let mut entry = Entry { + name: encoding.decode(&self.name.0)?, + method: self.method, + comment: encoding.decode(&self.comment.0)?, + modified: self.modified.to_datetime().unwrap_or_else(zero_datetime), + created: None, + accessed: None, + header_offset: self.header_offset as u64 + global_offset, + reader_version: self.reader_version, + flags: self.flags, + uid: None, + gid: None, + crc32: self.crc32, + compressed_size: self.compressed_size as _, + uncompressed_size: self.uncompressed_size as _, + mode: Mode(0), + }; - let mut extra_fields: Vec = Vec::new(); + entry.mode = match self.creator_version.host_system { + HostSystem::Unix | HostSystem::Osx => UnixMode(self.external_attrs >> 16).into(), + HostSystem::WindowsNtfs | HostSystem::Vfat | HostSystem::MsDos => { + MsdosMode(self.external_attrs).into() + } + _ => Mode(0), + }; + if entry.name.ends_with('/') { + // believe it or not, this is straight from the APPNOTE + entry.mode |= Mode::DIR + }; let settings = ExtraFieldSettings { - needs_compressed_size: self.compressed_size == !0u32, - needs_uncompressed_size: self.uncompressed_size == !0u32, - needs_header_offset: self.header_offset == !0u32, + uncompressed_size_u32: self.uncompressed_size, + compressed_size_u32: self.compressed_size, + header_offset_u32: self.header_offset, }; let mut slice = Partial::new(&self.extra.0[..]); while !slice.is_empty() { match ExtraField::mk_parser(settings).parse_next(&mut slice) { Ok(ef) => { - match &ef { - ExtraField::Zip64(z64) => { - if let Some(n) = z64.uncompressed_size { - uncompressed_size = n; - } - if let Some(n) = z64.compressed_size { - compressed_size = n; - } - if let Some(n) = z64.header_offset { - header_offset = n; - } - } - ExtraField::Timestamp(ts) => { - modified = Utc.timestamp_opt(ts.mtime as i64, 0).single(); - } - ExtraField::Ntfs(nf) => { - for attr in &nf.attrs { - // note: other attributes are unsupported - if let NtfsAttr::Attr1(attr) = attr { - modified = attr.mtime.to_datetime(); - created = attr.ctime.to_datetime(); - accessed = attr.atime.to_datetime(); - } - } - } - ExtraField::Unix(uf) => { - modified = Utc.timestamp_opt(uf.mtime as i64, 0).single(); - if uid.is_none() { - uid = Some(uf.uid as u32); - } - if gid.is_none() { - gid = Some(uf.gid as u32); - } - } - ExtraField::NewUnix(uf) => { - uid = Some(uf.uid as u32); - gid = Some(uf.uid as u32); - } - _ => {} - }; - extra_fields.push(ef); + entry.set_extra_field(&ef); } Err(e) => { trace!("extra field error: {:#?}", e); @@ -221,52 +187,6 @@ impl DirectoryHeader { } } - let modified = match modified { - Some(m) => Some(m), - None => self.modified.to_datetime(), - }; - - let mut mode: Mode = match self.creator_version.host_system() { - HostSystem::Unix | HostSystem::Osx => UnixMode(self.external_attrs >> 16).into(), - HostSystem::WindowsNtfs | HostSystem::Vfat | HostSystem::MsDos => { - MsdosMode(self.external_attrs).into() - } - _ => Mode(0), - }; - if name.ends_with('/') { - // believe it or not, this is straight from the APPNOTE - mode |= Mode::DIR - }; - - Ok(StoredEntry { - entry: Entry { - name, - method: self.method.into(), - comment, - modified: modified.unwrap_or_else(zero_datetime), - created, - accessed, - }, - - creator_version: self.creator_version, - reader_version: self.reader_version, - flags: self.flags, - - inner: StoredEntryInner { - crc32: self.crc32, - compressed_size, - uncompressed_size, - is_zip64, - }, - header_offset, - - uid, - gid, - mode, - - extra_fields, - - external_attrs: self.external_attrs, - }) + Ok(entry) } } diff --git a/rc-zip/src/parse/extra_field.rs b/rc-zip/src/parse/extra_field.rs index fd7434b..6513b4e 100644 --- a/rc-zip/src/parse/extra_field.rs +++ b/rc-zip/src/parse/extra_field.rs @@ -1,7 +1,7 @@ use tracing::trace; use winnow::{ binary::{le_u16, le_u32, le_u64, le_u8, length_take}, - combinator::{cond, opt, preceded, repeat_till}, + combinator::{opt, preceded, repeat_till}, error::{ErrMode, ErrorKind, ParserError, StrContext}, seq, token::{tag, take}, @@ -36,14 +36,20 @@ impl<'a> ExtraFieldRecord<'a> { /// Central directory record field is set to 0xFFFF or 0xFFFFFFFF. #[derive(Debug, Clone, Copy)] pub struct ExtraFieldSettings { - /// Whether the "zip64 extra field" uncompressed size field is needed/present - pub needs_uncompressed_size: bool, - - /// Whether the "zip64 extra field" compressed size field is needed/present - pub needs_compressed_size: bool, - - /// Whether the "zip64 extra field" header offset field is needed/present - pub needs_header_offset: bool, + /// The uncompressed size field read from a local or central directory record + /// If this is 0xFFFF_FFFF, then the zip64 extra field uncompressed size + /// field will be present. + pub uncompressed_size_u32: u32, + + /// The compressed size field read from a local or central directory record + /// If this is 0xFFFF_FFFF, then the zip64 extra field compressed size + /// field will be present. + pub compressed_size_u32: u32, + + /// The header offset field read from a central directory record (or zero + /// for local directory records). If this is 0xFFFF_FFFF, then the zip64 + /// extra field header offset field will be present. + pub header_offset_u32: u32, } /// Information stored in the central directory header `extra` field @@ -90,7 +96,7 @@ impl ExtraField { .context(StrContext::Label("timestamp")) .parse_next(payload)?, ExtraNtfsField::TAG => { - opt(ExtraNtfsField::parse.map(EF::Ntfs)).parse_next(payload)? + opt(ExtraNtfsField::parser.map(EF::Ntfs)).parse_next(payload)? } ExtraUnixField::TAG | ExtraUnixField::TAG_INFOZIP => { opt(ExtraUnixField::parser.map(EF::Unix)).parse_next(payload)? @@ -111,13 +117,16 @@ impl ExtraField { #[derive(Clone, Default)] pub struct ExtraZip64Field { /// 64-bit uncompressed size - pub uncompressed_size: Option, + pub uncompressed_size: u64, /// 64-bit compressed size - pub compressed_size: Option, + pub compressed_size: u64, /// 64-bit header offset - pub header_offset: Option, + pub header_offset: u64, + + /// 32-bit disk start number + pub disk_start: u32, } impl ExtraZip64Field { @@ -127,13 +136,29 @@ impl ExtraZip64Field { settings: ExtraFieldSettings, ) -> impl FnMut(&mut Partial<&'_ [u8]>) -> PResult { move |i| { - // N.B: we ignore "disk start number" - seq! {Self { - uncompressed_size: cond(settings.needs_uncompressed_size, le_u64), - compressed_size: cond(settings.needs_compressed_size, le_u64), - header_offset: cond(settings.needs_header_offset, le_u64), - }} - .parse_next(i) + let uncompressed_size = if settings.uncompressed_size_u32 == 0xFFFF_FFFF { + le_u64.parse_next(i)? + } else { + settings.uncompressed_size_u32 as u64 + }; + let compressed_size = if settings.compressed_size_u32 == 0xFFFF_FFFF { + le_u64.parse_next(i)? + } else { + settings.compressed_size_u32 as u64 + }; + let header_offset = if settings.header_offset_u32 == 0xFFFF_FFFF { + le_u64.parse_next(i)? + } else { + settings.header_offset_u32 as u64 + }; + let disk_start = le_u32.parse_next(i)?; + + Ok(Self { + uncompressed_size, + compressed_size, + header_offset, + disk_start, + }) } } } @@ -254,7 +279,7 @@ pub struct ExtraNtfsField { impl ExtraNtfsField { const TAG: u16 = 0x000a; - fn parse(i: &mut Partial<&'_ [u8]>) -> PResult { + fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { let _ = take(4_usize).parse_next(i)?; // reserved (unused) seq! {Self { // from the winnow docs: @@ -262,7 +287,7 @@ impl ExtraNtfsField { // data or the end of the stream, causing them to always report // Incomplete. // using repeat_till with eof combinator to work around this: - attrs: repeat_till(0.., NtfsAttr::parse, winnow::combinator::eof).map(|x| x.0), + attrs: repeat_till(0.., NtfsAttr::parser, winnow::combinator::eof).map(|x| x.0), }} .parse_next(i) } @@ -282,7 +307,7 @@ pub enum NtfsAttr { } impl NtfsAttr { - fn parse(i: &mut Partial<&'_ [u8]>) -> PResult { + fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { let tag = le_u16.parse_next(i)?; trace!("parsing NTFS attribute, tag {:04x}", tag); let payload = length_take(le_u16).parse_next(i)?; diff --git a/rc-zip/src/parse/local.rs b/rc-zip/src/parse/local_headers.rs similarity index 98% rename from rc-zip/src/parse/local.rs rename to rc-zip/src/parse/local_headers.rs index fc73ef6..6b835b0 100644 --- a/rc-zip/src/parse/local.rs +++ b/rc-zip/src/parse/local_headers.rs @@ -14,7 +14,7 @@ use winnow::{ #[derive(Debug)] /// 4.3.7 Local file header -pub struct LocalFileHeaderRecord { +pub struct LocalFileHeader { /// version needed to extract pub reader_version: Version, @@ -56,7 +56,7 @@ pub enum MethodSpecific { Lzma(LzmaProperties), } -impl LocalFileHeaderRecord { +impl LocalFileHeader { /// The signature for a local file header pub const SIGNATURE: &'static str = "PK\x03\x04"; diff --git a/rc-zip/src/parse/mod.rs b/rc-zip/src/parse/mod.rs index 962c24e..fc41699 100644 --- a/rc-zip/src/parse/mod.rs +++ b/rc-zip/src/parse/mod.rs @@ -22,14 +22,14 @@ pub use version::*; mod date_time; pub use date_time::*; -mod directory_header; -pub use directory_header::*; +mod central_directory_file_header; +pub use central_directory_file_header::*; mod eocd; pub use eocd::*; -mod local; -pub use local::*; +mod local_headers; +pub use local_headers::*; mod raw; pub use raw::*; diff --git a/rc-zip/src/parse/raw.rs b/rc-zip/src/parse/raw.rs index fb978ab..9a86943 100644 --- a/rc-zip/src/parse/raw.rs +++ b/rc-zip/src/parse/raw.rs @@ -32,14 +32,6 @@ impl ZipString { let count = count.to_usize(); move |i| (take(count).map(|slice: &[u8]| Self(slice.into()))).parse_next(i) } - - pub(crate) fn into_option(self) -> Option { - if !self.0.is_empty() { - Some(self) - } else { - None - } - } } /// A raw u8 slice, with no specific structure. diff --git a/rc-zip/src/parse/version.rs b/rc-zip/src/parse/version.rs index 1b9ac8f..2348bdf 100644 --- a/rc-zip/src/parse/version.rs +++ b/rc-zip/src/parse/version.rs @@ -1,5 +1,6 @@ +use num_enum::{FromPrimitive, IntoPrimitive}; use std::fmt; -use winnow::{binary::le_u16, PResult, Parser, Partial}; +use winnow::{binary::le_u8, seq, PResult, Parser, Partial}; /// A zip version (either created by, or required when reading an archive). /// @@ -8,7 +9,14 @@ use winnow::{binary::le_u16, PResult, Parser, Partial}; /// /// For more information, see the [.ZIP Application Note](https://support.pkware.com/display/PKZIP/APPNOTE), section 4.4.2. #[derive(Clone, Copy, PartialEq, Eq, Hash)] -pub struct Version(pub u16); +pub struct Version { + /// The host system on which + pub host_system: HostSystem, + + /// Integer version, e.g. 45 for Zip version 4.5 + /// See APPNOTE, section 4.4.2.1 + pub version: u8, +} impl fmt::Debug for Version { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -25,109 +33,84 @@ impl fmt::Debug for Version { impl Version { /// Parse a version from a byte slice pub fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { - le_u16.map(Self).parse_next(i) - } - - /// Identifies the host system on which the zip attributes are compatible. - pub fn host_system(&self) -> HostSystem { - match self.host() { - 0 => HostSystem::MsDos, - 1 => HostSystem::Amiga, - 2 => HostSystem::OpenVms, - 3 => HostSystem::Unix, - 4 => HostSystem::VmCms, - 5 => HostSystem::AtariSt, - 6 => HostSystem::Os2Hpfs, - 7 => HostSystem::Macintosh, - 8 => HostSystem::ZSystem, - 9 => HostSystem::CpM, - 10 => HostSystem::WindowsNtfs, - 11 => HostSystem::Mvs, - 12 => HostSystem::Vse, - 13 => HostSystem::AcornRisc, - 14 => HostSystem::Vfat, - 15 => HostSystem::AlternateMvs, - 16 => HostSystem::BeOs, - 17 => HostSystem::Tandem, - 18 => HostSystem::Os400, - 19 => HostSystem::Osx, - n => HostSystem::Unknown(n), - } - } - - /// Integer host system - pub fn host(&self) -> u8 { - (self.0 >> 8) as u8 - } - - /// Integer version, e.g. 45 for Zip version 4.5 - pub fn version(&self) -> u8 { - (self.0 & 0xff) as u8 - } - - /// ZIP specification major version - /// - /// See APPNOTE, section 4.4.2.1 - pub fn major(&self) -> u32 { - self.version() as u32 / 10 - } - - /// ZIP specification minor version - /// - /// See APPNOTE, section 4.4.2.1 - pub fn minor(&self) -> u32 { - self.version() as u32 % 10 + seq! {Self { + host_system: le_u8.map(HostSystem::from_u8), + version: le_u8, + }} + .parse_next(i) } } /// System on which an archive was created, as encoded into a version u16. /// /// See APPNOTE, section 4.4.2.2 -#[derive(Debug)] +#[derive(Debug, Clone, Copy, IntoPrimitive, FromPrimitive)] +#[repr(u8)] pub enum HostSystem { /// MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems) - MsDos, + MsDos = 0, + /// Amiga - Amiga, + Amiga = 1, + /// OpenVMS - OpenVms, + OpenVms = 2, + /// UNIX - Unix, + Unix = 3, + /// VM/CMS - VmCms, + VmCms = 4, + /// Atari ST - AtariSt, + AtariSt = 5, + /// OS/2 H.P.F.S - Os2Hpfs, + Os2Hpfs = 6, + /// Macintosh (see `Osx`) - Macintosh, + Macintosh = 7, + /// Z-System - ZSystem, + ZSystem = 8, + /// CP/M - CpM, + CpM = 9, + /// Windows NTFS - WindowsNtfs, + WindowsNtfs = 10, + /// MVS (OS/390 - Z/OS) - Mvs, + Mvs = 11, + /// VSE - Vse, + Vse = 12, + /// Acorn Risc - AcornRisc, + AcornRisc = 13, + /// VFAT - Vfat, + Vfat = 14, + /// alternate MVS - AlternateMvs, + AlternateMvs = 15, + /// BeOS - BeOs, + BeOs = 16, + /// Tandem - Tandem, + Tandem = 17, + /// OS/400 - Os400, + Os400 = 18, + /// OS X (Darwin) - Osx, + Osx = 19, + /// Unknown host system /// /// Values 20 through 255 are currently unused, as of - /// APPNOTE.TXT 6.3.6 (April 26, 2019) + /// APPNOTE.TXT 6.3.10 + #[num_enum(catch_all)] Unknown(u8), }