Skip to content

Commit

Permalink
Start unifying local / central file headers
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterthanlime committed Feb 5, 2024
1 parent ac6319c commit d713ee8
Show file tree
Hide file tree
Showing 14 changed files with 303 additions and 405 deletions.
20 changes: 10 additions & 10 deletions rc-zip-sync/examples/jean.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use cfg_if::cfg_if;
use clap::{Parser, Subcommand};
use humansize::{format_size, BINARY};
use rc_zip::parse::{Archive, EntryContents, Method, Version};
use rc_zip::parse::{Archive, EntryKind, Method, Version};
use rc_zip_sync::{ReadZip, ReadZipEntriesStreaming};

use std::{
Expand Down Expand Up @@ -102,14 +102,14 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
for entry in archive.entries() {
creator_versions.insert(entry.creator_version);
reader_versions.insert(entry.reader_version);
match entry.contents() {
EntryContents::Symlink => {
match entry.kind() {
EntryKind::Symlink => {
num_symlinks += 1;
}
EntryContents::Directory => {
EntryKind::Directory => {
num_dirs += 1;
}
EntryContents::File => {
EntryKind::File => {
methods.insert(entry.method());
num_files += 1;
compressed_size += entry.inner.compressed_size;
Expand Down Expand Up @@ -166,7 +166,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
gid = Optional(entry.gid),
);

if let EntryContents::Symlink = entry.contents() {
if let EntryKind::Symlink = entry.contents() {
let mut target = String::new();
entry.reader().read_to_string(&mut target).unwrap();
print!("\t{target}", target = target);
Expand All @@ -193,7 +193,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
let mut num_symlinks = 0;
let mut uncompressed_size: u64 = 0;
for entry in reader.entries() {
if let EntryContents::File = entry.contents() {
if let EntryKind::File = entry.contents() {
uncompressed_size += entry.inner.uncompressed_size;
}
}
Expand All @@ -220,7 +220,7 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {

pbar.set_message(entry_name.to_string());
match entry.contents() {
EntryContents::Symlink => {
EntryKind::Symlink => {
num_symlinks += 1;

cfg_if! {
Expand Down Expand Up @@ -256,15 +256,15 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
}
}
}
EntryContents::Directory => {
EntryKind::Directory => {
num_dirs += 1;
let path = dir.join(entry_name);
std::fs::create_dir_all(
path.parent()
.expect("all full entry paths should have parent paths"),
)?;
}
EntryContents::File => {
EntryKind::File => {
num_files += 1;
let path = dir.join(entry_name);
std::fs::create_dir_all(
Expand Down
2 changes: 1 addition & 1 deletion rc-zip-sync/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@ mod streaming_entry_reader;
// re-exports
pub use rc_zip;
pub use read_zip::{
HasCursor, ReadZip, ReadZipEntriesStreaming, ReadZipWithSize, SyncArchive, SyncStoredEntry,
HasCursor, ReadZip, ReadZipEntriesStreaming, ReadZipWithSize, SyncArchive, SyncEntry,
};
20 changes: 9 additions & 11 deletions rc-zip-sync/src/read_zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,7 @@ use rc_zip::chrono::{DateTime, TimeZone, Utc};
use rc_zip::{
error::{Error, FormatError},
fsm::{ArchiveFsm, FsmResult},
parse::{
Archive, ExtraField, ExtraFieldSettings, LocalFileHeaderRecord, NtfsAttr, StoredEntry,
},
parse::{Archive, ExtraField, ExtraFieldSettings, LocalFileHeader, NtfsAttr, StoredEntry},
};
use tracing::trace;
use winnow::{
Expand Down Expand Up @@ -123,41 +121,41 @@ where
F: HasCursor,
{
/// Iterate over all files in this zip, read from the central directory.
pub fn entries(&self) -> impl Iterator<Item = SyncStoredEntry<'_, F>> {
self.archive.entries().map(move |entry| SyncStoredEntry {
pub fn entries(&self) -> impl Iterator<Item = SyncEntry<'_, F>> {
self.archive.entries().map(move |entry| SyncEntry {
file: self.file,
entry,
})
}

/// Attempts to look up an entry by name. This is usually a bad idea,
/// as names aren't necessarily normalized in zip archives.
pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<SyncStoredEntry<'_, F>> {
pub fn by_name<N: AsRef<str>>(&self, name: N) -> Option<SyncEntry<'_, F>> {
self.archive
.entries()
.find(|&x| x.name() == name.as_ref())
.map(|entry| SyncStoredEntry {
.map(|entry| SyncEntry {
file: self.file,
entry,
})
}
}

/// A zip entry, read synchronously from a file or other I/O resource.
pub struct SyncStoredEntry<'a, F> {
pub struct SyncEntry<'a, F> {
file: &'a F,
entry: &'a StoredEntry,
}

impl<F> Deref for SyncStoredEntry<'_, F> {
impl<F> Deref for SyncEntry<'_, F> {
type Target = StoredEntry;

fn deref(&self) -> &Self::Target {
self.entry
}
}

impl<'a, F> SyncStoredEntry<'a, F>
impl<'a, F> SyncEntry<'a, F>
where
F: HasCursor,
{
Expand Down Expand Up @@ -259,7 +257,7 @@ where
buf.fill(n);

let mut input = Partial::new(buf.data());
match LocalFileHeaderRecord::parser.parse_next(&mut input) {
match LocalFileHeader::parser.parse_next(&mut input) {
Ok(header) => {
let consumed = input.as_bytes().offset_from(&buf.data());
trace!(?header, %consumed, "Got local file header record!");
Expand Down
6 changes: 3 additions & 3 deletions rc-zip-sync/src/streaming_entry_reader.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use oval::Buffer;
use rc_zip::{
fsm::{EntryFsm, FsmResult},
parse::LocalFileHeaderRecord,
parse::LocalFileHeader,
};
use std::{
io::{self, Write},
Expand All @@ -10,7 +10,7 @@ use std::{
use tracing::trace;

pub struct StreamingEntryReader<R> {
header: LocalFileHeaderRecord,
header: LocalFileHeader,
rd: R,
state: State,
}
Expand All @@ -34,7 +34,7 @@ impl<R> StreamingEntryReader<R>
where
R: io::Read,
{
pub(crate) fn new(remain: Buffer, header: LocalFileHeaderRecord, rd: R) -> Self {
pub(crate) fn new(remain: Buffer, header: LocalFileHeader, rd: R) -> Self {
Self {
rd,
header,
Expand Down
18 changes: 8 additions & 10 deletions rc-zip/src/corpus/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use chrono::{DateTime, FixedOffset, TimeZone, Timelike, Utc};
use crate::{
encoding::Encoding,
error::Error,
parse::{Archive, EntryContents, StoredEntry},
parse::{Archive, Entry, EntryKind},
};

pub struct Case {
Expand Down Expand Up @@ -246,14 +246,12 @@ pub fn check_case(test: &Case, archive: Result<&Archive, &Error>) {
// then each implementation should check individual files
}

pub fn check_file_against(file: &CaseFile, entry: &StoredEntry, actual_bytes: &[u8]) {
pub fn check_file_against(file: &CaseFile, entry: &Entry, actual_bytes: &[u8]) {
if let Some(expected) = file.modified {
assert_eq!(
expected,
entry.modified(),
expected, entry.modified,
"entry {} should have modified = {:?}",
entry.name(),
expected
entry.name, expected
)
}

Expand All @@ -262,10 +260,10 @@ pub fn check_file_against(file: &CaseFile, entry: &StoredEntry, actual_bytes: &[
}

// I have honestly yet to see a zip file _entry_ with a comment.
assert!(entry.comment().is_none());
assert!(entry.comment.is_empty());

match entry.contents() {
EntryContents::File => {
match entry.kind() {
EntryKind::File => {
match &file.content {
FileContent::Unchecked => {
// ah well
Expand All @@ -283,7 +281,7 @@ pub fn check_file_against(file: &CaseFile, entry: &StoredEntry, actual_bytes: &[
}
}
}
EntryContents::Symlink | EntryContents::Directory => {
EntryKind::Symlink | EntryKind::Directory => {
assert!(matches!(file.content, FileContent::Unchecked));
}
}
Expand Down
12 changes: 6 additions & 6 deletions rc-zip/src/fsm/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use crate::{
encoding::Encoding,
error::{Error, FormatError},
parse::{
Archive, DirectoryHeader, EndOfCentralDirectory, EndOfCentralDirectory64Locator,
EndOfCentralDirectory64Record, EndOfCentralDirectoryRecord, Located, StoredEntry,
Archive, CentralDirectoryFileHeader, EndOfCentralDirectory, EndOfCentralDirectory64Locator,
EndOfCentralDirectory64Record, EndOfCentralDirectoryRecord, Entry, Located,
},
};

Expand Down Expand Up @@ -66,7 +66,7 @@ enum State {
/// Reading all headers from the central directory
ReadCentralDirectory {
eocd: EndOfCentralDirectory,
directory_headers: Vec<DirectoryHeader>,
directory_headers: Vec<CentralDirectoryFileHeader>,
},

#[default]
Expand Down Expand Up @@ -256,7 +256,7 @@ impl ArchiveFsm {
"initial offset & len"
);
'read_headers: while !input.is_empty() {
match DirectoryHeader::parser.parse_next(&mut input) {
match CentralDirectoryFileHeader::parser.parse_next(&mut input) {
Ok(dh) => {
trace!(
input_empty_now = input.is_empty(),
Expand Down Expand Up @@ -336,9 +336,9 @@ impl ArchiveFsm {

let is_zip64 = eocd.dir64.is_some();
let global_offset = eocd.global_offset as u64;
let entries: Result<Vec<StoredEntry>, Error> = directory_headers
let entries: Result<Vec<Entry>, Error> = directory_headers
.iter()
.map(|x| x.as_stored_entry(is_zip64, encoding, global_offset))
.map(|x| x.as_entry(is_zip64, encoding, global_offset))
.collect();
let entries = entries?;

Expand Down
19 changes: 12 additions & 7 deletions rc-zip/src/fsm/entry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ mod zstd_dec;

use crate::{
error::{Error, FormatError, UnsupportedError},
parse::{DataDescriptorRecord, LocalFileHeaderRecord, Method, StoredEntryInner},
parse::{DataDescriptorRecord, Entry, LocalFileHeader, Method},
};

use super::FsmResult;
Expand All @@ -43,7 +43,7 @@ enum State {

ReadData {
/// The local file header for this entry
header: LocalFileHeaderRecord,
header: LocalFileHeader,

/// Entry compressed size
compressed_size: u64,
Expand All @@ -63,15 +63,15 @@ enum State {

ReadDataDescriptor {
/// The local file header for this entry
header: LocalFileHeaderRecord,
header: LocalFileHeader,

/// Size we've decompressed + crc32 hash we've computed
metrics: EntryReadMetrics,
},

Validate {
/// The local file header for this entry
header: LocalFileHeaderRecord,
header: LocalFileHeader,

/// Size we've decompressed + crc32 hash we've computed
metrics: EntryReadMetrics,
Expand All @@ -87,14 +87,14 @@ enum State {
/// A state machine that can parse a zip entry
pub struct EntryFsm {
state: State,
entry: Option<StoredEntryInner>,
entry: Option<Entry>,
buffer: Buffer,
eof: bool,
}

impl EntryFsm {
/// Create a new state machine for decompressing a zip entry
pub fn new(entry: Option<StoredEntryInner>) -> Self {
pub fn new(entry: Option<Entry>) -> Self {
Self {
state: State::ReadLocalHeader,
entry,
Expand All @@ -119,6 +119,11 @@ impl EntryFsm {
}
}

/// Like `process`, but only processes the header:
pub fn process_header_only(&mut self) -> Option<&LocalFileHeader> {
todo!()
}

/// Process the input and write the output to the given buffer
///
/// This function will return `FsmResult::Continue` if it needs more input
Expand Down Expand Up @@ -148,7 +153,7 @@ impl EntryFsm {
match &mut self.state {
S::ReadLocalHeader => {
let mut input = Partial::new(self.buffer.data());
match LocalFileHeaderRecord::parser.parse_next(&mut input) {
match LocalFileHeader::parser.parse_next(&mut input) {
Ok(header) => {
let consumed = input.as_bytes().offset_from(&self.buffer.data());
tracing::trace!(local_file_header = ?header, consumed, "parsed local file header");
Expand Down
Loading

0 comments on commit d713ee8

Please sign in to comment.