Skip to content

Commit

Permalink
Clarify state machine terminology
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterthanlime committed Feb 2, 2024
1 parent 93d3518 commit 2f22013
Show file tree
Hide file tree
Showing 11 changed files with 58 additions and 37 deletions.
8 changes: 4 additions & 4 deletions rc-zip-sync/src/read_zip.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use rc_zip::{
reader::{ArchiveReader, ArchiveReaderResult},
fsm::{ArchiveFsm, FsmResult},
Archive, Error, StoredEntry,
};

Expand Down Expand Up @@ -41,7 +41,7 @@ where

fn read_zip_with_size(&self, size: u64) -> Result<SyncArchive<'_, F>, Error> {
tracing::trace!(%size, "read_zip_with_size");
let mut ar = ArchiveReader::new(size);
let mut ar = ArchiveFsm::new(size);
loop {
if let Some(offset) = ar.wants_read() {
tracing::trace!(%offset, "read_zip_with_size: wants_read, space len = {}", ar.space().len());
Expand All @@ -58,14 +58,14 @@ where
}

match ar.process()? {
ArchiveReaderResult::Done(archive) => {
FsmResult::Done(archive) => {
tracing::trace!("read_zip_with_size: done");
return Ok(SyncArchive {
file: self,
archive,
});
}
ArchiveReaderResult::Continue => {
FsmResult::Continue => {
tracing::trace!("read_zip_with_size: continue");
}
}
Expand Down
8 changes: 4 additions & 4 deletions rc-zip-sync/tests/integration_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,12 @@ fn real_world_files() {

#[test_log::test]
fn state_machine() {
use rc_zip::reader::{ArchiveReader, ArchiveReaderResult};
use rc_zip::fsm::{ArchiveFsm, FsmResult};

let cases = test_cases();
let case = cases.iter().find(|x| x.name() == "zip64.zip").unwrap();
let bs = case.bytes();
let mut zar = ArchiveReader::new(bs.len() as u64);
let mut zar = ArchiveFsm::new(bs.len() as u64);

let archive = 'read_zip: loop {
if let Some(offset) = zar.wants_read() {
Expand Down Expand Up @@ -381,8 +381,8 @@ fn state_machine() {

match zar.process() {
Ok(res) => match res {
ArchiveReaderResult::Continue => {}
ArchiveReaderResult::Done(archive) => break 'read_zip archive,
FsmResult::Continue => {}
FsmResult::Done(archive) => break 'read_zip archive,
},
Err(err) => {
println!("zar processing error: {:#?}", err);
Expand Down
8 changes: 4 additions & 4 deletions rc-zip-tokio/src/read_zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use positioned_io::{RandomAccessFile, ReadAt};
use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf};

use rc_zip::{
reader::{ArchiveReader, ArchiveReaderResult},
fsm::{ArchiveFsm, FsmResult},
Archive, Error, StoredEntry,
};

Expand Down Expand Up @@ -50,7 +50,7 @@ where
type File = F;

async fn read_zip_with_size_async(&self, size: u64) -> Result<AsyncArchive<'_, F>, Error> {
let mut ar = ArchiveReader::new(size);
let mut ar = ArchiveFsm::new(size);
loop {
if let Some(offset) = ar.wants_read() {
match self.cursor_at(offset).read(ar.space()).await {
Expand All @@ -65,13 +65,13 @@ where
}

match ar.process()? {
ArchiveReaderResult::Done(archive) => {
FsmResult::Done(archive) => {
return Ok(AsyncArchive {
file: self,
archive,
})
}
ArchiveReaderResult::Continue => {}
FsmResult::Continue => {}
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions rc-zip/src/encoding.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
//! zip entry paths may be encoded in a variety of character encodings.
//!
//! Historically, CP-437 was used, but many modern zip files use UTF-8 with an
//! optional UTF-8 flag.
//!
//! Others use the system's local character encoding, and we have no choice but
//! to make an educated guess thanks to the chardet-ng crate.
use std::fmt;

/// Encodings supported by this crate
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Encoding {
/// UTF-8
Utf8,

/// [Codepage 437](https://en.wikipedia.org/wiki/Code_page_437), also known as
/// OEM-US, PC-8, or DOS Latin US.
///
/// This is the fallback if UTF-8 is not specified and no other encoding
/// is auto-detected. It was the original encoding of the zip format.
Cp437,

/// [Shift JIS](https://en.wikipedia.org/wiki/Shift_JIS), also known as SJIS.
///
/// Still in use by some Japanese users as of 2019.
Expand Down
2 changes: 2 additions & 0 deletions rc-zip/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
//! All error types used in this crate
use crate::Method;

use super::encoding;
Expand Down
7 changes: 7 additions & 0 deletions rc-zip/src/format/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
//! Contain winnow parsers for most elements that make up a ZIP file, like
//! the end-of-central-directory record, local file headers, and central
//! directory headers.
//!
//! Everything in there is based off of the appnote, which you can find in the
//! source repository.
pub use crate::encoding::Encoding;

mod archive;
Expand Down
6 changes: 3 additions & 3 deletions rc-zip/src/reader/archive.rs → rc-zip/src/fsm/archive.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@ use winnow::{
Parser, Partial,
};

/// ArchiveReader parses a valid zip archive into an [Archive][]. In particular, this struct finds
/// [ArchiveReader] parses a valid zip archive into an [Archive]. In particular, this struct finds
/// an end of central directory record, parses the entire central directory, detects text encoding,
/// and normalizes metadata.
pub struct ArchiveReader {
pub struct ArchiveFsm {
// Size of the entire zip file
size: u64,
state: State,
Expand Down Expand Up @@ -71,7 +71,7 @@ impl State {
}
}

impl ArchiveReader {
impl ArchiveFsm {
/// This should be > 65KiB, because the section at the end of the
/// file that we check for end of central directory record is 65KiB.
const DEFAULT_BUFFER_SIZE: usize = 256 * 1024;
Expand Down
12 changes: 12 additions & 0 deletions rc-zip/src/fsm/entry.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#[derive(Default)]
enum State {
/// Done!
Done,

#[default]
Transition,
}

pub struct EntryFsm {
state: State,
}
12 changes: 11 additions & 1 deletion rc-zip/src/reader/mod.rs → rc-zip/src/fsm/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
//! Parsers are just part of the puzzle when it comes to zip files: finding the
//! central directory is non-trivial and involves seeking around the input:
//! [ArchiveFsm] provides a state machine to handle this.
//!
//! Similarly, reading an entry involves reading the local header, then the
//! data (while calculating the CRC32), then the data descriptor, and then
//! checking whether the uncompressed size and CRC32 match the values in the
//! central directory.
macro_rules! transition {
($state: expr => ($pattern: pat) $body: expr) => {
$state = if let $pattern = std::mem::take(&mut $state) {
Expand All @@ -9,9 +18,10 @@ macro_rules! transition {
}

mod archive;
pub use archive::ArchiveReader;
pub use archive::ArchiveFsm;

mod entry;
pub use entry::EntryFsm;

/// Indicates whether or not the state machine has completed its work
pub enum FsmResult<T> {
Expand Down
21 changes: 1 addition & 20 deletions rc-zip/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,22 +1,3 @@
//! # rc-zip
//!
//! rc-zip is a zip archive library with a focus on compatibility and correctness.
//!
//! ### Reading
//!
//! [ArchiveReader](reader::ArchiveReader) is your first stop. It
//! ensures we are dealing with a valid zip archive, and reads the central
//! directory. It does not perform I/O itself, but rather, it is a state machine
//! that asks for reads at specific offsets.
//!
//! An [Archive] contains a full list of [entries](StoredEntry),
//! which you can then extract.
//!
//! ### Writing
//!
//! Writing archives is not implemented yet.
//!
mod encoding;

mod error;
Expand All @@ -25,4 +6,4 @@ pub use error::*;
mod format;
pub use format::*;

pub mod reader;
pub mod fsm;
1 change: 0 additions & 1 deletion rc-zip/src/reader/entry.rs

This file was deleted.

0 comments on commit 2f22013

Please sign in to comment.