From 35342b23e66e87aa825a8545eb25ddb90f79d1c7 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Tue, 30 Jan 2024 16:45:01 +0100 Subject: [PATCH] Now to use all those parsers --- crates/rc-zip/src/encoding.rs | 4 +- crates/rc-zip/src/format/date_time.rs | 4 +- crates/rc-zip/src/format/directory_header.rs | 7 +- crates/rc-zip/src/format/eocd.rs | 8 +- crates/rc-zip/src/format/extra_field.rs | 26 +++-- crates/rc-zip/src/format/local.rs | 109 +++++++++--------- crates/rc-zip/src/format/version.rs | 3 +- crates/rc-zip/src/reader/archive_reader.rs | 2 +- .../src/reader/sync/entry_reader/mod.rs | 6 +- 9 files changed, 86 insertions(+), 83 deletions(-) diff --git a/crates/rc-zip/src/encoding.rs b/crates/rc-zip/src/encoding.rs index 1684d86..823221d 100644 --- a/crates/rc-zip/src/encoding.rs +++ b/crates/rc-zip/src/encoding.rs @@ -53,7 +53,7 @@ impl From for DecodingError { } impl Encoding { - pub(crate) fn decode(&self, i: &mut Partial<&'_ [u8]>) -> Result { + pub(crate) fn decode(&self, i: &[u8]) -> Result { match self { Encoding::Utf8 => { let s = std::str::from_utf8(i)?; @@ -69,7 +69,7 @@ impl Encoding { fn decode_as( &self, - i: &mut Partial<&'_ [u8]>, + i: &[u8], encoding: &'static encoding_rs::Encoding, ) -> Result { let mut decoder = encoding.new_decoder(); diff --git a/crates/rc-zip/src/format/date_time.rs b/crates/rc-zip/src/format/date_time.rs index 8f1e7c9..4af476c 100644 --- a/crates/rc-zip/src/format/date_time.rs +++ b/crates/rc-zip/src/format/date_time.rs @@ -28,10 +28,10 @@ impl fmt::Debug for MsdosTimestamp { impl MsdosTimestamp { pub fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { - seq!(Self { + seq! {Self { time: le_u16, date: le_u16, - }) + }} .parse_next(i) } diff --git a/crates/rc-zip/src/format/directory_header.rs b/crates/rc-zip/src/format/directory_header.rs index e6eb123..ff73cf6 100644 --- a/crates/rc-zip/src/format/directory_header.rs +++ b/crates/rc-zip/src/format/directory_header.rs @@ -140,10 +140,10 @@ impl DirectoryHeader { needs_header_offset: self.header_offset == !0u32, }; - let mut slice = &self.extra.0[..]; + let mut slice = Partial::new(&self.extra.0[..]); while !slice.is_empty() { - match ExtraField::parse(slice, &settings) { - Ok((remaining, ef)) => { + match ExtraField::mk_parser(settings).parse_next(&mut slice) { + Ok(ef) => { match &ef { ExtraField::Zip64(z64) => { if let Some(n) = z64.uncompressed_size { @@ -185,7 +185,6 @@ impl DirectoryHeader { _ => {} }; extra_fields.push(ef); - slice = remaining; } Err(e) => { trace!("extra field error: {:#?}", e); diff --git a/crates/rc-zip/src/format/eocd.rs b/crates/rc-zip/src/format/eocd.rs index ba9b951..4dc4596 100644 --- a/crates/rc-zip/src/format/eocd.rs +++ b/crates/rc-zip/src/format/eocd.rs @@ -76,11 +76,11 @@ impl EndOfCentralDirectory64Locator { pub fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { _ = tag(Self::SIGNATURE).parse_next(i)?; - seq!(Self { + seq! {Self { dir_disk_number: le_u32, directory_offset: le_u64, total_disks: le_u32, - }) + }} .parse_next(i) } } @@ -114,7 +114,7 @@ impl EndOfCentralDirectory64Record { pub fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { _ = tag(Self::SIGNATURE).parse_next(i)?; - seq!(Self { + seq! {Self { record_size: le_u64, creator_version: le_u16, reader_version: le_u16, @@ -124,7 +124,7 @@ impl EndOfCentralDirectory64Record { directory_records: le_u64, directory_size: le_u64, directory_offset: le_u64, - }) + }} .parse_next(i) } } diff --git a/crates/rc-zip/src/format/extra_field.rs b/crates/rc-zip/src/format/extra_field.rs index 4ebb0f6..0b4db75 100644 --- a/crates/rc-zip/src/format/extra_field.rs +++ b/crates/rc-zip/src/format/extra_field.rs @@ -15,10 +15,10 @@ pub(crate) struct ExtraFieldRecord<'a> { impl<'a> ExtraFieldRecord<'a> { pub(crate) fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { - seq!(Self { + seq! {Self { tag: le_u16, payload: length_take(le_u16), - }) + }} .parse_next(i) } } @@ -70,17 +70,19 @@ impl ExtraField { let variant = match rec.tag { ExtraZip64Field::TAG => { - opt(ExtraZip64Field::mk_parser(settings).map(EF::Zip64)).parse_next(payload) + opt(ExtraZip64Field::mk_parser(settings).map(EF::Zip64)).parse_next(payload)? } ExtraTimestampField::TAG => { - opt(ExtraTimestampField::parser.map(EF::Timestamp)).parse_next(payload) + opt(ExtraTimestampField::parser.map(EF::Timestamp)).parse_next(payload)? + } + ExtraNtfsField::TAG => { + opt(ExtraNtfsField::parse.map(EF::Ntfs)).parse_next(payload)? } - ExtraNtfsField::TAG => opt(ExtraNtfsField::parse.map(EF::Ntfs)).parse_next(payload), ExtraUnixField::TAG | ExtraUnixField::TAG_INFOZIP => { - opt(ExtraUnixField::parser.map(EF::Unix)).parse_next(payload) + opt(ExtraUnixField::parser.map(EF::Unix)).parse_next(payload)? } ExtraNewUnixField::TAG => { - opt(ExtraNewUnixField::parser.map(EF::NewUnix)).parse_next(payload) + opt(ExtraNewUnixField::parser.map(EF::NewUnix)).parse_next(payload)? } _ => None, } @@ -107,11 +109,11 @@ impl ExtraZip64Field { ) -> impl FnMut(&mut Partial<&'_ [u8]>) -> PResult { move |i| { // N.B: we ignore "disk start number" - seq!(Self { + seq! {Self { uncompressed_size: cond(settings.needs_uncompressed_size, le_u64), compressed_size: cond(settings.needs_compressed_size, le_u64), header_offset: cond(settings.needs_header_offset, le_u64), - }) + }} .parse_next(i) } } @@ -131,7 +133,7 @@ impl ExtraTimestampField { preceded( // 1 byte of flags, if bit 0 is set, modification time is present le_u8.verify(|x| x & 0b1 != 0), - seq!(Self { mtime: le_u32 }), + seq! {Self { mtime: le_u32 }}, ) .parse_next(i) } @@ -268,11 +270,11 @@ pub struct NtfsAttr1 { impl NtfsAttr1 { fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { - seq!(Self { + seq! {Self { mtime: NtfsTimestamp::parser, atime: NtfsTimestamp::parser, ctime: NtfsTimestamp::parser, - }) + }} .parse_next(i) } } diff --git a/crates/rc-zip/src/format/local.rs b/crates/rc-zip/src/format/local.rs index 17f086f..4fc42ed 100644 --- a/crates/rc-zip/src/format/local.rs +++ b/crates/rc-zip/src/format/local.rs @@ -1,9 +1,10 @@ -use crate::{fields, format::*}; +use crate::format::*; use winnow::{ - bytes::streaming::tag, + binary::{le_u16, le_u32, le_u64}, combinator::opt, - number::streaming::{le_u16, le_u32, le_u64}, - sequence::preceded, + seq, + token::tag, + PResult, Parser, Partial, }; #[derive(Debug)] @@ -32,34 +33,34 @@ pub struct LocalFileHeaderRecord { impl LocalFileHeaderRecord { pub const SIGNATURE: &'static str = "PK\x03\x04"; - pub fn parse(i: &mut Partial<&'_ [u8]>) -> PResult { - preceded( - tag(Self::SIGNATURE), - fields!({ - reader_version: Version::parse, - flags: le_u16, - method: le_u16, - modified: MsdosTimestamp::parse, - crc32: le_u32, - compressed_size: le_u32, - uncompressed_size: le_u32, - name_len: le_u16, - extra_len: le_u16, - } chain fields!({ - name: ZipString::parser(name_len), - extra: ZipBytes::parser(extra_len), - } map Self { - reader_version, - flags, - method, - modified, - crc32, - compressed_size, - uncompressed_size, - name, - extra, - })), - )(i) + pub fn parser(i: &mut Partial<&'_ [u8]>) -> PResult { + let _ = tag(Self::SIGNATURE).parse_next(i)?; + + let reader_version = Version::parser.parse_next(i)?; + let flags = le_u16.parse_next(i)?; + let method = le_u16.parse_next(i)?; + let modified = MsdosTimestamp::parser.parse_next(i)?; + let crc32 = le_u32.parse_next(i)?; + let compressed_size = le_u32.parse_next(i)?; + let uncompressed_size = le_u32.parse_next(i)?; + + let name_len = le_u16.parse_next(i)?; + let extra_len = le_u16.parse_next(i)?; + + let name = ZipString::parser(name_len).parse_next(i)?; + let extra = ZipBytes::parser(extra_len).parse_next(i)?; + + Ok(Self { + reader_version, + flags, + method, + modified, + crc32, + compressed_size, + uncompressed_size, + name, + extra, + }) } pub fn has_data_descriptor(&self) -> bool { @@ -83,29 +84,31 @@ pub struct DataDescriptorRecord { impl DataDescriptorRecord { const SIGNATURE: &'static str = "PK\x07\x08"; - pub fn parse(i: &mut Partial<&'_ [u8]>, is_zip64: bool) -> PResult { + pub fn parser(i: &mut Partial<&'_ [u8]>, is_zip64: bool) -> PResult { + // From appnote.txt: + // + // 4.3.9.3 Although not originally assigned a signature, the value + // 0x08074b50 has commonly been adopted as a signature value for the + // data descriptor record. Implementers SHOULD be aware that ZIP files + // MAY be encountered with or without this signature marking data + // descriptors and SHOULD account for either case when reading ZIP files + // to ensure compatibility. + let _ = opt(tag(Self::SIGNATURE)).parse_next(i)?; + if is_zip64 { - preceded( - opt(tag(Self::SIGNATURE)), - fields!(Self { - crc32: le_u32, - compressed_size: le_u64, - uncompressed_size: le_u64, - }), - )(i) + seq! {Self { + crc32: le_u32, + compressed_size: le_u64, + uncompressed_size: le_u64, + }} + .parse_next(i) } else { - preceded( - opt(tag(Self::SIGNATURE)), - fields!({ - crc32: le_u32, - compressed_size: le_u32, - uncompressed_size: le_u32, - } map Self { - crc32, - compressed_size: compressed_size as u64, - uncompressed_size: uncompressed_size as u64, - }), - )(i) + seq! {Self { + crc32: le_u32, + compressed_size: le_u32.map(|x| x as u64), + uncompressed_size: le_u32.map(|x| x as u64), + }} + .parse_next(i) } } } diff --git a/crates/rc-zip/src/format/version.rs b/crates/rc-zip/src/format/version.rs index 7009cfb..1b9ac8f 100644 --- a/crates/rc-zip/src/format/version.rs +++ b/crates/rc-zip/src/format/version.rs @@ -1,6 +1,5 @@ -use crate::format::*; use std::fmt; -use winnow::{combinator::map, number::streaming::le_u16, PResult, Partial}; +use winnow::{binary::le_u16, PResult, Parser, Partial}; /// A zip version (either created by, or required when reading an archive). /// diff --git a/crates/rc-zip/src/reader/archive_reader.rs b/crates/rc-zip/src/reader/archive_reader.rs index de8fbe0..171222a 100644 --- a/crates/rc-zip/src/reader/archive_reader.rs +++ b/crates/rc-zip/src/reader/archive_reader.rs @@ -2,7 +2,7 @@ use crate::{encoding::Encoding, error::*, format::*, reader::buffer::*, transiti use std::io::Read; use tracing::trace; -use winnow::Offset; +use winnow::stream::Offset; /// ArchiveReader parses a valid zip archive into an [Archive][]. In particular, this struct finds /// an end of central directory record, parses the entire central directory, detects text encoding, diff --git a/crates/rc-zip/src/reader/sync/entry_reader/mod.rs b/crates/rc-zip/src/reader/sync/entry_reader/mod.rs index b43a6c5..cb5da72 100644 --- a/crates/rc-zip/src/reader/sync/entry_reader/mod.rs +++ b/crates/rc-zip/src/reader/sync/entry_reader/mod.rs @@ -23,7 +23,7 @@ use cfg_if::cfg_if; use oval::Buffer; use std::io; use tracing::trace; -use winnow::Offset; +use winnow::stream::Offset; struct EntryReadMetrics { uncompressed_size: u64, @@ -77,7 +77,7 @@ where let read_bytes = self.rd.read(buffer.space())?; buffer.fill(read_bytes); - match LocalFileHeaderRecord::parse(buffer.data()) { + match LocalFileHeaderRecord::parser(buffer.data()) { Ok((remaining, header)) => { let consumed = buffer.data().offset(remaining); buffer.consume(consumed); @@ -168,7 +168,7 @@ where buffer.available_space() ); - match DataDescriptorRecord::parse(buffer.data(), self.inner.is_zip64) { + match DataDescriptorRecord::parser(buffer.data(), self.inner.is_zip64) { Ok((_remaining, descriptor)) => { trace!("data descriptor = {:#?}", descriptor); transition!(self.state => (S::ReadDataDescriptor { metrics, header, .. }) {