From e11dfa5666b865b7bc43bea2f40ec35b1703a159 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 15:43:45 +0100 Subject: [PATCH 1/4] Add extra output to jean to figure out why rc-zip doesn't detect this file as zip64 --- crates/jean/src/main.rs | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crates/jean/src/main.rs b/crates/jean/src/main.rs index 7ac507c..c199ca8 100644 --- a/crates/jean/src/main.rs +++ b/crates/jean/src/main.rs @@ -75,6 +75,8 @@ fn do_main(cli: Cli) -> Result<(), Box> { if let Some(comment) = archive.comment() { println!("Comment:\n{}", comment); } + let has_zip64 = archive.entries().any(|entry| entry.inner.is_zip64); + println!("{}", if has_zip64 { "Zip64" } else { "Zip32" }); let mut creator_versions = HashSet::::new(); let mut reader_versions = HashSet::::new(); @@ -156,6 +158,11 @@ fn do_main(cli: Cli) -> Result<(), Box> { if let Some(comment) = entry.comment() { print!("\t{comment}", comment = comment); } + if entry.inner.is_zip64 { + print!("\tZip64"); + } else { + print!("\tZip32"); + } } println!(); } From 6c09f5982bc8d35b7ef09c331ef718f117732aef Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 15:58:47 +0100 Subject: [PATCH 2/4] Upgrade appnote --- crates/rc-zip/testdata/appnote.txt | 89 +++++++++++++++++++++++++++--- 1 file changed, 80 insertions(+), 9 deletions(-) diff --git a/crates/rc-zip/testdata/appnote.txt b/crates/rc-zip/testdata/appnote.txt index f9e00c8..9ad4153 100644 --- a/crates/rc-zip/testdata/appnote.txt +++ b/crates/rc-zip/testdata/appnote.txt @@ -1,8 +1,8 @@ File: APPNOTE.TXT - .ZIP File Format Specification -Version: 6.3.6 -Status: FINAL - replaces version 6.3.5 -Revised: April 26, 2019 -Copyright (c) 1989 - 2014, 2018, 2019 PKWARE Inc., All Rights Reserved. +Version: 6.3.10 +Status: FINAL - replaces version 6.3.9 +Revised: Nov 01, 2022 +Copyright (c) 1989 - 2014, 2018, 2019, 2020, 2022 PKWARE Inc., All Rights Reserved. 1.0 Introduction --------------- @@ -208,6 +208,32 @@ Copyright (c) 1989 - 2014, 2018, 2019 PKWARE Inc., All Rights Reserved. 6.3.6 -Corrected typographical error 04/26/2019 (4.4.1.3) + 6.3.7 -Added Zstandard compression method ID + (4.4.5) + + -Corrected several reported typos + + -Marked intended use for general purpose bit 14 + + -Added Data Stream Alignment Extra Data info + (4.6.11) + + 6.3.8 -Resolved Zstandard compression method ID conflict + (4.4.5) + + -Added additional compression method ID values in use + + 6.3.9 -Corrected a typo in Data Stream Alignment description + (4.6.11) + + 6.3.10 -Added several z/OS attribute values for APPENDIX B + + -Added several additional 3rd party Extra Field mappings + (thanks to Armijn Hemel @tjaldur.nl for forwarding info + on several of the Header ID's) + + + 3.0 Notations ------------- @@ -314,7 +340,7 @@ Copyright (c) 1989 - 2014, 2018, 2019 PKWARE Inc., All Rights Reserved. records defined in this specification MAY be used as needed to support storage requirements for individual ZIP files. - 4.3.2 Each file placed into a ZIP file MUST be preceeded by a "local + 4.3.2 Each file placed into a ZIP file MUST be preceded by a "local file header" record for that file. Each "local file header" MUST be accompanied by a corresponding "central directory header" record within the central directory section of the ZIP file. @@ -821,7 +847,7 @@ Copyright (c) 1989 - 2014, 2018, 2019 PKWARE Inc., All Rights Reserved. PKWARE Proprietary Technology into Your Product" for more information. - Bit 14: Reserved by PKWARE. + Bit 14: Reserved by PKWARE for alternate streams. Bit 15: Reserved by PKWARE. @@ -846,7 +872,11 @@ Copyright (c) 1989 - 2014, 2018, 2019 PKWARE Inc., All Rights Reserved. 16 - IBM z/OS CMPSC Compression 17 - Reserved by PKWARE 18 - File is compressed using IBM TERSE (new) - 19 - IBM LZ77 z Architecture (PFS) + 19 - IBM LZ77 z Architecture + 20 - deprecated (use method 93 for zstd) + 93 - Zstandard (zstd) Compression + 94 - MP3 Compression + 95 - XZ Compression 96 - JPEG variant 97 - WavPack compressed data 98 - PPMd version I, Rev 1 @@ -1535,17 +1565,21 @@ Value Size Description 4.6.1 Third party mappings commonly used are: 0x07c8 Macintosh + 0x1986 Pixar USD header ID 0x2605 ZipIt Macintosh 0x2705 ZipIt Macintosh 1.3.5+ 0x2805 ZipIt Macintosh 1.3.5+ 0x334d Info-ZIP Macintosh + 0x4154 Tandem 0x4341 Acorn/SparkFS 0x4453 Windows NT security descriptor (binary ACL) 0x4704 VM/CMS 0x470f MVS + 0x4854 THEOS (old?) 0x4b46 FWKCS MD5 (see below) 0x4c41 OS/2 access control list (text ACL) - 0x4d49 Info-ZIP OpenVMS + 0x4d49 Info-ZIP OpenVMS + 0x4d63 Macintosh Smartzip (??) 0x4f4c Xceed original location extra field 0x5356 AOS/VS (ACL) 0x5455 extended timestamp @@ -1553,10 +1587,17 @@ Value Size Description 0x5855 Info-ZIP UNIX (original, also OS/2, NT, etc) 0x6375 Info-ZIP Unicode Comment Extra Field 0x6542 BeOS/BeBox + 0x6854 THEOS 0x7075 Info-ZIP Unicode Path Extra Field + 0x7441 AtheOS/Syllable 0x756e ASi UNIX 0x7855 Info-ZIP UNIX (new) + 0x7875 Info-ZIP UNIX (newer UID/GID) + 0xa11e Data Stream Alignment (Apache Commons-Compress) 0xa220 Microsoft Open Packaging Growth Hint + 0xcafe Java JAR file Extra Field Header ID + 0xd935 Android ZIP Alignment Extra Field + 0xe57a Korean ZIP code page info 0xfd4a SMS/QDOS 0x9901 AE-x encryption structure (see APPENDIX E) 0x9902 unknown @@ -1591,6 +1632,7 @@ Value Size Description known type. Third-party Mappings: + Not all third-party extra field mappings are documented here. 4.6.4 -ZipIt Macintosh Extra Field (long) (0x2605): @@ -1785,6 +1827,32 @@ Value Size Description PadVal Short Initial padding value Padding variable filled with NULL characters + 4.6.11 -Data Stream Alignment (Apache Commons-Compress) (0xa11e): + + (per Zbynek Vyskovsky) Defines alignment of data stream of this + entry within the zip archive. Additionally, indicates whether the + compression method should be kept when re-compressing the zip file. + + The purpose of this extra field is to align specific resources to + word or page boundaries so they can be easily mapped into memory. + + Value Size Description + ----- ---- ----------- + 0xa11e Short tag for this extra block type + TSize Short total data size for this block (2+padding) + alignment Short required alignment and indicator + 0x00 Variable padding + + The alignment field (lower 15 bits) defines the minimal alignment + required by the data stream. Bit 15 of alignment field indicates + whether the compression method of this entry can be changed when + recompressing the zip file. The value 0 means the compression method + should not be changed. The value 1 indicates the compression method + may be changed. The padding field contains padding to ensure the correct + alignment. It can be changed at any time when the offset or required + alignment changes. (see https://issues.apache.org/jira/browse/COMPRESS-391) + + 4.7 Manifest Files ------------------ @@ -3329,7 +3397,7 @@ from 9-13). Flag values are defined below. 8.5.1 Capacities for split archives are as follows: Maximum number of segments = 4,294,967,295 - 1 - Maximum .ZIP segment size = 4,294,967,295 bytes (refer to section 8.5.6) + Maximum .ZIP segment size = 4,294,967,295 bytes Minimum segment size = 64K Maximum PKSFX segment size = 2,147,483,647 bytes @@ -3597,6 +3665,9 @@ B.2 Field Code Description 009D NONVSAM DSORG 2 bytes 009E Program Virtual Object Info 3 bytes 009F Encapsulated file Info 9 bytes + 00A2 Cluster Log 4 bytes Binary + 00A3 Cluster LSID Length 4 bytes Binary + 00A4 Cluster LSID 26 bytes EBCDIC 400C Unix File Creation Time 4 bytes 400D Unix File Access Time 4 bytes 400E Unix File Modification time 4 bytes From e0ca548e7da4d09bf1090db7d00063eab5b6c46d Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 15:59:00 +0100 Subject: [PATCH 3/4] Add tracing-subscriber to jean, add more tracing around eocdr parsing --- Cargo.lock | 74 ++++++++++++++++++++++ crates/jean/Cargo.toml | 1 + crates/jean/src/main.rs | 2 + crates/rc-zip/src/format/eocd.rs | 3 + crates/rc-zip/src/reader/archive_reader.rs | 6 ++ 5 files changed, 86 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index e588e37..cc74cdc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -335,6 +335,7 @@ dependencies = [ "indicatif", "positioned-io", "rc-zip", + "tracing-subscriber", ] [[package]] @@ -416,6 +417,16 @@ dependencies = [ "minimal-lexical", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num-traits" version = "0.2.17" @@ -449,6 +460,12 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "phf" version = "0.11.2" @@ -610,12 +627,27 @@ dependencies = [ "serde", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "siphasher" version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + [[package]] name = "strsim" version = "0.10.0" @@ -653,6 +685,16 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152" +dependencies = [ + "cfg-if", + "once_cell", +] + [[package]] name = "tracing" version = "0.1.40" @@ -682,6 +724,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "nu-ansi-term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", ] [[package]] @@ -702,6 +770,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "version_check" version = "0.9.4" diff --git a/crates/jean/Cargo.toml b/crates/jean/Cargo.toml index 2c6738d..e56f0a7 100644 --- a/crates/jean/Cargo.toml +++ b/crates/jean/Cargo.toml @@ -13,3 +13,4 @@ clap = { version = "4.4.18", features = ["derive"] } humansize = "2.1.3" positioned-io.workspace = true indicatif = "0.17.7" +tracing-subscriber = "0.3.18" diff --git a/crates/jean/src/main.rs b/crates/jean/src/main.rs index c199ca8..750099e 100644 --- a/crates/jean/src/main.rs +++ b/crates/jean/src/main.rs @@ -66,6 +66,8 @@ enum Commands { } fn main() { + tracing_subscriber::fmt::init(); + let cli = Cli::parse(); do_main(cli).unwrap(); } diff --git a/crates/rc-zip/src/format/eocd.rs b/crates/rc-zip/src/format/eocd.rs index f9f2a8a..48c187e 100644 --- a/crates/rc-zip/src/format/eocd.rs +++ b/crates/rc-zip/src/format/eocd.rs @@ -9,6 +9,7 @@ use nom::{ use tracing::trace; /// 4.3.16 End of central directory record: +#[derive(Debug)] pub struct EndOfCentralDirectoryRecord { /// number of this disk pub disk_nbr: u16, @@ -107,6 +108,7 @@ impl EndOfCentralDirectory64Locator { } /// 4.3.14 Zip64 end of central directory record +#[derive(Debug)] pub struct EndOfCentralDirectory64Record { /// size of zip64 end of central directory record pub record_size: u64, @@ -150,6 +152,7 @@ impl EndOfCentralDirectory64Record { } } +#[derive(Debug)] pub struct Located { pub offset: u64, pub inner: T, diff --git a/crates/rc-zip/src/reader/archive_reader.rs b/crates/rc-zip/src/reader/archive_reader.rs index 5750377..b5c49de 100644 --- a/crates/rc-zip/src/reader/archive_reader.rs +++ b/crates/rc-zip/src/reader/archive_reader.rs @@ -173,11 +173,17 @@ impl ArchiveReader { } { None => Err(FormatError::DirectoryEndSignatureNotFound.into()), Some(mut eocdr) => { + trace!(?eocdr, "ReadEocd | found end of central directory record"); buffer.reset(); eocdr.offset += self.size - haystack_size; if eocdr.offset < EndOfCentralDirectory64Locator::LENGTH as u64 { // no room for an EOCD64 locator, definitely not a zip64 file + trace!( + offset = eocdr.offset, + eocd64locator_length = EndOfCentralDirectory64Locator::LENGTH, + "no room for an EOCD64 locator, definitely not a zip64 file" + ); transition!(self.state => (S::ReadEocd { mut buffer, .. }) { buffer.reset(); S::ReadCentralDirectory { From 2ee90e1ab907b9a0399f56aa0f077d1fa343faf3 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Fri, 26 Jan 2024 16:24:21 +0100 Subject: [PATCH 4/4] fix: Read 64-bit extra fields compressed/uncompressed sizes properly Closes #31 --- crates/rc-zip/src/format/directory_header.rs | 5 +++-- crates/rc-zip/src/format/eocd.rs | 1 + crates/rc-zip/src/format/extra_field.rs | 1 + crates/rc-zip/src/reader/archive_reader.rs | 13 ++++++++++++- 4 files changed, 17 insertions(+), 3 deletions(-) diff --git a/crates/rc-zip/src/format/directory_header.rs b/crates/rc-zip/src/format/directory_header.rs index cabdf5c..85d2da0 100644 --- a/crates/rc-zip/src/format/directory_header.rs +++ b/crates/rc-zip/src/format/directory_header.rs @@ -138,10 +138,11 @@ impl DirectoryHeader { let mut extra_fields: Vec = Vec::new(); let settings = ExtraFieldSettings { - needs_compressed_size: self.uncompressed_size == !0u32, - needs_uncompressed_size: self.compressed_size == !0u32, + needs_compressed_size: self.compressed_size == !0u32, + needs_uncompressed_size: self.uncompressed_size == !0u32, needs_header_offset: self.header_offset == !0u32, }; + trace!("extra field settings: {:#?}", settings); let mut slice = &self.extra.0[..]; while !slice.is_empty() { diff --git a/crates/rc-zip/src/format/eocd.rs b/crates/rc-zip/src/format/eocd.rs index 48c187e..cef06bf 100644 --- a/crates/rc-zip/src/format/eocd.rs +++ b/crates/rc-zip/src/format/eocd.rs @@ -82,6 +82,7 @@ impl EndOfCentralDirectoryRecord { } /// 4.3.15 Zip64 end of central directory locator +#[derive(Debug)] pub struct EndOfCentralDirectory64Locator { /// number of the disk with the start of the zip64 end of central directory pub dir_disk_number: u32, diff --git a/crates/rc-zip/src/format/extra_field.rs b/crates/rc-zip/src/format/extra_field.rs index 6a4071f..f86d953 100644 --- a/crates/rc-zip/src/format/extra_field.rs +++ b/crates/rc-zip/src/format/extra_field.rs @@ -29,6 +29,7 @@ impl<'a> ExtraFieldRecord<'a> { // is created. The order of the fields in the zip64 extended information record // is fixed, but the fields MUST only appear if the corresponding Local or // Central directory record field is set to 0xFFFF or 0xFFFFFFFF. +#[derive(Debug)] pub(crate) struct ExtraFieldSettings { pub(crate) needs_uncompressed_size: bool, pub(crate) needs_compressed_size: bool, diff --git a/crates/rc-zip/src/reader/archive_reader.rs b/crates/rc-zip/src/reader/archive_reader.rs index b5c49de..f42a6b9 100644 --- a/crates/rc-zip/src/reader/archive_reader.rs +++ b/crates/rc-zip/src/reader/archive_reader.rs @@ -173,7 +173,11 @@ impl ArchiveReader { } { None => Err(FormatError::DirectoryEndSignatureNotFound.into()), Some(mut eocdr) => { - trace!(?eocdr, "ReadEocd | found end of central directory record"); + trace!( + ?eocdr, + size = self.size, + "ReadEocd | found end of central directory record" + ); buffer.reset(); eocdr.offset += self.size - haystack_size; @@ -194,6 +198,7 @@ impl ArchiveReader { }); Ok(R::Continue) } else { + trace!("ReadEocd | transition to ReadEocd64Locator"); transition!(self.state => (S::ReadEocd { mut buffer, .. }) { buffer.reset(); S::ReadEocd64Locator { buffer, eocdr } @@ -211,6 +216,8 @@ impl ArchiveReader { } Err(nom::Err::Error(_)) | Err(nom::Err::Failure(_)) => { // we don't have a zip64 end of central directory locator - that's ok! + trace!("ReadEocd64Locator | no zip64 end of central directory locator"); + trace!("ReadEocd64Locator | data we got: {:02x?}", buffer.data()); transition!(self.state => (S::ReadEocd64Locator { mut buffer, eocdr }) { buffer.reset(); S::ReadCentralDirectory { @@ -222,6 +229,10 @@ impl ArchiveReader { Ok(R::Continue) } Ok((_, locator)) => { + trace!( + ?locator, + "ReadEocd64Locator | found zip64 end of central directory locator" + ); transition!(self.state => (S::ReadEocd64Locator { mut buffer, eocdr }) { buffer.reset(); S::ReadEocd64 {