From 93a7f69c694d35c5aea69ee2f51dbda8d6986e4b Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Fri, 21 Jun 2024 09:26:56 +0200 Subject: [PATCH 01/14] Added the method fn read_zipfile_from_seekablestream(reader: &mut S) that reads the next zipfile entry from a stream by potential parsing the data descriptor This is an alternative method to read a zip file. If possible, use the ZipArchive functions as some information will be missing when reading this manner. This method extends the existing read_zipfile_from_stream method when the stream is seekable. This method is superior to ZipArchive in the special case that the underlying stream implementation must buffer all seen/read data to provide seek back support and the memory consumption must be kept small. This could be the case when reading a ZipFile B.zip nested within a zip file A.zip, that is stored on disk. Since A is seekable when stored as file, A.zip can be read using ZipArchive. Be B a zip file stored in A, then we can read B's content using a decompressing reader. The problem is that the decompressing reader will not be seekable (due to decompression). When one would like to still read contents of B.zip without extracting A to disk, the file B.zip must be buffered in RAM. When using ZipArchive to read B.zip from RAM, the whole B.zip file must be buffered to RAM because the central directory of B.zip is located at the end of the file B.zip. This method will read B.zip from the start of the file and returning the first file entry found in B.zip. After the execution of this function and the ZipFile return value is dropped, the reader will be positioned at the end of the file entry. Since this function will never seek back to before the initial position of the stream when the function was called, the underlying stream implementation may discard, after dropping ZipFile, all buffered data before the current position of the stream. Summarizing: In given scenario, this method must not buffer the whole B.zip file to RAM, but only the first file entry. --- src/read.rs | 279 +++++++++++++++++++-- src/spec.rs | 1 + src/types.rs | 80 ++++-- tests/data/data_descriptor_two_entries.zip | Bin 0 -> 392 bytes tests/deflate64.rs | 102 ++++++++ 5 files changed, 415 insertions(+), 47 deletions(-) create mode 100644 tests/data/data_descriptor_two_entries.zip diff --git a/src/read.rs b/src/read.rs index 9479b6a43..7b0184d6a 100644 --- a/src/read.rs +++ b/src/read.rs @@ -8,10 +8,10 @@ use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{ZipError, ZipResult}; -use crate::spec::{self, FixedSizeBlock, Zip32CentralDirectoryEnd}; +use crate::spec::{self, FixedSizeBlock, Magic, Zip32CentralDirectoryEnd}; use crate::types::{ - AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, - ZipLocalEntryBlock, + AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, + ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields, }; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; @@ -1584,23 +1584,7 @@ impl<'a> Drop for ZipFile<'a> { } } -/// Read ZipFile structures from a non-seekable reader. -/// -/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions -/// as some information will be missing when reading this manner. -/// -/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is -/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory -/// is encountered. No more files should be read after this. -/// -/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after -/// the structure is done. -/// -/// Missing fields are: -/// * `comment`: set to an empty string -/// * `data_start`: set to 0 -/// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { +fn read_local_fileblock(reader: &mut R) -> ZipResult> { // We can't use the typical ::parse() method, as we follow separate code paths depending on the // "magic" value (since the magic value will be from the central directory header if we've // finished iterating over all the actual files). @@ -1619,9 +1603,29 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult( + block: ZipLocalEntryBlockAndFields, + reader: &'a mut R, + data_descriptor: Option, +) -> ZipResult>> { + let mut result = ZipFileData::from_local_block(block, data_descriptor)?; + + match crate::read::parse_extra_field(&mut result) { Ok(..) | Err(ZipError::Io(..)) => {} Err(e) => return Err(e), } @@ -1630,7 +1634,7 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult(reader: &'a mut R) -> ZipResult(reader: &'a mut R) -> ZipResult>> { + let block = read_local_fileblock(reader)?; + let block = match block { + Some(block) => block, + None => return Ok(None), + }; + + // we can't look for a data descriptor since we can't seek back + // TODO: provide a method that buffers the read data and allows seeking back + read_zipfile_from_fileblock(block, reader, None) +} + +/// Read ZipFile structures from a seekable reader. +/// +/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions +/// as some information will be missing when reading this manner. +/// +/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is +/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory +/// is encountered. No more files should be read after this. +/// +/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after +/// the structure is done. +/// +/// This method will not find a ZipFile entry if the zip file uses central directory encryption. +/// In that case you should use ZipArchive functions. +/// +/// This method is superior to ZipArchive in the special case that the underlying stream implementation must +/// buffer all seen/read data to provide seek back support and the memory consumption must be kept small. +/// This could be the case when reading a ZipFile B.zip nested within a zip file A.zip, that is stored on disk. Since A is seekable when stored +/// as file, A.zip can be read using ZipArchive. Be B a zip file stored in A, then we can read B's content +/// using a decompressing reader. The problem is that the decompressing reader will not be seekable (due to decompression). +/// When one would like to still read contents of B.zip without extracting A to disk, the file B.zip must be buffered in RAM. +/// When using ZipArchive to read B.zip from RAM, the whole B.zip file must be buffered to RAM because the central directory of B.zip is located +/// at the end of the file B.zip. +/// This method will read B.zip from the start of the file and returning the first file entry found in B.zip. +/// After the execution of this function and the ZipFile return value is dropped, the reader will be positioned at the end of the file entry. +/// Since this function will never seek back to before the initial position of the stream when the function was called, +/// the underlying stream implementation may discard, after dropping ZipFile, all buffered data before the current position of the stream. +/// Summarizing: In given scenario, this method must not buffer the whole B.zip file to RAM, but only the first file entry. +/// +/// Missing fields are: +/// * `comment`: set to an empty string +/// * `data_start`: set to 0 +/// * `external_attributes`: `unix_mode()`: will return None +pub fn read_zipfile_from_seekablestream( + reader: &mut S, +) -> ZipResult> { + let local_entry_block = read_local_fileblock(reader)?; + let local_entry_block = match local_entry_block { + Some(local_entry_block) => local_entry_block, + None => return Ok(None), + }; + + let using_data_descriptor = local_entry_block.block.flags & (1 << 3) == 1 << 3; + let mut shift_register: u32 = 0; + let mut read_buffer = [0u8; 1]; + if using_data_descriptor { + // we must find the data descriptor to get the compressed size + + let mut read_count_total: usize = 0; + loop { + let read_count = reader.read(&mut read_buffer)?; + if read_count == 0 { + return Ok(None); // EOF + } + + read_count_total += read_count; + + if read_count_total > u32::MAX as usize { + return Ok(None); // file too large + } + + shift_register = shift_register >> 8 | (read_buffer[0] as u32) << 24; + + if read_count_total < 4 { + continue; + } + + if spec::Magic::from_le_bytes(shift_register.to_le_bytes()) + == spec::Magic::DATA_DESCRIPTOR_SIGNATURE + { + // found a potential data descriptor + reader.seek(SeekFrom::Current(-4))?; // go back to the start of the data descriptor + read_count_total -= 4; + + let mut data_descriptor_block = [0u8; mem::size_of::()]; + reader.read_exact(&mut data_descriptor_block)?; + let data_descriptor_block: Box<[u8]> = data_descriptor_block.into(); + // seek back to data end + reader.seek(SeekFrom::Current( + -(mem::size_of::() as i64), + ))?; + + let data_descriptor = ZipDataDescriptor::interpret(&data_descriptor_block)?; + + // check if the data descriptor is indeed valid for the read data + if data_descriptor.compressed_size == read_count_total as u32 { + // todo also check crc + // valid data descriptor + + // seek back to data start + reader.seek(SeekFrom::Current(-(read_count_total as i64)))?; + + return read_zipfile_from_fileblock( + local_entry_block, + reader, + Some(data_descriptor), + ); + } else { + // data descriptor is invalid + + reader.seek(SeekFrom::Current(4))?; // skip the magic number + read_count_total += 4; + + // continue data descriptor searching + } + } + } + } else { + // dont using a data descriptor + read_zipfile_from_fileblock(local_entry_block, reader, None) + } +} + +/// Advance the stream to the next zip file magic number (local file header, central directory header, data descriptor) +/// and return the magic number and the number of bytes read since the call to this function. +/// +/// This function is useful in combination with [read_zipfile_from_seekablestream] to read multiple zip files from a single stream. +/// This function will ever seek back 4 bytes and not before the initial position of the stream when the function was called. +/// +/// The stream will be positioned at the start of the magic number. +/// +/// Returns the found magic number and the number of bytes read since the call to this function. +fn advance_stream_to_next_magic( + reader: &mut S, +) -> ZipResult> { + let mut shift_register: u32 = 0; + let mut read_buffer = [0u8; 1]; + + let mut read_count_total: usize = 0; + loop { + let read_count = reader.read(&mut read_buffer)?; + + if read_count == 0 { + return Ok(None); // EOF + } + + read_count_total += read_count; + + shift_register = shift_register >> 8 | (read_buffer[0] as u32) << 24; + let magic = spec::Magic::from_le_bytes(shift_register.to_le_bytes()); + + if read_count_total < 4 { + continue; + } + + if magic == spec::Magic::LOCAL_FILE_HEADER_SIGNATURE + || magic == spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE + || magic == spec::Magic::DATA_DESCRIPTOR_SIGNATURE + { + // found a potential magic number + reader.seek(SeekFrom::Current(-4))?; // go back to the start of the magic number + return Ok(Some((magic, read_count_total - 4))); + } + } +} + +/// Like advance_stream_to_next_magic but advances until given Magic is encountered +fn advance_stream_to_next_magic_start( + reader: &mut S, + target_magic: Magic, +) -> ZipResult> { + let mut bytes_read_total = 0; + + loop { + match advance_stream_to_next_magic(reader)? { + Some((magic, bytes_read)) => { + bytes_read_total += bytes_read; + if magic == target_magic { + return Ok(Some(bytes_read_total)); + } else { + reader.seek(SeekFrom::Current(1))?; + bytes_read_total += 1; + } + } + None => return Ok(None), + } + } +} + +/// Advance the stream to the next zip file start (local file header start) +/// and return the magic number and the number of bytes read since the call to this function. +/// +/// This function is useful in combination with [read_zipfile_from_seekablestream] to read multiple zip files from a single stream. +/// This function will ever seek back 4 bytes and not before the initial position of the stream when the function was called. +/// +/// The stream will be positioned at the start of the zip file start (local file header start). +/// +/// Returns the number of bytes read since the call to this function. +/// +/// Will return None if the end of the stream is reached. +pub fn advance_stream_to_next_zipfile_start( + reader: &mut S, +) -> ZipResult> { + advance_stream_to_next_magic_start(reader, spec::Magic::LOCAL_FILE_HEADER_SIGNATURE) +} + #[cfg(test)] mod test { use crate::ZipArchive; diff --git a/src/spec.rs b/src/spec.rs index f923d835c..b29b2c494 100644 --- a/src/spec.rs +++ b/src/spec.rs @@ -53,6 +53,7 @@ impl Magic { pub const CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06054b50); pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06064b50); pub const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Self = Self::literal(0x07064b50); + pub const DATA_DESCRIPTOR_SIGNATURE: Self = Self::literal(0x08074b50); } /// Similar to [`Magic`], but used for extra field tags as per section 4.5.3 of APPNOTE.TXT. diff --git a/src/types.rs b/src/types.rs index f1921cb47..c554fd083 100644 --- a/src/types.rs +++ b/src/types.rs @@ -478,6 +478,13 @@ pub struct ZipFileData { pub extra_fields: Vec, } +// contains the block and the following fields as read from the file +pub(crate) struct ZipLocalEntryBlockAndFields { + pub(crate) block: ZipLocalEntryBlock, + pub(crate) file_name_raw: Vec, + pub(crate) extra_field: Vec, +} + impl ZipFileData { /// Get the starting offset of the data of the compressed file pub fn data_start(&self) -> u64 { @@ -659,9 +666,9 @@ impl ZipFileData { local_block } - pub(crate) fn from_local_block( - block: ZipLocalEntryBlock, - reader: &mut R, + pub(crate) fn from_local_block( + block: ZipLocalEntryBlockAndFields, + data_descriptor: Option, ) -> ZipResult { let ZipLocalEntryBlock { // magic, @@ -670,13 +677,11 @@ impl ZipFileData { compression_method, last_mod_time, last_mod_date, - crc32, - compressed_size, - uncompressed_size, - file_name_length, - extra_field_length, + mut crc32, + mut compressed_size, + mut uncompressed_size, .. - } = block; + } = block.block; let encrypted: bool = flags & 1 == 1; if encrypted { @@ -689,25 +694,27 @@ impl ZipFileData { /* flags & (1 << 3) != 0 */ let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3; if using_data_descriptor { - return Err(ZipError::UnsupportedArchive( - "The file length is not available in the local header", - )); + match data_descriptor { + None => { + return Err(ZipError::UnsupportedArchive( + "The file uses a data descriptor, but the provided input stream is not seekable or the data descriptor is missing.", + )); + } + Some(data_descriptor) => { + uncompressed_size = data_descriptor.uncompressed_size; + compressed_size = data_descriptor.compressed_size; + crc32 = data_descriptor.crc32; + } + } } /* flags & (1 << 1) != 0 */ let is_utf8: bool = flags & (1 << 11) != 0; let compression_method = crate::CompressionMethod::parse_from_u16(compression_method); - let file_name_length: usize = file_name_length.into(); - let extra_field_length: usize = extra_field_length.into(); - - let mut file_name_raw = vec![0u8; file_name_length]; - reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0u8; extra_field_length]; - reader.read_exact(&mut extra_field)?; let file_name: Box = match is_utf8 { - true => String::from_utf8_lossy(&file_name_raw).into(), - false => file_name_raw.clone().from_cp437().into(), + true => String::from_utf8_lossy(&block.file_name_raw).into(), + false => block.file_name_raw.clone().from_cp437().into(), }; let system: u8 = (version_made_by >> 8).try_into().unwrap(); @@ -725,8 +732,8 @@ impl ZipFileData { compressed_size: compressed_size.into(), uncompressed_size: uncompressed_size.into(), file_name, - file_name_raw: file_name_raw.into(), - extra_field: Some(Arc::new(extra_field)), + file_name_raw: block.file_name_raw.into(), + extra_field: Some(Arc::new(block.extra_field)), central_extra_field: None, file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory // header_start and data start are not available, but also don't matter, since seeking is @@ -985,6 +992,33 @@ impl FixedSizeBlock for ZipLocalEntryBlock { ]; } +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub(crate) struct ZipDataDescriptor { + magic: spec::Magic, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, +} + +impl FixedSizeBlock for crate::types::ZipDataDescriptor { + const MAGIC: spec::Magic = spec::Magic::DATA_DESCRIPTOR_SIGNATURE; + + #[inline(always)] + fn magic(self) -> spec::Magic { + self.magic + } + + const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("Invalid data descriptor"); + + to_and_from_le![ + (magic, spec::Magic), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + ]; +} + #[derive(Copy, Clone, Debug)] pub(crate) struct Zip64ExtraFieldBlock { magic: spec::ExtraFieldMagic, diff --git a/tests/data/data_descriptor_two_entries.zip b/tests/data/data_descriptor_two_entries.zip new file mode 100644 index 0000000000000000000000000000000000000000..f487cccb9dd07ef67a7371ae549dce8a2a35eb45 GIT binary patch literal 392 zcmWIWW@Zs#-~d9_dD9~ppnwNRb22C}WTfWgi12Kq3vxJ+Ev$Xgz#Q46wl@=U813-oZcr!AIFe7|{Y&FOiFtDT%#3I^H i$i{-)jcg^Vv8g~~iS|W+H!H}|3`{_{4oDvXaTox;c2n*E literal 0 HcmV?d00001 diff --git a/tests/deflate64.rs b/tests/deflate64.rs index b0cd95a95..e48dcf0d6 100644 --- a/tests/deflate64.rs +++ b/tests/deflate64.rs @@ -19,3 +19,105 @@ fn decompress_deflate64() { .expect("couldn't read encrypted and compressed file"); assert_eq!(include_bytes!("data/folder/binary.wmv"), &content[..]); } + +#[test] +fn decompress_deflate64_stream_no_data_descriptor() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/deflate64.zip")); + let mut stream = io::Cursor::new(v); + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("binary.wmv", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(include_bytes!("data/folder/binary.wmv"), &content[..]); +} + +#[test] +fn decompress_deflate64_stream_with_data_descriptor_sanity_check() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive + .by_name("hello.txt") + .expect("couldn't find file in archive"); + assert_eq!("hello.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); +} + +#[test] +fn decompress_deflate64_stream_with_data_descriptor() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); + let mut stream = io::Cursor::new(v); + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("hello.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); +} + +#[test] +fn decompress_deflate64_stream_with_data_descriptor_continue() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor_two_entries.zip")); + let mut stream = io::Cursor::new(v); + + // First entry + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("hello.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); + + drop(entry); + + // Second entry + + zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file"); + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("world.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"STUFF\n", &content[..]); + + drop(entry); + + // No more entries + + let entry = zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file"); + match entry { + None => (), + _ => assert!(false, "expected no more entries"), + }; +} From 7f0d07b539864e538f459d883f5b0d601adfae85 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Fri, 21 Jun 2024 09:45:23 +0200 Subject: [PATCH 02/14] Merged master to feature branch --- src/read.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/read.rs b/src/read.rs index 06a35e02a..59c89c1cc 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1815,7 +1815,7 @@ pub fn read_zipfile_from_seekablestream( /// Returns the found magic number and the number of bytes read since the call to this function. fn advance_stream_to_next_magic( reader: &mut S, -) -> ZipResult> { +) -> ZipResult> { let mut shift_register: u32 = 0; let mut read_buffer = [0u8; 1]; @@ -1850,7 +1850,7 @@ fn advance_stream_to_next_magic( /// Like advance_stream_to_next_magic but advances until given Magic is encountered fn advance_stream_to_next_magic_start( reader: &mut S, - target_magic: Magic, + target_magic: spec::Magic, ) -> ZipResult> { let mut bytes_read_total = 0; From 59f1327e72390813a422e1c7486818da536e9ad7 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Fri, 21 Jun 2024 15:55:49 +0200 Subject: [PATCH 03/14] Moved streamed zip read tests to custom test file --- src/read.rs | 15 +++---- tests/deflate64.rs | 102 ------------------------------------------ tests/zip_stream.rs | 105 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 112 insertions(+), 110 deletions(-) create mode 100644 tests/zip_stream.rs diff --git a/src/read.rs b/src/read.rs index 59c89c1cc..99773320b 100644 --- a/src/read.rs +++ b/src/read.rs @@ -773,7 +773,8 @@ impl ZipArchive { Err(e) => invalid_errors.push(e), Ok(o) => { if o.files.len() == footer.number_of_files as usize - || footer.number_of_files == ZIP64_ENTRY_THR as u16 { + || footer.number_of_files == ZIP64_ENTRY_THR as u16 + { ok_results.push((footer.clone(), o)) } else { invalid_errors.push(InvalidArchive("wrong number of files")) @@ -1777,7 +1778,6 @@ pub fn read_zipfile_from_seekablestream( // check if the data descriptor is indeed valid for the read data if data_descriptor.compressed_size == read_count_total as u32 { - // todo also check crc // valid data descriptor // seek back to data start @@ -1791,8 +1791,8 @@ pub fn read_zipfile_from_seekablestream( } else { // data descriptor is invalid - reader.seek(SeekFrom::Current(4))?; // skip the magic number - read_count_total += 4; + reader.seek(SeekFrom::Current(1))?; // skip the magic number + read_count_total += 1; // continue data descriptor searching } @@ -1808,7 +1808,7 @@ pub fn read_zipfile_from_seekablestream( /// and return the magic number and the number of bytes read since the call to this function. /// /// This function is useful in combination with [read_zipfile_from_seekablestream] to read multiple zip files from a single stream. -/// This function will ever seek back 4 bytes and not before the initial position of the stream when the function was called. +/// This function will never seek back more than 4 bytes and not before the initial position of the stream when the function was called. /// /// The stream will be positioned at the start of the magic number. /// @@ -1870,11 +1870,10 @@ fn advance_stream_to_next_magic_start( } } -/// Advance the stream to the next zip file start (local file header start) -/// and return the magic number and the number of bytes read since the call to this function. +/// Advance the stream to the next zip file start (local file header start) returning the number of bytes read since the call to this function. /// /// This function is useful in combination with [read_zipfile_from_seekablestream] to read multiple zip files from a single stream. -/// This function will ever seek back 4 bytes and not before the initial position of the stream when the function was called. +/// This function will never seek back more than 4 bytes and not before the initial position of the stream when the function was called. /// /// The stream will be positioned at the start of the zip file start (local file header start). /// diff --git a/tests/deflate64.rs b/tests/deflate64.rs index e48dcf0d6..b0cd95a95 100644 --- a/tests/deflate64.rs +++ b/tests/deflate64.rs @@ -19,105 +19,3 @@ fn decompress_deflate64() { .expect("couldn't read encrypted and compressed file"); assert_eq!(include_bytes!("data/folder/binary.wmv"), &content[..]); } - -#[test] -fn decompress_deflate64_stream_no_data_descriptor() { - let mut v = Vec::new(); - v.extend_from_slice(include_bytes!("data/deflate64.zip")); - let mut stream = io::Cursor::new(v); - - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) - .expect("couldn't open test zip file") - .expect("did not find file entry in zip file"); - assert_eq!("binary.wmv", entry.name()); - - let mut content = Vec::new(); - entry - .read_to_end(&mut content) - .expect("couldn't read encrypted and compressed file"); - assert_eq!(include_bytes!("data/folder/binary.wmv"), &content[..]); -} - -#[test] -fn decompress_deflate64_stream_with_data_descriptor_sanity_check() { - let mut v = Vec::new(); - v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); - let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); - - let mut file = archive - .by_name("hello.txt") - .expect("couldn't find file in archive"); - assert_eq!("hello.txt", file.name()); - - let mut content = Vec::new(); - file.read_to_end(&mut content) - .expect("couldn't read encrypted and compressed file"); - assert_eq!(b"Hello World\n", &content[..]); -} - -#[test] -fn decompress_deflate64_stream_with_data_descriptor() { - let mut v = Vec::new(); - v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); - let mut stream = io::Cursor::new(v); - - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) - .expect("couldn't open test zip file") - .expect("did not find file entry in zip file"); - assert_eq!("hello.txt", entry.name()); - - let mut content = Vec::new(); - entry - .read_to_end(&mut content) - .expect("couldn't read encrypted and compressed file"); - assert_eq!(b"Hello World\n", &content[..]); -} - -#[test] -fn decompress_deflate64_stream_with_data_descriptor_continue() { - let mut v = Vec::new(); - v.extend_from_slice(include_bytes!("data/data_descriptor_two_entries.zip")); - let mut stream = io::Cursor::new(v); - - // First entry - - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) - .expect("couldn't open test zip file") - .expect("did not find file entry in zip file"); - assert_eq!("hello.txt", entry.name()); - - let mut content = Vec::new(); - entry - .read_to_end(&mut content) - .expect("couldn't read encrypted and compressed file"); - assert_eq!(b"Hello World\n", &content[..]); - - drop(entry); - - // Second entry - - zip::read::advance_stream_to_next_zipfile_start(&mut stream) - .expect("couldn't advance to next entry in zip file"); - - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) - .expect("couldn't open test zip file") - .expect("did not find file entry in zip file"); - assert_eq!("world.txt", entry.name()); - - let mut content = Vec::new(); - entry - .read_to_end(&mut content) - .expect("couldn't read encrypted and compressed file"); - assert_eq!(b"STUFF\n", &content[..]); - - drop(entry); - - // No more entries - - let entry = zip::read::advance_stream_to_next_zipfile_start(&mut stream) - .expect("couldn't advance to next entry in zip file"); - match entry { - None => (), - _ => assert!(false, "expected no more entries"), - }; -} diff --git a/tests/zip_stream.rs b/tests/zip_stream.rs new file mode 100644 index 000000000..8549916f9 --- /dev/null +++ b/tests/zip_stream.rs @@ -0,0 +1,105 @@ +use std::io; +use std::io::Read; +use zip::ZipArchive; + +#[test] +fn decompress_stream_no_data_descriptor() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/deflate64.zip")); + let mut stream = io::Cursor::new(v); + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("binary.wmv", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(include_bytes!("data/folder/binary.wmv"), &content[..]); +} + +#[test] +fn decompress_stream_with_data_descriptor_sanity_check() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive + .by_name("hello.txt") + .expect("couldn't find file in archive"); + assert_eq!("hello.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); +} + +#[test] +fn decompress_stream_with_data_descriptor() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); + let mut stream = io::Cursor::new(v); + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("hello.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); +} + +#[test] +fn decompress_stream_with_data_descriptor_continue() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor_two_entries.zip")); + let mut stream = io::Cursor::new(v); + + // First entry + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("hello.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); + + drop(entry); + + // Second entry + + zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file"); + + let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + .expect("couldn't open test zip file") + .expect("did not find file entry in zip file"); + assert_eq!("world.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"STUFF\n", &content[..]); + + drop(entry); + + // No more entries + + let entry = zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file"); + match entry { + None => (), + _ => assert!(false, "expected no more entries"), + }; +} From 3bef659fed873d322222c0ceb2161943012a7b3f Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Sat, 22 Jun 2024 16:18:46 +0200 Subject: [PATCH 04/14] Added security risk documentation and untrusted value struct to encapsulate potential unsafe data --- src/read.rs | 39 ++++++++++++++++++++++----------------- src/result.rs | 32 ++++++++++++++++++++++++++++++++ tests/zip_stream.rs | 12 ++++++++---- 3 files changed, 62 insertions(+), 21 deletions(-) diff --git a/src/read.rs b/src/read.rs index 99773320b..60c5a0b71 100644 --- a/src/read.rs +++ b/src/read.rs @@ -7,12 +7,9 @@ use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; -use crate::result::{ZipError, ZipResult}; +use crate::result::{UntrustedValue, ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CentralDirectoryEnd, ZIP64_ENTRY_THR}; -use crate::types::{ - AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, - ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields, -}; +use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; use std::borrow::Cow; @@ -1674,13 +1671,13 @@ fn read_zipfile_from_fileblock<'a, R: Read>( /// the structure is done. /// /// This method will not find a ZipFile entry if the zip file uses data descriptors. -/// In that case you could use [read_zipfile_from_seekable_stream] +/// In that case you could use read_zipfile_from_seekable_stream /// /// Missing fields are: /// * `comment`: set to an empty string /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult>> { +pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult> { let block = read_local_fileblock(reader)?; let block = match block { Some(block) => block, @@ -1688,7 +1685,6 @@ pub fn read_zipfile_from_stream<'a, R: Read>(reader: &'a mut R) -> ZipResult(reader: &'a mut R) -> ZipResult(reader: &'a mut R) -> ZipResult(reader: &'a mut R) -> ZipResult( +pub fn read_zipfile_from_limitedseekable_stream( reader: &mut S, -) -> ZipResult> { +) -> ZipResult>> { let local_entry_block = read_local_fileblock(reader)?; let local_entry_block = match local_entry_block { Some(local_entry_block) => local_entry_block, - None => return Ok(None), + None => return Ok(None.into()), }; let using_data_descriptor = local_entry_block.block.flags & (1 << 3) == 1 << 3; @@ -1744,13 +1749,13 @@ pub fn read_zipfile_from_seekablestream( loop { let read_count = reader.read(&mut read_buffer)?; if read_count == 0 { - return Ok(None); // EOF + return Ok(None.into()); // EOF } read_count_total += read_count; if read_count_total > u32::MAX as usize { - return Ok(None); // file too large + return Ok(None.into()); // file too large } shift_register = shift_register >> 8 | (read_buffer[0] as u32) << 24; @@ -1787,7 +1792,7 @@ pub fn read_zipfile_from_seekablestream( local_entry_block, reader, Some(data_descriptor), - ); + ).map(|result| result.into()); } else { // data descriptor is invalid @@ -1800,14 +1805,14 @@ pub fn read_zipfile_from_seekablestream( } } else { // dont using a data descriptor - read_zipfile_from_fileblock(local_entry_block, reader, None) + read_zipfile_from_fileblock(local_entry_block, reader, None).map(|result| result.into()) } } /// Advance the stream to the next zip file magic number (local file header, central directory header, data descriptor) /// and return the magic number and the number of bytes read since the call to this function. /// -/// This function is useful in combination with [read_zipfile_from_seekablestream] to read multiple zip files from a single stream. +/// This function is useful in combination with read_zipfile_from_limitedseekable_stream to read multiple zip files from a single stream. /// This function will never seek back more than 4 bytes and not before the initial position of the stream when the function was called. /// /// The stream will be positioned at the start of the magic number. @@ -1872,7 +1877,7 @@ fn advance_stream_to_next_magic_start( /// Advance the stream to the next zip file start (local file header start) returning the number of bytes read since the call to this function. /// -/// This function is useful in combination with [read_zipfile_from_seekablestream] to read multiple zip files from a single stream. +/// This function is useful in combination with read_zipfile_from_limitedseekable_stream to read multiple zip files from a single stream. /// This function will never seek back more than 4 bytes and not before the initial position of the stream when the function was called. /// /// The stream will be positioned at the start of the zip file start (local file header start). diff --git a/src/result.rs b/src/result.rs index ec8fbb13f..7e13d05ee 100644 --- a/src/result.rs +++ b/src/result.rs @@ -96,3 +96,35 @@ impl fmt::Display for DateTimeRangeError { } impl Error for DateTimeRangeError {} + +/// Represents a potentially untrusted untrustworthy value. +/// +/// An attacker might be able to control (part) of the returned value. +/// Take special care processing this data. +/// +/// See the method documentation of the function returning this value +pub struct UntrustedValue { + value: T, +} + +impl UntrustedValue { + /// Be sure that you carefully handle the returned value since + /// it may be controllable by a malicious actor. + /// + /// See the method documentation of the function returning this value + pub fn use_untrusted_value(self) -> T { + self.value + } + + /// Wraps the provided value as UntrustedValue + pub fn wrap(value: T) -> Self { + UntrustedValue {value} + } +} + +impl From for UntrustedValue { + fn from(value: T) -> Self { + UntrustedValue::wrap(value) + } +} + diff --git a/tests/zip_stream.rs b/tests/zip_stream.rs index 8549916f9..7b6534d43 100644 --- a/tests/zip_stream.rs +++ b/tests/zip_stream.rs @@ -8,8 +8,9 @@ fn decompress_stream_no_data_descriptor() { v.extend_from_slice(include_bytes!("data/deflate64.zip")); let mut stream = io::Cursor::new(v); - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) .expect("couldn't open test zip file") + .use_untrusted_value() .expect("did not find file entry in zip file"); assert_eq!("binary.wmv", entry.name()); @@ -43,8 +44,9 @@ fn decompress_stream_with_data_descriptor() { v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); let mut stream = io::Cursor::new(v); - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) .expect("couldn't open test zip file") + .use_untrusted_value() .expect("did not find file entry in zip file"); assert_eq!("hello.txt", entry.name()); @@ -63,8 +65,9 @@ fn decompress_stream_with_data_descriptor_continue() { // First entry - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) .expect("couldn't open test zip file") + .use_untrusted_value() .expect("did not find file entry in zip file"); assert_eq!("hello.txt", entry.name()); @@ -81,8 +84,9 @@ fn decompress_stream_with_data_descriptor_continue() { zip::read::advance_stream_to_next_zipfile_start(&mut stream) .expect("couldn't advance to next entry in zip file"); - let mut entry = zip::read::read_zipfile_from_seekablestream(&mut stream) + let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) .expect("couldn't open test zip file") + .use_untrusted_value() .expect("did not find file entry in zip file"); assert_eq!("world.txt", entry.name()); From f6b5da958028c5cb90d6de7b5b56a378de52fc95 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Sun, 23 Jun 2024 13:29:53 +0200 Subject: [PATCH 05/14] Library does not require Take anymore but instead accepts a template T: Read in internal data structures Added UntrustedValue and MaybeUntrusted data types --- examples/stdin_info.rs | 10 ++- src/read.rs | 172 +++++++++++++++++++++++++---------------- src/read/stream.rs | 4 +- src/result.rs | 65 ++++++++++++++++ src/types.rs | 9 ++- 5 files changed, 190 insertions(+), 70 deletions(-) diff --git a/examples/stdin_info.rs b/examples/stdin_info.rs index a609916a0..d3cf8b0ad 100644 --- a/examples/stdin_info.rs +++ b/examples/stdin_info.rs @@ -10,7 +10,15 @@ fn real_main() -> i32 { let mut buf = [0u8; 16]; loop { - match zip::read::read_zipfile_from_stream(&mut stdin_handle) { + let file = match zip::read::read_zipfile_from_stream(&mut stdin_handle) { + Err(e) => { + println!("Error encountered while reading zip: {e:?}"); + return 1; + } + Ok(value) => value, + }; + + match file.unwrap_or_error("data descriptors not supported while reading stdin") { Ok(Some(mut file)) => { println!( "{}: {} bytes ({} bytes packed)", diff --git a/src/read.rs b/src/read.rs index 60c5a0b71..294dd15eb 100644 --- a/src/read.rs +++ b/src/read.rs @@ -7,7 +7,7 @@ use crate::cp437::FromCp437; use crate::crc32::Crc32Reader; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; -use crate::result::{UntrustedValue, ZipError, ZipResult}; +use crate::result::{MaybeUntrusted, ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CentralDirectoryEnd, ZIP64_ENTRY_THR}; use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; @@ -16,9 +16,10 @@ use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; use std::io::{self, copy, prelude::*, sink, SeekFrom}; +use std::marker::PhantomData; use std::mem; use std::mem::size_of; -use std::ops::Deref; +use std::ops::{Deref}; use std::path::{Path, PathBuf}; use std::rc::Rc; use std::sync::{Arc, OnceLock}; @@ -127,20 +128,20 @@ use crate::unstable::{path_to_string, LittleEndianReadExt}; pub use zip_archive::ZipArchive; #[allow(clippy::large_enum_variant)] -pub(crate) enum CryptoReader<'a> { - Plaintext(io::Take<&'a mut dyn Read>), - ZipCrypto(ZipCryptoReaderValid>), +pub(crate) enum CryptoReader<'a, T: Read + 'a> { + Plaintext(T, PhantomData<&'a T>), // todo check if phantom data can be removed + ZipCrypto(ZipCryptoReaderValid), #[cfg(feature = "aes-crypto")] Aes { - reader: AesReaderValid>, + reader: AesReaderValid, vendor_version: AesVendorVersion, }, } -impl<'a> Read for CryptoReader<'a> { +impl<'a, T: Read + 'a> Read for CryptoReader<'a, T> { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { - CryptoReader::Plaintext(r) => r.read(buf), + CryptoReader::Plaintext(r, _) => r.read(buf), CryptoReader::ZipCrypto(r) => r.read(buf), #[cfg(feature = "aes-crypto")] CryptoReader::Aes { reader: r, .. } => r.read(buf), @@ -148,11 +149,11 @@ impl<'a> Read for CryptoReader<'a> { } } -impl<'a> CryptoReader<'a> { +impl<'a, T: Read + 'a> CryptoReader<'a, T> { /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { + pub fn into_inner(self) -> T { match self { - CryptoReader::Plaintext(r) => r, + CryptoReader::Plaintext(r, _) => r, CryptoReader::ZipCrypto(r) => r.into_inner(), #[cfg(feature = "aes-crypto")] CryptoReader::Aes { reader: r, .. } => r.into_inner(), @@ -174,23 +175,23 @@ impl<'a> CryptoReader<'a> { } } -pub(crate) enum ZipFileReader<'a> { +pub(crate) enum ZipFileReader<'a, T: Read + 'a> { NoReader, - Raw(io::Take<&'a mut dyn Read>), - Stored(Crc32Reader>), + Raw(T), + Stored(Crc32Reader>), #[cfg(feature = "_deflate-any")] - Deflated(Crc32Reader>>), + Deflated(Crc32Reader>>), #[cfg(feature = "deflate64")] - Deflate64(Crc32Reader>>>), + Deflate64(Crc32Reader>>>), #[cfg(feature = "bzip2")] - Bzip2(Crc32Reader>>), + Bzip2(Crc32Reader>>), #[cfg(feature = "zstd")] - Zstd(Crc32Reader>>>), + Zstd(Crc32Reader>>>), #[cfg(feature = "lzma")] - Lzma(Crc32Reader>>>), + Lzma(Crc32Reader>>>), } -impl<'a> Read for ZipFileReader<'a> { +impl<'a, T: Read + 'a> Read for ZipFileReader<'a, T> { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), @@ -210,7 +211,7 @@ impl<'a> Read for ZipFileReader<'a> { } } -impl<'a> ZipFileReader<'a> { +impl<'a, T: Read + 'a> ZipFileReader<'a, T> { /// Consumes this decoder, returning the underlying reader. pub fn drain(self) { let mut inner = match self { @@ -242,22 +243,22 @@ impl<'a> ZipFileReader<'a> { /// A struct for reading a zip file pub struct ZipFile<'a> { pub(crate) data: Cow<'a, ZipFileData>, - pub(crate) crypto_reader: Option>, - pub(crate) reader: ZipFileReader<'a>, + pub(crate) crypto_reader: Option>>, + pub(crate) reader: ZipFileReader<'a, Box>, } -pub(crate) fn find_content<'a>( +pub(crate) fn find_content<'a, T: Read + Seek>( data: &ZipFileData, - reader: &'a mut (impl Read + Seek), -) -> ZipResult> { + mut reader: T, +) -> ZipResult> { // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! let data_start = match data.data_start.get() { Some(data_start) => *data_start, - None => find_data_start(data, reader)?, + None => find_data_start(data, &mut reader)?, }; reader.seek(io::SeekFrom::Start(data_start))?; - Ok((reader as &mut dyn Read).take(data.compressed_size)) + Ok(reader.take(data.compressed_size)) } fn find_data_start( @@ -290,16 +291,16 @@ fn find_data_start( } #[allow(clippy::too_many_arguments)] -pub(crate) fn make_crypto_reader<'a>( +pub(crate) fn make_crypto_reader<'a, T: Read + 'a>( compression_method: CompressionMethod, crc32: u32, mut last_modified_time: Option, using_data_descriptor: bool, - reader: io::Take<&'a mut dyn Read>, + reader: T, password: Option<&[u8]>, aes_info: Option<(AesMode, AesVendorVersion, CompressionMethod)>, #[cfg(feature = "aes-crypto")] compressed_size: u64, -) -> ZipResult> { +) -> ZipResult> { #[allow(deprecated)] { if let CompressionMethod::Unsupported(_) = compression_method { @@ -331,16 +332,16 @@ pub(crate) fn make_crypto_reader<'a>( CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?) } (None, Some(_)) => return Err(InvalidPassword), - (None, None) => CryptoReader::Plaintext(reader), + (None, None) => CryptoReader::Plaintext(reader, PhantomData), }; Ok(reader) } -pub(crate) fn make_reader( +pub(crate) fn make_reader( compression_method: CompressionMethod, crc32: u32, - reader: CryptoReader, -) -> ZipResult { + reader: CryptoReader, +) -> ZipResult> { let ae2_encrypted = reader.is_ae2_encrypted(); match compression_method { @@ -1070,7 +1071,7 @@ impl ZipArchive { .ok_or(ZipError::FileNotFound)?; Ok(ZipFile { crypto_reader: None, - reader: ZipFileReader::Raw(find_content(data, reader)?), + reader: ZipFileReader::Raw(Box::new(find_content(data, reader)?)), data: Cow::Borrowed(data), }) } @@ -1098,7 +1099,7 @@ impl ZipArchive { data.crc32, data.last_modified_time, data.using_data_descriptor, - limit_reader, + Box::new(limit_reader) as Box, password, data.aes_mode, #[cfg(feature = "aes-crypto")] @@ -1396,7 +1397,7 @@ pub(crate) fn parse_single_extra_field( /// Methods for retrieving information on zip files impl<'a> ZipFile<'a> { - fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a>> { + fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a, Box>> { if let ZipFileReader::NoReader = self.reader { let data = &self.data; let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); @@ -1623,19 +1624,69 @@ fn read_local_fileblock(reader: &mut R) -> ZipResult { + reader: T, + +} + +impl ReadTillDataDescriptor { + pub fn new(reader: T) -> Self { + Self { + reader + } + } +} + +impl Read for ReadTillDataDescriptor { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + todo!() + // self.reader.read(buf) + // todo + } +} + +/// Read ZipFile structures from a reader. +/// +/// This method operates in three different modes: +/// 1. data_descriptor=None and allow_unlimited_read=false : Parse the local file header and return zip file. Works if data descriptors are not used. +/// 2. data_descriptor=Some and allow_unlimited_read=false : Parse the local file header, augment the zip file with the size given in the data descriptor and return zip file. Works if data descriptors are used. +/// 3. data_descriptor=None and allow_unlimited_read=true : Parse the local file header, if data descriptors not used, fallback to case 1, else provide a ZipFile that +/// searches for the data descriptor in the file. It just starts reading the compressed stream with no size limit set. When detecting a data descriptor the +/// stream is closed. This is useful when the size of the compressed stream is unknown but brings some security risks. Therefore, returning an UntrustedValue. +/// 4. data_descriptor=Some and allow_unlimited_read=true : Fallback to case 2 +/// +/// Case 3: **Security-disclaimer**: The **output** should **not** be regarded as **secure/trustworthy**. +/// When an attacker has control over a file which is compressed into a zip: The file might be crafted such that +/// when reading with ZipArchive vs this function a parsing mismatch can occur. An attacker that has control over a file +/// can manipulate such file such that this function will regard parts of the file as new zip file entry. The attacker might +/// therefore inject custom zip file entries with attacker controlled content/name/metadata. fn read_zipfile_from_fileblock<'a, R: Read>( block: ZipLocalEntryBlockAndFields, reader: &'a mut R, data_descriptor: Option, -) -> ZipResult>> { - let mut result = ZipFileData::from_local_block(block, data_descriptor)?; + mut allow_unlimited_read: bool, +) -> ZipResult>>> { + + if data_descriptor.is_some() && allow_unlimited_read { + allow_unlimited_read = false; // fallback, size is given + } + + let size_unknown = data_descriptor.is_none() && block.block.flags & (1 << 3) == 1 << 3; // no data descriptor provided but should exist + let mut result = ZipFileData::from_local_block(block, data_descriptor, allow_unlimited_read)?; + + if size_unknown && !allow_unlimited_read { + return Err(ZipError::UnsupportedArchive("Archive is using data descriptors, but unlimited reading not enabled")); + } match crate::read::parse_extra_field(&mut result) { Ok(..) | Err(ZipError::Io(..)) => {} Err(e) => return Err(e), } - let limit_reader = (reader as &'a mut dyn Read).take(result.compressed_size); + let limit_reader: Box = match size_unknown { + false => Box::new((reader as &'a mut dyn Read).take(result.compressed_size)), + true => Box::new(ReadTillDataDescriptor::new(reader)) + }; let result_crc32 = result.crc32; let result_compression_method = result.compression_method; @@ -1648,14 +1699,14 @@ fn read_zipfile_from_fileblock<'a, R: Read>( None, None, #[cfg(feature = "aes-crypto")] - result.compressed_size, + result.compressed_size, // does not matter since aes_info is set to None )?; - Ok(Some(ZipFile { + Ok(MaybeUntrusted::wrap(Some(ZipFile { data: Cow::Owned(result), crypto_reader: None, reader: crate::read::make_reader(result_compression_method, result_crc32, crypto_reader)?, - })) + }), size_unknown)) } /// Read ZipFile structures from a non-seekable reader. @@ -1677,15 +1728,15 @@ fn read_zipfile_from_fileblock<'a, R: Read>( /// * `comment`: set to an empty string /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult> { +pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult>> { let block = read_local_fileblock(reader)?; let block = match block { Some(block) => block, - None => return Ok(None), + None => return Ok(MaybeUntrusted::wrap_ok(None)), }; // we can't look for a data descriptor since we can't seek back - read_zipfile_from_fileblock(block, reader, None) + read_zipfile_from_fileblock(block, reader, None, true) } /// Read ZipFile structures from a seekable reader. @@ -1706,21 +1757,7 @@ pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult(reader: &mut R) -> ZipResult( reader: &mut S, -) -> ZipResult>> { +) -> ZipResult>> { + // todo: remove + let local_entry_block = read_local_fileblock(reader)?; let local_entry_block = match local_entry_block { Some(local_entry_block) => local_entry_block, @@ -1792,6 +1831,7 @@ pub fn read_zipfile_from_limitedseekable_stream( local_entry_block, reader, Some(data_descriptor), + false, ).map(|result| result.into()); } else { // data descriptor is invalid @@ -1805,7 +1845,7 @@ pub fn read_zipfile_from_limitedseekable_stream( } } else { // dont using a data descriptor - read_zipfile_from_fileblock(local_entry_block, reader, None).map(|result| result.into()) + read_zipfile_from_fileblock(local_entry_block, reader, None, false) } } @@ -1821,6 +1861,8 @@ pub fn read_zipfile_from_limitedseekable_stream( fn advance_stream_to_next_magic( reader: &mut S, ) -> ZipResult> { + // todo remove + let mut shift_register: u32 = 0; let mut read_buffer = [0u8; 1]; @@ -1946,7 +1988,7 @@ mod test { v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); let mut reader = Cursor::new(v); loop { - if read_zipfile_from_stream(&mut reader).unwrap().is_none() { + if read_zipfile_from_stream(&mut reader).unwrap().unwrap_or_error("data descriptors used").unwrap().is_none() { break; } } diff --git a/src/read/stream.rs b/src/read/stream.rs index 7bc91c9cc..f8de0f8d7 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -40,7 +40,9 @@ impl ZipStreamReader { /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { - while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { + // todo allow data descriptors by passing the MaybeUntrusted to the user + // todo check if we are allowed to change the API signature + while let Some(mut file) = read_zipfile_from_stream(&mut self.0)?.unwrap_or_error("zip content size not given in header")? { visitor.visit_file(&mut file)?; } diff --git a/src/result.rs b/src/result.rs index 7e13d05ee..856d2dc23 100644 --- a/src/result.rs +++ b/src/result.rs @@ -128,3 +128,68 @@ impl From for UntrustedValue { } } +/// Represents a value that might be untrusted. See UntrustedValue for more information. +pub enum MaybeUntrusted { + /// Trusted value variant + Ok(T), + /// Untrusted value variant + Untrusted(UntrustedValue), +} + +impl MaybeUntrusted { + /// Be sure that you carefully handle the returned value since + /// it may be controllable by a malicious actor (when it is a MaybeUntrusted::Untrusted). + /// + /// See the method documentation of the function returning this value + pub fn use_untrusted_value(self) -> T { + match self { + MaybeUntrusted::Ok(value) => value, + MaybeUntrusted::Untrusted(value) => value.use_untrusted_value(), + } + } + + /// Unwraps the value if values is not untrusted, if untrusted returns the provided error message + pub fn unwrap_or_error(self, error: &'static str) -> Result { + match self { + MaybeUntrusted::Ok(value) => Ok(value), + MaybeUntrusted::Untrusted(_) => Err(ZipError::InvalidArchive(error)), + } + } + + /// Returns true if the value is untrusted + pub fn is_untrusted(&self) -> bool { + match self { + MaybeUntrusted::Ok(_) => false, + MaybeUntrusted::Untrusted(_) => true, + } + } + + /// Returns true if the value is not untrusted + pub fn is_ok(&self) -> bool { + !self.is_untrusted() + } + + /// Wraps the provided values as Untrusted + pub fn wrap_untrusted(value: T) -> Self { + MaybeUntrusted::Untrusted(value.into()) + } + + /// Wraps the provided values as Ok + pub fn wrap_ok(value: T) -> Self { + MaybeUntrusted::Ok(value) + } + + /// Wraps the provided value as maybe untrusted, according to given boolean + pub fn wrap(value: T, untrusted: bool) -> Self { + match untrusted { + true => Self::wrap_untrusted(value), + false => Self::wrap_ok(value), + } + } +} + +impl From> for MaybeUntrusted { + fn from(value: UntrustedValue) -> Self { + MaybeUntrusted::Untrusted(value) + } +} diff --git a/src/types.rs b/src/types.rs index c554fd083..ff6b0e0b5 100644 --- a/src/types.rs +++ b/src/types.rs @@ -669,6 +669,7 @@ impl ZipFileData { pub(crate) fn from_local_block( block: ZipLocalEntryBlockAndFields, data_descriptor: Option, + allow_empty_data_descriptor: bool, ) -> ZipResult { let ZipLocalEntryBlock { // magic, @@ -696,9 +697,11 @@ impl ZipFileData { if using_data_descriptor { match data_descriptor { None => { - return Err(ZipError::UnsupportedArchive( - "The file uses a data descriptor, but the provided input stream is not seekable or the data descriptor is missing.", - )); + if !allow_empty_data_descriptor { + return Err(ZipError::UnsupportedArchive( + "The file uses a data descriptor, but the provided input stream is not seekable or the data descriptor is missing.", + )); + } } Some(data_descriptor) => { uncompressed_size = data_descriptor.uncompressed_size; From 47f718b3e13d920a37f064fd1ad6fa8887048e5a Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Sun, 23 Jun 2024 13:50:32 +0200 Subject: [PATCH 06/14] Completed merge master -> feature branch --- src/read.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/read.rs b/src/read.rs index 6708da8bb..b2e8fb814 100644 --- a/src/read.rs +++ b/src/read.rs @@ -1812,7 +1812,7 @@ pub fn read_zipfile_from_limitedseekable_stream( let local_entry_block = read_local_fileblock(reader)?; let local_entry_block = match local_entry_block { Some(local_entry_block) => local_entry_block, - None => return Ok(None.into()), + None => return Ok(MaybeUntrusted::wrap_ok(None)), }; let using_data_descriptor = local_entry_block.block.flags & (1 << 3) == 1 << 3; @@ -1825,13 +1825,13 @@ pub fn read_zipfile_from_limitedseekable_stream( loop { let read_count = reader.read(&mut read_buffer)?; if read_count == 0 { - return Ok(None.into()); // EOF + return Ok(MaybeUntrusted::wrap_ok(None)); // EOF } read_count_total += read_count; if read_count_total > u32::MAX as usize { - return Ok(None.into()); // file too large + return Ok(MaybeUntrusted::wrap_ok(None)); // file too large } shift_register = shift_register >> 8 | (read_buffer[0] as u32) << 24; From bf7a03089db6c9fa2333371414a1ca5a0f9c8d22 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Mon, 24 Jun 2024 13:42:02 +0200 Subject: [PATCH 07/14] CRC32 checksum is now late propagated Before: The CRC32 checksum had to be supplied before the stream is read. Though checked when EOF occurred. Now: The CRC32 checksum can be supplied before starting to read or after finishing reading from the stream. --- src/aes.rs | 2 + src/crc32.rs | 133 ++++++++++++++-- src/read.rs | 365 +++++++++++++++++++++----------------------- src/read/lzma.rs | 4 + src/zipcrypto.rs | 2 + tests/zip_stream.rs | 16 +- 6 files changed, 305 insertions(+), 217 deletions(-) diff --git a/src/aes.rs b/src/aes.rs index 6c9eb371f..546ff83e7 100644 --- a/src/aes.rs +++ b/src/aes.rs @@ -219,6 +219,8 @@ impl AesReaderValid { pub fn into_inner(self) -> R { self.reader } + + pub fn get_ref(&self) -> &R { &self.reader } } pub struct AesWriter { diff --git a/src/crc32.rs b/src/crc32.rs index 7152c0853..8d01d110d 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -1,34 +1,38 @@ //! Helper module to compute a CRC32 checksum use std::io; +use std::io::BufReader; use std::io::prelude::*; +use bzip2::read::BzDecoder; use crc32fast::Hasher; +use deflate64::Deflate64Decoder; +use flate2::read::DeflateDecoder; +use crate::read::CryptoReader; +use crate::read::lzma::LzmaDecoder; /// Reader that validates the CRC32 when it reaches the EOF. -pub struct Crc32Reader { +pub struct Crc32Reader { inner: R, hasher: Hasher, - check: u32, /// Signals if `inner` stores aes encrypted data. /// AE-2 encrypted data doesn't use crc and sets the value to 0. ae2_encrypted: bool, } -impl Crc32Reader { +impl Crc32Reader { /// Get a new Crc32Reader which checks the inner reader against checksum. /// The check is disabled if `ae2_encrypted == true`. - pub(crate) fn new(inner: R, checksum: u32, ae2_encrypted: bool) -> Crc32Reader { + pub(crate) fn new(inner: R, ae2_encrypted: bool) -> Crc32Reader { Crc32Reader { inner, hasher: Hasher::new(), - check: checksum, ae2_encrypted, } } - fn check_matches(&self) -> bool { - self.check == self.hasher.clone().finalize() + fn check_matches(&self) -> std::io::Result { + Ok(self.inner.get_crc32()? == self.hasher.clone().finalize()) } pub fn into_inner(self) -> R { @@ -36,12 +40,10 @@ impl Crc32Reader { } } -impl Read for Crc32Reader { +impl Read for Crc32Reader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - let invalid_check = !buf.is_empty() && !self.check_matches() && !self.ae2_encrypted; - let count = match self.inner.read(buf) { - Ok(0) if invalid_check => { + Ok(0) if !buf.is_empty() && !self.check_matches()? && !self.ae2_encrypted => { return Err(io::Error::new(io::ErrorKind::Other, "Invalid checksum")) } Ok(n) => n, @@ -52,6 +54,107 @@ impl Read for Crc32Reader { } } +/// A reader trait that provides a method to get the expected crc of the data read. +/// In the normal case, the expected crc is known before the zip entry is read. +/// In streaming mode with data descriptors, the crc will be available after the data is read. +/// Still in both cases the crc is available after the data is read and can be checked. +pub trait ReadAndSupplyExpectedCRC32 : Read { + fn get_crc32(&self) -> std::io::Result; +} + +pub struct InitiallyKnownCRC32 { + reader: R, + crc: u32, +} + +impl InitiallyKnownCRC32 { + pub fn new(reader: R, crc: u32) -> InitiallyKnownCRC32 { + InitiallyKnownCRC32 { reader, crc } + } + + #[allow(dead_code)] + pub fn into_inner(self) -> R { + self.reader + } + + #[allow(dead_code)] + pub fn get_ref(&self) -> &R { + &self.reader + } +} + +impl Read for InitiallyKnownCRC32 { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.reader.read(buf) + } +} + +impl ReadAndSupplyExpectedCRC32 for InitiallyKnownCRC32 { + fn get_crc32(&self) -> std::io::Result { + Ok(self.crc) + } +} + +impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> ReadAndSupplyExpectedCRC32 for CryptoReader<'a, T> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +impl ReadAndSupplyExpectedCRC32 for DeflateDecoder { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +impl ReadAndSupplyExpectedCRC32 for Deflate64Decoder> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_ref().get_crc32() + } +} + +impl ReadAndSupplyExpectedCRC32 for BzDecoder { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +impl<'a, T: ReadAndSupplyExpectedCRC32 + BufRead> ReadAndSupplyExpectedCRC32 for zstd::Decoder<'a, T> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +impl<'a, T: ReadAndSupplyExpectedCRC32> ReadAndSupplyExpectedCRC32 for zstd::Decoder<'a, BufReader> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_ref().get_crc32() + } +} + +impl ReadAndSupplyExpectedCRC32 for LzmaDecoder { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +impl<'a> ReadAndSupplyExpectedCRC32 for Box<(dyn ReadAndSupplyExpectedCRC32 + 'a)> { + fn get_crc32(&self) -> io::Result { + self.as_ref().get_crc32() + } +} + +impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> ReadAndSupplyExpectedCRC32 for Box { + fn get_crc32(&self) -> io::Result { + self.as_ref().get_crc32() + } +} + +impl ReadAndSupplyExpectedCRC32 for std::io::Take<&mut T> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + #[cfg(test)] mod test { use super::*; @@ -61,10 +164,10 @@ mod test { let data: &[u8] = b""; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 0), false); assert_eq!(reader.read(&mut buf).unwrap(), 0); - let mut reader = Crc32Reader::new(data, 1, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 1), false); assert!(reader .read(&mut buf) .unwrap_err() @@ -77,7 +180,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 0x9be3e0a3), false); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); @@ -92,7 +195,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 5]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 0x9be3e0a3), false); assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); assert_eq!(reader.read(&mut buf).unwrap(), 4); } diff --git a/src/read.rs b/src/read.rs index b2e8fb814..426e01d52 100644 --- a/src/read.rs +++ b/src/read.rs @@ -4,10 +4,10 @@ use crate::aes::{AesReader, AesReaderValid}; use crate::compression::CompressionMethod; use crate::cp437::FromCp437; -use crate::crc32::Crc32Reader; +use crate::crc32::{Crc32Reader, InitiallyKnownCRC32, ReadAndSupplyExpectedCRC32}; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; -use crate::result::{MaybeUntrusted, ZipError, ZipResult}; +use crate::result::{MaybeUntrusted, UntrustedValue, ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CentralDirectoryEnd, ZIP64_ENTRY_THR}; use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; @@ -15,7 +15,7 @@ use indexmap::IndexMap; use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, copy, prelude::*, sink, SeekFrom}; +use std::io::{self, copy, prelude::*, sink, SeekFrom, ErrorKind}; use std::marker::PhantomData; use std::mem; use std::mem::size_of; @@ -159,6 +159,15 @@ impl<'a, T: Read + 'a> CryptoReader<'a, T> { } } + pub fn get_ref(&self) -> &T { + match self { + CryptoReader::Plaintext(r, _) => &r, + CryptoReader::ZipCrypto(r) => &r.get_ref(), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => &r.get_ref(), + } + } + /// Returns `true` if the data is encrypted using AE2. pub const fn is_ae2_encrypted(&self) -> bool { #[cfg(feature = "aes-crypto")] @@ -174,7 +183,7 @@ impl<'a, T: Read + 'a> CryptoReader<'a, T> { } } -pub(crate) enum ZipFileReader<'a, T: Read + 'a> { +pub(crate) enum ZipFileReader<'a, T: ReadAndSupplyExpectedCRC32 + 'a> { NoReader, Raw(T), Stored(Crc32Reader>), @@ -190,7 +199,7 @@ pub(crate) enum ZipFileReader<'a, T: Read + 'a> { Lzma(Crc32Reader>>>), } -impl<'a, T: Read + 'a> Read for ZipFileReader<'a, T> { +impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> Read for ZipFileReader<'a, T> { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { ZipFileReader::NoReader => panic!("ZipFileReader was in an invalid state"), @@ -210,7 +219,7 @@ impl<'a, T: Read + 'a> Read for ZipFileReader<'a, T> { } } -impl<'a, T: Read + 'a> ZipFileReader<'a, T> { +impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> ZipFileReader<'a, T> { /// Consumes this decoder, returning the underlying reader. pub fn drain(self) { let mut inner = match self { @@ -242,14 +251,14 @@ impl<'a, T: Read + 'a> ZipFileReader<'a, T> { /// A struct for reading a zip file pub struct ZipFile<'a> { pub(crate) data: Cow<'a, ZipFileData>, - pub(crate) crypto_reader: Option>>, - pub(crate) reader: ZipFileReader<'a, Box>, + pub(crate) crypto_reader: Option>>, + pub(crate) reader: ZipFileReader<'a, Box>, } pub(crate) fn find_content<'a, T: Read + Seek>( data: &ZipFileData, mut reader: T, -) -> ZipResult> { +) -> ZipResult>> { // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! let data_start = match data.data_start.get() { Some(data_start) => *data_start, @@ -257,7 +266,7 @@ pub(crate) fn find_content<'a, T: Read + Seek>( }; reader.seek(io::SeekFrom::Start(data_start))?; - Ok(reader.take(data.compressed_size)) + Ok(InitiallyKnownCRC32::new(reader.take(data.compressed_size), data.crc32)) } fn find_data_start( @@ -336,9 +345,8 @@ pub(crate) fn make_crypto_reader<'a, T: Read + 'a>( Ok(reader) } -pub(crate) fn make_reader( +pub(crate) fn make_reader( compression_method: CompressionMethod, - crc32: u32, reader: CryptoReader, ) -> ZipResult> { let ae2_encrypted = reader.is_ae2_encrypted(); @@ -346,7 +354,6 @@ pub(crate) fn make_reader( match compression_method { CompressionMethod::Stored => Ok(ZipFileReader::Stored(Crc32Reader::new( reader, - crc32, ae2_encrypted, ))), #[cfg(feature = "_deflate-any")] @@ -354,7 +361,6 @@ pub(crate) fn make_reader( let deflate_reader = DeflateDecoder::new(reader); Ok(ZipFileReader::Deflated(Crc32Reader::new( deflate_reader, - crc32, ae2_encrypted, ))) } @@ -363,7 +369,6 @@ pub(crate) fn make_reader( let deflate64_reader = Deflate64Decoder::new(reader); Ok(ZipFileReader::Deflate64(Crc32Reader::new( deflate64_reader, - crc32, ae2_encrypted, ))) } @@ -372,16 +377,14 @@ pub(crate) fn make_reader( let bzip2_reader = BzDecoder::new(reader); Ok(ZipFileReader::Bzip2(Crc32Reader::new( bzip2_reader, - crc32, ae2_encrypted, ))) } #[cfg(feature = "zstd")] CompressionMethod::Zstd => { - let zstd_reader = ZstdDecoder::new(reader).unwrap(); + let zstd_reader = ZstdDecoder::new(reader)?; Ok(ZipFileReader::Zstd(Crc32Reader::new( zstd_reader, - crc32, ae2_encrypted, ))) } @@ -390,7 +393,6 @@ pub(crate) fn make_reader( let reader = LzmaDecoder::new(reader); Ok(ZipFileReader::Lzma(Crc32Reader::new( Box::new(reader), - crc32, ae2_encrypted, ))) } @@ -1136,7 +1138,7 @@ impl ZipArchive { data.crc32, data.last_modified_time, data.using_data_descriptor, - Box::new(limit_reader) as Box, + Box::new(InitiallyKnownCRC32::new(limit_reader, data.crc32)) as Box, password, data.aes_mode, #[cfg(feature = "aes-crypto")] @@ -1434,11 +1436,11 @@ pub(crate) fn parse_single_extra_field( /// Methods for retrieving information on zip files impl<'a> ZipFile<'a> { - fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a, Box>> { + fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a, Box>> { if let ZipFileReader::NoReader = self.reader { let data = &self.data; let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); - self.reader = make_reader(data.compression_method, data.crc32, crypto_reader)?; + self.reader = make_reader(data.compression_method, crypto_reader)?; } Ok(&mut self.reader) } @@ -1661,24 +1663,143 @@ fn read_local_fileblock(reader: &mut R) -> ZipResult { + reader: &'a mut T, + buffer: Vec, + index: usize, +} + +impl<'a, T: Read> ReadBufferThenForward<'a, T> { + /// creates a new instance of this, first reads from the buffer, then proxy read to underlying stream + pub fn new(reader: &'a mut T, buffer: Vec) -> Self { + ReadBufferThenForward { + reader, + buffer, + index: 0 + } + } +} + +impl<'a, T: Read> Read for ReadBufferThenForward<'a, T> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let buffer_start = std::cmp::min(self.index, buf.len()); + let data_left_in_buffer = buf.len() - buffer_start; + let buffer_end = std::cmp::min(self.buffer.len(), buffer_start + data_left_in_buffer); + + let next_buffer_data = &self.buffer[buffer_start..buffer_end]; + let count_copy_from_buffer = std::cmp::min(next_buffer_data.len(), buf.len()); + + buf[0..count_copy_from_buffer].copy_from_slice(&next_buffer_data[0..count_copy_from_buffer]); + + self.index += count_copy_from_buffer; + if self.index > buf.len() && self.index > 0 { + // empty buffer + self.index = 0; + self.buffer.clear(); + self.buffer.shrink_to_fit(); + } + + let remaining_bytes = buf.len() - count_copy_from_buffer; + let result = if remaining_bytes > 0 { + self.reader.read(&mut buf[count_copy_from_buffer..])? + } else { + 0 + }; + + Ok(count_copy_from_buffer + result) + } +} + struct ReadTillDataDescriptor { reader: T, + data_descriptor_found: Option, + look_ahead_buffer: Vec, + number_read_total_actual: usize, // without look ahead buffer +} + +impl ReadTillDataDescriptor { + pub fn new(mut reader: T) -> ZipResult { + let mut look_ahead_buffer= [0u8; std::mem::size_of::()]; + + // fill the look ahead buffer + reader.read_exact(&mut look_ahead_buffer)?; + let look_ahead_buffer = Vec::from(look_ahead_buffer); + + Ok(Self { + reader, + look_ahead_buffer, + data_descriptor_found: None, + number_read_total_actual: 0, + }) + } } impl ReadTillDataDescriptor { - pub fn new(reader: T) -> Self { - Self { - reader + fn has_found_data_descriptor(&self) -> Option { + if spec::Magic::from_first_le_bytes(&self.look_ahead_buffer) == spec::Magic::DATA_DESCRIPTOR_SIGNATURE { + // potentially found a data descriptor + // check if size matches + let data_descriptor = match ZipDataDescriptor::interpret(&self.look_ahead_buffer) { + Ok(data_descriptor) => data_descriptor, + Err(_) => return None, + }; + let data_descriptor_size = data_descriptor.compressed_size; + + if data_descriptor_size == self.number_read_total_actual as u32 { + return Some(data_descriptor); + } + } + None + } + + fn read_a_byte(&mut self) -> io::Result> { + let mut byte_buffer = [0u8; 1]; + let read_count = self.reader.read(&mut byte_buffer)?; + + if read_count > 0 { + let value = self.look_ahead_buffer.remove(0); + self.look_ahead_buffer.push(byte_buffer[0]); + self.number_read_total_actual += 1; + Ok(Some(value)) + } else { + Ok(None) } } + + pub fn get_data_descriptor(&self) -> Option { + self.data_descriptor_found + } + + #[allow(dead_code)] + pub fn into_inner(self) -> T { + self.reader + } } impl Read for ReadTillDataDescriptor { fn read(&mut self, buf: &mut [u8]) -> io::Result { - todo!() - // self.reader.read(buf) - // todo + for index in 0..buf.len() { + let data_descriptor = self.has_found_data_descriptor(); + + if let Some(_) = data_descriptor { + self.data_descriptor_found = data_descriptor; + return Ok(index) + } + + match self.read_a_byte()? { + None => return Ok(index), + Some(value) => buf[index] = value, + } + } + Ok(buf.len()) + } +} + +impl ReadAndSupplyExpectedCRC32 for ReadTillDataDescriptor { + fn get_crc32(&self) -> io::Result { + Ok(self.get_data_descriptor().ok_or(std::io::Error::new(ErrorKind::Other, "EOF reached, corrupt zip file"))?.crc32) } } @@ -1718,14 +1839,15 @@ fn read_zipfile_from_fileblock<'a, R: Read>( match crate::read::parse_extra_field(&mut result) { Ok(..) | Err(ZipError::Io(..)) => {} Err(e) => return Err(e), - } - - let limit_reader: Box = match size_unknown { - false => Box::new((reader as &'a mut dyn Read).take(result.compressed_size)), - true => Box::new(ReadTillDataDescriptor::new(reader)) }; let result_crc32 = result.crc32; + + let limit_reader: Box = match size_unknown { + false => Box::new(InitiallyKnownCRC32::new((reader as &'a mut dyn Read).take(result.compressed_size), result_crc32)), + true => Box::new(ReadTillDataDescriptor::new(reader)?) + }; + let result_compression_method = result.compression_method; let crypto_reader = crate::read::make_crypto_reader( result_compression_method, @@ -1742,7 +1864,7 @@ fn read_zipfile_from_fileblock<'a, R: Read>( Ok(MaybeUntrusted::wrap(Some(ZipFile { data: Cow::Owned(result), crypto_reader: None, - reader: crate::read::make_reader(result_compression_method, result_crc32, crypto_reader)?, + reader: crate::read::make_reader(result_compression_method, crypto_reader)?, }), size_unknown)) } @@ -1776,130 +1898,23 @@ pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult( - reader: &mut S, -) -> ZipResult>> { - // todo: remove - - let local_entry_block = read_local_fileblock(reader)?; - let local_entry_block = match local_entry_block { - Some(local_entry_block) => local_entry_block, - None => return Ok(MaybeUntrusted::wrap_ok(None)), - }; - - let using_data_descriptor = local_entry_block.block.flags & (1 << 3) == 1 << 3; - let mut shift_register: u32 = 0; - let mut read_buffer = [0u8; 1]; - if using_data_descriptor { - // we must find the data descriptor to get the compressed size - - let mut read_count_total: usize = 0; - loop { - let read_count = reader.read(&mut read_buffer)?; - if read_count == 0 { - return Ok(MaybeUntrusted::wrap_ok(None)); // EOF - } - - read_count_total += read_count; - - if read_count_total > u32::MAX as usize { - return Ok(MaybeUntrusted::wrap_ok(None)); // file too large - } - - shift_register = shift_register >> 8 | (read_buffer[0] as u32) << 24; - - if read_count_total < 4 { - continue; - } - - if spec::Magic::from_le_bytes(shift_register.to_le_bytes()) - == spec::Magic::DATA_DESCRIPTOR_SIGNATURE - { - // found a potential data descriptor - reader.seek(SeekFrom::Current(-4))?; // go back to the start of the data descriptor - read_count_total -= 4; - - let mut data_descriptor_block = [0u8; mem::size_of::()]; - reader.read_exact(&mut data_descriptor_block)?; - let data_descriptor_block: Box<[u8]> = data_descriptor_block.into(); - // seek back to data end - reader.seek(SeekFrom::Current( - -(mem::size_of::() as i64), - ))?; - - let data_descriptor = ZipDataDescriptor::interpret(&data_descriptor_block)?; - - // check if the data descriptor is indeed valid for the read data - if data_descriptor.compressed_size == read_count_total as u32 { - // valid data descriptor - - // seek back to data start - reader.seek(SeekFrom::Current(-(read_count_total as i64)))?; - - return read_zipfile_from_fileblock( - local_entry_block, - reader, - Some(data_descriptor), - false, - ).map(|result| result.into()); - } else { - // data descriptor is invalid - - reader.seek(SeekFrom::Current(1))?; // skip the magic number - read_count_total += 1; - - // continue data descriptor searching - } - } - } - } else { - // dont using a data descriptor - read_zipfile_from_fileblock(local_entry_block, reader, None, false) - } -} - -/// Advance the stream to the next zip file magic number (local file header, central directory header, data descriptor) -/// and return the magic number and the number of bytes read since the call to this function. +/// The stream will be positioned at the start of the zip file start +4 bytes (local file header start). +/// The returned stream will be positioned at the zip file start and proxy all reads to the given stream /// -/// This function is useful in combination with read_zipfile_from_limitedseekable_stream to read multiple zip files from a single stream. -/// This function will never seek back more than 4 bytes and not before the initial position of the stream when the function was called. +/// Returns a wrapped stream /// -/// The stream will be positioned at the start of the magic number. +/// Will return None if the end of the stream is reached. /// -/// Returns the found magic number and the number of bytes read since the call to this function. -fn advance_stream_to_next_magic( - reader: &mut S, -) -> ZipResult> { - // todo remove - +/// If parts of the file are attacker controlled (when using in combination with read_zipfile_from_stream) then +/// it is not guaranteed that the stream will be positioned at the next zip local entry header but instead +/// within file content, possibly attacker controlled. Take care handling the value. See also read_zipfile_from_stream for more information. +pub fn advance_stream_to_next_zipfile_start( + reader: &mut T, +) -> ZipResult>>> { let mut shift_register: u32 = 0; let mut read_buffer = [0u8; 1]; @@ -1921,55 +1936,15 @@ fn advance_stream_to_next_magic( } if magic == spec::Magic::LOCAL_FILE_HEADER_SIGNATURE - || magic == spec::Magic::CENTRAL_DIRECTORY_HEADER_SIGNATURE - || magic == spec::Magic::DATA_DESCRIPTOR_SIGNATURE { // found a potential magic number - reader.seek(SeekFrom::Current(-4))?; // go back to the start of the magic number - return Ok(Some((magic, read_count_total - 4))); - } - } -} -/// Like advance_stream_to_next_magic but advances until given Magic is encountered -fn advance_stream_to_next_magic_start( - reader: &mut S, - target_magic: spec::Magic, -) -> ZipResult> { - let mut bytes_read_total = 0; - - loop { - match advance_stream_to_next_magic(reader)? { - Some((magic, bytes_read)) => { - bytes_read_total += bytes_read; - if magic == target_magic { - return Ok(Some(bytes_read_total)); - } else { - reader.seek(SeekFrom::Current(1))?; - bytes_read_total += 1; - } - } - None => return Ok(None), + let buffer = magic.to_le_bytes(); + return Ok(Some(ReadBufferThenForward::new(reader, buffer.into()).into())) } } } -/// Advance the stream to the next zip file start (local file header start) returning the number of bytes read since the call to this function. -/// -/// This function is useful in combination with read_zipfile_from_limitedseekable_stream to read multiple zip files from a single stream. -/// This function will never seek back more than 4 bytes and not before the initial position of the stream when the function was called. -/// -/// The stream will be positioned at the start of the zip file start (local file header start). -/// -/// Returns the number of bytes read since the call to this function. -/// -/// Will return None if the end of the stream is reached. -pub fn advance_stream_to_next_zipfile_start( - reader: &mut S, -) -> ZipResult> { - advance_stream_to_next_magic_start(reader, spec::Magic::LOCAL_FILE_HEADER_SIGNATURE) -} - #[cfg(test)] mod test { use crate::result::ZipResult; diff --git a/src/read/lzma.rs b/src/read/lzma.rs index 04b6edd8e..15af12626 100644 --- a/src/read/lzma.rs +++ b/src/read/lzma.rs @@ -28,6 +28,10 @@ impl LzmaDecoder { copy(&mut self.compressed_reader, &mut self.stream)?; self.stream.finish().map_err(Error::from) } + + pub fn get_ref(&self) -> &R { + &self.compressed_reader + } } impl Read for LzmaDecoder { diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index bd81b5c43..1a1433187 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -207,6 +207,8 @@ impl ZipCryptoReaderValid { pub fn into_inner(self) -> R { self.reader.file } + + pub fn get_ref(&self) -> &R { &self.reader.file } } static CRCTABLE: [u32; 256] = [ diff --git a/tests/zip_stream.rs b/tests/zip_stream.rs index 7b6534d43..d06c3e09d 100644 --- a/tests/zip_stream.rs +++ b/tests/zip_stream.rs @@ -8,7 +8,7 @@ fn decompress_stream_no_data_descriptor() { v.extend_from_slice(include_bytes!("data/deflate64.zip")); let mut stream = io::Cursor::new(v); - let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) .expect("couldn't open test zip file") .use_untrusted_value() .expect("did not find file entry in zip file"); @@ -42,9 +42,9 @@ fn decompress_stream_with_data_descriptor_sanity_check() { fn decompress_stream_with_data_descriptor() { let mut v = Vec::new(); v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); - let mut stream = io::Cursor::new(v); + let mut stream = Box::new(io::Cursor::new(v)) as Box; - let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) .expect("couldn't open test zip file") .use_untrusted_value() .expect("did not find file entry in zip file"); @@ -65,7 +65,7 @@ fn decompress_stream_with_data_descriptor_continue() { // First entry - let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) .expect("couldn't open test zip file") .use_untrusted_value() .expect("did not find file entry in zip file"); @@ -81,10 +81,12 @@ fn decompress_stream_with_data_descriptor_continue() { // Second entry - zip::read::advance_stream_to_next_zipfile_start(&mut stream) - .expect("couldn't advance to next entry in zip file"); + let mut stream = zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file") + .expect("no more entries") + .use_untrusted_value(); - let mut entry = zip::read::read_zipfile_from_limitedseekable_stream(&mut stream) + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) .expect("couldn't open test zip file") .use_untrusted_value() .expect("did not find file entry in zip file"); From 3265477834ca5080b62da13f9606cf84cb04c05f Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Mon, 24 Jun 2024 14:15:41 +0200 Subject: [PATCH 08/14] ZipStream API supports archives with data descriptor --- src/read/stream.rs | 101 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 80 insertions(+), 21 deletions(-) diff --git a/src/read/stream.rs b/src/read/stream.rs index f8de0f8d7..30cf4baa6 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,6 +1,7 @@ use std::fs; use std::io::{self, Read}; use std::path::{Path, PathBuf}; +use crate::result::{MaybeUntrusted}; use super::{ central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, @@ -40,14 +41,21 @@ impl ZipStreamReader { /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { - // todo allow data descriptors by passing the MaybeUntrusted to the user - // todo check if we are allowed to change the API signature - while let Some(mut file) = read_zipfile_from_stream(&mut self.0)?.unwrap_or_error("zip content size not given in header")? { - visitor.visit_file(&mut file)?; + let mut untrusted = false; + + loop { + let file = read_zipfile_from_stream(&mut self.0)?; + untrusted = untrusted | file.is_untrusted(); + + let file = file.use_untrusted_value(); + match file { + Some(mut file) => visitor.visit_file(MaybeUntrusted::wrap(&mut file, untrusted))?, + None => break + } } while let Ok(metadata) = self.parse_central_directory() { - visitor.visit_additional_metadata(&metadata)?; + visitor.visit_additional_metadata(MaybeUntrusted::wrap(&metadata, untrusted))?; } Ok(()) @@ -59,14 +67,38 @@ impl ZipStreamReader { /// Extraction is not atomic; If an error is encountered, some of the files /// may be left on disk. pub fn extract>(self, directory: P) -> ZipResult<()> { - struct Extractor<'a>(&'a Path); + self.extract_unsafe(directory, false) + } + + /// Extract a Zip archive into a directory, overwriting files if they + /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. + /// + /// Extraction is not atomic; If an error is encountered, some of the files + /// may be left on disk. + /// + /// **This is considered a security risk. Use at your own discretion.** + pub fn extract_untrusted>(self, directory: P) -> ZipResult<()> { + self.extract_unsafe(directory, true) + } + + fn extract_unsafe>(self, directory: P, allow_untrusted: bool) -> ZipResult<()> { + struct Extractor<'a> { path: &'a Path, allow_untrusted: bool } impl ZipStreamVisitor for Extractor<'_> { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { + let file = match file { + MaybeUntrusted::Ok(file) => file, + MaybeUntrusted::Untrusted(value) => if self.allow_untrusted { + value.use_untrusted_value() + } else { + return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + } + }; + let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; - let outpath = self.0.join(filepath); + let outpath = self.path.join(filepath); if file.is_dir() { fs::create_dir_all(&outpath)?; @@ -84,15 +116,24 @@ impl ZipStreamReader { #[allow(unused)] fn visit_additional_metadata( &mut self, - metadata: &ZipStreamFileMetadata, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { #[cfg(unix)] { + let metadata = match metadata { + MaybeUntrusted::Ok(file) => file, + MaybeUntrusted::Untrusted(value) => if self.allow_untrusted { + value.use_untrusted_value() + } else { + return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + } + }; + let filepath = metadata .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; - let outpath = self.0.join(filepath); + let outpath = self.path.join(filepath); use std::os::unix::fs::PermissionsExt; if let Some(mode) = metadata.unix_mode() { @@ -104,7 +145,7 @@ impl ZipStreamReader { } } - self.visit(&mut Extractor(directory.as_ref())) + self.visit(&mut Extractor {path: directory.as_ref(), allow_untrusted}) } } @@ -115,12 +156,26 @@ pub trait ZipStreamVisitor { /// - `comment`: set to an empty string /// - `data_start`: set to 0 /// - `external_attributes`: `unix_mode()`: will return None - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; + /// + /// In the case of parsing a zip file which contains data descriptors and an attacker can + /// control files that were compressed to the zip file the returned data is not trusted and + /// partly attacker controllable. Everything returned by this function should be regarded untrusted + /// (file name, content, file count, ...). Take care handling the output. + /// + /// To be on the safe side use `.unwrap_or_error(...)` on the `MaybeUntrusted` parameter. + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()>; /// This function is guranteed to be called after all `visit_file`s. /// /// * `metadata` - Provides missing metadata in `visit_file`. - fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; + /// + /// In the case of parsing a zip file which contains data descriptors and an attacker can + // control files that were compressed to the zip file the returned data is not trusted and + // partly attacker controllable. Everything returned by this function should be regarded untrusted + // (file name, content, file count, ...). Take care handling the output. + // + // To be on the safe side use `.unwrap_or_error(...)` on the `MaybeUntrusted` parameter. + fn visit_additional_metadata(&mut self, metadata: MaybeUntrusted<&ZipStreamFileMetadata>) -> ZipResult<()>; } /// Additional metadata for the file. @@ -210,13 +265,13 @@ mod test { struct DummyVisitor; impl ZipStreamVisitor for DummyVisitor { - fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, _file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { Ok(()) } fn visit_additional_metadata( &mut self, - _metadata: &ZipStreamFileMetadata, + _metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { Ok(()) } @@ -225,14 +280,14 @@ mod test { #[derive(Default, Debug, Eq, PartialEq)] struct CounterVisitor(u64, u64); impl ZipStreamVisitor for CounterVisitor { - fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, _file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { self.0 += 1; Ok(()) } fn visit_additional_metadata( &mut self, - _metadata: &ZipStreamFileMetadata, + _metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { self.1 += 1; Ok(()) @@ -268,7 +323,8 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { + let file = file.use_untrusted_value(); if file.is_file() { self.filenames.insert(file.name().into()); } @@ -277,8 +333,9 @@ mod test { } fn visit_additional_metadata( &mut self, - metadata: &ZipStreamFileMetadata, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { + let metadata = metadata.use_untrusted_value(); if metadata.is_file() { assert!( self.filenames.contains(metadata.name()), @@ -305,7 +362,8 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { + let file = file.use_untrusted_value(); let full_name = file.enclosed_name().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( @@ -321,8 +379,9 @@ mod test { } fn visit_additional_metadata( &mut self, - metadata: &ZipStreamFileMetadata, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { + let metadata = metadata.use_untrusted_value(); if metadata.is_file() { assert!( self.filenames.contains(metadata.name()), From 1d3afa92305b2c95c80b82d1c02c2d01804c89b0 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Mon, 24 Jun 2024 14:16:38 +0200 Subject: [PATCH 09/14] Run cargo fmt --all --- src/aes.rs | 4 ++- src/crc32.rs | 18 ++++++---- src/read.rs | 85 ++++++++++++++++++++++++++++++++-------------- src/read/stream.rs | 39 ++++++++++++++------- src/result.rs | 2 +- src/zipcrypto.rs | 4 ++- 6 files changed, 103 insertions(+), 49 deletions(-) diff --git a/src/aes.rs b/src/aes.rs index 546ff83e7..c74bd1978 100644 --- a/src/aes.rs +++ b/src/aes.rs @@ -220,7 +220,9 @@ impl AesReaderValid { self.reader } - pub fn get_ref(&self) -> &R { &self.reader } + pub fn get_ref(&self) -> &R { + &self.reader + } } pub struct AesWriter { diff --git a/src/crc32.rs b/src/crc32.rs index 8d01d110d..80d42067d 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -1,15 +1,15 @@ //! Helper module to compute a CRC32 checksum +use bzip2::read::BzDecoder; use std::io; -use std::io::BufReader; use std::io::prelude::*; -use bzip2::read::BzDecoder; +use std::io::BufReader; +use crate::read::lzma::LzmaDecoder; +use crate::read::CryptoReader; use crc32fast::Hasher; use deflate64::Deflate64Decoder; use flate2::read::DeflateDecoder; -use crate::read::CryptoReader; -use crate::read::lzma::LzmaDecoder; /// Reader that validates the CRC32 when it reaches the EOF. pub struct Crc32Reader { @@ -58,7 +58,7 @@ impl Read for Crc32Reader { /// In the normal case, the expected crc is known before the zip entry is read. /// In streaming mode with data descriptors, the crc will be available after the data is read. /// Still in both cases the crc is available after the data is read and can be checked. -pub trait ReadAndSupplyExpectedCRC32 : Read { +pub trait ReadAndSupplyExpectedCRC32: Read { fn get_crc32(&self) -> std::io::Result; } @@ -119,13 +119,17 @@ impl ReadAndSupplyExpectedCRC32 for BzDecoder } } -impl<'a, T: ReadAndSupplyExpectedCRC32 + BufRead> ReadAndSupplyExpectedCRC32 for zstd::Decoder<'a, T> { +impl<'a, T: ReadAndSupplyExpectedCRC32 + BufRead> ReadAndSupplyExpectedCRC32 + for zstd::Decoder<'a, T> +{ fn get_crc32(&self) -> io::Result { self.get_ref().get_crc32() } } -impl<'a, T: ReadAndSupplyExpectedCRC32> ReadAndSupplyExpectedCRC32 for zstd::Decoder<'a, BufReader> { +impl<'a, T: ReadAndSupplyExpectedCRC32> ReadAndSupplyExpectedCRC32 + for zstd::Decoder<'a, BufReader> +{ fn get_crc32(&self) -> io::Result { self.get_ref().get_ref().get_crc32() } diff --git a/src/read.rs b/src/read.rs index 426e01d52..0b3082b63 100644 --- a/src/read.rs +++ b/src/read.rs @@ -9,13 +9,16 @@ use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; use crate::result::{MaybeUntrusted, UntrustedValue, ZipError, ZipResult}; use crate::spec::{self, FixedSizeBlock, Zip32CentralDirectoryEnd, ZIP64_ENTRY_THR}; -use crate::types::{AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields}; +use crate::types::{ + AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, + ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields, +}; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; use std::borrow::Cow; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, copy, prelude::*, sink, SeekFrom, ErrorKind}; +use std::io::{self, copy, prelude::*, sink, ErrorKind, SeekFrom}; use std::marker::PhantomData; use std::mem; use std::mem::size_of; @@ -266,7 +269,10 @@ pub(crate) fn find_content<'a, T: Read + Seek>( }; reader.seek(io::SeekFrom::Start(data_start))?; - Ok(InitiallyKnownCRC32::new(reader.take(data.compressed_size), data.crc32)) + Ok(InitiallyKnownCRC32::new( + reader.take(data.compressed_size), + data.crc32, + )) } fn find_data_start( @@ -1138,7 +1144,8 @@ impl ZipArchive { data.crc32, data.last_modified_time, data.using_data_descriptor, - Box::new(InitiallyKnownCRC32::new(limit_reader, data.crc32)) as Box, + Box::new(InitiallyKnownCRC32::new(limit_reader, data.crc32)) + as Box, password, data.aes_mode, #[cfg(feature = "aes-crypto")] @@ -1436,7 +1443,9 @@ pub(crate) fn parse_single_extra_field( /// Methods for retrieving information on zip files impl<'a> ZipFile<'a> { - fn get_reader(&mut self) -> ZipResult<&mut ZipFileReader<'a, Box>> { + fn get_reader( + &mut self, + ) -> ZipResult<&mut ZipFileReader<'a, Box>> { if let ZipFileReader::NoReader = self.reader { let data = &self.data; let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); @@ -1676,7 +1685,7 @@ impl<'a, T: Read> ReadBufferThenForward<'a, T> { ReadBufferThenForward { reader, buffer, - index: 0 + index: 0, } } } @@ -1690,7 +1699,8 @@ impl<'a, T: Read> Read for ReadBufferThenForward<'a, T> { let next_buffer_data = &self.buffer[buffer_start..buffer_end]; let count_copy_from_buffer = std::cmp::min(next_buffer_data.len(), buf.len()); - buf[0..count_copy_from_buffer].copy_from_slice(&next_buffer_data[0..count_copy_from_buffer]); + buf[0..count_copy_from_buffer] + .copy_from_slice(&next_buffer_data[0..count_copy_from_buffer]); self.index += count_copy_from_buffer; if self.index > buf.len() && self.index > 0 { @@ -1701,7 +1711,7 @@ impl<'a, T: Read> Read for ReadBufferThenForward<'a, T> { } let remaining_bytes = buf.len() - count_copy_from_buffer; - let result = if remaining_bytes > 0 { + let result = if remaining_bytes > 0 { self.reader.read(&mut buf[count_copy_from_buffer..])? } else { 0 @@ -1720,7 +1730,7 @@ struct ReadTillDataDescriptor { impl ReadTillDataDescriptor { pub fn new(mut reader: T) -> ZipResult { - let mut look_ahead_buffer= [0u8; std::mem::size_of::()]; + let mut look_ahead_buffer = [0u8; std::mem::size_of::()]; // fill the look ahead buffer reader.read_exact(&mut look_ahead_buffer)?; @@ -1738,7 +1748,9 @@ impl ReadTillDataDescriptor { impl ReadTillDataDescriptor { fn has_found_data_descriptor(&self) -> Option { - if spec::Magic::from_first_le_bytes(&self.look_ahead_buffer) == spec::Magic::DATA_DESCRIPTOR_SIGNATURE { + if spec::Magic::from_first_le_bytes(&self.look_ahead_buffer) + == spec::Magic::DATA_DESCRIPTOR_SIGNATURE + { // potentially found a data descriptor // check if size matches let data_descriptor = match ZipDataDescriptor::interpret(&self.look_ahead_buffer) { @@ -1785,7 +1797,7 @@ impl Read for ReadTillDataDescriptor { if let Some(_) = data_descriptor { self.data_descriptor_found = data_descriptor; - return Ok(index) + return Ok(index); } match self.read_a_byte()? { @@ -1799,7 +1811,13 @@ impl Read for ReadTillDataDescriptor { impl ReadAndSupplyExpectedCRC32 for ReadTillDataDescriptor { fn get_crc32(&self) -> io::Result { - Ok(self.get_data_descriptor().ok_or(std::io::Error::new(ErrorKind::Other, "EOF reached, corrupt zip file"))?.crc32) + Ok(self + .get_data_descriptor() + .ok_or(std::io::Error::new( + ErrorKind::Other, + "EOF reached, corrupt zip file", + ))? + .crc32) } } @@ -1824,7 +1842,6 @@ fn read_zipfile_from_fileblock<'a, R: Read>( data_descriptor: Option, mut allow_unlimited_read: bool, ) -> ZipResult>>> { - if data_descriptor.is_some() && allow_unlimited_read { allow_unlimited_read = false; // fallback, size is given } @@ -1833,7 +1850,9 @@ fn read_zipfile_from_fileblock<'a, R: Read>( let mut result = ZipFileData::from_local_block(block, data_descriptor, allow_unlimited_read)?; if size_unknown && !allow_unlimited_read { - return Err(ZipError::UnsupportedArchive("Archive is using data descriptors, but unlimited reading not enabled")); + return Err(ZipError::UnsupportedArchive( + "Archive is using data descriptors, but unlimited reading not enabled", + )); } match crate::read::parse_extra_field(&mut result) { @@ -1844,8 +1863,11 @@ fn read_zipfile_from_fileblock<'a, R: Read>( let result_crc32 = result.crc32; let limit_reader: Box = match size_unknown { - false => Box::new(InitiallyKnownCRC32::new((reader as &'a mut dyn Read).take(result.compressed_size), result_crc32)), - true => Box::new(ReadTillDataDescriptor::new(reader)?) + false => Box::new(InitiallyKnownCRC32::new( + (reader as &'a mut dyn Read).take(result.compressed_size), + result_crc32, + )), + true => Box::new(ReadTillDataDescriptor::new(reader)?), }; let result_compression_method = result.compression_method; @@ -1861,11 +1883,14 @@ fn read_zipfile_from_fileblock<'a, R: Read>( result.compressed_size, // does not matter since aes_info is set to None )?; - Ok(MaybeUntrusted::wrap(Some(ZipFile { - data: Cow::Owned(result), - crypto_reader: None, - reader: crate::read::make_reader(result_compression_method, crypto_reader)?, - }), size_unknown)) + Ok(MaybeUntrusted::wrap( + Some(ZipFile { + data: Cow::Owned(result), + crypto_reader: None, + reader: crate::read::make_reader(result_compression_method, crypto_reader)?, + }), + size_unknown, + )) } /// Read ZipFile structures from a non-seekable reader. @@ -1887,7 +1912,9 @@ fn read_zipfile_from_fileblock<'a, R: Read>( /// * `comment`: set to an empty string /// * `data_start`: set to 0 /// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult>> { +pub fn read_zipfile_from_stream( + reader: &mut R, +) -> ZipResult>> { let block = read_local_fileblock(reader)?; let block = match block { Some(block) => block, @@ -1935,12 +1962,13 @@ pub fn advance_stream_to_next_zipfile_start( continue; } - if magic == spec::Magic::LOCAL_FILE_HEADER_SIGNATURE - { + if magic == spec::Magic::LOCAL_FILE_HEADER_SIGNATURE { // found a potential magic number let buffer = magic.to_le_bytes(); - return Ok(Some(ReadBufferThenForward::new(reader, buffer.into()).into())) + return Ok(Some( + ReadBufferThenForward::new(reader, buffer.into()).into(), + )); } } } @@ -2003,7 +2031,12 @@ mod test { v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); let mut reader = Cursor::new(v); loop { - if read_zipfile_from_stream(&mut reader).unwrap().unwrap_or_error("data descriptors used").unwrap().is_none() { + if read_zipfile_from_stream(&mut reader) + .unwrap() + .unwrap_or_error("data descriptors used") + .unwrap() + .is_none() + { break; } } diff --git a/src/read/stream.rs b/src/read/stream.rs index 30cf4baa6..192a76125 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,7 +1,7 @@ +use crate::result::MaybeUntrusted; use std::fs; use std::io::{self, Read}; use std::path::{Path, PathBuf}; -use crate::result::{MaybeUntrusted}; use super::{ central_header_to_zip_file_inner, read_zipfile_from_stream, ZipCentralEntryBlock, ZipError, @@ -50,7 +50,7 @@ impl ZipStreamReader { let file = file.use_untrusted_value(); match file { Some(mut file) => visitor.visit_file(MaybeUntrusted::wrap(&mut file, untrusted))?, - None => break + None => break, } } @@ -82,15 +82,20 @@ impl ZipStreamReader { } fn extract_unsafe>(self, directory: P, allow_untrusted: bool) -> ZipResult<()> { - struct Extractor<'a> { path: &'a Path, allow_untrusted: bool } + struct Extractor<'a> { + path: &'a Path, + allow_untrusted: bool, + } impl ZipStreamVisitor for Extractor<'_> { fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { let file = match file { MaybeUntrusted::Ok(file) => file, - MaybeUntrusted::Untrusted(value) => if self.allow_untrusted { - value.use_untrusted_value() - } else { - return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + MaybeUntrusted::Untrusted(value) => { + if self.allow_untrusted { + value.use_untrusted_value() + } else { + return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + } } }; @@ -122,10 +127,12 @@ impl ZipStreamReader { { let metadata = match metadata { MaybeUntrusted::Ok(file) => file, - MaybeUntrusted::Untrusted(value) => if self.allow_untrusted { - value.use_untrusted_value() - } else { - return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + MaybeUntrusted::Untrusted(value) => { + if self.allow_untrusted { + value.use_untrusted_value() + } else { + return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + } } }; @@ -145,7 +152,10 @@ impl ZipStreamReader { } } - self.visit(&mut Extractor {path: directory.as_ref(), allow_untrusted}) + self.visit(&mut Extractor { + path: directory.as_ref(), + allow_untrusted, + }) } } @@ -175,7 +185,10 @@ pub trait ZipStreamVisitor { // (file name, content, file count, ...). Take care handling the output. // // To be on the safe side use `.unwrap_or_error(...)` on the `MaybeUntrusted` parameter. - fn visit_additional_metadata(&mut self, metadata: MaybeUntrusted<&ZipStreamFileMetadata>) -> ZipResult<()>; + fn visit_additional_metadata( + &mut self, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, + ) -> ZipResult<()>; } /// Additional metadata for the file. diff --git a/src/result.rs b/src/result.rs index 856d2dc23..939c68256 100644 --- a/src/result.rs +++ b/src/result.rs @@ -118,7 +118,7 @@ impl UntrustedValue { /// Wraps the provided value as UntrustedValue pub fn wrap(value: T) -> Self { - UntrustedValue {value} + UntrustedValue { value } } } diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index 1a1433187..02add103a 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -208,7 +208,9 @@ impl ZipCryptoReaderValid { self.reader.file } - pub fn get_ref(&self) -> &R { &self.reader.file } + pub fn get_ref(&self) -> &R { + &self.reader.file + } } static CRCTABLE: [u32; 256] = [ From ad3dbc0ab914c4fd6d2c650997d85a3e79b43062 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Sat, 6 Jul 2024 12:52:22 -0700 Subject: [PATCH 10/14] chore: Feature-gate ReadAndSupplyExpectedCRC32 implementations Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/crc32.rs | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/crc32.rs b/src/crc32.rs index 80d42067d..60f956678 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -101,24 +101,28 @@ impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> ReadAndSupplyExpectedCRC32 for Cryp } } +#[cfg(feature = "_deflate-any")] impl ReadAndSupplyExpectedCRC32 for DeflateDecoder { fn get_crc32(&self) -> io::Result { self.get_ref().get_crc32() } } +#[cfg(feature = "deflate64")] impl ReadAndSupplyExpectedCRC32 for Deflate64Decoder> { fn get_crc32(&self) -> io::Result { self.get_ref().get_ref().get_crc32() } } +#[cfg(feature = "bzip2")] impl ReadAndSupplyExpectedCRC32 for BzDecoder { fn get_crc32(&self) -> io::Result { self.get_ref().get_crc32() } } +#[cfg(feature = "zstd")] impl<'a, T: ReadAndSupplyExpectedCRC32 + BufRead> ReadAndSupplyExpectedCRC32 for zstd::Decoder<'a, T> { @@ -127,6 +131,7 @@ impl<'a, T: ReadAndSupplyExpectedCRC32 + BufRead> ReadAndSupplyExpectedCRC32 } } +#[cfg(feature = "zstd")] impl<'a, T: ReadAndSupplyExpectedCRC32> ReadAndSupplyExpectedCRC32 for zstd::Decoder<'a, BufReader> { @@ -135,6 +140,7 @@ impl<'a, T: ReadAndSupplyExpectedCRC32> ReadAndSupplyExpectedCRC32 } } +#[cfg(feature = "lzma")] impl ReadAndSupplyExpectedCRC32 for LzmaDecoder { fn get_crc32(&self) -> io::Result { self.get_ref().get_crc32() From 6cc20f5ce55daf264b0c6f84456e7ae6c4e38771 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Mon, 8 Jul 2024 11:59:08 +0200 Subject: [PATCH 11/14] refactor: applied simple review suggestions and cargo fmt & clippy --- src/read.rs | 57 ++++++++++++++++++++++++---------------------- src/read/stream.rs | 2 +- 2 files changed, 31 insertions(+), 28 deletions(-) diff --git a/src/read.rs b/src/read.rs index 0b3082b63..480b4241c 100644 --- a/src/read.rs +++ b/src/read.rs @@ -164,10 +164,10 @@ impl<'a, T: Read + 'a> CryptoReader<'a, T> { pub fn get_ref(&self) -> &T { match self { - CryptoReader::Plaintext(r, _) => &r, - CryptoReader::ZipCrypto(r) => &r.get_ref(), + CryptoReader::Plaintext(r, _) => r, + CryptoReader::ZipCrypto(r) => r.get_ref(), #[cfg(feature = "aes-crypto")] - CryptoReader::Aes { reader: r, .. } => &r.get_ref(), + CryptoReader::Aes { reader: r, .. } => r.get_ref(), } } @@ -258,7 +258,7 @@ pub struct ZipFile<'a> { pub(crate) reader: ZipFileReader<'a, Box>, } -pub(crate) fn find_content<'a, T: Read + Seek>( +pub(crate) fn find_content( data: &ZipFileData, mut reader: T, ) -> ZipResult>> { @@ -1724,7 +1724,7 @@ impl<'a, T: Read> Read for ReadBufferThenForward<'a, T> { struct ReadTillDataDescriptor { reader: T, data_descriptor_found: Option, - look_ahead_buffer: Vec, + look_ahead_buffer: Vec, // TODO: change this to VecDeque number_read_total_actual: usize, // without look ahead buffer } @@ -1748,21 +1748,20 @@ impl ReadTillDataDescriptor { impl ReadTillDataDescriptor { fn has_found_data_descriptor(&self) -> Option { - if spec::Magic::from_first_le_bytes(&self.look_ahead_buffer) - == spec::Magic::DATA_DESCRIPTOR_SIGNATURE - { - // potentially found a data descriptor - // check if size matches - let data_descriptor = match ZipDataDescriptor::interpret(&self.look_ahead_buffer) { - Ok(data_descriptor) => data_descriptor, - Err(_) => return None, - }; - let data_descriptor_size = data_descriptor.compressed_size; + let Ok(data_descriptor) = ZipDataDescriptor::interpret(&self.look_ahead_buffer) else { + return None; + }; - if data_descriptor_size == self.number_read_total_actual as u32 { - return Some(data_descriptor); - } + // potentially found a data descriptor + // check if size matches + + let data_descriptor_size = data_descriptor.compressed_size; + + if data_descriptor_size == self.number_read_total_actual as u32 { + // TODO: check CRC32 here as well + return Some(data_descriptor); } + None } @@ -1772,6 +1771,10 @@ impl ReadTillDataDescriptor { if read_count > 0 { let value = self.look_ahead_buffer.remove(0); + /* .ok_or(io::Error::new( + io::ErrorKind::Other, + "Look ahead buffer is empty. This should not have happened!?", + ))?; */ self.look_ahead_buffer.push(byte_buffer[0]); self.number_read_total_actual += 1; Ok(Some(value)) @@ -1792,17 +1795,17 @@ impl ReadTillDataDescriptor { impl Read for ReadTillDataDescriptor { fn read(&mut self, buf: &mut [u8]) -> io::Result { - for index in 0..buf.len() { + for (index, buf_value) in buf.iter_mut().enumerate() { let data_descriptor = self.has_found_data_descriptor(); - if let Some(_) = data_descriptor { + if data_descriptor.is_some() { self.data_descriptor_found = data_descriptor; return Ok(index); } match self.read_a_byte()? { None => return Ok(index), - Some(value) => buf[index] = value, + Some(value) => *buf_value = value, } } Ok(buf.len()) @@ -1825,13 +1828,12 @@ impl ReadAndSupplyExpectedCRC32 for ReadTillDataDescriptor { /// /// This method operates in three different modes: /// 1. data_descriptor=None and allow_unlimited_read=false : Parse the local file header and return zip file. Works if data descriptors are not used. -/// 2. data_descriptor=Some and allow_unlimited_read=false : Parse the local file header, augment the zip file with the size given in the data descriptor and return zip file. Works if data descriptors are used. -/// 3. data_descriptor=None and allow_unlimited_read=true : Parse the local file header, if data descriptors not used, fallback to case 1, else provide a ZipFile that +/// 2. data_descriptor=None and allow_unlimited_read=true : Parse the local file header, if data descriptors not used, fallback to case 1, else provide a ZipFile that /// searches for the data descriptor in the file. It just starts reading the compressed stream with no size limit set. When detecting a data descriptor the /// stream is closed. This is useful when the size of the compressed stream is unknown but brings some security risks. Therefore, returning an UntrustedValue. -/// 4. data_descriptor=Some and allow_unlimited_read=true : Fallback to case 2 +/// 3. data_descriptor=Some : Parse the local file header, augment the zip file with the size given in the data descriptor and return zip file. Works if data descriptors are used. /// -/// Case 3: **Security-disclaimer**: The **output** should **not** be regarded as **secure/trustworthy**. +/// Case 2: **Security-disclaimer**: The **output** should **not** be regarded as **secure/trustworthy**. /// When an attacker has control over a file which is compressed into a zip: The file might be crafted such that /// when reading with ZipArchive vs this function a parsing mismatch can occur. An attacker that has control over a file /// can manipulate such file such that this function will regard parts of the file as new zip file entry. The attacker might @@ -1842,12 +1844,11 @@ fn read_zipfile_from_fileblock<'a, R: Read>( data_descriptor: Option, mut allow_unlimited_read: bool, ) -> ZipResult>>> { - if data_descriptor.is_some() && allow_unlimited_read { + if data_descriptor.is_some() { allow_unlimited_read = false; // fallback, size is given } let size_unknown = data_descriptor.is_none() && block.block.flags & (1 << 3) == 1 << 3; // no data descriptor provided but should exist - let mut result = ZipFileData::from_local_block(block, data_descriptor, allow_unlimited_read)?; if size_unknown && !allow_unlimited_read { return Err(ZipError::UnsupportedArchive( @@ -1855,6 +1856,8 @@ fn read_zipfile_from_fileblock<'a, R: Read>( )); } + let mut result = ZipFileData::from_local_block(block, data_descriptor, allow_unlimited_read)?; + match crate::read::parse_extra_field(&mut result) { Ok(..) | Err(ZipError::Io(..)) => {} Err(e) => return Err(e), diff --git a/src/read/stream.rs b/src/read/stream.rs index 192a76125..d2649a33f 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -45,7 +45,7 @@ impl ZipStreamReader { loop { let file = read_zipfile_from_stream(&mut self.0)?; - untrusted = untrusted | file.is_untrusted(); + untrusted |= file.is_untrusted(); let file = file.use_untrusted_value(); match file { From f0cb9f2bb4962a274a4da283c01b43f29ffab740 Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Mon, 15 Jul 2024 12:42:52 +0200 Subject: [PATCH 12/14] fix: completed merge of xz decoder into feature branch --- src/crc32.rs | 8 ++++++++ src/read.rs | 3 +-- src/read/xz.rs | 4 ++++ 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/src/crc32.rs b/src/crc32.rs index 60f956678..b7390d409 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -10,6 +10,7 @@ use crate::read::CryptoReader; use crc32fast::Hasher; use deflate64::Deflate64Decoder; use flate2::read::DeflateDecoder; +use crate::read::xz::XzDecoder; /// Reader that validates the CRC32 when it reaches the EOF. pub struct Crc32Reader { @@ -147,6 +148,13 @@ impl ReadAndSupplyExpectedCRC32 for LzmaDecoder ReadAndSupplyExpectedCRC32 for XzDecoder { + fn get_crc32(&self) -> io::Result { + self.as_ref().get_crc32() + } +} + impl<'a> ReadAndSupplyExpectedCRC32 for Box<(dyn ReadAndSupplyExpectedCRC32 + 'a)> { fn get_crc32(&self) -> io::Result { self.as_ref().get_crc32() diff --git a/src/read.rs b/src/read.rs index 44bd0e309..9102a83d8 100644 --- a/src/read.rs +++ b/src/read.rs @@ -206,7 +206,7 @@ pub(crate) enum ZipFileReader<'a, T: ReadAndSupplyExpectedCRC32 + 'a> { #[cfg(feature = "lzma")] Lzma(Crc32Reader>>>), #[cfg(feature = "xz")] - Xz(Crc32Reader>>), + Xz(Crc32Reader>>), } impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> Read for ZipFileReader<'a, T> { @@ -418,7 +418,6 @@ pub(crate) fn make_reader( let reader = XzDecoder::new(reader); Ok(ZipFileReader::Xz(Crc32Reader::new( reader, - crc32, ae2_encrypted, ))) } diff --git a/src/read/xz.rs b/src/read/xz.rs index 50ee38d7b..2338ba4e5 100644 --- a/src/read/xz.rs +++ b/src/read/xz.rs @@ -26,6 +26,10 @@ impl XzDecoder { flags: [0, 0], } } + + pub(crate) fn as_ref(&self) -> &R { + self.compressed_reader.get_ref() + } } struct CountReader<'a, R: BufRead> { From 549d1da287e500b48d31a7b2f2edc9c4b508b67f Mon Sep 17 00:00:00 2001 From: 0xCCF4 <0xccf4-git-commit@lenzentech.de> Date: Mon, 15 Jul 2024 13:07:13 +0200 Subject: [PATCH 13/14] perf: use vecdequeue for look ahead buffer run cargo fmt & clippy --- benches/read_metadata.rs | 5 +---- src/crc32.rs | 2 +- src/read.rs | 25 ++++++++++++------------- src/read/xz.rs | 2 +- tests/zip_stream.rs | 2 +- 5 files changed, 16 insertions(+), 20 deletions(-) diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 73f2b26ed..6c89594e9 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -108,10 +108,7 @@ fn parse_stream_archive(bench: &mut Bencher) { bench.iter(|| { let mut f = fs::File::open(&out).unwrap(); - while zip::read::read_zipfile_from_stream(&mut f) - .unwrap() - .is_some() - {} + while zip::read::read_zipfile_from_stream(&mut f).unwrap().is_ok() {} }); bench.bytes = bytes.len() as u64; } diff --git a/src/crc32.rs b/src/crc32.rs index b7390d409..5e9eb5485 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -6,11 +6,11 @@ use std::io::prelude::*; use std::io::BufReader; use crate::read::lzma::LzmaDecoder; +use crate::read::xz::XzDecoder; use crate::read::CryptoReader; use crc32fast::Hasher; use deflate64::Deflate64Decoder; use flate2::read::DeflateDecoder; -use crate::read::xz::XzDecoder; /// Reader that validates the CRC32 when it reaches the EOF. pub struct Crc32Reader { diff --git a/src/read.rs b/src/read.rs index 9102a83d8..419660be7 100644 --- a/src/read.rs +++ b/src/read.rs @@ -16,6 +16,7 @@ use crate::types::{ use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; use std::borrow::Cow; +use std::collections::VecDeque; use std::ffi::OsString; use std::fs::create_dir_all; use std::io::{self, copy, prelude::*, sink, ErrorKind, SeekFrom}; @@ -416,10 +417,7 @@ pub(crate) fn make_reader( #[cfg(feature = "xz")] CompressionMethod::Xz => { let reader = XzDecoder::new(reader); - Ok(ZipFileReader::Xz(Crc32Reader::new( - reader, - ae2_encrypted, - ))) + Ok(ZipFileReader::Xz(Crc32Reader::new(reader, ae2_encrypted))) } _ => Err(UnsupportedArchive("Compression method not supported")), } @@ -1743,8 +1741,8 @@ impl<'a, T: Read> Read for ReadBufferThenForward<'a, T> { struct ReadTillDataDescriptor { reader: T, data_descriptor_found: Option, - look_ahead_buffer: Vec, // TODO: change this to VecDeque - number_read_total_actual: usize, // without look ahead buffer + look_ahead_buffer: VecDeque, + number_read_total_actual: usize, // read count, without look ahead buffer } impl ReadTillDataDescriptor { @@ -1754,7 +1752,7 @@ impl ReadTillDataDescriptor { // fill the look ahead buffer reader.read_exact(&mut look_ahead_buffer)?; - let look_ahead_buffer = Vec::from(look_ahead_buffer); + let look_ahead_buffer = VecDeque::from(look_ahead_buffer); Ok(Self { reader, @@ -1766,8 +1764,10 @@ impl ReadTillDataDescriptor { } impl ReadTillDataDescriptor { - fn has_found_data_descriptor(&self) -> Option { - let Ok(data_descriptor) = ZipDataDescriptor::interpret(&self.look_ahead_buffer) else { + fn has_found_data_descriptor(&mut self) -> Option { + let Ok(data_descriptor) = + ZipDataDescriptor::interpret(self.look_ahead_buffer.make_contiguous()) + else { return None; }; @@ -1789,12 +1789,11 @@ impl ReadTillDataDescriptor { let read_count = self.reader.read(&mut byte_buffer)?; if read_count > 0 { - let value = self.look_ahead_buffer.remove(0); - /* .ok_or(io::Error::new( + let value = self.look_ahead_buffer.pop_front().ok_or(io::Error::new( io::ErrorKind::Other, "Look ahead buffer is empty. This should not have happened!?", - ))?; */ - self.look_ahead_buffer.push(byte_buffer[0]); + ))?; + self.look_ahead_buffer.push_back(byte_buffer[0]); self.number_read_total_actual += 1; Ok(Some(value)) } else { diff --git a/src/read/xz.rs b/src/read/xz.rs index 2338ba4e5..3ae4520b8 100644 --- a/src/read/xz.rs +++ b/src/read/xz.rs @@ -26,7 +26,7 @@ impl XzDecoder { flags: [0, 0], } } - + pub(crate) fn as_ref(&self) -> &R { self.compressed_reader.get_ref() } diff --git a/tests/zip_stream.rs b/tests/zip_stream.rs index d06c3e09d..ec59089ef 100644 --- a/tests/zip_stream.rs +++ b/tests/zip_stream.rs @@ -106,6 +106,6 @@ fn decompress_stream_with_data_descriptor_continue() { .expect("couldn't advance to next entry in zip file"); match entry { None => (), - _ => assert!(false, "expected no more entries"), + _ => panic!("expected no more entries"), }; } From 60221366cb60feca0c5b3ca0b5f83c390008fb90 Mon Sep 17 00:00:00 2001 From: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> Date: Fri, 19 Jul 2024 16:27:18 -0700 Subject: [PATCH 14/14] fix: FixedSizeBlock must extend Pod Signed-off-by: Chris Hennick <4961925+Pr0methean@users.noreply.github.com> --- src/types.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/types.rs b/src/types.rs index 37dbdc613..34becca0e 100644 --- a/src/types.rs +++ b/src/types.rs @@ -1010,6 +1010,8 @@ pub(crate) struct ZipDataDescriptor { pub uncompressed_size: u32, } +impl Pod for crate::types::ZipDataDescriptor {} + impl FixedSizeBlock for crate::types::ZipDataDescriptor { const MAGIC: spec::Magic = spec::Magic::DATA_DESCRIPTOR_SIGNATURE;