diff --git a/benches/read_metadata.rs b/benches/read_metadata.rs index 73f2b26ed..6c89594e9 100644 --- a/benches/read_metadata.rs +++ b/benches/read_metadata.rs @@ -108,10 +108,7 @@ fn parse_stream_archive(bench: &mut Bencher) { bench.iter(|| { let mut f = fs::File::open(&out).unwrap(); - while zip::read::read_zipfile_from_stream(&mut f) - .unwrap() - .is_some() - {} + while zip::read::read_zipfile_from_stream(&mut f).unwrap().is_ok() {} }); bench.bytes = bytes.len() as u64; } diff --git a/examples/stdin_info.rs b/examples/stdin_info.rs index a609916a0..d3cf8b0ad 100644 --- a/examples/stdin_info.rs +++ b/examples/stdin_info.rs @@ -10,7 +10,15 @@ fn real_main() -> i32 { let mut buf = [0u8; 16]; loop { - match zip::read::read_zipfile_from_stream(&mut stdin_handle) { + let file = match zip::read::read_zipfile_from_stream(&mut stdin_handle) { + Err(e) => { + println!("Error encountered while reading zip: {e:?}"); + return 1; + } + Ok(value) => value, + }; + + match file.unwrap_or_error("data descriptors not supported while reading stdin") { Ok(Some(mut file)) => { println!( "{}: {} bytes ({} bytes packed)", diff --git a/src/aes.rs b/src/aes.rs index 6c9eb371f..c74bd1978 100644 --- a/src/aes.rs +++ b/src/aes.rs @@ -219,6 +219,10 @@ impl AesReaderValid { pub fn into_inner(self) -> R { self.reader } + + pub fn get_ref(&self) -> &R { + &self.reader + } } pub struct AesWriter { diff --git a/src/crc32.rs b/src/crc32.rs index 4b2beb62e..f04783575 100644 --- a/src/crc32.rs +++ b/src/crc32.rs @@ -1,34 +1,41 @@ //! Helper module to compute a CRC32 checksum +use bzip2::read::BzDecoder; use std::io; use std::io::prelude::*; +use std::io::BufReader; +use crate::read::lzma::LzmaDecoder; +use crate::read::xz::XzDecoder; +use crate::read::CryptoReader; use crc32fast::Hasher; +use deflate64::Deflate64Decoder; +use flate2::read::DeflateDecoder; /// Reader that validates the CRC32 when it reaches the EOF. -pub struct Crc32Reader { +pub struct Crc32Reader { inner: R, hasher: Hasher, - check: u32, /// Signals if `inner` stores aes encrypted data. /// AE-2 encrypted data doesn't use crc and sets the value to 0. enabled: bool, } -impl Crc32Reader { +impl Crc32Reader { /// Get a new Crc32Reader which checks the inner reader against checksum. /// The check is disabled if `ae2_encrypted == true`. - pub(crate) fn new(inner: R, checksum: u32, ae2_encrypted: bool) -> Crc32Reader { + pub(crate) fn new(inner: R, ae2_encrypted: bool) -> Crc32Reader { Crc32Reader { inner, hasher: Hasher::new(), + ae2_encrypted, check: checksum, enabled: !ae2_encrypted, } } - fn check_matches(&self) -> bool { - self.check == self.hasher.clone().finalize() + fn check_matches(&self) -> std::io::Result { + Ok(self.inner.get_crc32()? == self.hasher.clone().finalize()) } pub fn into_inner(self) -> R { @@ -41,13 +48,11 @@ fn invalid_checksum() -> io::Error { io::Error::new(io::ErrorKind::InvalidData, "Invalid checksum") } -impl Read for Crc32Reader { +impl Read for Crc32Reader { fn read(&mut self, buf: &mut [u8]) -> io::Result { - let count = self.inner.read(buf)?; - - if self.enabled { - if count == 0 && !buf.is_empty() && !self.check_matches() { - return Err(invalid_checksum()); + let count = match self.inner.read(buf) { + Ok(0) if !buf.is_empty() && !self.check_matches()? && !self.ae2_encrypted => { + return Err(io::Error::new(io::ErrorKind::Other, "Invalid checksum")) } self.hasher.update(&buf[..count]); } @@ -83,6 +88,124 @@ impl Read for Crc32Reader { } } +/// A reader trait that provides a method to get the expected crc of the data read. +/// In the normal case, the expected crc is known before the zip entry is read. +/// In streaming mode with data descriptors, the crc will be available after the data is read. +/// Still in both cases the crc is available after the data is read and can be checked. +pub trait ReadAndSupplyExpectedCRC32: Read { + fn get_crc32(&self) -> std::io::Result; +} + +pub struct InitiallyKnownCRC32 { + reader: R, + crc: u32, +} + +impl InitiallyKnownCRC32 { + pub fn new(reader: R, crc: u32) -> InitiallyKnownCRC32 { + InitiallyKnownCRC32 { reader, crc } + } + + #[allow(dead_code)] + pub fn into_inner(self) -> R { + self.reader + } + + #[allow(dead_code)] + pub fn get_ref(&self) -> &R { + &self.reader + } +} + +impl Read for InitiallyKnownCRC32 { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.reader.read(buf) + } +} + +impl ReadAndSupplyExpectedCRC32 for InitiallyKnownCRC32 { + fn get_crc32(&self) -> std::io::Result { + Ok(self.crc) + } +} + +impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> ReadAndSupplyExpectedCRC32 for CryptoReader<'a, T> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +#[cfg(feature = "_deflate-any")] +impl ReadAndSupplyExpectedCRC32 for DeflateDecoder { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +#[cfg(feature = "deflate64")] +impl ReadAndSupplyExpectedCRC32 for Deflate64Decoder> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_ref().get_crc32() + } +} + +#[cfg(feature = "bzip2")] +impl ReadAndSupplyExpectedCRC32 for BzDecoder { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +#[cfg(feature = "zstd")] +impl<'a, T: ReadAndSupplyExpectedCRC32 + BufRead> ReadAndSupplyExpectedCRC32 + for zstd::Decoder<'a, T> +{ + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +#[cfg(feature = "zstd")] +impl<'a, T: ReadAndSupplyExpectedCRC32> ReadAndSupplyExpectedCRC32 + for zstd::Decoder<'a, BufReader> +{ + fn get_crc32(&self) -> io::Result { + self.get_ref().get_ref().get_crc32() + } +} + +#[cfg(feature = "lzma")] +impl ReadAndSupplyExpectedCRC32 for LzmaDecoder { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + +#[cfg(feature = "xz")] +impl ReadAndSupplyExpectedCRC32 for XzDecoder { + fn get_crc32(&self) -> io::Result { + self.as_ref().get_crc32() + } +} + +impl<'a> ReadAndSupplyExpectedCRC32 for Box<(dyn ReadAndSupplyExpectedCRC32 + 'a)> { + fn get_crc32(&self) -> io::Result { + self.as_ref().get_crc32() + } +} + +impl<'a, T: ReadAndSupplyExpectedCRC32 + 'a> ReadAndSupplyExpectedCRC32 for Box { + fn get_crc32(&self) -> io::Result { + self.as_ref().get_crc32() + } +} + +impl ReadAndSupplyExpectedCRC32 for std::io::Take<&mut T> { + fn get_crc32(&self) -> io::Result { + self.get_ref().get_crc32() + } +} + #[cfg(test)] mod test { use super::*; @@ -92,10 +215,10 @@ mod test { let data: &[u8] = b""; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 0), false); assert_eq!(reader.read(&mut buf).unwrap(), 0); - let mut reader = Crc32Reader::new(data, 1, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 1), false); assert!(reader .read(&mut buf) .unwrap_err() @@ -108,7 +231,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 1]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 0x9be3e0a3), false); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); assert_eq!(reader.read(&mut buf).unwrap(), 1); @@ -123,7 +246,7 @@ mod test { let data: &[u8] = b"1234"; let mut buf = [0; 5]; - let mut reader = Crc32Reader::new(data, 0x9be3e0a3, false); + let mut reader = Crc32Reader::new(InitiallyKnownCRC32::new(data, 0x9be3e0a3), false); assert_eq!(reader.read(&mut buf[..0]).unwrap(), 0); assert_eq!(reader.read(&mut buf).unwrap(), 4); } diff --git a/src/read.rs b/src/read.rs index 354d80702..fa7658d74 100644 --- a/src/read.rs +++ b/src/read.rs @@ -4,24 +4,26 @@ use crate::aes::{AesReader, AesReaderValid}; use crate::compression::{CompressionMethod, Decompressor}; use crate::cp437::FromCp437; -use crate::crc32::Crc32Reader; +use crate::crc32::{Crc32Reader, InitiallyKnownCRC32, ReadAndSupplyExpectedCRC32}; use crate::extra_fields::{ExtendedTimestamp, ExtraField}; use crate::read::zip_archive::{Shared, SharedBuilder}; -use crate::result::{ZipError, ZipResult}; +use crate::result::{MaybeUntrusted, UntrustedValue, ZipError, ZipResult}; use crate::spec::{ self, FixedSizeBlock, Pod, Zip32CentralDirectoryEnd, Zip64CDELocatorBlock, Zip64CentralDirectoryEnd, ZIP64_ENTRY_THR, }; use crate::types::{ - AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipFileData, - ZipLocalEntryBlock, + AesMode, AesVendorVersion, DateTime, System, ZipCentralEntryBlock, ZipDataDescriptor, + ZipFileData, ZipLocalEntryBlock, ZipLocalEntryBlockAndFields, }; use crate::zipcrypto::{ZipCryptoReader, ZipCryptoReaderValid, ZipCryptoValidator}; use indexmap::IndexMap; use std::borrow::Cow; +use std::collections::VecDeque; use std::ffi::OsString; use std::fs::create_dir_all; -use std::io::{self, copy, prelude::*, sink, SeekFrom}; +use std::io::{self, copy, prelude::*, sink, ErrorKind, SeekFrom}; +use std::marker::PhantomData; use std::mem; use std::mem::size_of; use std::ops::Deref; @@ -122,20 +124,20 @@ use crate::unstable::{path_to_string, LittleEndianReadExt}; pub use zip_archive::ZipArchive; #[allow(clippy::large_enum_variant)] -pub(crate) enum CryptoReader<'a> { - Plaintext(io::Take<&'a mut dyn Read>), - ZipCrypto(ZipCryptoReaderValid>), +pub(crate) enum CryptoReader<'a, T: Read + 'a> { + Plaintext(T, PhantomData<&'a T>), // todo check if phantom data can be removed + ZipCrypto(ZipCryptoReaderValid), #[cfg(feature = "aes-crypto")] Aes { - reader: AesReaderValid>, + reader: AesReaderValid, vendor_version: AesVendorVersion, }, } -impl Read for CryptoReader<'_> { +impl Read for CryptoReader<'_, T> { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { - CryptoReader::Plaintext(r) => r.read(buf), + CryptoReader::Plaintext(r, _) => r.read(buf), CryptoReader::ZipCrypto(r) => r.read(buf), #[cfg(feature = "aes-crypto")] CryptoReader::Aes { reader: r, .. } => r.read(buf), @@ -161,17 +163,26 @@ impl Read for CryptoReader<'_> { } } -impl<'a> CryptoReader<'a> { +impl<'a, T: Read + 'a> CryptoReader<'a, T> { /// Consumes this decoder, returning the underlying reader. - pub fn into_inner(self) -> io::Take<&'a mut dyn Read> { + pub fn into_inner(self) -> T { match self { - CryptoReader::Plaintext(r) => r, + CryptoReader::Plaintext(r, _) => r, CryptoReader::ZipCrypto(r) => r.into_inner(), #[cfg(feature = "aes-crypto")] CryptoReader::Aes { reader: r, .. } => r.into_inner(), } } + pub fn get_ref(&self) -> &T { + match self { + CryptoReader::Plaintext(r, _) => r, + CryptoReader::ZipCrypto(r) => r.get_ref(), + #[cfg(feature = "aes-crypto")] + CryptoReader::Aes { reader: r, .. } => r.get_ref(), + } + } + /// Returns `true` if the data is encrypted using AE2. pub const fn is_ae2_encrypted(&self) -> bool { #[cfg(feature = "aes-crypto")] @@ -201,7 +212,7 @@ pub(crate) enum ZipFileReader<'a> { Compressed(Box>>>>), } -impl Read for ZipFileReader<'_> { +impl Read for ZipFileReader<'_, T> { fn read(&mut self, buf: &mut [u8]) -> io::Result { match self { ZipFileReader::NoReader => invalid_state(), @@ -250,7 +261,8 @@ impl<'a> ZipFileReader<'a> { /// A struct for reading a zip file pub struct ZipFile<'a> { pub(crate) data: Cow<'a, ZipFileData>, - pub(crate) reader: ZipFileReader<'a>, + pub(crate) crypto_reader: Option>>, + pub(crate) reader: ZipFileReader<'a, Box>, } /// A struct for reading and seeking a zip file @@ -317,18 +329,21 @@ impl Read for SeekableTake<'_, R> { } } -pub(crate) fn find_content<'a>( +pub(crate) fn find_content( data: &ZipFileData, - reader: &'a mut (impl Read + Seek), -) -> ZipResult> { + mut reader: T, +) -> ZipResult>> { // TODO: use .get_or_try_init() once stabilized to provide a closure returning a Result! let data_start = match data.data_start.get() { Some(data_start) => *data_start, - None => find_data_start(data, reader)?, + None => find_data_start(data, &mut reader)?, }; - reader.seek(SeekFrom::Start(data_start))?; - Ok((reader as &mut dyn Read).take(data.compressed_size)) + reader.seek(io::SeekFrom::Start(data_start))?; + Ok(InitiallyKnownCRC32::new( + reader.take(data.compressed_size), + data.crc32, + )) } fn find_content_seek<'a, R: Read + Seek>( @@ -411,18 +426,16 @@ pub(crate) fn make_crypto_reader<'a>( CryptoReader::ZipCrypto(ZipCryptoReader::new(reader, password).validate(validator)?) } (None, Some(_)) => return Err(InvalidPassword), - (None, None) => CryptoReader::Plaintext(reader), + (None, None) => CryptoReader::Plaintext(reader, PhantomData), }; Ok(reader) } -pub(crate) fn make_reader( +pub(crate) fn make_reader( compression_method: CompressionMethod, - crc32: u32, - reader: CryptoReader, -) -> ZipResult { + reader: CryptoReader, +) -> ZipResult> { let ae2_encrypted = reader.is_ae2_encrypted(); - Ok(ZipFileReader::Compressed(Box::new(Crc32Reader::new( Decompressor::new(io::BufReader::new(reader), compression_method)?, crc32, @@ -1201,7 +1214,6 @@ impl ZipArchive { let limit_reader = find_content(data, &mut self.reader)?; let crypto_reader = make_crypto_reader(data, limit_reader, password, data.aes_mode)?; - Ok(ZipFile { data: Cow::Borrowed(data), reader: make_reader(data.compression_method, data.crc32, crypto_reader)?, @@ -1497,6 +1509,17 @@ pub trait HasZipMetadata { /// Methods for retrieving information on zip files impl<'a> ZipFile<'a> { + fn get_reader( + &mut self, + ) -> ZipResult<&mut ZipFileReader<'a, Box>> { + if let ZipFileReader::NoReader = self.reader { + let data = &self.data; + let crypto_reader = self.crypto_reader.take().expect("Invalid reader state"); + self.reader = make_reader(data.compression_method, crypto_reader)?; + } + Ok(&mut self.reader) + } + pub(crate) fn take_raw_reader(&mut self) -> io::Result> { mem::replace(&mut self.reader, ZipFileReader::NoReader).into_inner() } @@ -1718,23 +1741,7 @@ impl Drop for ZipFile<'_> { } } -/// Read ZipFile structures from a non-seekable reader. -/// -/// This is an alternative method to read a zip file. If possible, use the ZipArchive functions -/// as some information will be missing when reading this manner. -/// -/// Reads a file header from the start of the stream. Will return `Ok(Some(..))` if a file is -/// present at the start of the stream. Returns `Ok(None)` if the start of the central directory -/// is encountered. No more files should be read after this. -/// -/// The Drop implementation of ZipFile ensures that the reader will be correctly positioned after -/// the structure is done. -/// -/// Missing fields are: -/// * `comment`: set to an empty string -/// * `data_start`: set to 0 -/// * `external_attributes`: `unix_mode()`: will return None -pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult>> { +fn read_local_fileblock(reader: &mut R) -> ZipResult> { // We can't use the typical ::parse() method, as we follow separate code paths depending on the // "magic" value (since the magic value will be from the central directory header if we've // finished iterating over all the actual files). @@ -1751,16 +1758,223 @@ pub fn read_zipfile_from_stream(reader: &mut R) -> ZipResult {} - Err(e) => return Err(e), + let mut file_name_raw = vec![0u8; file_name_length]; + reader.read_exact(&mut file_name_raw)?; + let mut extra_field = vec![0u8; extra_field_length]; + reader.read_exact(&mut extra_field)?; + + Ok(Some(ZipLocalEntryBlockAndFields { + block, + file_name_raw, + extra_field, + })) +} + +/// First read the data given in the buffer, then continue reading from the reader. +pub struct ReadBufferThenForward<'a, T: Read + 'a> { + reader: &'a mut T, + buffer: Vec, + index: usize, +} + +impl<'a, T: Read> ReadBufferThenForward<'a, T> { + /// creates a new instance of this, first reads from the buffer, then proxy read to underlying stream + pub fn new(reader: &'a mut T, buffer: Vec) -> Self { + ReadBufferThenForward { + reader, + buffer, + index: 0, + } + } +} + +impl<'a, T: Read> Read for ReadBufferThenForward<'a, T> { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let buffer_start = std::cmp::min(self.index, buf.len()); + let data_left_in_buffer = buf.len() - buffer_start; + let buffer_end = std::cmp::min(self.buffer.len(), buffer_start + data_left_in_buffer); + + let next_buffer_data = &self.buffer[buffer_start..buffer_end]; + let count_copy_from_buffer = std::cmp::min(next_buffer_data.len(), buf.len()); + + buf[0..count_copy_from_buffer] + .copy_from_slice(&next_buffer_data[0..count_copy_from_buffer]); + + self.index += count_copy_from_buffer; + if self.index > buf.len() && self.index > 0 { + // empty buffer + self.index = 0; + self.buffer.clear(); + self.buffer.shrink_to_fit(); + } + + let remaining_bytes = buf.len() - count_copy_from_buffer; + let result = if remaining_bytes > 0 { + self.reader.read(&mut buf[count_copy_from_buffer..])? + } else { + 0 + }; + + Ok(count_copy_from_buffer + result) + } +} + +struct ReadTillDataDescriptor { + reader: T, + data_descriptor_found: Option, + look_ahead_buffer: VecDeque, + number_read_total_actual: usize, // read count, without look ahead buffer +} + +impl ReadTillDataDescriptor { + pub fn new(mut reader: T) -> ZipResult { + let mut look_ahead_buffer = [0u8; std::mem::size_of::()]; + + // fill the look ahead buffer + reader.read_exact(&mut look_ahead_buffer)?; + + let look_ahead_buffer = VecDeque::from(look_ahead_buffer); + + Ok(Self { + reader, + look_ahead_buffer, + data_descriptor_found: None, + number_read_total_actual: 0, + }) } +} + +impl ReadTillDataDescriptor { + fn has_found_data_descriptor(&mut self) -> Option { + let Ok(data_descriptor) = + ZipDataDescriptor::interpret(self.look_ahead_buffer.make_contiguous()) + else { + return None; + }; + + // potentially found a data descriptor + // check if size matches + + let data_descriptor_size = data_descriptor.compressed_size; + + if data_descriptor_size == self.number_read_total_actual as u32 { + // TODO: check CRC32 here as well + return Some(data_descriptor); + } + + None + } + + fn read_a_byte(&mut self) -> io::Result> { + let mut byte_buffer = [0u8; 1]; + let read_count = self.reader.read(&mut byte_buffer)?; + + if read_count > 0 { + let value = self.look_ahead_buffer.pop_front().ok_or(io::Error::new( + io::ErrorKind::Other, + "Look ahead buffer is empty. This should not have happened!?", + ))?; + self.look_ahead_buffer.push_back(byte_buffer[0]); + self.number_read_total_actual += 1; + Ok(Some(value)) + } else { + Ok(None) + } + } + + pub fn get_data_descriptor(&self) -> Option { + self.data_descriptor_found + } + + #[allow(dead_code)] + pub fn into_inner(self) -> T { + self.reader + } +} + +impl Read for ReadTillDataDescriptor { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + for (index, buf_value) in buf.iter_mut().enumerate() { + let data_descriptor = self.has_found_data_descriptor(); + + if data_descriptor.is_some() { + self.data_descriptor_found = data_descriptor; + return Ok(index); + } - let limit_reader = (reader as &mut dyn Read).take(result.compressed_size); + match self.read_a_byte()? { + None => return Ok(index), + Some(value) => *buf_value = value, + } + } + Ok(buf.len()) + } +} + +impl ReadAndSupplyExpectedCRC32 for ReadTillDataDescriptor { + fn get_crc32(&self) -> io::Result { + Ok(self + .get_data_descriptor() + .ok_or(std::io::Error::new( + ErrorKind::Other, + "EOF reached, corrupt zip file", + ))? + .crc32) + } +} + +/// Read ZipFile structures from a reader. +/// +/// This method operates in three different modes: +/// 1. data_descriptor=None and allow_unlimited_read=false : Parse the local file header and return zip file. Works if data descriptors are not used. +/// 2. data_descriptor=None and allow_unlimited_read=true : Parse the local file header, if data descriptors not used, fallback to case 1, else provide a ZipFile that +/// searches for the data descriptor in the file. It just starts reading the compressed stream with no size limit set. When detecting a data descriptor the +/// stream is closed. This is useful when the size of the compressed stream is unknown but brings some security risks. Therefore, returning an UntrustedValue. +/// 3. data_descriptor=Some : Parse the local file header, augment the zip file with the size given in the data descriptor and return zip file. Works if data descriptors are used. +/// +/// Case 2: **Security-disclaimer**: The **output** should **not** be regarded as **secure/trustworthy**. +/// When an attacker has control over a file which is compressed into a zip: The file might be crafted such that +/// when reading with ZipArchive vs this function a parsing mismatch can occur. An attacker that has control over a file +/// can manipulate such file such that this function will regard parts of the file as new zip file entry. The attacker might +/// therefore inject custom zip file entries with attacker controlled content/name/metadata. +fn read_zipfile_from_fileblock<'a, R: Read>( + block: ZipLocalEntryBlockAndFields, + reader: &'a mut R, + data_descriptor: Option, + mut allow_unlimited_read: bool, +) -> ZipResult>>> { + if data_descriptor.is_some() { + allow_unlimited_read = false; // fallback, size is given + } + + let size_unknown = data_descriptor.is_none() && block.block.flags & (1 << 3) == 1 << 3; // no data descriptor provided but should exist + + if size_unknown && !allow_unlimited_read { + return Err(ZipError::UnsupportedArchive( + "Archive is using data descriptors, but unlimited reading not enabled", + )); + } + + let mut result = ZipFileData::from_local_block(block, data_descriptor, allow_unlimited_read)?; + + match crate::read::parse_extra_field(&mut result) { + Ok(..) | Err(ZipError::Io(..)) => {} + Err(e) => return Err(e), + }; let result_crc32 = result.crc32; + + let limit_reader: Box = match size_unknown { + false => Box::new(InitiallyKnownCRC32::new( + (reader as &'a mut dyn Read).take(result.compressed_size), + result_crc32, + )), + true => Box::new(ReadTillDataDescriptor::new(reader)?), + }; + let result_compression_method = result.compression_method; let crypto_reader = make_crypto_reader(&result, limit_reader, None, None)?; @@ -1828,7 +2042,12 @@ mod test { v.extend_from_slice(include_bytes!("../tests/data/mimetype.zip")); let mut reader = Cursor::new(v); loop { - if read_zipfile_from_stream(&mut reader).unwrap().is_none() { + if read_zipfile_from_stream(&mut reader) + .unwrap() + .unwrap_or_error("data descriptors used") + .unwrap() + .is_none() + { break; } } diff --git a/src/read/lzma.rs b/src/read/lzma.rs index e47209143..315f85aa8 100644 --- a/src/read/lzma.rs +++ b/src/read/lzma.rs @@ -25,6 +25,10 @@ impl LzmaDecoder { pub fn into_inner(self) -> R { self.compressed_reader } + + pub fn get_ref(&self) -> &R { + &self.compressed_reader + } } impl Read for LzmaDecoder { diff --git a/src/read/stream.rs b/src/read/stream.rs index cc602d1dc..de9074bc5 100644 --- a/src/read/stream.rs +++ b/src/read/stream.rs @@ -1,3 +1,4 @@ +use crate::result::MaybeUntrusted; use std::fs; use std::io::{self, Read}; use std::path::{Path, PathBuf}; @@ -40,12 +41,21 @@ impl ZipStreamReader { /// Iterate over the stream and extract all file and their /// metadata. pub fn visit(mut self, visitor: &mut V) -> ZipResult<()> { - while let Some(mut file) = read_zipfile_from_stream(&mut self.0)? { - visitor.visit_file(&mut file)?; + let mut untrusted = false; + + loop { + let file = read_zipfile_from_stream(&mut self.0)?; + untrusted |= file.is_untrusted(); + + let file = file.use_untrusted_value(); + match file { + Some(mut file) => visitor.visit_file(MaybeUntrusted::wrap(&mut file, untrusted))?, + None => break, + } } while let Ok(metadata) = self.parse_central_directory() { - visitor.visit_additional_metadata(&metadata)?; + visitor.visit_additional_metadata(MaybeUntrusted::wrap(&metadata, untrusted))?; } Ok(()) @@ -57,14 +67,43 @@ impl ZipStreamReader { /// Extraction is not atomic; If an error is encountered, some of the files /// may be left on disk. pub fn extract>(self, directory: P) -> ZipResult<()> { - struct Extractor<'a>(&'a Path); + self.extract_unsafe(directory, false) + } + + /// Extract a Zip archive into a directory, overwriting files if they + /// already exist. Paths are sanitized with [`ZipFile::enclosed_name`]. + /// + /// Extraction is not atomic; If an error is encountered, some of the files + /// may be left on disk. + /// + /// **This is considered a security risk. Use at your own discretion.** + pub fn extract_untrusted>(self, directory: P) -> ZipResult<()> { + self.extract_unsafe(directory, true) + } + + fn extract_unsafe>(self, directory: P, allow_untrusted: bool) -> ZipResult<()> { + struct Extractor<'a> { + path: &'a Path, + allow_untrusted: bool, + } impl ZipStreamVisitor for Extractor<'_> { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { + let file = match file { + MaybeUntrusted::Ok(file) => file, + MaybeUntrusted::Untrusted(value) => { + if self.allow_untrusted { + value.use_untrusted_value() + } else { + return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + } + } + }; + let filepath = file .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; - let outpath = self.0.join(filepath); + let outpath = self.path.join(filepath); if file.is_dir() { fs::create_dir_all(&outpath)?; @@ -82,15 +121,26 @@ impl ZipStreamReader { #[allow(unused)] fn visit_additional_metadata( &mut self, - metadata: &ZipStreamFileMetadata, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { #[cfg(unix)] { + let metadata = match metadata { + MaybeUntrusted::Ok(file) => file, + MaybeUntrusted::Untrusted(value) => { + if self.allow_untrusted { + value.use_untrusted_value() + } else { + return Err(ZipError::UnsupportedArchive("extracting a zip file stream-fashion with data descriptors is considered a security risk")); + } + } + }; + let filepath = metadata .enclosed_name() .ok_or(ZipError::InvalidArchive("Invalid file path"))?; - let outpath = self.0.join(filepath); + let outpath = self.path.join(filepath); use std::os::unix::fs::PermissionsExt; if let Some(mode) = metadata.unix_mode() { @@ -102,7 +152,10 @@ impl ZipStreamReader { } } - self.visit(&mut Extractor(directory.as_ref())) + self.visit(&mut Extractor { + path: directory.as_ref(), + allow_untrusted, + }) } } @@ -113,12 +166,29 @@ pub trait ZipStreamVisitor { /// - `comment`: set to an empty string /// - `data_start`: set to 0 /// - `external_attributes`: `unix_mode()`: will return None - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()>; + /// + /// In the case of parsing a zip file which contains data descriptors and an attacker can + /// control files that were compressed to the zip file the returned data is not trusted and + /// partly attacker controllable. Everything returned by this function should be regarded untrusted + /// (file name, content, file count, ...). Take care handling the output. + /// + /// To be on the safe side use `.unwrap_or_error(...)` on the `MaybeUntrusted` parameter. + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()>; /// This function is guranteed to be called after all `visit_file`s. /// /// * `metadata` - Provides missing metadata in `visit_file`. - fn visit_additional_metadata(&mut self, metadata: &ZipStreamFileMetadata) -> ZipResult<()>; + /// + /// In the case of parsing a zip file which contains data descriptors and an attacker can + // control files that were compressed to the zip file the returned data is not trusted and + // partly attacker controllable. Everything returned by this function should be regarded untrusted + // (file name, content, file count, ...). Take care handling the output. + // + // To be on the safe side use `.unwrap_or_error(...)` on the `MaybeUntrusted` parameter. + fn visit_additional_metadata( + &mut self, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, + ) -> ZipResult<()>; } /// Additional metadata for the file. @@ -208,13 +278,13 @@ mod test { struct DummyVisitor; impl ZipStreamVisitor for DummyVisitor { - fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, _file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { Ok(()) } fn visit_additional_metadata( &mut self, - _metadata: &ZipStreamFileMetadata, + _metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { Ok(()) } @@ -224,14 +294,14 @@ mod test { #[derive(Default, Debug, Eq, PartialEq)] struct CounterVisitor(u64, u64); impl ZipStreamVisitor for CounterVisitor { - fn visit_file(&mut self, _file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, _file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { self.0 += 1; Ok(()) } fn visit_additional_metadata( &mut self, - _metadata: &ZipStreamFileMetadata, + _metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { self.1 += 1; Ok(()) @@ -267,7 +337,8 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { + let file = file.use_untrusted_value(); if file.is_file() { self.filenames.insert(file.name().into()); } @@ -276,8 +347,9 @@ mod test { } fn visit_additional_metadata( &mut self, - metadata: &ZipStreamFileMetadata, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { + let metadata = metadata.use_untrusted_value(); if metadata.is_file() { assert!( self.filenames.contains(metadata.name()), @@ -304,7 +376,8 @@ mod test { filenames: BTreeSet>, } impl ZipStreamVisitor for V { - fn visit_file(&mut self, file: &mut ZipFile<'_>) -> ZipResult<()> { + fn visit_file(&mut self, file: MaybeUntrusted<&mut ZipFile<'_>>) -> ZipResult<()> { + let file = file.use_untrusted_value(); let full_name = file.enclosed_name().unwrap(); let file_name = full_name.file_name().unwrap().to_str().unwrap(); assert!( @@ -320,8 +393,9 @@ mod test { } fn visit_additional_metadata( &mut self, - metadata: &ZipStreamFileMetadata, + metadata: MaybeUntrusted<&ZipStreamFileMetadata>, ) -> ZipResult<()> { + let metadata = metadata.use_untrusted_value(); if metadata.is_file() { assert!( self.filenames.contains(metadata.name()), diff --git a/src/read/xz.rs b/src/read/xz.rs index f9ed41af2..7ed9fdae0 100644 --- a/src/read/xz.rs +++ b/src/read/xz.rs @@ -26,6 +26,10 @@ impl XzDecoder { flags: [0, 0], } } + + pub(crate) fn as_ref(&self) -> &R { + self.compressed_reader.get_ref() + } } struct CountReader<'a, R: BufRead> { diff --git a/src/result.rs b/src/result.rs index ec8fbb13f..939c68256 100644 --- a/src/result.rs +++ b/src/result.rs @@ -96,3 +96,100 @@ impl fmt::Display for DateTimeRangeError { } impl Error for DateTimeRangeError {} + +/// Represents a potentially untrusted untrustworthy value. +/// +/// An attacker might be able to control (part) of the returned value. +/// Take special care processing this data. +/// +/// See the method documentation of the function returning this value +pub struct UntrustedValue { + value: T, +} + +impl UntrustedValue { + /// Be sure that you carefully handle the returned value since + /// it may be controllable by a malicious actor. + /// + /// See the method documentation of the function returning this value + pub fn use_untrusted_value(self) -> T { + self.value + } + + /// Wraps the provided value as UntrustedValue + pub fn wrap(value: T) -> Self { + UntrustedValue { value } + } +} + +impl From for UntrustedValue { + fn from(value: T) -> Self { + UntrustedValue::wrap(value) + } +} + +/// Represents a value that might be untrusted. See UntrustedValue for more information. +pub enum MaybeUntrusted { + /// Trusted value variant + Ok(T), + /// Untrusted value variant + Untrusted(UntrustedValue), +} + +impl MaybeUntrusted { + /// Be sure that you carefully handle the returned value since + /// it may be controllable by a malicious actor (when it is a MaybeUntrusted::Untrusted). + /// + /// See the method documentation of the function returning this value + pub fn use_untrusted_value(self) -> T { + match self { + MaybeUntrusted::Ok(value) => value, + MaybeUntrusted::Untrusted(value) => value.use_untrusted_value(), + } + } + + /// Unwraps the value if values is not untrusted, if untrusted returns the provided error message + pub fn unwrap_or_error(self, error: &'static str) -> Result { + match self { + MaybeUntrusted::Ok(value) => Ok(value), + MaybeUntrusted::Untrusted(_) => Err(ZipError::InvalidArchive(error)), + } + } + + /// Returns true if the value is untrusted + pub fn is_untrusted(&self) -> bool { + match self { + MaybeUntrusted::Ok(_) => false, + MaybeUntrusted::Untrusted(_) => true, + } + } + + /// Returns true if the value is not untrusted + pub fn is_ok(&self) -> bool { + !self.is_untrusted() + } + + /// Wraps the provided values as Untrusted + pub fn wrap_untrusted(value: T) -> Self { + MaybeUntrusted::Untrusted(value.into()) + } + + /// Wraps the provided values as Ok + pub fn wrap_ok(value: T) -> Self { + MaybeUntrusted::Ok(value) + } + + /// Wraps the provided value as maybe untrusted, according to given boolean + pub fn wrap(value: T, untrusted: bool) -> Self { + match untrusted { + true => Self::wrap_untrusted(value), + false => Self::wrap_ok(value), + } + } +} + +impl From> for MaybeUntrusted { + fn from(value: UntrustedValue) -> Self { + MaybeUntrusted::Untrusted(value) + } +} diff --git a/src/spec.rs b/src/spec.rs index 1dfb4e5e7..752505ee4 100755 --- a/src/spec.rs +++ b/src/spec.rs @@ -48,6 +48,7 @@ impl Magic { pub const CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06054b50); pub const ZIP64_CENTRAL_DIRECTORY_END_SIGNATURE: Self = Self::literal(0x06064b50); pub const ZIP64_CENTRAL_DIRECTORY_END_LOCATOR_SIGNATURE: Self = Self::literal(0x07064b50); + pub const DATA_DESCRIPTOR_SIGNATURE: Self = Self::literal(0x08074b50); } /// Similar to [`Magic`], but used for extra field tags as per section 4.5.3 of APPNOTE.TXT. diff --git a/src/types.rs b/src/types.rs index de22f6055..d4b2641e5 100644 --- a/src/types.rs +++ b/src/types.rs @@ -478,6 +478,13 @@ pub struct ZipFileData { pub extra_fields: Vec, } +// contains the block and the following fields as read from the file +pub(crate) struct ZipLocalEntryBlockAndFields { + pub(crate) block: ZipLocalEntryBlock, + pub(crate) file_name_raw: Vec, + pub(crate) extra_field: Vec, +} + impl ZipFileData { /// Get the starting offset of the data of the compressed file pub fn data_start(&self) -> u64 { @@ -661,9 +668,10 @@ impl ZipFileData { local_block } - pub(crate) fn from_local_block( - block: ZipLocalEntryBlock, - reader: &mut R, + pub(crate) fn from_local_block( + block: ZipLocalEntryBlockAndFields, + data_descriptor: Option, + allow_empty_data_descriptor: bool, ) -> ZipResult { let ZipLocalEntryBlock { // magic, @@ -672,13 +680,11 @@ impl ZipFileData { compression_method, last_mod_time, last_mod_date, - crc32, - compressed_size, - uncompressed_size, - file_name_length, - extra_field_length, + mut crc32, + mut compressed_size, + mut uncompressed_size, .. - } = block; + } = block.block; let encrypted: bool = flags & 1 == 1; if encrypted { @@ -691,25 +697,29 @@ impl ZipFileData { /* flags & (1 << 3) != 0 */ let using_data_descriptor: bool = flags & (1 << 3) == 1 << 3; if using_data_descriptor { - return Err(ZipError::UnsupportedArchive( - "The file length is not available in the local header", - )); + match data_descriptor { + None => { + if !allow_empty_data_descriptor { + return Err(ZipError::UnsupportedArchive( + "The file uses a data descriptor, but the provided input stream is not seekable or the data descriptor is missing.", + )); + } + } + Some(data_descriptor) => { + uncompressed_size = data_descriptor.uncompressed_size; + compressed_size = data_descriptor.compressed_size; + crc32 = data_descriptor.crc32; + } + } } /* flags & (1 << 1) != 0 */ let is_utf8: bool = flags & (1 << 11) != 0; let compression_method = crate::CompressionMethod::parse_from_u16(compression_method); - let file_name_length: usize = file_name_length.into(); - let extra_field_length: usize = extra_field_length.into(); - - let mut file_name_raw = vec![0u8; file_name_length]; - reader.read_exact(&mut file_name_raw)?; - let mut extra_field = vec![0u8; extra_field_length]; - reader.read_exact(&mut extra_field)?; let file_name: Box = match is_utf8 { - true => String::from_utf8_lossy(&file_name_raw).into(), - false => file_name_raw.clone().from_cp437().into(), + true => String::from_utf8_lossy(&block.file_name_raw).into(), + false => block.file_name_raw.clone().from_cp437().into(), }; let system: u8 = (version_made_by >> 8).try_into().unwrap(); @@ -727,8 +737,8 @@ impl ZipFileData { compressed_size: compressed_size.into(), uncompressed_size: uncompressed_size.into(), file_name, - file_name_raw: file_name_raw.into(), - extra_field: Some(Arc::new(extra_field)), + file_name_raw: block.file_name_raw.into(), + extra_field: Some(Arc::new(block.extra_field)), central_extra_field: None, file_comment: String::with_capacity(0).into_boxed_str(), // file comment is only available in the central directory // header_start and data start are not available, but also don't matter, since seeking is @@ -952,6 +962,35 @@ impl FixedSizeBlock for ZipLocalEntryBlock { ]; } +#[derive(Copy, Clone, Debug)] +#[repr(packed)] +pub(crate) struct ZipDataDescriptor { + magic: spec::Magic, + pub crc32: u32, + pub compressed_size: u32, + pub uncompressed_size: u32, +} + +impl Pod for crate::types::ZipDataDescriptor {} + +impl FixedSizeBlock for crate::types::ZipDataDescriptor { + const MAGIC: spec::Magic = spec::Magic::DATA_DESCRIPTOR_SIGNATURE; + + #[inline(always)] + fn magic(self) -> spec::Magic { + self.magic + } + + const WRONG_MAGIC_ERROR: ZipError = ZipError::InvalidArchive("Invalid data descriptor"); + + to_and_from_le![ + (magic, spec::Magic), + (crc32, u32), + (compressed_size, u32), + (uncompressed_size, u32), + ]; +} + #[derive(Copy, Clone, Debug)] pub(crate) struct Zip64ExtraFieldBlock { magic: spec::ExtraFieldMagic, diff --git a/src/zipcrypto.rs b/src/zipcrypto.rs index 7ad017c8c..711186952 100644 --- a/src/zipcrypto.rs +++ b/src/zipcrypto.rs @@ -207,6 +207,10 @@ impl ZipCryptoReaderValid { pub fn into_inner(self) -> R { self.reader.file } + + pub fn get_ref(&self) -> &R { + &self.reader.file + } } static CRCTABLE: [u32; 256] = [ diff --git a/tests/data/data_descriptor_two_entries.zip b/tests/data/data_descriptor_two_entries.zip new file mode 100644 index 000000000..f487cccb9 Binary files /dev/null and b/tests/data/data_descriptor_two_entries.zip differ diff --git a/tests/zip_stream.rs b/tests/zip_stream.rs new file mode 100644 index 000000000..ec59089ef --- /dev/null +++ b/tests/zip_stream.rs @@ -0,0 +1,111 @@ +use std::io; +use std::io::Read; +use zip::ZipArchive; + +#[test] +fn decompress_stream_no_data_descriptor() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/deflate64.zip")); + let mut stream = io::Cursor::new(v); + + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) + .expect("couldn't open test zip file") + .use_untrusted_value() + .expect("did not find file entry in zip file"); + assert_eq!("binary.wmv", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(include_bytes!("data/folder/binary.wmv"), &content[..]); +} + +#[test] +fn decompress_stream_with_data_descriptor_sanity_check() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); + let mut archive = ZipArchive::new(io::Cursor::new(v)).expect("couldn't open test zip file"); + + let mut file = archive + .by_name("hello.txt") + .expect("couldn't find file in archive"); + assert_eq!("hello.txt", file.name()); + + let mut content = Vec::new(); + file.read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); +} + +#[test] +fn decompress_stream_with_data_descriptor() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor.zip")); + let mut stream = Box::new(io::Cursor::new(v)) as Box; + + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) + .expect("couldn't open test zip file") + .use_untrusted_value() + .expect("did not find file entry in zip file"); + assert_eq!("hello.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); +} + +#[test] +fn decompress_stream_with_data_descriptor_continue() { + let mut v = Vec::new(); + v.extend_from_slice(include_bytes!("data/data_descriptor_two_entries.zip")); + let mut stream = io::Cursor::new(v); + + // First entry + + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) + .expect("couldn't open test zip file") + .use_untrusted_value() + .expect("did not find file entry in zip file"); + assert_eq!("hello.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"Hello World\n", &content[..]); + + drop(entry); + + // Second entry + + let mut stream = zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file") + .expect("no more entries") + .use_untrusted_value(); + + let mut entry = zip::read::read_zipfile_from_stream(&mut stream) + .expect("couldn't open test zip file") + .use_untrusted_value() + .expect("did not find file entry in zip file"); + assert_eq!("world.txt", entry.name()); + + let mut content = Vec::new(); + entry + .read_to_end(&mut content) + .expect("couldn't read encrypted and compressed file"); + assert_eq!(b"STUFF\n", &content[..]); + + drop(entry); + + // No more entries + + let entry = zip::read::advance_stream_to_next_zipfile_start(&mut stream) + .expect("couldn't advance to next entry in zip file"); + match entry { + None => (), + _ => panic!("expected no more entries"), + }; +}