From 1473dfce2f5540621fdf0b4f9b2faedd10037468 Mon Sep 17 00:00:00 2001 From: Amos Wenger Date: Tue, 19 Mar 2024 16:46:29 +0100 Subject: [PATCH] feat: Run one-byte-read tests in CI in release (#77) This helps find state-machine bugs. Closes #76. --- Justfile | 1 + rc-zip-sync/tests/integration_tests.rs | 42 ++++++++++++++++++-- rc-zip-tokio/tests/integration_tests.rs | 53 ++++++++++++++++++++++--- rc-zip/src/fsm/entry/deflate_dec.rs | 2 + rc-zip/src/fsm/entry/mod.rs | 19 +++++++-- 5 files changed, 104 insertions(+), 13 deletions(-) diff --git a/Justfile b/Justfile index 352903d..953a57b 100644 --- a/Justfile +++ b/Justfile @@ -23,4 +23,5 @@ ci-test: source <(cargo llvm-cov show-env --export-prefix) cargo llvm-cov clean --workspace cargo nextest run --all-features --profile ci + ONE_BYTE_READ=1 cargo nextest run --all-features --release --profile ci cargo llvm-cov report --lcov --output-path coverage.lcov diff --git a/rc-zip-sync/tests/integration_tests.rs b/rc-zip-sync/tests/integration_tests.rs index 3df5500..a7844fc 100644 --- a/rc-zip-sync/tests/integration_tests.rs +++ b/rc-zip-sync/tests/integration_tests.rs @@ -3,9 +3,12 @@ use rc_zip::{ error::Error, parse::Archive, }; -use rc_zip_sync::{ArchiveHandle, HasCursor, ReadZip, ReadZipStreaming}; +use rc_zip_sync::{ArchiveHandle, HasCursor, ReadZip, ReadZipStreaming, ReadZipWithSize}; -use std::{fs::File, io::Read}; +use std::{ + fs::File, + io::{self, Read}, +}; fn check_case(test: &Case, archive: Result, Error>) { corpus::check_case(test, archive.as_ref().map(|ar| -> &Archive { ar })); @@ -49,8 +52,15 @@ fn real_world_files() { let guarded_path = case.absolute_path(); let file = File::open(&guarded_path.path).unwrap(); - let archive = file.read_zip().map_err(Error::from); - check_case(&case, archive); + if let Ok("1") = std::env::var("ONE_BYTE_READ").as_deref() { + let size = file.metadata().unwrap().len(); + let file = OneByteReadWrapper(file); + let archive = file.read_zip_with_size(size).map_err(Error::from); + check_case(&case, archive); + } else { + let archive = file.read_zip().map_err(Error::from); + check_case(&case, archive); + }; drop(guarded_path) } } @@ -78,3 +88,27 @@ fn streaming() { drop(guarded_path) } } + +// This helps find bugs in state machines! + +struct OneByteReadWrapper(R); + +impl io::Read for OneByteReadWrapper +where + R: io::Read, +{ + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.0.read(&mut buf[..1]) + } +} + +impl HasCursor for OneByteReadWrapper +where + R: HasCursor, +{ + type Cursor<'a> = OneByteReadWrapper> where R: 'a; + + fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> { + OneByteReadWrapper(self.0.cursor_at(offset)) + } +} diff --git a/rc-zip-tokio/tests/integration_tests.rs b/rc-zip-tokio/tests/integration_tests.rs index 35e8c90..fe8071d 100644 --- a/rc-zip-tokio/tests/integration_tests.rs +++ b/rc-zip-tokio/tests/integration_tests.rs @@ -1,13 +1,13 @@ -use positioned_io::RandomAccessFile; +use positioned_io::{RandomAccessFile, Size}; use rc_zip::{ corpus::{self, zips_dir, Case, Files}, error::Error, parse::Archive, }; -use rc_zip_tokio::{ArchiveHandle, HasCursor, ReadZip, ReadZipStreaming}; -use tokio::io::AsyncReadExt; +use rc_zip_tokio::{ArchiveHandle, HasCursor, ReadZip, ReadZipStreaming, ReadZipWithSize}; +use tokio::io::{AsyncRead, AsyncReadExt, ReadBuf}; -use std::sync::Arc; +use std::{pin::Pin, sync::Arc, task}; async fn check_case(test: &Case, archive: Result, Error>) { corpus::check_case(test, archive.as_ref().map(|ar| -> &Archive { ar })); @@ -49,8 +49,15 @@ async fn real_world_files() { let guarded_path = case.absolute_path(); let file = Arc::new(RandomAccessFile::open(&guarded_path.path).unwrap()); - let archive = file.read_zip().await; - check_case(&case, archive).await; + if let Ok("1") = std::env::var("ONE_BYTE_READ").as_deref() { + let size = file.size().unwrap().expect("file to have a size"); + let file = OneByteReadWrapper(file); + let archive = file.read_zip_with_size(size).await; + check_case(&case, archive).await; + } else { + let archive = file.read_zip().await; + check_case(&case, archive).await; + } drop(guarded_path) } } @@ -79,3 +86,37 @@ async fn streaming() { drop(guarded_path) } } + +// This helps find bugs in state machines! + +struct OneByteReadWrapper(R); + +impl AsyncRead for OneByteReadWrapper +where + R: AsyncRead, +{ + fn poll_read( + self: Pin<&mut Self>, + cx: &mut task::Context<'_>, + buf: &mut ReadBuf<'_>, + ) -> task::Poll> { + let mut inner_buf = buf.take(1); + futures::ready!( + unsafe { self.map_unchecked_mut(|s| &mut s.0) }.poll_read(cx, &mut inner_buf) + )?; + let n = inner_buf.filled().len(); + buf.set_filled(n); + Ok(()).into() + } +} + +impl HasCursor for OneByteReadWrapper +where + R: HasCursor, +{ + type Cursor<'a> = OneByteReadWrapper<::Cursor<'a>> where R: 'a; + + fn cursor_at(&self, offset: u64) -> Self::Cursor<'_> { + OneByteReadWrapper(self.0.cursor_at(offset)) + } +} diff --git a/rc-zip/src/fsm/entry/deflate_dec.rs b/rc-zip/src/fsm/entry/deflate_dec.rs index db405b6..033e0a9 100644 --- a/rc-zip/src/fsm/entry/deflate_dec.rs +++ b/rc-zip/src/fsm/entry/deflate_dec.rs @@ -87,6 +87,7 @@ impl Decompressor for DeflateDec { self.out_pos, flags, ); + trace!(%bytes_read, %bytes_written, ?status, "decompress returned"); outcome.bytes_read += bytes_read; self.remain_in_internal_buffer += bytes_written; @@ -115,6 +116,7 @@ impl Decompressor for DeflateDec { }, } + trace!("calling copy_to_out"); self.copy_to_out(out, &mut outcome); Ok(outcome) } diff --git a/rc-zip/src/fsm/entry/mod.rs b/rc-zip/src/fsm/entry/mod.rs index 9fc8a03..176e1c9 100644 --- a/rc-zip/src/fsm/entry/mod.rs +++ b/rc-zip/src/fsm/entry/mod.rs @@ -225,15 +225,21 @@ impl EntryFsm { .. } => { let in_buf = self.buffer.data(); + let entry = self.entry.as_ref().unwrap(); - // don't feed the decompressor bytes beyond the entry's compressed size + // do we have more input to feed to the decompressor? + // if so, don't give it an empty read + if in_buf.is_empty() && *compressed_bytes < entry.compressed_size { + return Ok(FsmResult::Continue((self, Default::default()))); + } - let entry = self.entry.as_ref().unwrap(); + // don't feed the decompressor bytes beyond the entry's compressed size let in_buf_max_len = cmp::min( in_buf.len(), entry.compressed_size as usize - *compressed_bytes as usize, ); let in_buf = &in_buf[..in_buf_max_len]; + let bytes_fed_this_turn = in_buf.len(); let fed_bytes_after_this = *compressed_bytes + in_buf.len() as u64; let has_more_input = if fed_bytes_after_this == entry.compressed_size as _ { @@ -282,7 +288,14 @@ impl EntryFsm { }); return self.process(out); } else if outcome.bytes_written == 0 && outcome.bytes_read == 0 { - panic!("decompressor didn't read anything and didn't write anything?") + if bytes_fed_this_turn == 0 { + return Err(Error::IO(std::io::Error::new( + std::io::ErrorKind::UnexpectedEof, + "decompressor made no progress: this is probably an rc-zip bug", + ))); + } else { + // ok fine, continue + } } // write the decompressed data to the hasher