Skip to content

Commit

Permalink
fix: lzma_dec: count all input in outcome.bytes_read
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterthanlime committed Mar 19, 2024
1 parent e1beda2 commit 6ca042f
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 96 deletions.
63 changes: 35 additions & 28 deletions rc-zip-sync/src/entry_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,38 +30,45 @@ where
R: io::Read,
{
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut fsm = match self.fsm.take() {
Some(fsm) => fsm,
None => return Ok(0),
};
loop {
let mut fsm = match self.fsm.take() {
Some(fsm) => fsm,
None => return Ok(0),
};

if fsm.wants_read() {
trace!("fsm wants read");
let n = self.rd.read(fsm.space())?;
trace!("giving fsm {} bytes", n);
fsm.fill(n);
} else {
trace!("fsm does not want read");
}
#[allow(clippy::needless_late_init)] // don't tell me what to do
let filled_bytes;
if fsm.wants_read() {
tracing::trace!(space_avail = fsm.space().len(), "fsm wants read");
let n = self.rd.read(fsm.space())?;
fsm.fill(n);
filled_bytes = n;
} else {
trace!("fsm does not want read");
filled_bytes = 0;
}

match fsm.process(buf)? {
FsmResult::Continue((fsm, outcome)) => {
self.fsm = Some(fsm);
match fsm.process(buf)? {
FsmResult::Continue((fsm, outcome)) => {
self.fsm = Some(fsm);

if outcome.bytes_written > 0 {
Ok(outcome.bytes_written)
} else if outcome.bytes_read == 0 {
// that's EOF, baby!
Ok(0)
} else {
// loop, it happens
self.read(buf)
if outcome.bytes_written > 0 {
tracing::trace!("wrote {} bytes", outcome.bytes_written);
return Ok(outcome.bytes_written);
} else if filled_bytes > 0 || outcome.bytes_read > 0 {
// progress was made, keep reading
continue;
} else {
return Err(io::Error::new(
io::ErrorKind::Other,
"entry reader: no progress",
));
}
}
FsmResult::Done(_) => {
// neat!
return Ok(0);
}
}
FsmResult::Done(_) => {
// neat!
trace!("fsm done");
Ok(0)
}
}
}
Expand Down
12 changes: 8 additions & 4 deletions rc-zip-tokio/src/entry_reader.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{pin::Pin, task};
use std::{io, pin::Pin, task};

use pin_project_lite::pin_project;
use rc_zip::{
Expand Down Expand Up @@ -77,11 +77,15 @@ where
if outcome.bytes_written > 0 {
tracing::trace!("wrote {} bytes", outcome.bytes_written);
buf.advance(outcome.bytes_written);
} else if filled_bytes > 0 {
// keep reading
} else if filled_bytes > 0 || outcome.bytes_read > 0 {
// progress was made, keep reading
continue;
} else {
// that's EOF, baby!
return Err(io::Error::new(
io::ErrorKind::Other,
"entry reader: no progress",
))
.into();
}
}
FsmResult::Done(_) => {
Expand Down
129 changes: 76 additions & 53 deletions rc-zip/src/fsm/entry/lzma_dec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,75 +40,98 @@ impl LzmaDec {
impl Decompressor for LzmaDec {
fn decompress(
&mut self,
in_buf: &[u8],
mut in_buf: &[u8],
out: &mut [u8],
has_more_input: HasMoreInput,
) -> Result<DecompressOutcome, Error> {
tracing::trace!(
in_buf_len = in_buf.len(),
out_len = out.len(),
remain_in_internal_buffer = self.internal_buf_mut().len(),
"decompress",
);

let mut outcome: DecompressOutcome = Default::default();

self.copy_to_out(out, &mut outcome);
if outcome.bytes_written > 0 {
trace!(
"still draining internal buffer, just copied {} bytes",
outcome.bytes_written
loop {
tracing::trace!(
in_buf_len = in_buf.len(),
out_len = out.len(),
remain_in_internal_buffer = self.internal_buf_mut().len(),
?outcome,
"decompress",
);
return Ok(outcome);
}

match &mut self.state {
State::Writing(stream) => {
let n = stream.write(in_buf).map_err(dec_err)?;
self.copy_to_out(out, &mut outcome);
if outcome.bytes_written > 0 {
trace!(
"wrote {} bytes to decompressor (of {} available)",
n,
in_buf.len()
"still draining internal buffer, just copied {} bytes",
outcome.bytes_written
);
outcome.bytes_read = n;

// if we haven't written all the input, and we haven't gotten
// any output, then we need to keep going
if n != 0 && n < in_buf.len() && self.internal_buf_mut().is_empty() {
// note: the n != 0 here is because apparently there can be a 10-byte
// trailer after LZMA compressed data? and the decoder will _refuse_
// to let us write them, so when we have just these 10 bytes left,
// it's good to just let the decoder finish up.
trace!("didn't write all output AND no output yet, so keep going");
return self.decompress(&in_buf[n..], out, has_more_input);
}
return Ok(outcome);
}

match has_more_input {
HasMoreInput::Yes => {
// keep going
trace!("more input to come");
match &mut self.state {
State::Writing(stream) => {
let n = stream.write(in_buf).map_err(dec_err)?;
trace!(
"wrote {} bytes to decompressor (of {} available)",
n,
in_buf.len()
);
outcome.bytes_read += n;
in_buf = &in_buf[n..];

// if we wrote some (but not all) of the input, and we haven't
// gotten any output, then we need to loop
if n != 0 && n < in_buf.len() && self.internal_buf_mut().is_empty() {
// note: the n != 0 here is because apparently there can be a 10-byte
// trailer after LZMA compressed data? and the decoder will _refuse_
// to let us write them, so when we have just these 10 bytes left,
// it's good to just let the decoder finish up.
trace!("didn't write all output AND no output yet, so keep going");
// FIXME: that's wrong! bytes_read is reset when we recurse.
// use a loop instead.
continue;
}
HasMoreInput::No => {
trace!("no more input to come");
match std::mem::take(&mut self.state) {
State::Writing(stream) => {
trace!("finishing...");
self.state = State::Draining(stream.finish().map_err(dec_err)?);

match has_more_input {
HasMoreInput::Yes => {
// keep going
trace!("more input to come");
}
HasMoreInput::No => {
trace!("no more input to come");

// this happens when we hit the 10-byte trailer mentioned above
// in this case, we just pretend we wrote everything
match in_buf.len() {
0 => {
// trailer is not present, that's okay
}
10 => {
trace!("eating LZMA trailer");
outcome.bytes_read += 10;
}
_ => {
return Err(Error::Decompression { method: Method::Lzma, msg: format!("expected LZMA trailer or no LZMA trailer, but not a {}-byte trailer", in_buf.len()) });
}
}

match std::mem::take(&mut self.state) {
State::Writing(stream) => {
trace!("finishing...");
self.state = State::Draining(stream.finish().map_err(dec_err)?);
continue;
}
_ => unreachable!(),
}
_ => unreachable!(),
}
}
}
State::Draining(_) => {
// keep going
trace!("draining");
}
State::Transition => unreachable!(),
}
State::Draining(_) => {
// keep going
}
State::Transition => unreachable!(),
}

self.copy_to_out(out, &mut outcome);
trace!("decompressor gave us {} bytes", outcome.bytes_written);
Ok(outcome)
self.copy_to_out(out, &mut outcome);
trace!("decompressor gave us {} bytes", outcome.bytes_written);
return Ok(outcome);
}
}
}

Expand Down
9 changes: 6 additions & 3 deletions rc-zip/src/fsm/entry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,14 +252,15 @@ impl EntryFsm {
);

let outcome = decompressor.decompress(in_buf, out, has_more_input)?;
self.buffer.consume(outcome.bytes_read);
*compressed_bytes += outcome.bytes_read as u64;
trace!(
?outcome,
compressed_bytes = *compressed_bytes,
uncompressed_bytes = *uncompressed_bytes,
entry_compressed_size = %entry.compressed_size,
?outcome,
"decompressed"
);
self.buffer.consume(outcome.bytes_read);
*compressed_bytes += outcome.bytes_read as u64;

if outcome.bytes_written == 0 && *compressed_bytes == entry.compressed_size {
trace!("eof and no bytes written, we're done");
Expand All @@ -280,6 +281,8 @@ impl EntryFsm {
}
});
return self.process(out);
} else if outcome.bytes_written == 0 && outcome.bytes_read == 0 {
panic!("decompressor didn't read anything and didn't write anything?")
}

// write the decompressed data to the hasher
Expand Down
8 changes: 0 additions & 8 deletions rc-zip/src/parse/extra_field.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::borrow::Cow;

use ownable::{IntoOwned, ToOwned};
use tracing::trace;
use winnow::{
binary::{le_u16, le_u32, le_u64, le_u8, length_take},
combinator::{opt, preceded, repeat_till},
Expand Down Expand Up @@ -88,11 +87,6 @@ impl<'a> ExtraField<'a> {
move |i| {
use ExtraField as EF;
let rec = ExtraFieldRecord::parser.parse_next(i)?;
trace!(
"parsing extra field record, tag {:04x}, len {}",
rec.tag,
rec.payload.len()
);
let payload = &mut Partial::new(rec.payload);

let variant = match rec.tag {
Expand Down Expand Up @@ -322,7 +316,6 @@ pub enum NtfsAttr {
impl NtfsAttr {
fn parser(i: &mut Partial<&'_ [u8]>) -> PResult<Self> {
let tag = le_u16.parse_next(i)?;
trace!("parsing NTFS attribute, tag {:04x}", tag);
let payload = length_take(le_u16).parse_next(i)?;

match tag {
Expand All @@ -349,7 +342,6 @@ pub struct NtfsAttr1 {

impl NtfsAttr1 {
fn parser(i: &mut Partial<&'_ [u8]>) -> PResult<Self> {
trace!("parsing NTFS attr 1, input len is {}", i.len());
seq! {Self {
mtime: NtfsTimestamp::parser,
atime: NtfsTimestamp::parser,
Expand Down

0 comments on commit 6ca042f

Please sign in to comment.