Skip to content

Commit

Permalink
fix: Implement BufRead or RawEntryReader (#58)
Browse files Browse the repository at this point in the history
This removes a layer of buffering when dealing with zstd comperssed files.
  • Loading branch information
fasterthanlime authored Jan 31, 2024
1 parent eabdc4d commit e211875
Show file tree
Hide file tree
Showing 12 changed files with 79 additions and 87 deletions.
61 changes: 24 additions & 37 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ zstd = ["dep:zstd"]
clap = { version = "4.4.18", features = ["derive"] }
humansize = "2.1.3"
indicatif = "0.17.7"
tracing-subscriber = "0.3.18"
tracing-test = "0.2.4"
test-log = { version = "0.2.14", default-features = false, features = ["tracing-subscriber", "trace"] }
tracing-subscriber = { version = "0.3.18", features = ["env-filter"] }

[profile.release]
debug = 1
3 changes: 1 addition & 2 deletions src/reader/archive_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,8 +73,7 @@ impl ArchiveReaderState {
impl ArchiveReader {
/// This should be > 65KiB, because the section at the end of the
/// file that we check for end of central directory record is 65KiB.
/// 128 is the next power of two.
const DEFAULT_BUFFER_SIZE: usize = 128 * 1024;
const DEFAULT_BUFFER_SIZE: usize = 256 * 1024;

/// Create a new archive reader with a specified file size.
///
Expand Down
18 changes: 15 additions & 3 deletions src/reader/sync/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,12 @@ where

/// Only allows reading a fixed number of bytes from a [oval::Buffer],
/// allowing to move the inner reader out afterwards.
pub struct LimitedReader {
pub struct RawEntryReader {
remaining: u64,
inner: Buffer,
}

impl LimitedReader {
impl RawEntryReader {
pub fn new(inner: Buffer, remaining: u64) -> Self {
Self { inner, remaining }
}
Expand All @@ -73,7 +73,19 @@ impl LimitedReader {
}
}

impl io::Read for LimitedReader {
impl io::BufRead for RawEntryReader {
fn fill_buf(&mut self) -> io::Result<&[u8]> {
let max_avail = cmp::min(self.remaining, self.inner.available_data() as u64);
Ok(&self.inner.data()[..max_avail as usize])
}

fn consume(&mut self, amt: usize) {
self.remaining -= amt as u64;
Buffer::consume(&mut self.inner, amt);
}
}

impl io::Read for RawEntryReader {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let len = cmp::min(buf.len() as u64, self.remaining) as usize;
tracing::trace!(%len, buf_len = buf.len(), remaining = self.remaining, available_data = self.inner.available_data(), available_space = self.inner.available_space(), "computing len");
Expand Down
4 changes: 2 additions & 2 deletions src/reader/sync/entry_reader/bzip2_dec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::io::Read;

use bzip2::read::BzDecoder;

use crate::reader::sync::{Decoder, LimitedReader};
use crate::reader::sync::{Decoder, RawEntryReader};

impl<R> Decoder<R> for BzDecoder<R>
where
Expand All @@ -17,6 +17,6 @@ where
}
}

pub(crate) fn mk_decoder(r: LimitedReader) -> impl Decoder<LimitedReader> {
pub(crate) fn mk_decoder(r: RawEntryReader) -> impl Decoder<RawEntryReader> {
BzDecoder::new(r)
}
4 changes: 2 additions & 2 deletions src/reader/sync/entry_reader/deflate64_dec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::io::{BufReader, Read};

use deflate64::Deflate64Decoder;

use crate::reader::sync::{Decoder, LimitedReader};
use crate::reader::sync::{Decoder, RawEntryReader};

impl<R> Decoder<R> for Deflate64Decoder<BufReader<R>>
where
Expand All @@ -17,6 +17,6 @@ where
}
}

pub(crate) fn mk_decoder(r: LimitedReader) -> impl Decoder<LimitedReader> {
pub(crate) fn mk_decoder(r: RawEntryReader) -> impl Decoder<RawEntryReader> {
Deflate64Decoder::new(r)
}
4 changes: 2 additions & 2 deletions src/reader/sync/entry_reader/deflate_dec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::io::Read;

use flate2::read::DeflateDecoder;

use crate::reader::sync::{Decoder, LimitedReader};
use crate::reader::sync::{Decoder, RawEntryReader};

impl<R> Decoder<R> for DeflateDecoder<R>
where
Expand All @@ -17,6 +17,6 @@ where
}
}

pub(crate) fn mk_decoder(r: LimitedReader) -> impl Decoder<LimitedReader> {
pub(crate) fn mk_decoder(r: RawEntryReader) -> impl Decoder<RawEntryReader> {
DeflateDecoder::new(r)
}
6 changes: 3 additions & 3 deletions src/reader/sync/entry_reader/lzma_dec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use lzma_rs::decompress::Stream;
use std::io::{Read, Write};

use crate::{
reader::sync::{Decoder, LimitedReader},
reader::sync::{Decoder, RawEntryReader},
Error, UnsupportedError,
};

Expand Down Expand Up @@ -102,9 +102,9 @@ where
}

pub(crate) fn mk_decoder(
mut r: LimitedReader,
mut r: RawEntryReader,
uncompressed_size: u64,
) -> std::io::Result<impl Decoder<LimitedReader>> {
) -> std::io::Result<impl Decoder<RawEntryReader>> {
use byteorder::{LittleEndian, ReadBytesExt};

// see `appnote.txt` section 5.8
Expand Down
28 changes: 14 additions & 14 deletions src/reader/sync/entry_reader/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::{
error::*,
format::*,
reader::sync::decoder::{Decoder, EOFNormalizer, LimitedReader, StoreDecoder},
reader::sync::decoder::{Decoder, EOFNormalizer, RawEntryReader, StoreDecoder},
transition,
};

Expand Down Expand Up @@ -45,7 +45,7 @@ enum State {
hasher: crc32fast::Hasher,
uncompressed_size: u64,
header: LocalFileHeaderRecord,
decoder: Box<dyn Decoder<LimitedReader>>,
decoder: Box<dyn Decoder<RawEntryReader>>,
},
ReadDataDescriptor {
metrics: EntryReadMetrics,
Expand Down Expand Up @@ -93,8 +93,8 @@ where
transition!(self.state => (S::ReadLocalHeader { buffer }) {
// allow unnecessary mut for some feature combinations
#[allow(unused_mut)]
let mut limited_reader = LimitedReader::new(buffer, self.inner.compressed_size);
let decoder: Box<dyn Decoder<LimitedReader>> = self.get_decoder(limited_reader)?;
let mut limited_reader = RawEntryReader::new(buffer, self.inner.compressed_size);
let decoder: Box<dyn Decoder<RawEntryReader>> = self.get_decoder(limited_reader)?;

S::ReadData {
hasher: crc32fast::Hasher::new(),
Expand Down Expand Up @@ -253,7 +253,7 @@ impl<R> EntryReader<R>
where
R: io::Read,
{
const DEFAULT_BUFFER_SIZE: usize = 8 * 1024;
const DEFAULT_BUFFER_SIZE: usize = 256 * 1024;

pub fn new<F>(entry: &StoredEntry, get_reader: F) -> Self
where
Expand All @@ -272,14 +272,14 @@ where

fn get_decoder(
&self,
#[allow(unused_mut)] mut limited_reader: LimitedReader,
) -> Result<Box<dyn Decoder<LimitedReader>>, Error> {
let decoder: Box<dyn Decoder<LimitedReader>> = match self.method {
Method::Store => Box::new(StoreDecoder::new(limited_reader)),
#[allow(unused_mut)] mut raw_r: RawEntryReader,
) -> Result<Box<dyn Decoder<RawEntryReader>>, Error> {
let decoder: Box<dyn Decoder<RawEntryReader>> = match self.method {
Method::Store => Box::new(StoreDecoder::new(raw_r)),
Method::Deflate => {
cfg_if! {
if #[cfg(feature = "deflate")] {
Box::new(deflate_dec::mk_decoder(limited_reader))
Box::new(deflate_dec::mk_decoder(raw_r))
} else {
return Err(Error::method_not_enabled(self.method));
}
Expand All @@ -288,7 +288,7 @@ where
Method::Deflate64 => {
cfg_if! {
if #[cfg(feature = "deflate64")] {
Box::new(deflate64_dec::mk_decoder(limited_reader))
Box::new(deflate64_dec::mk_decoder(raw_r))
} else {
return Err(Error::method_not_enabled(self.method));
}
Expand All @@ -297,7 +297,7 @@ where
Method::Lzma => {
cfg_if! {
if #[cfg(feature = "lzma")] {
Box::new(lzma_dec::mk_decoder(limited_reader,self.inner.uncompressed_size)?)
Box::new(lzma_dec::mk_decoder(raw_r,self.inner.uncompressed_size)?)
} else {
return Err(Error::method_not_enabled(self.method));
}
Expand All @@ -306,7 +306,7 @@ where
Method::Bzip2 => {
cfg_if! {
if #[cfg(feature = "bzip2")] {
Box::new(bzip2_dec::mk_decoder(limited_reader))
Box::new(bzip2_dec::mk_decoder(raw_r))
} else {
return Err(Error::method_not_enabled(self.method));
}
Expand All @@ -315,7 +315,7 @@ where
Method::Zstd => {
cfg_if! {
if #[cfg(feature = "zstd")] {
Box::new(zstd_dec::mk_decoder(limited_reader)?)
Box::new(zstd_dec::mk_decoder(raw_r)?)
} else {
return Err(Error::method_not_enabled(self.method));
}
Expand Down
Loading

0 comments on commit e211875

Please sign in to comment.