Skip to content

Commit

Permalink
jean getting further
Browse files Browse the repository at this point in the history
  • Loading branch information
fasterthanlime committed Feb 5, 2024
1 parent 9a30c74 commit 4352ab1
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 94 deletions.
88 changes: 1 addition & 87 deletions rc-zip-sync/examples/jean.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,17 +300,11 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
let bps = (uncompressed_size as f64 / seconds) as u64;
println!("Overall extraction speed: {} / s", format_size(bps, BINARY));
}
Commands::UnzipStreaming { zipfile, dir } => {
Commands::UnzipStreaming { zipfile, .. } => {
let zipfile = File::open(zipfile)?;
let dir = PathBuf::from(dir.unwrap_or_else(|| ".".into()));

let mut entry = zipfile.read_first_zip_entry_streaming()?;

let mut num_dirs = 0;
let mut num_files = 0;
let mut num_symlinks = 0;
let mut done_bytes: u64 = 0;

use indicatif::{ProgressBar, ProgressStyle};
let pbar = ProgressBar::new(100);
pbar.set_style(
Expand All @@ -322,7 +316,6 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {

pbar.enable_steady_tick(Duration::from_millis(125));

let start_time = std::time::SystemTime::now();
loop {
let entry_name = entry.name().unwrap();
let entry_name = match sanitize_entry_name(entry_name) {
Expand All @@ -344,87 +337,8 @@ fn do_main(cli: Cli) -> Result<(), Box<dyn std::error::Error>> {
break;
}
}

// match entry.contents() {
// EntryContents::Symlink => {
// num_symlinks += 1;

// cfg_if! {
// if #[cfg(windows)] {
// let path = dir.join(entry_name);
// std::fs::create_dir_all(
// path.parent()
// .expect("all full entry paths should have parent paths"),
// )?;
// let mut entry_writer = File::create(path)?;
// let mut entry_reader = entry.reader();
// std::io::copy(&mut entry_reader, &mut entry_writer)?;
// } else {
// let path = dir.join(entry_name);
// std::fs::create_dir_all(
// path.parent()
// .expect("all full entry paths should have parent paths"),
// )?;
// if let Ok(metadata) = std::fs::symlink_metadata(&path) {
// if metadata.is_file() {
// std::fs::remove_file(&path)?;
// }
// }

// let mut src = String::new();
// entry.reader().read_to_string(&mut src)?;

// // validate pointing path before creating a symbolic link
// if src.contains("..") {
// continue;
// }
// std::os::unix::fs::symlink(src, &path)?;
// }
// }
// }
// EntryContents::Directory => {
// num_dirs += 1;
// let path = dir.join(entry_name);
// std::fs::create_dir_all(
// path.parent()
// .expect("all full entry paths should have parent paths"),
// )?;
// }
// EntryContents::File => {
// num_files += 1;
// let path = dir.join(entry_name);
// std::fs::create_dir_all(
// path.parent()
// .expect("all full entry paths should have parent paths"),
// )?;
// let mut entry_writer = File::create(path)?;
// let entry_reader = entry.reader();
// let before_entry_bytes = done_bytes;
// let mut progress_reader = ProgressRead::new(
// entry_reader,
// entry.inner.uncompressed_size,
// |prog| {
// pbar.set_position(before_entry_bytes + prog.done);
// },
// );

// let copied_bytes = std::io::copy(&mut progress_reader, &mut entry_writer)?;
// done_bytes = before_entry_bytes + copied_bytes;
// }
// }
}
pbar.finish();
// let duration = start_time.elapsed()?;
// println!(
// "Extracted {} (in {} files, {} dirs, {} symlinks)",
// format_size(uncompressed_size, BINARY),
// num_files,
// num_dirs,
// num_symlinks
// );
// let seconds = (duration.as_millis() as f64) / 1000.0;
// let bps = (uncompressed_size as f64 / seconds) as u64;
// println!("Overall extraction speed: {} / s", format_size(bps, BINARY));
}
}

Expand Down
14 changes: 13 additions & 1 deletion rc-zip-sync/src/read_zip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -222,10 +222,22 @@ impl ReadZip for std::fs::File {
}
}

/// Allows reading zip entries in a streaming fashion, without seeking,
/// based only on local headers. THIS IS NOT RECOMMENDED, as correctly
/// reading zip files requires reading the central directory (located at
/// the end of the file).
///
/// Using local headers only involves a lot of guesswork and is only really
/// useful if you have some level of control over your input.
pub trait ReadZipEntriesStreaming<R>
where
R: Read,
{
/// Get the first zip entry from the stream as a [StreamingEntryReader].
///
/// See [ReadZipEntriesStreaming]'s documentation for why using this is
/// generally a bad idea: you might want to use [ReadZip] or
/// [ReadZipWithSize] instead.
fn read_first_zip_entry_streaming(self) -> Result<StreamingEntryReader<R>, Error>;
}

Expand All @@ -239,13 +251,13 @@ where

let header = loop {
let n = self.read(buf.space())?;
tracing::trace!("read {} bytes into buf for first zip entry", n);
buf.fill(n);

let mut input = Partial::new(buf.data());
match LocalFileHeaderRecord::parser.parse_next(&mut input) {
Ok(header) => {
let consumed = input.as_bytes().offset_from(&buf.data());
buf.consume(consumed);
tracing::trace!(?header, %consumed, "Got local file header record!");
break header;
}
Expand Down
18 changes: 12 additions & 6 deletions rc-zip-sync/src/streaming_entry_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use std::{
io::{self, Write},
str::Utf8Error,
};
use tracing::trace;

pub struct StreamingEntryReader<R> {
header: LocalFileHeaderRecord,
Expand All @@ -15,6 +16,7 @@ pub struct StreamingEntryReader<R> {
}

#[derive(Default)]
#[allow(clippy::large_enum_variant)]
enum State {
Reading {
remain: Buffer,
Expand Down Expand Up @@ -55,18 +57,22 @@ where
mut fsm,
} => {
if fsm.wants_read() {
tracing::trace!("fsm wants read");
trace!("fsm wants read");
if remain.available_data() > 0 {
let n = remain.read(buf)?;
tracing::trace!("giving fsm {} bytes from remain", n);
trace!(
"remain has {} data bytes available",
remain.available_data(),
);
let n = remain.read(fsm.space())?;
trace!("giving fsm {} bytes from remain", n);
fsm.fill(n);
} else {
let n = self.rd.read(fsm.space())?;
tracing::trace!("giving fsm {} bytes from rd", n);
trace!("giving fsm {} bytes from rd", n);
fsm.fill(n);
}
} else {
tracing::trace!("fsm does not want read");
trace!("fsm does not want read");
}

match fsm.process(buf)? {
Expand All @@ -85,7 +91,7 @@ where
// what the fsm just gave back
if remain.available_data() > 0 {
fsm_remain.grow(fsm_remain.capacity() + remain.available_data());
fsm_remain.write_all(remain.data());
fsm_remain.write_all(remain.data())?;
drop(remain)
}

Expand Down

0 comments on commit 4352ab1

Please sign in to comment.