From 8039e2c7e9c60bc24291bb0a04ffe5c482c1e9a6 Mon Sep 17 00:00:00 2001
From: Amos Wenger <amos@bearcove.net>
Date: Fri, 26 Jan 2024 18:10:05 +0100
Subject: [PATCH 1/5] wip: Add lzma support

---
 .vscode/settings.json                         |  3 ++
 Cargo.lock                                    | 28 +++++++++++++++++++
 crates/jean/Cargo.toml                        |  5 ++++
 crates/rc-zip/Cargo.toml                      |  3 ++
 crates/rc-zip/src/reader/sync/decoder.rs      | 17 +++++++++++
 crates/rc-zip/src/reader/sync/entry_reader.rs | 28 ++++++++++++++++++-
 crates/rc-zip/src/tests.rs                    | 12 ++++++++
 7 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 .vscode/settings.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..4ac34f4
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+	"rust-analyzer.cargo.features": ["default", "lzma"]
+}
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index 53136bc..4cca98b 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -367,6 +367,17 @@ version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
 
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
 [[package]]
 name = "memchr"
 version = "2.7.1"
@@ -512,6 +523,12 @@ version = "0.2.13"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58"
 
+[[package]]
+name = "pkg-config"
+version = "0.3.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb"
+
 [[package]]
 name = "portable-atomic"
 version = "1.6.0"
@@ -581,6 +598,7 @@ checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
 name = "rc-zip"
 version = "2.0.1"
 dependencies = [
+ "byteorder",
  "chardetng",
  "chrono",
  "circular",
@@ -594,6 +612,7 @@ dependencies = [
  "pretty-hex",
  "thiserror",
  "tracing",
+ "xz2",
 ]
 
 [[package]]
@@ -958,3 +977,12 @@ checksum = "b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16"
 dependencies = [
  "memchr",
 ]
+
+[[package]]
+name = "xz2"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
+dependencies = [
+ "lzma-sys",
+]
diff --git a/crates/jean/Cargo.toml b/crates/jean/Cargo.toml
index e56f0a7..77f8643 100644
--- a/crates/jean/Cargo.toml
+++ b/crates/jean/Cargo.toml
@@ -14,3 +14,8 @@ humansize = "2.1.3"
 positioned-io.workspace = true
 indicatif = "0.17.7"
 tracing-subscriber = "0.3.18"
+
+[features]
+default = ["lzma"]
+deflate = ["rc-zip/deflate"]
+lzma = ["rc-zip/lzma"]
diff --git a/crates/rc-zip/Cargo.toml b/crates/rc-zip/Cargo.toml
index 5229842..9c49809 100644
--- a/crates/rc-zip/Cargo.toml
+++ b/crates/rc-zip/Cargo.toml
@@ -25,9 +25,12 @@ thiserror = "1.0.56"
 chardetng = "0.1.17"
 flate2 = { version = "1.0.28", optional = true }
 num_enum = "0.7.2"
+xz2 = { version = "0.1.7", optional = true }
+byteorder = "1.5.0"
 
 [features]
 default = ["sync", "file", "deflate"]
 sync = []
 file = ["positioned-io"]
 deflate = ["flate2"]
+lzma = ["xz2"]
diff --git a/crates/rc-zip/src/reader/sync/decoder.rs b/crates/rc-zip/src/reader/sync/decoder.rs
index 058e503..479d129 100644
--- a/crates/rc-zip/src/reader/sync/decoder.rs
+++ b/crates/rc-zip/src/reader/sync/decoder.rs
@@ -1,6 +1,9 @@
 #[cfg(feature = "deflate")]
 use flate2::read::DeflateDecoder;
 
+#[cfg(feature = "lzma")]
+use xz2::read::XzDecoder;
+
 use std::{cmp, io};
 
 pub trait Decoder<R>: io::Read
@@ -29,6 +32,20 @@ where
     }
 }
 
+#[cfg(feature = "lzma")]
+impl<R> Decoder<R> for XzDecoder<R>
+where
+    R: io::Read,
+{
+    fn into_inner(self: Box<Self>) -> R {
+        XzDecoder::into_inner(*self)
+    }
+
+    fn get_mut(&mut self) -> &mut R {
+        XzDecoder::get_mut(self)
+    }
+}
+
 pub struct StoreDecoder<R>
 where
     R: io::Read,
diff --git a/crates/rc-zip/src/reader/sync/entry_reader.rs b/crates/rc-zip/src/reader/sync/entry_reader.rs
index f82e873..0ef3067 100644
--- a/crates/rc-zip/src/reader/sync/entry_reader.rs
+++ b/crates/rc-zip/src/reader/sync/entry_reader.rs
@@ -72,7 +72,10 @@ where
 
                         trace!("local file header: {:#?}", header);
                         transition!(self.state => (S::ReadLocalHeader { buffer }) {
-                            let limited_reader = LimitedReader::new(buffer, self.inner.compressed_size);
+                            // allow unnecessary mut for some feature combinations
+                            #[allow(unused_mut)]
+                            let mut limited_reader = LimitedReader::new(buffer, self.inner.compressed_size);
+
                             let decoder: Box<dyn Decoder<LimitedReader>> = match self.method {
                                 Method::Store => Box::new(StoreDecoder::new(limited_reader)),
                                 Method::Deflate => {
@@ -82,6 +85,29 @@ where
                                     #[cfg(not(feature = "deflate"))]
                                     { return Err(Error::Unsupported(UnsupportedError::CompressionMethodNotEnabled(Method::Deflate)).into()) }
                                 },
+                                Method::Lzma => {
+                                    #[cfg(feature = "lzma")]
+                                    {
+                                        // TODO: use a parser combinator library for this probably
+
+                                        // read LZMA properties header first.
+                                        use byteorder::{LittleEndian, ReadBytesExt};
+                                        let major: u8 = limited_reader.read_u8()?;
+                                        let minor: u8 = limited_reader.read_u8()?;
+
+                                        let size: u16 = limited_reader.read_u16::<LittleEndian>()?;
+                                        // this is an u16, worse case scenario is 65536 bytes
+                                        let mut data = [0u8; 1 << 16];
+                                        limited_reader.read_exact(&mut data[..size as usize])?;
+                                        let data = &data[..size as usize];
+                                        trace!(%major, %minor, %size, "LZMA properties header, data = {data:02x?}");
+
+                                        Box::new(xz2::read::XzDecoder::new_stream(limited_reader, xz2::stream::Stream::new_lzma_decoder(128 * 1024 * 1024)?))
+                                    }
+
+                                    #[cfg(not(feature = "lzma"))]
+                                    { return Err(Error::Unsupported(UnsupportedError::CompressionMethodNotEnabled(Method::Lzma)).into()) }
+                                }
                                 method => return Err(Error::Unsupported(UnsupportedError::UnsupportedCompressionMethod(method)).into()),
                             };
 
diff --git a/crates/rc-zip/src/tests.rs b/crates/rc-zip/src/tests.rs
index 2684521..e3bef67 100644
--- a/crates/rc-zip/src/tests.rs
+++ b/crates/rc-zip/src/tests.rs
@@ -266,6 +266,18 @@ fn test_cases() -> Vec<ZipTest> {
             }],
             ..Default::default()
         },
+        #[cfg(feature = "lzma")]
+        ZipTest {
+            source: ZipSource::File("found-me-lzma.zip"),
+            expected_encoding: Some(Encoding::Utf8),
+            files: vec![ZipTestFile {
+                name: "found-me.txt",
+                content: FileContent::Bytes("Oh no, you found me!\n".repeat(5000).into()),
+                modified: Some(date((2024, 1, 26), (17, 14, 36), 0, time_zone(0)).unwrap()),
+                ..Default::default()
+            }],
+            ..Default::default()
+        },
     ]
 }
 

From 5f9fb74bb4a27e30848d2f604d4db7e7dd1228a2 Mon Sep 17 00:00:00 2001
From: Amos Wenger <amos@bearcove.net>
Date: Fri, 26 Jan 2024 18:15:29 +0100
Subject: [PATCH 2/5] Add .direnv

---
 .envrc                   |   1 +
 Cargo.lock               |  38 +++++++-------
 crates/rc-zip/Cargo.toml |   4 +-
 flake.lock               | 106 +++++++++++++++++++++++++++++++++++++++
 flake.nix                |  76 ++++++++++++++++++++++++++++
 5 files changed, 202 insertions(+), 23 deletions(-)
 create mode 100644 .envrc
 create mode 100644 flake.lock
 create mode 100644 flake.nix

diff --git a/.envrc b/.envrc
new file mode 100644
index 0000000..3550a30
--- /dev/null
+++ b/.envrc
@@ -0,0 +1 @@
+use flake
diff --git a/Cargo.lock b/Cargo.lock
index 4cca98b..f64bdac 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -367,17 +367,6 @@ version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
 
-[[package]]
-name = "lzma-sys"
-version = "0.1.20"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
-dependencies = [
- "cc",
- "libc",
- "pkg-config",
-]
-
 [[package]]
 name = "memchr"
 version = "2.7.1"
@@ -610,9 +599,19 @@ dependencies = [
  "oem_cp",
  "positioned-io",
  "pretty-hex",
+ "rust-lzma",
  "thiserror",
  "tracing",
- "xz2",
+]
+
+[[package]]
+name = "rust-lzma"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7d62915608f6cee1d7f2fc00f28b4f058ff79d6e4ec3c2fe0006b09b52437c84"
+dependencies = [
+ "pkg-config",
+ "vcpkg",
 ]
 
 [[package]]
@@ -818,6 +817,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.90"
@@ -977,12 +982,3 @@ checksum = "b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16"
 dependencies = [
  "memchr",
 ]
-
-[[package]]
-name = "xz2"
-version = "0.1.7"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
-dependencies = [
- "lzma-sys",
-]
diff --git a/crates/rc-zip/Cargo.toml b/crates/rc-zip/Cargo.toml
index 9c49809..e2ba977 100644
--- a/crates/rc-zip/Cargo.toml
+++ b/crates/rc-zip/Cargo.toml
@@ -25,12 +25,12 @@ thiserror = "1.0.56"
 chardetng = "0.1.17"
 flate2 = { version = "1.0.28", optional = true }
 num_enum = "0.7.2"
-xz2 = { version = "0.1.7", optional = true }
 byteorder = "1.5.0"
+rust-lzma = { version = "0.6.0", optional = true }
 
 [features]
 default = ["sync", "file", "deflate"]
 sync = []
 file = ["positioned-io"]
 deflate = ["flate2"]
-lzma = ["xz2"]
+lzma = ["rust-lzma"]
diff --git a/flake.lock b/flake.lock
new file mode 100644
index 0000000..46b24b8
--- /dev/null
+++ b/flake.lock
@@ -0,0 +1,106 @@
+{
+  "nodes": {
+    "crane": {
+      "inputs": {
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1705974079,
+        "narHash": "sha256-HyC3C2esW57j6bG0MKwX4kQi25ltslRnr6z2uvpadJo=",
+        "owner": "ipetkov",
+        "repo": "crane",
+        "rev": "0b4e511fe6e346381e31d355e03de52aa43e8cb2",
+        "type": "github"
+      },
+      "original": {
+        "owner": "ipetkov",
+        "repo": "crane",
+        "type": "github"
+      }
+    },
+    "flake-utils": {
+      "inputs": {
+        "systems": "systems"
+      },
+      "locked": {
+        "lastModified": 1705309234,
+        "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=",
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26",
+        "type": "github"
+      },
+      "original": {
+        "owner": "numtide",
+        "repo": "flake-utils",
+        "type": "github"
+      }
+    },
+    "nixpkgs": {
+      "locked": {
+        "lastModified": 1705856552,
+        "narHash": "sha256-JXfnuEf5Yd6bhMs/uvM67/joxYKoysyE3M2k6T3eWbg=",
+        "owner": "NixOS",
+        "repo": "nixpkgs",
+        "rev": "612f97239e2cc474c13c9dafa0df378058c5ad8d",
+        "type": "github"
+      },
+      "original": {
+        "owner": "NixOS",
+        "ref": "nixos-unstable",
+        "repo": "nixpkgs",
+        "type": "github"
+      }
+    },
+    "root": {
+      "inputs": {
+        "crane": "crane",
+        "flake-utils": "flake-utils",
+        "nixpkgs": "nixpkgs",
+        "rust-overlay": "rust-overlay"
+      }
+    },
+    "rust-overlay": {
+      "inputs": {
+        "flake-utils": [
+          "flake-utils"
+        ],
+        "nixpkgs": [
+          "nixpkgs"
+        ]
+      },
+      "locked": {
+        "lastModified": 1706235145,
+        "narHash": "sha256-3jh5nahTlcsX6QFcMPqxtLn9p9CgT9RSce5GLqjcpi4=",
+        "owner": "oxalica",
+        "repo": "rust-overlay",
+        "rev": "3a57c4e29cb2beb777b2e6ae7309a680585b8b2f",
+        "type": "github"
+      },
+      "original": {
+        "owner": "oxalica",
+        "repo": "rust-overlay",
+        "type": "github"
+      }
+    },
+    "systems": {
+      "locked": {
+        "lastModified": 1681028828,
+        "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
+        "owner": "nix-systems",
+        "repo": "default",
+        "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
+        "type": "github"
+      },
+      "original": {
+        "owner": "nix-systems",
+        "repo": "default",
+        "type": "github"
+      }
+    }
+  },
+  "root": "root",
+  "version": 7
+}
diff --git a/flake.nix b/flake.nix
new file mode 100644
index 0000000..a3b1e57
--- /dev/null
+++ b/flake.nix
@@ -0,0 +1,76 @@
+{
+  inputs = {
+    flake-utils = { url = "github:numtide/flake-utils"; };
+    nixpkgs = { url = "github:NixOS/nixpkgs/nixos-unstable"; };
+    rust-overlay =
+      {
+        url = "github:oxalica/rust-overlay";
+        inputs = {
+          nixpkgs.follows = "nixpkgs";
+          flake-utils.follows = "flake-utils";
+        };
+      };
+    crane = {
+      url = "github:ipetkov/crane";
+      inputs = {
+        nixpkgs.follows = "nixpkgs";
+      };
+    };
+  };
+  outputs =
+    { self, nixpkgs, flake-utils, rust-overlay, crane }:
+    flake-utils.lib.eachDefaultSystem (system:
+    let
+      pkgs = import nixpkgs {
+        inherit system;
+        overlays = [ (import rust-overlay) ];
+      };
+      rustToolchain = pkgs.pkgsBuildHost.rust-bin.fromRustupToolchainFile ./rust-toolchain.toml;
+      craneLib = (crane.mkLib pkgs).overrideToolchain rustToolchain;
+      src = craneLib.cleanCargoSource (craneLib.path ./.);
+
+      buildInputs = with pkgs; [ pkgs.stdenv.cc.cc ];
+      nativeBuildInputs = with pkgs; [
+        rustToolchain
+        clang
+        mold
+        pkg-config
+        xz
+      ]
+      ++ lib.optionals pkgs.stdenv.isLinux [ autoPatchelfHook ]
+      ++ lib.optionals pkgs.stdenv.isDarwin
+        (with pkgs.darwin.apple_sdk.frameworks; [
+          CoreFoundation
+          CoreServices
+          SystemConfiguration
+          Security
+        ]);
+      commonArgs = {
+        pname = "fluke";
+        version = "latest";
+        strictDeps = true;
+        dontStrip = true;
+        # workaround for https://github.com/NixOS/nixpkgs/issues/166205
+        env = with pkgs; lib.optionalAttrs stdenv.cc.isClang {
+          NIX_LDFLAGS = "-l${stdenv.cc.libcxx.cxxabi.libName}";
+        };
+        inherit src buildInputs nativeBuildInputs;
+      };
+      cargoArtifacts = craneLib.buildDepsOnly commonArgs;
+      bin = craneLib.buildPackage (commonArgs // {
+        inherit cargoArtifacts;
+      });
+    in
+    with pkgs;
+    {
+      packages = {
+        inherit bin;
+        default = bin;
+      };
+      devShells.default = mkShell {
+        inputsFrom = [ bin ];
+        packages = with pkgs; [ just nixpkgs-fmt ];
+      };
+    }
+    );
+}

From 5fe9d15812ff6e778b1035c3371eac9be8f69a50 Mon Sep 17 00:00:00 2001
From: Amos Wenger <amos@bearcove.net>
Date: Fri, 26 Jan 2024 18:33:06 +0100
Subject: [PATCH 3/5] Okay forget lzma

---
 .direnv/flake-profile                         |  1 +
 .direnv/flake-profile-1-link                  |  1 +
 Cargo.lock                                    |  2 --
 crates/rc-zip/Cargo.toml                      |  4 +++-
 crates/rc-zip/src/reader/sync/decoder.rs      | 12 ++++++------
 crates/rc-zip/src/reader/sync/entry_reader.rs |  2 +-
 crates/rc-zip/testdata/test-zips/found-me.txt |  0
 flake.nix                                     |  2 +-
 rust-toolchain.toml                           |  3 +++
 9 files changed, 16 insertions(+), 11 deletions(-)
 create mode 120000 .direnv/flake-profile
 create mode 120000 .direnv/flake-profile-1-link
 create mode 100644 crates/rc-zip/testdata/test-zips/found-me.txt
 create mode 100644 rust-toolchain.toml

diff --git a/.direnv/flake-profile b/.direnv/flake-profile
new file mode 120000
index 0000000..0c05709
--- /dev/null
+++ b/.direnv/flake-profile
@@ -0,0 +1 @@
+flake-profile-1-link
\ No newline at end of file
diff --git a/.direnv/flake-profile-1-link b/.direnv/flake-profile-1-link
new file mode 120000
index 0000000..356f6b6
--- /dev/null
+++ b/.direnv/flake-profile-1-link
@@ -0,0 +1 @@
+/nix/store/bdw4nizg6fg6i13gyi43skk3clwg7fjs-nix-shell-env
\ No newline at end of file
diff --git a/Cargo.lock b/Cargo.lock
index f64bdac..6f055ea 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -607,8 +607,6 @@ dependencies = [
 [[package]]
 name = "rust-lzma"
 version = "0.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7d62915608f6cee1d7f2fc00f28b4f058ff79d6e4ec3c2fe0006b09b52437c84"
 dependencies = [
  "pkg-config",
  "vcpkg",
diff --git a/crates/rc-zip/Cargo.toml b/crates/rc-zip/Cargo.toml
index e2ba977..5ec9c42 100644
--- a/crates/rc-zip/Cargo.toml
+++ b/crates/rc-zip/Cargo.toml
@@ -26,7 +26,9 @@ chardetng = "0.1.17"
 flate2 = { version = "1.0.28", optional = true }
 num_enum = "0.7.2"
 byteorder = "1.5.0"
-rust-lzma = { version = "0.6.0", optional = true }
+# rust-lzma = { version = "0.6.0", optional = true }
+# FIXME:
+rust-lzma = { path = "../../../rust-lzma", optional = true }
 
 [features]
 default = ["sync", "file", "deflate"]
diff --git a/crates/rc-zip/src/reader/sync/decoder.rs b/crates/rc-zip/src/reader/sync/decoder.rs
index 479d129..88a8de3 100644
--- a/crates/rc-zip/src/reader/sync/decoder.rs
+++ b/crates/rc-zip/src/reader/sync/decoder.rs
@@ -2,7 +2,7 @@
 use flate2::read::DeflateDecoder;
 
 #[cfg(feature = "lzma")]
-use xz2::read::XzDecoder;
+use lzma::reader::LzmaReader;
 
 use std::{cmp, io};
 
@@ -24,25 +24,25 @@ where
     R: io::Read,
 {
     fn into_inner(self: Box<Self>) -> R {
-        DeflateDecoder::into_inner(*self)
+        Self::into_inner(*self)
     }
 
     fn get_mut(&mut self) -> &mut R {
-        DeflateDecoder::get_mut(self)
+        Self::get_mut(self)
     }
 }
 
 #[cfg(feature = "lzma")]
-impl<R> Decoder<R> for XzDecoder<R>
+impl<R> Decoder<R> for LzmaReader<R>
 where
     R: io::Read,
 {
     fn into_inner(self: Box<Self>) -> R {
-        XzDecoder::into_inner(*self)
+        Self::into_inner(*self)
     }
 
     fn get_mut(&mut self) -> &mut R {
-        XzDecoder::get_mut(self)
+        Self::get_mut(self)
     }
 }
 
diff --git a/crates/rc-zip/src/reader/sync/entry_reader.rs b/crates/rc-zip/src/reader/sync/entry_reader.rs
index 0ef3067..b1f2130 100644
--- a/crates/rc-zip/src/reader/sync/entry_reader.rs
+++ b/crates/rc-zip/src/reader/sync/entry_reader.rs
@@ -102,7 +102,7 @@ where
                                         let data = &data[..size as usize];
                                         trace!(%major, %minor, %size, "LZMA properties header, data = {data:02x?}");
 
-                                        Box::new(xz2::read::XzDecoder::new_stream(limited_reader, xz2::stream::Stream::new_lzma_decoder(128 * 1024 * 1024)?))
+                                        Box::new(lzma::reader::LzmaReader::new_decompressor(limited_reader).map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?)
                                     }
 
                                     #[cfg(not(feature = "lzma"))]
diff --git a/crates/rc-zip/testdata/test-zips/found-me.txt b/crates/rc-zip/testdata/test-zips/found-me.txt
new file mode 100644
index 0000000..e69de29
diff --git a/flake.nix b/flake.nix
index a3b1e57..be8ef3e 100644
--- a/flake.nix
+++ b/flake.nix
@@ -46,7 +46,7 @@
           Security
         ]);
       commonArgs = {
-        pname = "fluke";
+        pname = "rc_zip";
         version = "latest";
         strictDeps = true;
         dontStrip = true;
diff --git a/rust-toolchain.toml b/rust-toolchain.toml
new file mode 100644
index 0000000..d24f9dc
--- /dev/null
+++ b/rust-toolchain.toml
@@ -0,0 +1,3 @@
+[toolchain]
+channel = "stable"
+components = ["llvm-tools", "clippy", "rust-src"]

From cb3355b041a4fdc9c51a2686b541a7198fd4593a Mon Sep 17 00:00:00 2001
From: Amos Wenger <amos@bearcove.net>
Date: Fri, 26 Jan 2024 19:22:30 +0100
Subject: [PATCH 4/5] Struggle with lzma support

---
 Cargo.lock                                    |   37 +-
 crates/rc-zip/Cargo.toml                      |    7 +-
 crates/rc-zip/src/error.rs                    |    4 +
 crates/rc-zip/src/reader/sync/decoder.rs      |    5 +-
 crates/rc-zip/src/reader/sync/entry_reader.rs |  156 ++-
 crates/rc-zip/testdata/lzma-specification.txt | 1176 +++++++++++++++++
 crates/rc-zip/testdata/test-zips/.gitignore   |    1 +
 crates/rc-zip/testdata/test-zips/found-me.txt |    0
 8 files changed, 1326 insertions(+), 60 deletions(-)
 create mode 100644 crates/rc-zip/testdata/lzma-specification.txt
 create mode 100644 crates/rc-zip/testdata/test-zips/.gitignore
 delete mode 100644 crates/rc-zip/testdata/test-zips/found-me.txt

diff --git a/Cargo.lock b/Cargo.lock
index 6f055ea..558fa3d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -367,6 +367,17 @@ version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
 
+[[package]]
+name = "lzma-sys"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27"
+dependencies = [
+ "cc",
+ "libc",
+ "pkg-config",
+]
+
 [[package]]
 name = "memchr"
 version = "2.7.1"
@@ -588,6 +599,7 @@ name = "rc-zip"
 version = "2.0.1"
 dependencies = [
  "byteorder",
+ "cfg-if",
  "chardetng",
  "chrono",
  "circular",
@@ -599,17 +611,9 @@ dependencies = [
  "oem_cp",
  "positioned-io",
  "pretty-hex",
- "rust-lzma",
  "thiserror",
  "tracing",
-]
-
-[[package]]
-name = "rust-lzma"
-version = "0.6.0"
-dependencies = [
- "pkg-config",
- "vcpkg",
+ "xz2",
 ]
 
 [[package]]
@@ -815,12 +819,6 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
-[[package]]
-name = "vcpkg"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
-
 [[package]]
 name = "wasm-bindgen"
 version = "0.2.90"
@@ -980,3 +978,12 @@ checksum = "b7cf47b659b318dccbd69cc4797a39ae128f533dce7902a1096044d1967b9c16"
 dependencies = [
  "memchr",
 ]
+
+[[package]]
+name = "xz2"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2"
+dependencies = [
+ "lzma-sys",
+]
diff --git a/crates/rc-zip/Cargo.toml b/crates/rc-zip/Cargo.toml
index 5ec9c42..c053200 100644
--- a/crates/rc-zip/Cargo.toml
+++ b/crates/rc-zip/Cargo.toml
@@ -26,13 +26,12 @@ chardetng = "0.1.17"
 flate2 = { version = "1.0.28", optional = true }
 num_enum = "0.7.2"
 byteorder = "1.5.0"
-# rust-lzma = { version = "0.6.0", optional = true }
-# FIXME:
-rust-lzma = { path = "../../../rust-lzma", optional = true }
+xz2 = { version = "0.1.7", optional = true }
+cfg-if = "1.0.0"
 
 [features]
 default = ["sync", "file", "deflate"]
 sync = []
 file = ["positioned-io"]
 deflate = ["flate2"]
-lzma = ["rust-lzma"]
+lzma = ["xz2"]
diff --git a/crates/rc-zip/src/error.rs b/crates/rc-zip/src/error.rs
index a8894f9..28f2510 100644
--- a/crates/rc-zip/src/error.rs
+++ b/crates/rc-zip/src/error.rs
@@ -30,6 +30,10 @@ pub enum UnsupportedError {
     UnsupportedCompressionMethod(crate::format::Method),
     #[error("compression method supported, but not enabled in this build: {0:?}")]
     CompressionMethodNotEnabled(crate::format::Method),
+    #[error("only LZMA2.0 is supported, found LZMA{minor}.{major}")]
+    LzmaVersionUnsupported { minor: u8, major: u8 },
+    #[error("LZMA properties header too short: expected {expected} bytes, got {actual} bytes")]
+    LzmaPropertiesHeaderTooShort { expected: u16, actual: u16 },
 }
 
 /// Specific zip format errors, mostly due to invalid zip archives but that could also stem from
diff --git a/crates/rc-zip/src/reader/sync/decoder.rs b/crates/rc-zip/src/reader/sync/decoder.rs
index 88a8de3..ab81303 100644
--- a/crates/rc-zip/src/reader/sync/decoder.rs
+++ b/crates/rc-zip/src/reader/sync/decoder.rs
@@ -1,9 +1,6 @@
 #[cfg(feature = "deflate")]
 use flate2::read::DeflateDecoder;
 
-#[cfg(feature = "lzma")]
-use lzma::reader::LzmaReader;
-
 use std::{cmp, io};
 
 pub trait Decoder<R>: io::Read
@@ -33,7 +30,7 @@ where
 }
 
 #[cfg(feature = "lzma")]
-impl<R> Decoder<R> for LzmaReader<R>
+impl<R> Decoder<R> for xz2::read::XzDecoder<R>
 where
     R: io::Read,
 {
diff --git a/crates/rc-zip/src/reader/sync/entry_reader.rs b/crates/rc-zip/src/reader/sync/entry_reader.rs
index b1f2130..b78ce6b 100644
--- a/crates/rc-zip/src/reader/sync/entry_reader.rs
+++ b/crates/rc-zip/src/reader/sync/entry_reader.rs
@@ -7,12 +7,14 @@ use crate::{
     transition,
 };
 
-#[cfg(feature = "deflate")]
-use flate2::read::DeflateDecoder;
+use cfg_if::cfg_if;
 use nom::Offset;
 use std::io;
 use tracing::trace;
 
+#[cfg(feature = "deflate")]
+use flate2::read::DeflateDecoder;
+
 struct EntryReadMetrics {
     uncompressed_size: u64,
     crc32: u32,
@@ -75,41 +77,7 @@ where
                             // allow unnecessary mut for some feature combinations
                             #[allow(unused_mut)]
                             let mut limited_reader = LimitedReader::new(buffer, self.inner.compressed_size);
-
-                            let decoder: Box<dyn Decoder<LimitedReader>> = match self.method {
-                                Method::Store => Box::new(StoreDecoder::new(limited_reader)),
-                                Method::Deflate => {
-                                    #[cfg(feature = "deflate")]
-                                    { Box::new(DeflateDecoder::new(limited_reader)) }
-
-                                    #[cfg(not(feature = "deflate"))]
-                                    { return Err(Error::Unsupported(UnsupportedError::CompressionMethodNotEnabled(Method::Deflate)).into()) }
-                                },
-                                Method::Lzma => {
-                                    #[cfg(feature = "lzma")]
-                                    {
-                                        // TODO: use a parser combinator library for this probably
-
-                                        // read LZMA properties header first.
-                                        use byteorder::{LittleEndian, ReadBytesExt};
-                                        let major: u8 = limited_reader.read_u8()?;
-                                        let minor: u8 = limited_reader.read_u8()?;
-
-                                        let size: u16 = limited_reader.read_u16::<LittleEndian>()?;
-                                        // this is an u16, worse case scenario is 65536 bytes
-                                        let mut data = [0u8; 1 << 16];
-                                        limited_reader.read_exact(&mut data[..size as usize])?;
-                                        let data = &data[..size as usize];
-                                        trace!(%major, %minor, %size, "LZMA properties header, data = {data:02x?}");
-
-                                        Box::new(lzma::reader::LzmaReader::new_decompressor(limited_reader).map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?)
-                                    }
-
-                                    #[cfg(not(feature = "lzma"))]
-                                    { return Err(Error::Unsupported(UnsupportedError::CompressionMethodNotEnabled(Method::Lzma)).into()) }
-                                }
-                                method => return Err(Error::Unsupported(UnsupportedError::UnsupportedCompressionMethod(method)).into()),
-                            };
+                            let decoder: Box<dyn Decoder<LimitedReader>> = self.get_decoder(limited_reader)?;
 
                             S::ReadData {
                                 hasher: crc32fast::Hasher::new(),
@@ -284,4 +252,118 @@ where
             inner: entry.inner,
         }
     }
+
+    fn get_decoder(
+        &self,
+        #[allow(unused_mut)] mut limited_reader: LimitedReader,
+    ) -> std::io::Result<Box<dyn Decoder<LimitedReader>>> {
+        let decoder: Box<dyn Decoder<LimitedReader>> = match self.method {
+            Method::Store => Box::new(StoreDecoder::new(limited_reader)),
+            Method::Deflate => {
+                cfg_if! {
+                    if #[cfg(feature = "deflate")] {
+                        Box::new(DeflateDecoder::new(limited_reader))
+                    } else {
+                        return Err(
+                            Error::Unsupported(UnsupportedError::CompressionMethodNotEnabled(
+                                Method::Deflate,
+                            ))
+                            .into(),
+                        );
+                    }
+                }
+            }
+            Method::Lzma => {
+                cfg_if! {
+                    if #[cfg(feature = "lzma")] {
+                        // TODO: use a parser combinator library for this probably?
+
+                        // read LZMA properties header first.
+                        use byteorder::{LittleEndian, ReadBytesExt};
+                        let major: u8 = limited_reader.read_u8()?;
+                        let minor: u8 = limited_reader.read_u8()?;
+                        if (major, minor) != (2, 0) {
+                            return Err(
+                                Error::Unsupported(UnsupportedError::LzmaVersionUnsupported {
+                                    minor,
+                                    major,
+                                })
+                                .into(),
+                            );
+                        }
+
+                        let props_size: u16 = limited_reader.read_u16::<LittleEndian>()?;
+
+                        const LZMA_2_0_PROPS_SIZE: u16 = 5;
+                        if props_size != LZMA_2_0_PROPS_SIZE {
+                            return Err(Error::Unsupported(
+                                UnsupportedError::LzmaPropertiesHeaderTooShort {
+                                    expected: 5,
+                                    actual: props_size,
+                                },
+                            )
+                            .into());
+                        }
+                        let bits_byte: u8 = limited_reader.read_u8()?;
+
+                        #[derive(Debug, Clone, Copy)]
+                        struct LzmaProperties {
+                            literal_context_bits: u8,
+                            literal_pos_state_bits: u8,
+                            pos_state_bits: u8,
+                        }
+
+                        // from `lzma-specification.txt`
+                        fn decode_properties(mut d: u8) -> LzmaProperties {
+                            let lc = d % 9;
+                            d /= 9;
+                            let pb = d / 5;
+                            let lp = d % 5;
+
+                            LzmaProperties {
+                                literal_context_bits: lc,
+                                literal_pos_state_bits: lp,
+                                pos_state_bits: pb,
+                            }
+                        }
+
+                        let props = decode_properties(bits_byte);
+                        const LZMA_DIC_MIN: u32 = 1 << 12;
+                        let dict_size: u32 =
+                            std::cmp::min(LZMA_DIC_MIN, limited_reader.read_u32::<LittleEndian>()?);
+
+                        let mut opts = xz2::stream::LzmaOptions::new_preset(0)?;
+                        opts.dict_size(dict_size);
+                        opts.position_bits(props.pos_state_bits as _);
+                        opts.literal_position_bits(props.literal_pos_state_bits as _);
+                        opts.literal_context_bits(props.literal_context_bits as _);
+
+                        let mut filters = xz2::stream::Filters::new();
+                        filters.lzma2(&opts);
+                        // let stream = xz2::stream::Stream::new_lzma_decoder(&filters)?;
+
+                        // let stream = xz2::stream::Stream::new_lzma_encoder(&opts)?;
+                        let stream = xz2::stream::Stream::new_stream_encoder(&filters, xz2::stream::Check::None)?;
+
+                        Box::new(xz2::read::XzDecoder::new_stream(limited_reader, stream))
+                    } else {
+                        return Err(
+                            Error::Unsupported(UnsupportedError::CompressionMethodNotEnabled(
+                                Method::Lzma,
+                            ))
+                            .into(),
+                        );
+                    }
+                }
+            }
+            method => {
+                return Err(
+                    Error::Unsupported(UnsupportedError::UnsupportedCompressionMethod(method))
+                        .into(),
+                )
+            }
+        };
+
+        Ok(decoder)
+    }
 }
diff --git a/crates/rc-zip/testdata/lzma-specification.txt b/crates/rc-zip/testdata/lzma-specification.txt
new file mode 100644
index 0000000..ac0cce7
--- /dev/null
+++ b/crates/rc-zip/testdata/lzma-specification.txt
@@ -0,0 +1,1176 @@
+LZMA specification (DRAFT version)
+----------------------------------
+
+Author: Igor Pavlov
+Date: 2015-06-14
+
+This specification defines the format of LZMA compressed data and lzma file format.
+
+Notation 
+--------
+
+We use the syntax of C++ programming language.
+We use the following types in C++ code:
+  unsigned - unsigned integer, at least 16 bits in size
+  int      - signed integer, at least 16 bits in size
+  UInt64   - 64-bit unsigned integer
+  UInt32   - 32-bit unsigned integer
+  UInt16   - 16-bit unsigned integer
+  Byte     - 8-bit unsigned integer
+  bool     - boolean type with two possible values: false, true
+
+
+lzma file format
+================
+
+The lzma file contains the raw LZMA stream and the header with related properties.
+
+The files in that format use ".lzma" extension.
+
+The lzma file format layout:
+
+Offset Size Description
+
+  0     1   LZMA model properties (lc, lp, pb) in encoded form
+  1     4   Dictionary size (32-bit unsigned integer, little-endian)
+  5     8   Uncompressed size (64-bit unsigned integer, little-endian)
+ 13         Compressed data (LZMA stream)
+
+LZMA properties:
+
+    name  Range          Description
+
+      lc  [0, 8]         the number of "literal context" bits
+      lp  [0, 4]         the number of "literal pos" bits
+      pb  [0, 4]         the number of "pos" bits
+dictSize  [0, 2^32 - 1]  the dictionary size 
+
+The following code encodes LZMA properties:
+
+void EncodeProperties(Byte *properties)
+{
+  properties[0] = (Byte)((pb * 5 + lp) * 9 + lc);
+  Set_UInt32_LittleEndian(properties + 1, dictSize);
+}
+
+If the value of dictionary size in properties is smaller than (1 << 12),
+the LZMA decoder must set the dictionary size variable to (1 << 12).
+
+#define LZMA_DIC_MIN (1 << 12)
+
+  unsigned lc, pb, lp;
+  UInt32 dictSize;
+  UInt32 dictSizeInProperties;
+
+  void DecodeProperties(const Byte *properties)
+  {
+    unsigned d = properties[0];
+    if (d >= (9 * 5 * 5))
+      throw "Incorrect LZMA properties";
+    lc = d % 9;
+    d /= 9;
+    pb = d / 5;
+    lp = d % 5;
+    dictSizeInProperties = 0;
+    for (int i = 0; i < 4; i++)
+      dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
+    dictSize = dictSizeInProperties;
+    if (dictSize < LZMA_DIC_MIN)
+      dictSize = LZMA_DIC_MIN;
+  }
+
+If "Uncompressed size" field contains ones in all 64 bits, it means that
+uncompressed size is unknown and there is the "end marker" in stream,
+that indicates the end of decoding point.
+In opposite case, if the value from "Uncompressed size" field is not
+equal to ((2^64) - 1), the LZMA stream decoding must be finished after
+specified number of bytes (Uncompressed size) is decoded. And if there 
+is the "end marker", the LZMA decoder must read that marker also.
+
+
+The new scheme to encode LZMA properties
+----------------------------------------
+
+If LZMA compression is used for some another format, it's recommended to
+use a new improved scheme to encode LZMA properties. That new scheme was
+used in xz format that uses the LZMA2 compression algorithm.
+The LZMA2 is a new compression algorithm that is based on the LZMA algorithm.
+
+The dictionary size in LZMA2 is encoded with just one byte and LZMA2 supports
+only reduced set of dictionary sizes:
+  (2 << 11), (3 << 11),
+  (2 << 12), (3 << 12),
+  ...
+  (2 << 30), (3 << 30),
+  (2 << 31) - 1
+
+The dictionary size can be extracted from encoded value with the following code:
+
+  dictSize = (p == 40) ? 0xFFFFFFFF : (((UInt32)2 | ((p) & 1)) << ((p) / 2 + 11));
+
+Also there is additional limitation (lc + lp <= 4) in LZMA2 for values of 
+"lc" and "lp" properties:
+
+  if (lc + lp > 4)
+    throw "Unsupported properties: (lc + lp) > 4";
+
+There are some advantages for LZMA decoder with such (lc + lp) value
+limitation. It reduces the maximum size of tables allocated by decoder.
+And it reduces the complexity of initialization procedure, that can be 
+important to keep high speed of decoding of big number of small LZMA streams.
+
+It's recommended to use that limitation (lc + lp <= 4) for any new format
+that uses LZMA compression. Note that the combinations of "lc" and "lp" 
+parameters, where (lc + lp > 4), can provide significant improvement in 
+compression ratio only in some rare cases.
+
+The LZMA properties can be encoded into two bytes in new scheme:
+
+Offset Size Description
+
+  0     1   The dictionary size encoded with LZMA2 scheme
+  1     1   LZMA model properties (lc, lp, pb) in encoded form
+
+
+The RAM usage 
+=============
+
+The RAM usage for LZMA decoder is determined by the following parts:
+
+1) The Sliding Window (from 4 KiB to 4 GiB).
+2) The probability model counter arrays (arrays of 16-bit variables).
+3) Some additional state variables (about 10 variables of 32-bit integers).
+
+
+The RAM usage for Sliding Window
+--------------------------------
+
+There are two main scenarios of decoding:
+
+1) The decoding of full stream to one RAM buffer.
+
+  If we decode full LZMA stream to one output buffer in RAM, the decoder 
+  can use that output buffer as sliding window. So the decoder doesn't 
+  need additional buffer allocated for sliding window.
+
+2) The decoding to some external storage.
+
+  If we decode LZMA stream to external storage, the decoder must allocate
+  the buffer for sliding window. The size of that buffer must be equal 
+  or larger than the value of dictionary size from properties of LZMA stream.
+
+In this specification we describe the code for decoding to some external
+storage. The optimized version of code for decoding of full stream to one
+output RAM buffer can require some minor changes in code.
+
+
+The RAM usage for the probability model counters
+------------------------------------------------
+
+The size of the probability model counter arrays is calculated with the 
+following formula:
+
+size_of_prob_arrays = 1846 + 768 * (1 << (lp + lc))
+
+Each probability model counter is 11-bit unsigned integer.
+If we use 16-bit integer variables (2-byte integers) for these probability 
+model counters, the RAM usage required by probability model counter arrays 
+can be estimated with the following formula:
+
+  RAM = 4 KiB + 1.5 KiB * (1 << (lp + lc))
+
+For example, for default LZMA parameters (lp = 0 and lc = 3), the RAM usage is
+
+  RAM_lc3_lp0 = 4 KiB + 1.5 KiB * 8 = 16 KiB
+
+The maximum RAM state usage is required for decoding the stream with lp = 4 
+and lc = 8:
+
+  RAM_lc8_lp4 = 4 KiB + 1.5 KiB * 4096 = 6148 KiB
+
+If the decoder uses LZMA2's limited property condition 
+(lc + lp <= 4), the RAM usage will be not larger than
+
+  RAM_lc_lp_4 = 4 KiB + 1.5 KiB * 16 = 28 KiB
+
+
+The RAM usage for encoder
+-------------------------
+
+There are many variants for LZMA encoding code.
+These variants have different values for memory consumption.
+Note that memory consumption for LZMA Encoder can not be 
+smaller than memory consumption of LZMA Decoder for same stream.
+
+The RAM usage required by modern effective implementation of 
+LZMA Encoder can be estimated with the following formula:
+
+  Encoder_RAM_Usage = 4 MiB + 11 * dictionarySize.
+
+But there are some modes of the encoder that require less memory.
+
+
+LZMA Decoding
+=============
+
+The LZMA compression algorithm uses LZ-based compression with Sliding Window
+and Range Encoding as entropy coding method.
+
+
+Sliding Window
+--------------
+
+LZMA uses Sliding Window compression similar to LZ77 algorithm.
+
+LZMA stream must be decoded to the sequence that consists
+of MATCHES and LITERALS:
+  
+  - a LITERAL is a 8-bit character (one byte).
+    The decoder just puts that LITERAL to the uncompressed stream.
+  
+  - a MATCH is a pair of two numbers (DISTANCE-LENGTH pair).
+    The decoder takes one byte exactly "DISTANCE" characters behind
+    current position in the uncompressed stream and puts it to 
+    uncompressed stream. The decoder must repeat it "LENGTH" times.
+
+The "DISTANCE" can not be larger than dictionary size.
+And the "DISTANCE" can not be larger than the number of bytes in
+the uncompressed stream that were decoded before that match.
+
+In this specification we use cyclic buffer to implement Sliding Window
+for LZMA decoder:
+
+class COutWindow
+{
+  Byte *Buf;
+  UInt32 Pos;
+  UInt32 Size;
+  bool IsFull;
+
+public:
+  unsigned TotalPos;
+  COutStream OutStream;
+
+  COutWindow(): Buf(NULL) {}
+  ~COutWindow() { delete []Buf; }
+ 
+  void Create(UInt32 dictSize)
+  {
+    Buf = new Byte[dictSize];
+    Pos = 0;
+    Size = dictSize;
+    IsFull = false;
+    TotalPos = 0;
+  }
+
+  void PutByte(Byte b)
+  {
+    TotalPos++;
+    Buf[Pos++] = b;
+    if (Pos == Size)
+    {
+      Pos = 0;
+      IsFull = true;
+    }
+    OutStream.WriteByte(b);
+  }
+
+  Byte GetByte(UInt32 dist) const
+  {
+    return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
+  }
+
+  void CopyMatch(UInt32 dist, unsigned len)
+  {
+    for (; len > 0; len--)
+      PutByte(GetByte(dist));
+  }
+
+  bool CheckDistance(UInt32 dist) const
+  {
+    return dist <= Pos || IsFull;
+  }
+
+  bool IsEmpty() const
+  {
+    return Pos == 0 && !IsFull;
+  }
+};
+
+
+In another implementation it's possible to use one buffer that contains 
+Sliding Window and the whole data stream after uncompressing.
+
+
+Range Decoder
+-------------
+
+LZMA algorithm uses Range Encoding (1) as entropy coding method.
+
+LZMA stream contains just one very big number in big-endian encoding.
+LZMA decoder uses the Range Decoder to extract a sequence of binary
+symbols from that big number.
+
+The state of the Range Decoder:
+
+struct CRangeDecoder
+{
+  UInt32 Range; 
+  UInt32 Code;
+  InputStream *InStream;
+
+  bool Corrupted;
+}
+
+The notes about UInt32 type for the "Range" and "Code" variables:
+
+  It's possible to use 64-bit (unsigned or signed) integer type
+  for the "Range" and the "Code" variables instead of 32-bit unsigned,
+  but some additional code must be used to truncate the values to 
+  low 32-bits after some operations.
+
+  If the programming language does not support 32-bit unsigned integer type 
+  (like in case of JAVA language), it's possible to use 32-bit signed integer, 
+  but some code must be changed. For example, it's required to change the code
+  that uses comparison operations for UInt32 variables in this specification.
+
+The Range Decoder can be in some states that can be treated as 
+"Corruption" in LZMA stream. The Range Decoder uses the variable "Corrupted":
+
+  (Corrupted == false), if the Range Decoder has not detected any corruption.
+  (Corrupted == true), if the Range Decoder has detected some corruption.
+
+The reference LZMA Decoder ignores the value of the "Corrupted" variable.
+So it continues to decode the stream, even if the corruption can be detected
+in the Range Decoder. To provide the full compatibility with output of the 
+reference LZMA Decoder, another LZMA Decoder implementations must also 
+ignore the value of the "Corrupted" variable.
+
+The LZMA Encoder is required to create only such LZMA streams, that will not 
+lead the Range Decoder to states, where the "Corrupted" variable is set to true.
+
+The Range Decoder reads first 5 bytes from input stream to initialize
+the state:
+
+bool CRangeDecoder::Init()
+{
+  Corrupted = false;
+  Range = 0xFFFFFFFF;
+  Code = 0;
+
+  Byte b = InStream->ReadByte();
+  
+  for (int i = 0; i < 4; i++)
+    Code = (Code << 8) | InStream->ReadByte();
+  
+  if (b != 0 || Code == Range)
+    Corrupted = true;
+  return b == 0;
+}
+
+The LZMA Encoder always writes ZERO in initial byte of compressed stream.
+That scheme allows to simplify the code of the Range Encoder in the 
+LZMA Encoder. If initial byte is not equal to ZERO, the LZMA Decoder must
+stop decoding and report error.
+
+After the last bit of data was decoded by Range Decoder, the value of the
+"Code" variable must be equal to 0. The LZMA Decoder must check it by 
+calling the IsFinishedOK() function:
+
+  bool IsFinishedOK() const { return Code == 0; }
+
+If there is corruption in data stream, there is big probability that
+the "Code" value will be not equal to 0 in the Finish() function. So that
+check in the IsFinishedOK() function provides very good feature for 
+corruption detection.
+
+The value of the "Range" variable before each bit decoding can not be smaller 
+than ((UInt32)1 << 24). The Normalize() function keeps the "Range" value in 
+described range.
+
+#define kTopValue ((UInt32)1 << 24)
+
+void CRangeDecoder::Normalize()
+{
+  if (Range < kTopValue)
+  {
+    Range <<= 8;
+    Code = (Code << 8) | InStream->ReadByte();
+  }
+}
+
+Notes: if the size of the "Code" variable is larger than 32 bits, it's
+required to keep only low 32 bits of the "Code" variable after the change
+in Normalize() function.
+
+If the LZMA Stream is not corrupted, the value of the "Code" variable is
+always smaller than value of the "Range" variable.
+But the Range Decoder ignores some types of corruptions, so the value of
+the "Code" variable can be equal or larger than value of the "Range" variable
+for some "Corrupted" archives.
+
+
+LZMA uses Range Encoding only with binary symbols of two types:
+  1) binary symbols with fixed and equal probabilities (direct bits)
+  2) binary symbols with predicted probabilities
+
+The DecodeDirectBits() function decodes the sequence of direct bits:
+
+UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
+{
+  UInt32 res = 0;
+  do
+  {
+    Range >>= 1;
+    Code -= Range;
+    UInt32 t = 0 - ((UInt32)Code >> 31);
+    Code += Range & t;
+    
+    if (Code == Range)
+      Corrupted = true;
+    
+    Normalize();
+    res <<= 1;
+    res += t + 1;
+  }
+  while (--numBits);
+  return res;
+}
+
+
+The Bit Decoding with Probability Model
+---------------------------------------
+
+The task of Bit Probability Model is to estimate probabilities of binary
+symbols. And then it provides the Range Decoder with that information.
+The better prediction provides better compression ratio.
+The Bit Probability Model uses statistical data of previous decoded
+symbols.
+
+That estimated probability is presented as 11-bit unsigned integer value
+that represents the probability of symbol "0".
+
+#define kNumBitModelTotalBits 11
+
+Mathematical probabilities can be presented with the following formulas:
+     probability(symbol_0) = prob / 2048.
+     probability(symbol_1) =  1 - Probability(symbol_0) =  
+                           =  1 - prob / 2048 =  
+                           =  (2048 - prob) / 2048
+where the "prob" variable contains 11-bit integer probability counter.
+
+It's recommended to use 16-bit unsigned integer type, to store these 11-bit
+probability values:
+
+typedef UInt16 CProb;
+
+Each probability value must be initialized with value ((1 << 11) / 2),
+that represents the state, where probabilities of symbols 0 and 1 
+are equal to 0.5:
+
+#define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
+
+The INIT_PROBS macro is used to initialize the array of CProb variables:
+
+#define INIT_PROBS(p) \
+ { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
+
+
+The DecodeBit() function decodes one bit.
+The LZMA decoder provides the pointer to CProb variable that contains 
+information about estimated probability for symbol 0 and the Range Decoder 
+updates that CProb variable after decoding. The Range Decoder increases 
+estimated probability of the symbol that was decoded:
+
+#define kNumMoveBits 5
+
+unsigned CRangeDecoder::DecodeBit(CProb *prob)
+{
+  unsigned v = *prob;
+  UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
+  unsigned symbol;
+  if (Code < bound)
+  {
+    v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
+    Range = bound;
+    symbol = 0;
+  }
+  else
+  {
+    v -= v >> kNumMoveBits;
+    Code -= bound;
+    Range -= bound;
+    symbol = 1;
+  }
+  *prob = (CProb)v;
+  Normalize();
+  return symbol;
+}
+
+
+The Binary Tree of bit model counters
+-------------------------------------
+
+LZMA uses a tree of Bit model variables to decode symbol that needs
+several bits for storing. There are two versions of such trees in LZMA:
+  1) the tree that decodes bits from high bit to low bit (the normal scheme).
+  2) the tree that decodes bits from low bit to high bit (the reverse scheme).
+
+Each binary tree structure supports different size of decoded symbol
+(the size of binary sequence that contains value of symbol).
+If that size of decoded symbol is "NumBits" bits, the tree structure 
+uses the array of (2 << NumBits) counters of CProb type. 
+But only ((2 << NumBits) - 1) items are used by encoder and decoder.
+The first item (the item with index equal to 0) in array is unused.
+That scheme with unused array's item allows to simplify the code.
+
+unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
+{
+  unsigned m = 1;
+  unsigned symbol = 0;
+  for (unsigned i = 0; i < numBits; i++)
+  {
+    unsigned bit = rc->DecodeBit(&probs[m]);
+    m <<= 1;
+    m += bit;
+    symbol |= (bit << i);
+  }
+  return symbol;
+}
+
+template <unsigned NumBits>
+class CBitTreeDecoder
+{
+  CProb Probs[(unsigned)1 << NumBits];
+
+public:
+
+  void Init()
+  {
+    INIT_PROBS(Probs);
+  }
+
+  unsigned Decode(CRangeDecoder *rc)
+  {
+    unsigned m = 1;
+    for (unsigned i = 0; i < NumBits; i++)
+      m = (m << 1) + rc->DecodeBit(&Probs[m]);
+    return m - ((unsigned)1 << NumBits);
+  }
+
+  unsigned ReverseDecode(CRangeDecoder *rc)
+  {
+    return BitTreeReverseDecode(Probs, NumBits, rc);
+  }
+};
+
+
+LZ part of LZMA 
+---------------
+
+LZ part of LZMA describes details about the decoding of MATCHES and LITERALS.
+
+
+The Literal Decoding
+--------------------
+
+The LZMA Decoder uses (1 << (lc + lp)) tables with CProb values, where 
+each table contains 0x300 CProb values:
+
+  CProb *LitProbs;
+
+  void CreateLiterals()
+  {
+    LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
+  }
+  
+  void InitLiterals()
+  {
+    UInt32 num = (UInt32)0x300 << (lc + lp);
+    for (UInt32 i = 0; i < num; i++)
+      LitProbs[i] = PROB_INIT_VAL;
+  }
+
+To select the table for decoding it uses the context that consists of
+(lc) high bits from previous literal and (lp) low bits from value that
+represents current position in outputStream.
+
+If (State > 7), the Literal Decoder also uses "matchByte" that represents 
+the byte in OutputStream at position the is the DISTANCE bytes before 
+current position, where the DISTANCE is the distance in DISTANCE-LENGTH pair
+of latest decoded match.
+
+The following code decodes one literal and puts it to Sliding Window buffer:
+
+  void DecodeLiteral(unsigned state, UInt32 rep0)
+  {
+    unsigned prevByte = 0;
+    if (!OutWindow.IsEmpty())
+      prevByte = OutWindow.GetByte(1);
+    
+    unsigned symbol = 1;
+    unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
+    CProb *probs = &LitProbs[(UInt32)0x300 * litState];
+    
+    if (state >= 7)
+    {
+      unsigned matchByte = OutWindow.GetByte(rep0 + 1);
+      do
+      {
+        unsigned matchBit = (matchByte >> 7) & 1;
+        matchByte <<= 1;
+        unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
+        symbol = (symbol << 1) | bit;
+        if (matchBit != bit)
+          break;
+      }
+      while (symbol < 0x100);
+    }
+    while (symbol < 0x100)
+      symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
+    OutWindow.PutByte((Byte)(symbol - 0x100));
+  }
+
+
+The match length decoding
+-------------------------
+
+The match length decoder returns normalized (zero-based value) 
+length of match. That value can be converted to real length of the match 
+with the following code:
+
+#define kMatchMinLen 2
+
+    matchLen = len + kMatchMinLen;
+
+The match length decoder can return the values from 0 to 271.
+And the corresponded real match length values can be in the range 
+from 2 to 273.
+
+The following scheme is used for the match length encoding:
+
+  Binary encoding    Binary Tree structure    Zero-based match length 
+  sequence                                    (binary + decimal):
+
+  0 xxx              LowCoder[posState]       xxx
+  1 0 yyy            MidCoder[posState]       yyy + 8
+  1 1 zzzzzzzz       HighCoder                zzzzzzzz + 16
+
+LZMA uses bit model variable "Choice" to decode the first selection bit.
+
+If the first selection bit is equal to 0, the decoder uses binary tree 
+  LowCoder[posState] to decode 3-bit zero-based match length (xxx).
+
+If the first selection bit is equal to 1, the decoder uses bit model 
+  variable "Choice2" to decode the second selection bit.
+
+  If the second selection bit is equal to 0, the decoder uses binary tree 
+    MidCoder[posState] to decode 3-bit "yyy" value, and zero-based match
+    length is equal to (yyy + 8).
+
+  If the second selection bit is equal to 1, the decoder uses binary tree 
+    HighCoder to decode 8-bit "zzzzzzzz" value, and zero-based 
+    match length is equal to (zzzzzzzz + 16).
+
+LZMA uses "posState" value as context to select the binary tree 
+from LowCoder and MidCoder binary tree arrays:
+
+    unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
+
+The full code of the length decoder:
+
+class CLenDecoder
+{
+  CProb Choice;
+  CProb Choice2;
+  CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
+  CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
+  CBitTreeDecoder<8> HighCoder;
+
+public:
+
+  void Init()
+  {
+    Choice = PROB_INIT_VAL;
+    Choice2 = PROB_INIT_VAL;
+    HighCoder.Init();
+    for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
+    {
+      LowCoder[i].Init();
+      MidCoder[i].Init();
+    }
+  }
+
+  unsigned Decode(CRangeDecoder *rc, unsigned posState)
+  {
+    if (rc->DecodeBit(&Choice) == 0)
+      return LowCoder[posState].Decode(rc);
+    if (rc->DecodeBit(&Choice2) == 0)
+      return 8 + MidCoder[posState].Decode(rc);
+    return 16 + HighCoder.Decode(rc);
+  }
+};
+
+The LZMA decoder uses two instances of CLenDecoder class.
+The first instance is for the matches of "Simple Match" type,
+and the second instance is for the matches of "Rep Match" type:
+
+  CLenDecoder LenDecoder;
+  CLenDecoder RepLenDecoder;
+
+
+The match distance decoding
+---------------------------
+
+LZMA supports dictionary sizes up to 4 GiB minus 1.
+The value of match distance (decoded by distance decoder) can be 
+from 1 to 2^32. But the distance value that is equal to 2^32 is used to
+indicate the "End of stream" marker. So real largest match distance 
+that is used for LZ-window match is (2^32 - 1).
+
+LZMA uses normalized match length (zero-based length) 
+to calculate the context state "lenState" do decode the distance value:
+
+#define kNumLenToPosStates 4
+
+    unsigned lenState = len;
+    if (lenState > kNumLenToPosStates - 1)
+      lenState = kNumLenToPosStates - 1;
+
+The distance decoder returns the "dist" value that is zero-based value 
+of match distance. The real match distance can be calculated with the
+following code:
+  
+  matchDistance = dist + 1; 
+
+The state of the distance decoder and the initialization code: 
+
+  #define kEndPosModelIndex 14
+  #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
+  #define kNumAlignBits 4
+
+  CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
+  CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
+  CBitTreeDecoder<kNumAlignBits> AlignDecoder;
+
+  void InitDist()
+  {
+    for (unsigned i = 0; i < kNumLenToPosStates; i++)
+      PosSlotDecoder[i].Init();
+    AlignDecoder.Init();
+    INIT_PROBS(PosDecoders);
+  }
+
+At first stage the distance decoder decodes 6-bit "posSlot" value with bit
+tree decoder from PosSlotDecoder array. It's possible to get 2^6=64 different 
+"posSlot" values.
+
+    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
+
+The encoding scheme for distance value is shown in the following table:
+
+posSlot (decimal) /
+      zero-based distance (binary)
+ 0    0
+ 1    1
+ 2    10
+ 3    11
+
+ 4    10 x
+ 5    11 x
+ 6    10 xx
+ 7    11 xx
+ 8    10 xxx
+ 9    11 xxx
+10    10 xxxx
+11    11 xxxx
+12    10 xxxxx
+13    11 xxxxx
+
+14    10 yy zzzz
+15    11 yy zzzz
+16    10 yyy zzzz
+17    11 yyy zzzz
+...
+62    10 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz
+63    11 yyyyyyyyyyyyyyyyyyyyyyyyyy zzzz
+
+where 
+  "x ... x" means the sequence of binary symbols encoded with binary tree and 
+      "Reverse" scheme. It uses separated binary tree for each posSlot from 4 to 13.
+  "y" means direct bit encoded with range coder.
+  "zzzz" means the sequence of four binary symbols encoded with binary
+      tree with "Reverse" scheme, where one common binary tree "AlignDecoder"
+      is used for all posSlot values.
+
+If (posSlot < 4), the "dist" value is equal to posSlot value.
+
+If (posSlot >= 4), the decoder uses "posSlot" value to calculate the value of
+  the high bits of "dist" value and the number of the low bits.
+
+  If (4 <= posSlot < kEndPosModelIndex), the decoder uses bit tree decoders.
+    (one separated bit tree decoder per one posSlot value) and "Reverse" scheme.
+    In this implementation we use one CProb array "PosDecoders" that contains 
+    all CProb variables for all these bit decoders.
+  
+  if (posSlot >= kEndPosModelIndex), the middle bits are decoded as direct 
+    bits from RangeDecoder and the low 4 bits are decoded with a bit tree 
+    decoder "AlignDecoder" with "Reverse" scheme.
+
+The code to decode zero-based match distance:
+  
+  unsigned DecodeDistance(unsigned len)
+  {
+    unsigned lenState = len;
+    if (lenState > kNumLenToPosStates - 1)
+      lenState = kNumLenToPosStates - 1;
+    
+    unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
+    if (posSlot < 4)
+      return posSlot;
+    
+    unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
+    UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
+    if (posSlot < kEndPosModelIndex)
+      dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
+    else
+    {
+      dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
+      dist += AlignDecoder.ReverseDecode(&RangeDec);
+    }
+    return dist;
+  }
+
+
+
+LZMA Decoding modes
+-------------------
+
+There are 2 types of LZMA streams:
+
+1) The stream with "End of stream" marker.
+2) The stream without "End of stream" marker.
+
+And the LZMA Decoder supports 3 modes of decoding:
+
+1) The unpack size is undefined. The LZMA decoder stops decoding after 
+   getting "End of stream" marker. 
+   The input variables for that case:
+    
+      markerIsMandatory = true
+      unpackSizeDefined = false
+      unpackSize contains any value
+
+2) The unpack size is defined and LZMA decoder supports both variants, 
+   where the stream can contain "End of stream" marker or the stream is
+   finished without "End of stream" marker. The LZMA decoder must detect 
+   any of these situations.
+   The input variables for that case:
+    
+      markerIsMandatory = false
+      unpackSizeDefined = true
+      unpackSize contains unpack size
+
+3) The unpack size is defined and the LZMA stream must contain 
+   "End of stream" marker
+   The input variables for that case:
+    
+      markerIsMandatory = true
+      unpackSizeDefined = true
+      unpackSize contains unpack size
+
+
+The main loop of decoder
+------------------------
+
+The main loop of LZMA decoder:
+
+Initialize the LZMA state.
+loop
+{
+  // begin of loop
+  Check "end of stream" conditions.
+  Decode Type of MATCH / LITERAL. 
+    If it's LITERAL, decode LITERAL value and put the LITERAL to Window.
+    If it's MATCH, decode the length of match and the match distance. 
+        Check error conditions, check end of stream conditions and copy
+        the sequence of match bytes from sliding window to current position
+        in window.
+  Go to begin of loop
+}
+
+The reference implementation of LZMA decoder uses "unpackSize" variable
+to keep the number of remaining bytes in output stream. So it reduces 
+"unpackSize" value after each decoded LITERAL or MATCH.
+
+The following code contains the "end of stream" condition check at the start
+of the loop:
+
+    if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
+      if (RangeDec.IsFinishedOK())
+        return LZMA_RES_FINISHED_WITHOUT_MARKER;
+
+LZMA uses three types of matches:
+
+1) "Simple Match" -     the match with distance value encoded with bit models.
+
+2) "Rep Match" -        the match that uses the distance from distance
+                        history table.
+
+3) "Short Rep Match" -  the match of single byte length, that uses the latest 
+                        distance from distance history table.
+
+The LZMA decoder keeps the history of latest 4 match distances that were used 
+by decoder. That set of 4 variables contains zero-based match distances and 
+these variables are initialized with zero values:
+
+  UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
+
+The LZMA decoder uses binary model variables to select type of MATCH or LITERAL:
+
+#define kNumStates 12
+#define kNumPosBitsMax 4
+
+  CProb IsMatch[kNumStates << kNumPosBitsMax];
+  CProb IsRep[kNumStates];
+  CProb IsRepG0[kNumStates];
+  CProb IsRepG1[kNumStates];
+  CProb IsRepG2[kNumStates];
+  CProb IsRep0Long[kNumStates << kNumPosBitsMax];
+
+The decoder uses "state" variable value to select exact variable 
+from "IsRep", "IsRepG0", "IsRepG1" and "IsRepG2" arrays.
+The "state" variable can get the value from 0 to 11.
+Initial value for "state" variable is zero:
+
+  unsigned state = 0;
+
+The "state" variable is updated after each LITERAL or MATCH with one of the
+following functions:
+
+unsigned UpdateState_Literal(unsigned state)
+{
+  if (state < 4) return 0;
+  else if (state < 10) return state - 3;
+  else return state - 6;
+}
+unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
+unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
+unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
+
+The decoder calculates "state2" variable value to select exact variable from 
+"IsMatch" and "IsRep0Long" arrays:
+
+unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
+unsigned state2 = (state << kNumPosBitsMax) + posState;
+
+The decoder uses the following code flow scheme to select exact 
+type of LITERAL or MATCH:
+
+IsMatch[state2] decode
+  0 - the Literal
+  1 - the Match
+    IsRep[state] decode
+      0 - Simple Match
+      1 - Rep Match
+        IsRepG0[state] decode
+          0 - the distance is rep0
+            IsRep0Long[state2] decode
+              0 - Short Rep Match
+              1 - Rep Match 0
+          1 - 
+            IsRepG1[state] decode
+              0 - Rep Match 1
+              1 - 
+                IsRepG2[state] decode
+                  0 - Rep Match 2
+                  1 - Rep Match 3
+
+
+LITERAL symbol
+--------------
+If the value "0" was decoded with IsMatch[state2] decoding, we have "LITERAL" type.
+
+At first the LZMA decoder must check that it doesn't exceed 
+specified uncompressed size:
+
+      if (unpackSizeDefined && unpackSize == 0)
+        return LZMA_RES_ERROR;
+
+Then it decodes literal value and puts it to sliding window:
+
+      DecodeLiteral(state, rep0);
+
+Then the decoder must update the "state" value and "unpackSize" value;
+
+      state = UpdateState_Literal(state);
+      unpackSize--;
+
+Then the decoder must go to the begin of main loop to decode next Match or Literal.
+
+
+Simple Match
+------------
+
+If the value "1" was decoded with IsMatch[state2] decoding,
+we have the "Simple Match" type.
+
+The distance history table is updated with the following scheme:
+    
+      rep3 = rep2;
+      rep2 = rep1;
+      rep1 = rep0;
+
+The zero-based length is decoded with "LenDecoder":
+
+      len = LenDecoder.Decode(&RangeDec, posState);
+
+The state is update with UpdateState_Match function:
+
+      state = UpdateState_Match(state);
+
+and the new "rep0" value is decoded with DecodeDistance:
+
+      rep0 = DecodeDistance(len);
+
+That "rep0" will be used as zero-based distance for current match.
+
+If the value of "rep0" is equal to 0xFFFFFFFF, it means that we have 
+"End of stream" marker, so we can stop decoding and check finishing 
+condition in Range Decoder:
+
+      if (rep0 == 0xFFFFFFFF)
+        return RangeDec.IsFinishedOK() ?
+            LZMA_RES_FINISHED_WITH_MARKER :
+            LZMA_RES_ERROR;
+
+If uncompressed size is defined, LZMA decoder must check that it doesn't 
+exceed that specified uncompressed size:
+
+      if (unpackSizeDefined && unpackSize == 0)
+        return LZMA_RES_ERROR;
+
+Also the decoder must check that "rep0" value is not larger than dictionary size
+and is not larger than the number of already decoded bytes:
+
+      if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
+        return LZMA_RES_ERROR;
+
+Then the decoder must copy match bytes as described in 
+"The match symbols copying" section.
+
+
+Rep Match
+---------
+
+If the LZMA decoder has decoded the value "1" with IsRep[state] variable,
+we have "Rep Match" type.
+
+At first the LZMA decoder must check that it doesn't exceed 
+specified uncompressed size:
+
+      if (unpackSizeDefined && unpackSize == 0)
+        return LZMA_RES_ERROR;
+
+Also the decoder must return error, if the LZ window is empty:
+
+      if (OutWindow.IsEmpty())
+        return LZMA_RES_ERROR;
+
+If the match type is "Rep Match", the decoder uses one of the 4 variables of
+distance history table to get the value of distance for current match.
+And there are 4 corresponding ways of decoding flow. 
+
+The decoder updates the distance history with the following scheme 
+depending from type of match:
+
+- "Rep Match 0" or "Short Rep Match":
+      ; LZMA doesn't update the distance history    
+
+- "Rep Match 1":
+      UInt32 dist = rep1;
+      rep1 = rep0;
+      rep0 = dist;
+
+- "Rep Match 2":
+      UInt32 dist = rep2;
+      rep2 = rep1;
+      rep1 = rep0;
+      rep0 = dist;
+
+- "Rep Match 3":
+      UInt32 dist = rep3;
+      rep3 = rep2;
+      rep2 = rep1;
+      rep1 = rep0;
+      rep0 = dist;
+
+Then the decoder decodes exact subtype of "Rep Match" using "IsRepG0", "IsRep0Long",
+"IsRepG1", "IsRepG2".
+
+If the subtype is "Short Rep Match", the decoder updates the state, puts 
+the one byte from window to current position in window and goes to next 
+MATCH/LITERAL symbol (the begin of main loop):
+
+          state = UpdateState_ShortRep(state);
+          OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
+          unpackSize--;
+          continue;
+
+In other cases (Rep Match 0/1/2/3), it decodes the zero-based 
+length of match with "RepLenDecoder" decoder:
+
+      len = RepLenDecoder.Decode(&RangeDec, posState);
+
+Then it updates the state:
+
+      state = UpdateState_Rep(state);
+
+Then the decoder must copy match bytes as described in 
+"The Match symbols copying" section.
+
+
+The match symbols copying
+-------------------------
+
+If we have the match (Simple Match or Rep Match 0/1/2/3), the decoder must
+copy the sequence of bytes with calculated match distance and match length.
+If uncompressed size is defined, LZMA decoder must check that it doesn't 
+exceed that specified uncompressed size:
+
+    len += kMatchMinLen;
+    bool isError = false;
+    if (unpackSizeDefined && unpackSize < len)
+    {
+      len = (unsigned)unpackSize;
+      isError = true;
+    }
+    OutWindow.CopyMatch(rep0 + 1, len);
+    unpackSize -= len;
+    if (isError)
+      return LZMA_RES_ERROR;
+
+Then the decoder must go to the begin of main loop to decode next MATCH or LITERAL.
+
+
+
+NOTES
+-----
+
+This specification doesn't describe the variant of decoder implementation 
+that supports partial decoding. Such partial decoding case can require some 
+changes in "end of stream" condition checks code. Also such code 
+can use additional status codes, returned by decoder.
+
+This specification uses C++ code with templates to simplify describing.
+The optimized version of LZMA decoder doesn't need templates.
+Such optimized version can use just two arrays of CProb variables:
+  1) The dynamic array of CProb variables allocated for the Literal Decoder.
+  2) The one common array that contains all other CProb variables.
+
+
+References:      
+
+1. G. N. N. Martin, Range encoding: an algorithm for removing redundancy 
+   from a digitized message, Video & Data Recording Conference, 
+   Southampton, UK, July 24-27, 1979.
diff --git a/crates/rc-zip/testdata/test-zips/.gitignore b/crates/rc-zip/testdata/test-zips/.gitignore
new file mode 100644
index 0000000..106111b
--- /dev/null
+++ b/crates/rc-zip/testdata/test-zips/.gitignore
@@ -0,0 +1 @@
+found-me.txt
\ No newline at end of file
diff --git a/crates/rc-zip/testdata/test-zips/found-me.txt b/crates/rc-zip/testdata/test-zips/found-me.txt
deleted file mode 100644
index e69de29..0000000

From 414e91a6276e4156e61433270bb5ff39364a9306 Mon Sep 17 00:00:00 2001
From: Amos Wenger <amos@bearcove.net>
Date: Fri, 26 Jan 2024 19:30:56 +0100
Subject: [PATCH 5/5] I have idea.

---
 crates/rc-zip/src/error.rs                    |  3 ++
 crates/rc-zip/src/reader/sync/entry_reader.rs | 39 ++++++++++++-------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/crates/rc-zip/src/error.rs b/crates/rc-zip/src/error.rs
index 28f2510..78f7f3e 100644
--- a/crates/rc-zip/src/error.rs
+++ b/crates/rc-zip/src/error.rs
@@ -97,6 +97,9 @@ pub enum FormatError {
     /// The CRC-32 checksum didn't match.
     #[error("checksum didn't match: expected {expected:x?}, got {actual:x?}")]
     WrongChecksum { expected: u32, actual: u32 },
+
+    #[error("lzma properties larger than max")]
+    LzmaPropertiesLargerThanMax,
 }
 
 impl From<Error> for std::io::Error {
diff --git a/crates/rc-zip/src/reader/sync/entry_reader.rs b/crates/rc-zip/src/reader/sync/entry_reader.rs
index b78ce6b..fc82b7c 100644
--- a/crates/rc-zip/src/reader/sync/entry_reader.rs
+++ b/crates/rc-zip/src/reader/sync/entry_reader.rs
@@ -314,36 +314,47 @@ where
                         }
 
                         // from `lzma-specification.txt`
-                        fn decode_properties(mut d: u8) -> LzmaProperties {
+                        fn decode_properties(mut d: u8) -> Result<LzmaProperties, FormatError> {
+                            if d >= (9 * 5 * 5) {
+                                return Err(FormatError::LzmaPropertiesLargerThanMax);
+                            }
+
                             let lc = d % 9;
                             d /= 9;
                             let pb = d / 5;
                             let lp = d % 5;
 
-                            LzmaProperties {
+                            Ok(LzmaProperties {
                                 literal_context_bits: lc,
                                 literal_pos_state_bits: lp,
                                 pos_state_bits: pb,
-                            }
+                            })
                         }
 
                         let props = decode_properties(bits_byte);
+                        trace!("LZMA properties: {:#?}", props);
+
                         const LZMA_DIC_MIN: u32 = 1 << 12;
+                        let dict_size_read = limited_reader.read_u32::<LittleEndian>()?;
+                        trace!("LZMA dictionary size (raw): {}", dict_size_read);
                         let dict_size: u32 =
-                            std::cmp::min(LZMA_DIC_MIN, limited_reader.read_u32::<LittleEndian>()?);
+                            std::cmp::max(LZMA_DIC_MIN, dict_size_read);
+                        trace!("LZMA dictionary size: {}", dict_size);
+
+                        // let mut opts = xz2::stream::LzmaOptions::new_preset(0)?;
+                        // opts.dict_size(dict_size);
+                        // opts.position_bits(props.pos_state_bits as _);
+                        // opts.literal_position_bits(props.literal_pos_state_bits as _);
+                        // opts.literal_context_bits(props.literal_context_bits as _);
 
-                        let mut opts = xz2::stream::LzmaOptions::new_preset(0)?;
-                        opts.dict_size(dict_size);
-                        opts.position_bits(props.pos_state_bits as _);
-                        opts.literal_position_bits(props.literal_pos_state_bits as _);
-                        opts.literal_context_bits(props.literal_context_bits as _);
+                        // let mut filters = xz2::stream::Filters::new();
+                        // filters.lzma2(&opts);
 
-                        let mut filters = xz2::stream::Filters::new();
-                        filters.lzma2(&opts);
-                        // let stream = xz2::stream::Stream::new_lzma_decoder(&filters)?;
+                        // uncompressed size is stored as a little-endian 64-bit integer
+                        let uncompressed_size: u64 = limited_reader.read_u64::<LittleEndian>()?;
+                        trace!("LZMA uncompressed size: {}", uncompressed_size);
 
-                        // let stream = xz2::stream::Stream::new_lzma_encoder(&opts)?;
-                        let stream = xz2::stream::Stream::new_stream_encoder(&filters, xz2::stream::Check::None)?;
+                        let stream = xz2::stream::Stream::new_lzma_decoder(128 * 1024 * 1024)?;
 
                         Box::new(xz2::read::XzDecoder::new_stream(limited_reader, stream))
                     } else {