From 97cba4dc2b33d919e45a478f19c9c0e053bbab07 Mon Sep 17 00:00:00 2001 From: SymmetricChaos <42520289+SymmetricChaos@users.noreply.github.com> Date: Thu, 21 Nov 2024 19:26:40 -0500 Subject: [PATCH] update for RLE with bytes --- codes/src/compression/run_length_bytes.rs | 78 +++++++++++++++++++---- codes/src/ids/code_descriptions.json | 8 ++- codes/src/ids/code_id.rs | 1 + src/code_panel/rle_byte_controls.rs | 75 ++++++++++++++++++++++ src/code_panel/rle_controls.rs | 13 ++-- 5 files changed, 157 insertions(+), 18 deletions(-) create mode 100644 src/code_panel/rle_byte_controls.rs diff --git a/codes/src/compression/run_length_bytes.rs b/codes/src/compression/run_length_bytes.rs index 44619dff..aa174ea8 100644 --- a/codes/src/compression/run_length_bytes.rs +++ b/codes/src/compression/run_length_bytes.rs @@ -2,7 +2,37 @@ use crate::{errors::CodeError, traits::Code}; use num::Integer; use utils::byte_formatting::ByteFormat; -fn bytes_to_rle(bytes: &[u8]) -> Vec { +// To be used in a more complex encoding scheme. +// u64 allows recording a single repetition that takes up 18 exabytes and thus should +// avoid ever overflowing +// pub fn u64_leb128(n: u64) -> Vec { +// if n == 0 { +// return vec![0]; +// } +// let mut n = n; +// let mut out = Vec::with_capacity(8); +// while n != 0 { +// let mut b = (n as u8) & 0x7f; +// n = n >> 7; +// if n != 0 { +// b |= 0x80; +// } +// out.push(b); +// } +// out +// } + +// pub fn leb128_to_u64>(v: T) -> u64 { +// let mut out = 0; +// let mut shift = 0; +// for byte in v.as_ref() { +// out |= ((byte & 0x7f) as u64) << shift; +// shift += 7; +// } +// out +// } + +fn bytes_to_rle_one_byte(bytes: &[u8]) -> Vec { let mut out = Vec::new(); let mut cur = bytes[0]; let mut ctr = 0_u8; @@ -25,7 +55,7 @@ fn bytes_to_rle(bytes: &[u8]) -> Vec { out } -fn rle_to_bytes(bytes: &[u8]) -> Vec { +fn rle_to_bytes_one_byte(bytes: &[u8]) -> Vec { if !bytes.len().is_even() { panic!("the rle must be an even number of bytes") } @@ -40,28 +70,54 @@ fn rle_to_bytes(bytes: &[u8]) -> Vec { out } -pub struct RunLengthEncoding { +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum RleMethod { + OneByte, + Leb128, +} + +pub struct RunLengthEncodingBytes { pub input_format: ByteFormat, pub output_format: ByteFormat, + pub method: RleMethod, } -impl Default for RunLengthEncoding { +impl Default for RunLengthEncodingBytes { fn default() -> Self { Self { input_format: ByteFormat::Hex, output_format: ByteFormat::Hex, + method: RleMethod::OneByte, } } } -impl Code for RunLengthEncoding { +impl RunLengthEncodingBytes { + + fn compress(&self, bytes: &[u8]) -> Vec { + match self.method { + RleMethod::OneByte => bytes_to_rle_one_byte(bytes), + RleMethod::Leb128 => todo!(), + } + } + + fn decompress(&self, bytes: &[u8]) -> Vec { + match self.method { + RleMethod::OneByte => rle_to_bytes_one_byte(bytes), + RleMethod::Leb128 => todo!(), + } + } + +} + +impl Code for RunLengthEncodingBytes { fn encode(&self, text: &str) -> Result { let bytes = self .input_format .text_to_bytes(text) .map_err(|_| CodeError::input("invalid input bytes"))?; - Ok(self.output_format.byte_slice_to_text(&bytes_to_rle(&bytes))) + Ok(self.output_format.byte_slice_to_text(&self.compress(&bytes))) } fn decode(&self, text: &str) -> Result { @@ -74,7 +130,7 @@ impl Code for RunLengthEncoding { return Err(CodeError::input("the rle must be an even number of bytes")); } - Ok(self.output_format.byte_slice_to_text(&rle_to_bytes(&bytes))) + Ok(self.output_format.byte_slice_to_text(&self.decompress(&bytes))) } } @@ -89,19 +145,19 @@ mod rle_tests { fn check_overflow() { let bytes = vec![0_u8; 300]; let rle = vec![0, 255, 0, 45]; - assert_eq!(rle, bytes_to_rle(&bytes)); - assert_eq!(bytes, rle_to_bytes(&rle)); + assert_eq!(rle, bytes_to_rle_one_byte(&bytes)); + assert_eq!(bytes, rle_to_bytes_one_byte(&rle)); } #[test] fn encode_test() { - let code = RunLengthEncoding::default(); + let code = RunLengthEncodingBytes::default(); assert_eq!(ENCODEDTEXT, code.encode(PLAINTEXT).unwrap()) } #[test] fn decode_test() { - let code = RunLengthEncoding::default(); + let code = RunLengthEncodingBytes::default(); assert_eq!(PLAINTEXT, code.decode(ENCODEDTEXT).unwrap()) } } diff --git a/codes/src/ids/code_descriptions.json b/codes/src/ids/code_descriptions.json index fcd90f92..caefefe0 100644 --- a/codes/src/ids/code_descriptions.json +++ b/codes/src/ids/code_descriptions.json @@ -326,7 +326,13 @@ "Traits": null }, "Run Length Encoding": { - "Description": "Run Length Encoding (RLE) compresses data contains long strings of identical information by replacing them with one instance of the symbol followed by a count of how many should occur (the length of the run). Efficient RLE requires both that the data to be compressed have sufficient repetition and that the encoding not waste too much space on short runs. The version of RLE presented here encodes text but for general compression the encoding works on bytes.", + "Description": "Run Length Encoding (RLE) compresses data contains long strings of identical information by replacing them with one instance of the symbol followed by a count of how many should occur (the length of the run). Efficient RLE requires both that the data to be compressed have sufficient repetition and that the encoding not waste too much space on short runs. The version of RLE presented here encodes text as text (both UTF-8) but this usage is rare in practice. The Run Length Encoding Bytes page shows encoding that operates on arbitrary bytes.", + "Authors": null, + "Publication": null, + "Traits": null + }, + "Run Length Encoding Bytes": { + "Description": "Run Length Encoding (RLE) compresses data contains long strings of identical information by replacing them with one instance of the symbol followed by a count of how many should occur (the length of the run). Efficient RLE requires both that the data to be compressed have sufficient repetition and that the encoding not waste too much space on short runs. The simple encoding scheme here converts a sequence of bytes into a sequence of pairs of bytes. In these pairs the first byte is the one to be repeated and the second byte is the number of times to be repeated. If a byte is repeated more than 255 times the additional repetitions are encoded more pairs. If bytes are not repeated more than 255 times this is reasonably efficient. However if there are very long runs it performs relatively poorly. The complex encoding works similarly but the count is a variable length integer (LEB-128) which allows runs of enormous length to be compressed.", "Authors": null, "Publication": null, "Traits": null diff --git a/codes/src/ids/code_id.rs b/codes/src/ids/code_id.rs index dbf04b53..f9fcf5f5 100644 --- a/codes/src/ids/code_id.rs +++ b/codes/src/ids/code_id.rs @@ -76,6 +76,7 @@ code_ids_and_names!( Romaji, "Romaji"; RomanNumeral, "Roman Numeral"; RunLengthEncoding, "Run Length Encoding"; + RunLengthEncodingBytes, "Run Length Encoding Bytes"; Skey, "S/KEY"; SpellingAlphabet, "Spelling Alphabet"; Tap, "Tap"; diff --git a/src/code_panel/rle_byte_controls.rs b/src/code_panel/rle_byte_controls.rs new file mode 100644 index 00000000..224439fb --- /dev/null +++ b/src/code_panel/rle_byte_controls.rs @@ -0,0 +1,75 @@ +use crate::ui_elements::UiElements; + +use super::CodeFrame; +use codes::compression::{run_length::RunLengthEncoding, run_length_bytes::RunLengthEncodingBytes}; + +pub struct RleFrame { + byte_code: RunLengthEncodingBytes, +} + +impl Default for RleFrame { + fn default() -> Self { + Self { + byte_code: Default::default(), + } + } +} + +impl CodeFrame for RleFrame { + fn ui(&mut self, ui: &mut egui::Ui) { + ui.hyperlink_to( + "see the code", + "https://github.com/SymmetricChaos/crypto-gui/blob/master/codes/src/compression/run_length_bytes.rs", + ); + + + // { + // let this = &mut *ui; + // let mut changed = false; + // egui::CollapsingHeader::new("Input Format") + // .default_open(true) + // .show(this, |ui| { + // ui.label( + // "Input can be text, hexadecimal, Base64, or binary. All interpreted as bytes.", + // ); + // ui.horizontal(|ui| { + // for variant in ByteFormat::iter() { + // if ui + // .selectable_value(&mut self.byte_code., variant, variant.to_string()) + // .clicked() + // { + // changed = true; + // } + // } + // }); + // }); + + // this.add_space(8.0); + + // egui::CollapsingHeader::new("Output Format") + // .default_open(true) + // .show(this, |ui| { + // ui.label( + // "Output can be text, hexadecimal, Base64, or binary. All interpreted as bytes.", + // ); + // ui.horizontal(|ui| { + // for variant in ByteFormat::iter() { + // if ui + // .selectable_value(output, variant, variant.to_string()) + // .clicked() + // { + // changed = true; + // } + // } + // }); + // }); + // changed + // }; + + ui.add_space(16.0); + } + + fn code(&self) -> &dyn codes::traits::Code { + &self.text_code + } +} diff --git a/src/code_panel/rle_controls.rs b/src/code_panel/rle_controls.rs index d80b8f0d..aecb0485 100644 --- a/src/code_panel/rle_controls.rs +++ b/src/code_panel/rle_controls.rs @@ -1,14 +1,16 @@ +use crate::ui_elements::UiElements; + use super::CodeFrame; -use codes::compression::run_length::RunLengthEncoding; +use codes::compression::{run_length::RunLengthEncoding, run_length_bytes::RunLengthEncodingBytes}; pub struct RleFrame { - code: RunLengthEncoding, + text_code: RunLengthEncoding, } impl Default for RleFrame { fn default() -> Self { Self { - code: Default::default(), + text_code: Default::default(), } } } @@ -18,12 +20,11 @@ impl CodeFrame for RleFrame { ui.hyperlink_to( "see the code", "https://github.com/SymmetricChaos/crypto-gui/blob/master/codes/src/compression/run_length.rs", - ); - + ); ui.add_space(16.0); } fn code(&self) -> &dyn codes::traits::Code { - &self.code + &self.text_code } }