Skip to content

Commit

Permalink
update for RLE with bytes
Browse files Browse the repository at this point in the history
  • Loading branch information
SymmetricChaos committed Nov 22, 2024
1 parent 72227dd commit 97cba4d
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 18 deletions.
78 changes: 67 additions & 11 deletions codes/src/compression/run_length_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,37 @@ use crate::{errors::CodeError, traits::Code};
use num::Integer;
use utils::byte_formatting::ByteFormat;

fn bytes_to_rle(bytes: &[u8]) -> Vec<u8> {
// To be used in a more complex encoding scheme.
// u64 allows recording a single repetition that takes up 18 exabytes and thus should
// avoid ever overflowing
// pub fn u64_leb128(n: u64) -> Vec<u8> {
// if n == 0 {
// return vec![0];
// }
// let mut n = n;
// let mut out = Vec::with_capacity(8);
// while n != 0 {
// let mut b = (n as u8) & 0x7f;
// n = n >> 7;
// if n != 0 {
// b |= 0x80;
// }
// out.push(b);
// }
// out
// }

// pub fn leb128_to_u64<T: AsRef<[u8]>>(v: T) -> u64 {
// let mut out = 0;
// let mut shift = 0;
// for byte in v.as_ref() {
// out |= ((byte & 0x7f) as u64) << shift;
// shift += 7;
// }
// out
// }

fn bytes_to_rle_one_byte(bytes: &[u8]) -> Vec<u8> {
let mut out = Vec::new();
let mut cur = bytes[0];
let mut ctr = 0_u8;
Expand All @@ -25,7 +55,7 @@ fn bytes_to_rle(bytes: &[u8]) -> Vec<u8> {
out
}

fn rle_to_bytes(bytes: &[u8]) -> Vec<u8> {
fn rle_to_bytes_one_byte(bytes: &[u8]) -> Vec<u8> {
if !bytes.len().is_even() {
panic!("the rle must be an even number of bytes")
}
Expand All @@ -40,28 +70,54 @@ fn rle_to_bytes(bytes: &[u8]) -> Vec<u8> {
out
}

pub struct RunLengthEncoding {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum RleMethod {
OneByte,
Leb128,
}

pub struct RunLengthEncodingBytes {
pub input_format: ByteFormat,
pub output_format: ByteFormat,
pub method: RleMethod,
}

impl Default for RunLengthEncoding {
impl Default for RunLengthEncodingBytes {
fn default() -> Self {
Self {
input_format: ByteFormat::Hex,
output_format: ByteFormat::Hex,
method: RleMethod::OneByte,
}
}
}

impl Code for RunLengthEncoding {
impl RunLengthEncodingBytes {

fn compress(&self, bytes: &[u8]) -> Vec<u8> {
match self.method {
RleMethod::OneByte => bytes_to_rle_one_byte(bytes),
RleMethod::Leb128 => todo!(),
}
}

fn decompress(&self, bytes: &[u8]) -> Vec<u8> {
match self.method {
RleMethod::OneByte => rle_to_bytes_one_byte(bytes),
RleMethod::Leb128 => todo!(),
}
}

}

impl Code for RunLengthEncodingBytes {
fn encode(&self, text: &str) -> Result<String, CodeError> {
let bytes = self
.input_format
.text_to_bytes(text)
.map_err(|_| CodeError::input("invalid input bytes"))?;

Ok(self.output_format.byte_slice_to_text(&bytes_to_rle(&bytes)))
Ok(self.output_format.byte_slice_to_text(&self.compress(&bytes)))
}

fn decode(&self, text: &str) -> Result<String, CodeError> {
Expand All @@ -74,7 +130,7 @@ impl Code for RunLengthEncoding {
return Err(CodeError::input("the rle must be an even number of bytes"));
}

Ok(self.output_format.byte_slice_to_text(&rle_to_bytes(&bytes)))
Ok(self.output_format.byte_slice_to_text(&self.decompress(&bytes)))
}
}

Expand All @@ -89,19 +145,19 @@ mod rle_tests {
fn check_overflow() {
let bytes = vec![0_u8; 300];
let rle = vec![0, 255, 0, 45];
assert_eq!(rle, bytes_to_rle(&bytes));
assert_eq!(bytes, rle_to_bytes(&rle));
assert_eq!(rle, bytes_to_rle_one_byte(&bytes));
assert_eq!(bytes, rle_to_bytes_one_byte(&rle));
}

#[test]
fn encode_test() {
let code = RunLengthEncoding::default();
let code = RunLengthEncodingBytes::default();
assert_eq!(ENCODEDTEXT, code.encode(PLAINTEXT).unwrap())
}

#[test]
fn decode_test() {
let code = RunLengthEncoding::default();
let code = RunLengthEncodingBytes::default();
assert_eq!(PLAINTEXT, code.decode(ENCODEDTEXT).unwrap())
}
}
8 changes: 7 additions & 1 deletion codes/src/ids/code_descriptions.json
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,13 @@
"Traits": null
},
"Run Length Encoding": {
"Description": "Run Length Encoding (RLE) compresses data contains long strings of identical information by replacing them with one instance of the symbol followed by a count of how many should occur (the length of the run). Efficient RLE requires both that the data to be compressed have sufficient repetition and that the encoding not waste too much space on short runs. The version of RLE presented here encodes text but for general compression the encoding works on bytes.",
"Description": "Run Length Encoding (RLE) compresses data contains long strings of identical information by replacing them with one instance of the symbol followed by a count of how many should occur (the length of the run). Efficient RLE requires both that the data to be compressed have sufficient repetition and that the encoding not waste too much space on short runs. The version of RLE presented here encodes text as text (both UTF-8) but this usage is rare in practice. The Run Length Encoding Bytes page shows encoding that operates on arbitrary bytes.",
"Authors": null,
"Publication": null,
"Traits": null
},
"Run Length Encoding Bytes": {
"Description": "Run Length Encoding (RLE) compresses data contains long strings of identical information by replacing them with one instance of the symbol followed by a count of how many should occur (the length of the run). Efficient RLE requires both that the data to be compressed have sufficient repetition and that the encoding not waste too much space on short runs. The simple encoding scheme here converts a sequence of bytes into a sequence of pairs of bytes. In these pairs the first byte is the one to be repeated and the second byte is the number of times to be repeated. If a byte is repeated more than 255 times the additional repetitions are encoded more pairs. If bytes are not repeated more than 255 times this is reasonably efficient. However if there are very long runs it performs relatively poorly. The complex encoding works similarly but the count is a variable length integer (LEB-128) which allows runs of enormous length to be compressed.",
"Authors": null,
"Publication": null,
"Traits": null
Expand Down
1 change: 1 addition & 0 deletions codes/src/ids/code_id.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ code_ids_and_names!(
Romaji, "Romaji";
RomanNumeral, "Roman Numeral";
RunLengthEncoding, "Run Length Encoding";
RunLengthEncodingBytes, "Run Length Encoding Bytes";
Skey, "S/KEY";
SpellingAlphabet, "Spelling Alphabet";
Tap, "Tap";
Expand Down
75 changes: 75 additions & 0 deletions src/code_panel/rle_byte_controls.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
use crate::ui_elements::UiElements;

use super::CodeFrame;
use codes::compression::{run_length::RunLengthEncoding, run_length_bytes::RunLengthEncodingBytes};

pub struct RleFrame {
byte_code: RunLengthEncodingBytes,
}

impl Default for RleFrame {
fn default() -> Self {
Self {
byte_code: Default::default(),
}
}
}

impl CodeFrame for RleFrame {
fn ui(&mut self, ui: &mut egui::Ui) {
ui.hyperlink_to(
"see the code",
"https://github.com/SymmetricChaos/crypto-gui/blob/master/codes/src/compression/run_length_bytes.rs",
);


// {
// let this = &mut *ui;
// let mut changed = false;
// egui::CollapsingHeader::new("Input Format")
// .default_open(true)
// .show(this, |ui| {
// ui.label(
// "Input can be text, hexadecimal, Base64, or binary. All interpreted as bytes.",
// );
// ui.horizontal(|ui| {
// for variant in ByteFormat::iter() {
// if ui
// .selectable_value(&mut self.byte_code., variant, variant.to_string())
// .clicked()
// {
// changed = true;
// }
// }
// });
// });

// this.add_space(8.0);

// egui::CollapsingHeader::new("Output Format")
// .default_open(true)
// .show(this, |ui| {
// ui.label(
// "Output can be text, hexadecimal, Base64, or binary. All interpreted as bytes.",
// );
// ui.horizontal(|ui| {
// for variant in ByteFormat::iter() {
// if ui
// .selectable_value(output, variant, variant.to_string())
// .clicked()
// {
// changed = true;
// }
// }
// });
// });
// changed
// };

ui.add_space(16.0);
}

fn code(&self) -> &dyn codes::traits::Code {
&self.text_code
}
}
13 changes: 7 additions & 6 deletions src/code_panel/rle_controls.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
use crate::ui_elements::UiElements;

use super::CodeFrame;
use codes::compression::run_length::RunLengthEncoding;
use codes::compression::{run_length::RunLengthEncoding, run_length_bytes::RunLengthEncodingBytes};

pub struct RleFrame {
code: RunLengthEncoding,
text_code: RunLengthEncoding,
}

impl Default for RleFrame {
fn default() -> Self {
Self {
code: Default::default(),
text_code: Default::default(),
}
}
}
Expand All @@ -18,12 +20,11 @@ impl CodeFrame for RleFrame {
ui.hyperlink_to(
"see the code",
"https://github.com/SymmetricChaos/crypto-gui/blob/master/codes/src/compression/run_length.rs",
);

);
ui.add_space(16.0);
}

fn code(&self) -> &dyn codes::traits::Code {
&self.code
&self.text_code
}
}

0 comments on commit 97cba4d

Please sign in to comment.