From 757c13767657115b0438ac16892122535278dca3 Mon Sep 17 00:00:00 2001 From: Micah Date: Fri, 20 Sep 2024 12:18:47 -0700 Subject: [PATCH] Support setting compression type for rbx_binary and support zstd compression (#446) --- rbx_binary/CHANGELOG.md | 4 +++ rbx_binary/Cargo.toml | 1 + rbx_binary/src/chunk.rs | 45 +++++++++++++++++++----------- rbx_binary/src/lib.rs | 2 +- rbx_binary/src/serializer/mod.rs | 29 ++++++++++++++++++- rbx_binary/src/serializer/state.rs | 15 +++++----- 6 files changed, 71 insertions(+), 25 deletions(-) diff --git a/rbx_binary/CHANGELOG.md b/rbx_binary/CHANGELOG.md index df77bf08f..15cebfe0d 100644 --- a/rbx_binary/CHANGELOG.md +++ b/rbx_binary/CHANGELOG.md @@ -1,6 +1,10 @@ # rbx_binary Changelog ## Unreleased +* Added the ability to specify what type of compression to use for serializing. This takes the form of `Serializer::compression_type`. ([#446]) +* Added support for ZSTD compressed files ([#446]) + +[#446]: https://github.com/rojo-rbx/rbx-dom/pull/446 ## 0.7.7 (2024-08-22) * Updated rbx-dom dependencies diff --git a/rbx_binary/Cargo.toml b/rbx_binary/Cargo.toml index aeabeaec9..944c8e5e7 100644 --- a/rbx_binary/Cargo.toml +++ b/rbx_binary/Cargo.toml @@ -23,6 +23,7 @@ lz4 = "1.23.3" thiserror = "1.0.31" serde = { version = "1.0.137", features = ["derive"], optional = true } profiling = "1.0.6" +zstd = "0.13.2" [dev-dependencies] criterion = "0.3.5" diff --git a/rbx_binary/src/chunk.rs b/rbx_binary/src/chunk.rs index 43f0aa971..f8f70c97f 100644 --- a/rbx_binary/src/chunk.rs +++ b/rbx_binary/src/chunk.rs @@ -4,7 +4,12 @@ use std::{ str, }; -use crate::core::{RbxReadExt, RbxWriteExt}; +use crate::{ + core::{RbxReadExt, RbxWriteExt}, + serializer::CompressionType, +}; + +const ZSTD_MAGIC_NUMBER: &[u8] = &[0x28, 0xb5, 0x2f, 0xfd]; /// Represents one chunk from a binary model file. #[derive(Debug)] @@ -21,6 +26,7 @@ impl Chunk { log::trace!("{}", header); let data = if header.compressed_len == 0 { + log::trace!("No compression"); let mut data = Vec::with_capacity(header.len as usize); reader.take(header.len as u64).read_to_end(&mut data)?; data @@ -30,7 +36,13 @@ impl Chunk { .take(header.compressed_len as u64) .read_to_end(&mut compressed_data)?; - lz4::block::decompress(&compressed_data, Some(header.len as i32))? + if &compressed_data[0..4] == ZSTD_MAGIC_NUMBER { + log::trace!("ZSTD compression"); + zstd::bulk::decompress(&compressed_data, header.len as usize)? + } else { + log::trace!("LZ4 compression"); + lz4::block::decompress(&compressed_data, Some(header.len as i32))? + } }; assert_eq!(data.len(), header.len as usize); @@ -42,16 +54,6 @@ impl Chunk { } } -/// The compression format of a chunk in the binary model format. -#[derive(Debug, Clone, Copy)] -pub enum ChunkCompression { - /// The contents of the chunk should be LZ4 compressed. - Compressed, - - /// The contents of the chunk should be uncompressed. - Uncompressed, -} - /// Holds a chunk that is currently being written. /// /// This type intended to be written into via io::Write and then dumped into the @@ -60,14 +62,14 @@ pub enum ChunkCompression { #[must_use] pub struct ChunkBuilder { chunk_name: &'static [u8], - compression: ChunkCompression, + compression: CompressionType, buffer: Vec, } impl ChunkBuilder { /// Creates a new `ChunkBuilder` with the given name and compression /// setting. - pub fn new(chunk_name: &'static [u8], compression: ChunkCompression) -> Self { + pub fn new(chunk_name: &'static [u8], compression: CompressionType) -> Self { ChunkBuilder { chunk_name, compression, @@ -80,7 +82,7 @@ impl ChunkBuilder { writer.write_all(self.chunk_name)?; match self.compression { - ChunkCompression::Compressed => { + CompressionType::Lz4 => { let compressed = lz4::block::compress(&self.buffer, None, false)?; writer.write_le_u32(compressed.len() as u32)?; @@ -89,13 +91,24 @@ impl ChunkBuilder { writer.write_all(&compressed)?; } - ChunkCompression::Uncompressed => { + CompressionType::None => { writer.write_le_u32(0)?; writer.write_le_u32(self.buffer.len() as u32)?; writer.write_le_u32(0)?; writer.write_all(&self.buffer)?; } + CompressionType::Zstd => { + let compressed = zstd::bulk::compress(&self.buffer, 0)?; + + writer.write_le_u32(compressed.len() as u32)?; + writer.write_le_u32(self.buffer.len() as u32)?; + writer.write_le_u32(0)?; + + // ZSTD includes the magic number when compressing so we don't + // have to write it manually + writer.write_all(&compressed)?; + } } Ok(()) diff --git a/rbx_binary/src/lib.rs b/rbx_binary/src/lib.rs index fbb4ba854..ab86dd764 100644 --- a/rbx_binary/src/lib.rs +++ b/rbx_binary/src/lib.rs @@ -77,7 +77,7 @@ pub mod text_format { pub use crate::{ deserializer::{Deserializer, Error as DecodeError}, - serializer::{Error as EncodeError, Serializer}, + serializer::{CompressionType, Error as EncodeError, Serializer}, }; /// Deserialize a Roblox binary model or place from a stream. diff --git a/rbx_binary/src/serializer/mod.rs b/rbx_binary/src/serializer/mod.rs index 7a2ea0e36..02a1b56c5 100644 --- a/rbx_binary/src/serializer/mod.rs +++ b/rbx_binary/src/serializer/mod.rs @@ -34,14 +34,19 @@ pub use self::error::Error; /// A custom [`ReflectionDatabase`][ReflectionDatabase] can be specified via /// [`reflection_database`][reflection_database]. /// +/// By default, the Serializer uses LZ4 compression, mimicking Roblox. This can +/// be changed via [`compression_type`][compression_type]. +/// /// [ReflectionDatabase]: rbx_reflection::ReflectionDatabase /// [reflection_database]: Serializer#method.reflection_database +/// [compression_type]: Serializer#method.compression_type // // future settings: // * recursive: bool = true #[non_exhaustive] pub struct Serializer<'db> { database: &'db ReflectionDatabase<'db>, + compression: CompressionType, } impl<'db> Serializer<'db> { @@ -49,13 +54,23 @@ impl<'db> Serializer<'db> { pub fn new() -> Self { Serializer { database: rbx_reflection_database::get(), + compression: CompressionType::default(), } } /// Sets what reflection database for the serializer to use. #[inline] pub fn reflection_database(self, database: &'db ReflectionDatabase<'db>) -> Self { - Self { database } + Self { database, ..self } + } + + /// Sets what type of compression the serializer will use for compression. + #[inline] + pub fn compression_type(self, compression: CompressionType) -> Self { + Self { + compression, + ..self + } } /// Serialize a Roblox binary model or place into the given stream using @@ -84,3 +99,15 @@ impl<'db> Default for Serializer<'db> { Self::new() } } + +/// Indicates the types of compression that files can be written with. +#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)] +pub enum CompressionType { + /// LZ4 compression. This is what Roblox uses by default. + #[default] + Lz4, + /// No compression. + None, + /// ZSTD compression. + Zstd, +} diff --git a/rbx_binary/src/serializer/state.rs b/rbx_binary/src/serializer/state.rs index 9b83713a7..acecadca8 100644 --- a/rbx_binary/src/serializer/state.rs +++ b/rbx_binary/src/serializer/state.rs @@ -22,7 +22,7 @@ use rbx_reflection::{ }; use crate::{ - chunk::{ChunkBuilder, ChunkCompression}, + chunk::ChunkBuilder, core::{ find_property_descriptors, RbxWriteExt, FILE_MAGIC_HEADER, FILE_SIGNATURE, FILE_VERSION, }, @@ -31,6 +31,7 @@ use crate::{ }; use super::error::InnerError; +use super::CompressionType; static FILE_FOOTER: &[u8] = b""; @@ -506,7 +507,7 @@ impl<'dom, 'db, W: Write> SerializerState<'dom, 'db, W> { .insert(*id, next_referent.try_into().unwrap()); } - log::trace!("Referents constructed: {:#?}", self.id_to_referent); + log::debug!("Collected {} referents", self.id_to_referent.len()); } pub fn write_header(&mut self) -> Result<(), InnerError> { @@ -542,7 +543,7 @@ impl<'dom, 'db, W: Write> SerializerState<'dom, 'db, W> { return Ok(()); } - let mut chunk = ChunkBuilder::new(b"SSTR", ChunkCompression::Compressed); + let mut chunk = ChunkBuilder::new(b"SSTR", self.serializer.compression); chunk.write_le_u32(0)?; // SSTR version number chunk.write_le_u32(self.shared_strings.len() as u32)?; @@ -571,7 +572,7 @@ impl<'dom, 'db, W: Write> SerializerState<'dom, 'db, W> { type_info.instances.len() ); - let mut chunk = ChunkBuilder::new(b"INST", ChunkCompression::Compressed); + let mut chunk = ChunkBuilder::new(b"INST", self.serializer.compression); chunk.write_le_u32(type_info.type_id)?; chunk.write_string(type_name)?; @@ -629,7 +630,7 @@ impl<'dom, 'db, W: Write> SerializerState<'dom, 'db, W> { prop_info.prop_type ); - let mut chunk = ChunkBuilder::new(b"PROP", ChunkCompression::Compressed); + let mut chunk = ChunkBuilder::new(b"PROP", self.serializer.compression); chunk.write_le_u32(type_info.type_id)?; chunk.write_string(&prop_info.serialized_name)?; @@ -1272,7 +1273,7 @@ impl<'dom, 'db, W: Write> SerializerState<'dom, 'db, W> { pub fn serialize_parents(&mut self) -> Result<(), InnerError> { log::trace!("Writing parent relationships"); - let mut chunk = ChunkBuilder::new(b"PRNT", ChunkCompression::Compressed); + let mut chunk = ChunkBuilder::new(b"PRNT", self.serializer.compression); chunk.write_u8(0)?; // PRNT version 0 chunk.write_le_u32(self.relevant_instances.len() as u32)?; @@ -1313,7 +1314,7 @@ impl<'dom, 'db, W: Write> SerializerState<'dom, 'db, W> { pub fn serialize_end(&mut self) -> Result<(), InnerError> { log::trace!("Writing file end"); - let mut end = ChunkBuilder::new(b"END\0", ChunkCompression::Uncompressed); + let mut end = ChunkBuilder::new(b"END\0", CompressionType::None); end.write_all(FILE_FOOTER)?; end.dump(&mut self.output)?;