From bf3f5336a34234c71b3b50989f2091e603dcf2bd Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Wed, 14 Feb 2024 20:41:27 -0500 Subject: [PATCH 1/4] Code movement, documentation, Signed-off-by: Moritz Hoffmann --- src/{ => impls}/codec.rs | 6 +++--- src/impls/columns.rs | 2 +- src/impls/deduplicate.rs | 41 ++++++++++++++++++++++++++++------------ src/impls/mirror.rs | 2 +- src/impls/mod.rs | 1 + src/impls/offsets.rs | 38 +++++++++++++++++++++++-------------- src/impls/option.rs | 2 +- src/impls/result.rs | 2 +- src/impls/slice.rs | 2 +- src/impls/slice_copy.rs | 2 +- src/lib.rs | 9 ++++----- 11 files changed, 67 insertions(+), 40 deletions(-) rename src/{ => impls}/codec.rs (98%) diff --git a/src/codec.rs b/src/impls/codec.rs similarity index 98% rename from src/codec.rs rename to src/impls/codec.rs index f8fb40d..870f77c 100644 --- a/src/codec.rs +++ b/src/impls/codec.rs @@ -12,7 +12,7 @@ pub use dictionary::DictionaryCodec; /// This method will sort `vec` and then consolidate runs of more than one entry with /// identical first elements by accumulating the second elements of the pairs. Should the final /// accumulation be zero, the element is discarded. -pub fn consolidate(vec: &mut Vec<(T, usize)>) { +fn consolidate(vec: &mut Vec<(T, usize)>) { consolidate_from(vec, 0); } @@ -21,13 +21,13 @@ pub fn consolidate(vec: &mut Vec<(T, usize)>) { /// This method will sort `vec[offset..]` and then consolidate runs of more than one entry with /// identical first elements by accumulating the second elements of the pairs. Should the final /// accumulation be zero, the element is discarded. -pub fn consolidate_from(vec: &mut Vec<(T, usize)>, offset: usize) { +fn consolidate_from(vec: &mut Vec<(T, usize)>, offset: usize) { let length = consolidate_slice(&mut vec[offset..]); vec.truncate(offset + length); } /// Sorts and consolidates a slice, returning the valid prefix length. -pub fn consolidate_slice(slice: &mut [(T, usize)]) -> usize { +fn consolidate_slice(slice: &mut [(T, usize)]) -> usize { // We could do an insertion-sort like initial scan which builds up sorted, consolidated runs. // In a world where there are not many results, we may never even need to call in to merge sort. slice.sort_by(|x, y| x.0.cmp(&y.0)); diff --git a/src/impls/columns.rs b/src/impls/columns.rs index 2cfbc18..e998966 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -32,7 +32,7 @@ use crate::{CopyOnto, CopyRegion, Index, Region}; /// vec![], /// ]; /// -/// let mut r = ColumnsRegion::, _>::default(); +/// let mut r = , _>>::default(); /// /// let mut indices = Vec::with_capacity(data.len()); /// diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index e5f18ed..dae356e 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -4,6 +4,17 @@ use crate::impls::offsets::OffsetContainer; use crate::{CopyOnto, Region}; /// A region to deduplicate consecutive equal items. +/// +/// # Examples +/// +/// The following example shows that two inserts can result in the same index. +/// ``` +/// use flatcontainer::impls::deduplicate::CollapseSequence; +/// use flatcontainer::{CopyOnto, StringRegion}; +/// let mut r = >::default(); +/// +/// assert_eq!("abc".copy_onto(&mut r), "abc".copy_onto(&mut r)); +/// ``` #[derive(Debug, Clone)] pub struct CollapseSequence { /// Inner region. @@ -77,7 +88,19 @@ where /// be dense, i.e., `(i, j)` is followed by `(j, k)`. /// /// Defers to region `R` for storing items, and uses offset container `O` to -/// rember indices. By default, `O` is `Vec`. +/// remeber indices. By default, `O` is `Vec`. +/// +/// # Examples +/// +/// The following example shows that two inserts into a copy region have a collapsible index: +/// ``` +/// use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; +/// use flatcontainer::{CopyOnto, CopyRegion, Region, StringRegion}; +/// let mut r = >>::default(); +/// +/// let index: usize = b"abc"[..].copy_onto(&mut r); +/// assert_eq!(b"abc", r.index(index)); +/// ``` #[derive(Debug, Clone)] pub struct ConsecutiveOffsetPairs> where @@ -169,7 +192,7 @@ impl, O: OffsetContainer, T: CopyOnto::default(); - fn copy(r: &mut R, item: impl CopyOnto) -> R::Index { - item.copy_onto(r) - } - - assert_eq!(copy(&mut r, "abc"), copy(&mut r, "abc")); + assert_eq!("abc".copy_onto(&mut r), "abc".copy_onto(&mut r)); println!("{r:?}"); } @@ -199,12 +220,8 @@ mod tests { let mut r = CollapseSequence::>::default(); - fn copy(r: &mut R, item: impl CopyOnto) -> R::Index { - item.copy_onto(r) - } - for _ in 0..1000 { - copy(&mut r, "abc"); + "abc".copy_onto(&mut r); } println!("{r:?}"); diff --git a/src/impls/mirror.rs b/src/impls/mirror.rs index a4f8520..38c05ce 100644 --- a/src/impls/mirror.rs +++ b/src/impls/mirror.rs @@ -20,7 +20,7 @@ use crate::{Containerized, CopyOnto, Index, Region, ReserveItems}; /// For [`MirrorRegion`]s, we can index with a copy type: /// ``` /// # use flatcontainer::{MirrorRegion, Region}; -/// let r = MirrorRegion::::default(); +/// let r = >::default(); /// let output: u8 = r.index(42); /// assert_eq!(output, 42); /// ``` diff --git a/src/impls/mod.rs b/src/impls/mod.rs index fdcf043..be5a6a1 100644 --- a/src/impls/mod.rs +++ b/src/impls/mod.rs @@ -1,5 +1,6 @@ //! Various region implementations. +pub mod codec; pub mod columns; pub mod deduplicate; pub mod mirror; diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index c0811f5..17e261d 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -1,11 +1,11 @@ //! Types to represent offsets. -/// TODO +/// A container to store offsets. pub trait OffsetContainer: Default + Extend { /// Accepts a newly pushed element. fn push(&mut self, item: T); - /// Lookup an index + /// Lookup an index. May panic for invalid indexes. fn index(&self, index: usize) -> T; /// Clear all contents. @@ -24,12 +24,20 @@ pub trait OffsetContainer: Default + Extend { fn reserve(&mut self, additional: usize); } +/// A container for offsets that can represent strides of offsets. +/// +/// Does not implement `OffsetContainer` because it cannot accept arbitrary pushes. #[derive(Debug, Default)] -enum OffsetStride { +pub enum OffsetStride { + /// No push has occurred. #[default] Empty, + /// Pushed a single 0. Zero, + /// `Striding(stride, count)`: `count` many steps of stride `stride` have been pushed. Striding(usize, usize), + /// `Saturated(stride, count, reps)`: `count` many steps of stride `stride`, followed by + /// `reps` repetitions of the last element have been pushed. Saturated(usize, usize, usize), } @@ -110,15 +118,15 @@ pub struct OffsetList { } impl OffsetList { - // TODO - // /// Allocate a new list with a specified capacity. - // pub fn with_capacity(cap: usize) -> Self { - // Self { - // zero_prefix: 0, - // smol: Vec::with_capacity(cap), - // chonk: Vec::new(), - // } - // } + /// Allocate a new list with a specified capacity. + pub fn with_capacity(cap: usize) -> Self { + Self { + zero_prefix: 0, + smol: Vec::with_capacity(cap), + chonk: Vec::new(), + } + } + /// Inserts the offset, as a `u32` if that is still on the table. pub fn push(&mut self, offset: usize) { if self.smol.is_empty() && self.chonk.is_empty() && offset == 0 { @@ -133,7 +141,8 @@ impl OffsetList { self.chonk.push(offset.try_into().unwrap()) } } - /// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`. + + /// Like [`std::ops::Index`], which we cannot implement as it must return a `&usize`. pub fn index(&self, index: usize) -> usize { if index < self.zero_prefix { 0 @@ -167,7 +176,8 @@ impl OffsetList { } } -/// TODO +/// An offset container implementation that first tries to recognize strides, and then spilles into +/// a regular offset list. #[derive(Default, Debug)] pub struct OffsetOptimized { strided: OffsetStride, diff --git a/src/impls/option.rs b/src/impls/option.rs index ae476c2..11e97e4 100644 --- a/src/impls/option.rs +++ b/src/impls/option.rs @@ -16,7 +16,7 @@ impl Containerized for Option { /// The region can hold options: /// ``` /// # use flatcontainer::{Containerized, CopyOnto, OptionRegion, Region}; -/// let mut r = OptionRegion::<::Region>::default(); +/// let mut r = ::Region>>::default(); /// /// let some_index = Some(123).copy_onto(&mut r); /// // Type annotations required for `None`: diff --git a/src/impls/result.rs b/src/impls/result.rs index 79bb2b9..b033d94 100644 --- a/src/impls/result.rs +++ b/src/impls/result.rs @@ -17,7 +17,7 @@ impl Containerized for Result { /// ``` /// use flatcontainer::{Containerized, CopyOnto, Region, ResultRegion}; /// let mut r = -/// ResultRegion::<<() as Containerized>::Region, ::Region>::default(); +/// ::Region, ::Region>>::default(); /// /// let ok_index = Result::<(), String>::Ok(()).copy_onto(&mut r); /// let err_index = Result::<(), String>::Err("Error".to_string()).copy_onto(&mut r); diff --git a/src/impls/slice.rs b/src/impls/slice.rs index 0545dc6..03dfd51 100644 --- a/src/impls/slice.rs +++ b/src/impls/slice.rs @@ -33,7 +33,7 @@ impl Containerized for [T; N] { /// We fill some data into a slice region and use the [`ReadSlice`] to extract it later. /// ``` /// use flatcontainer::{Containerized, CopyOnto, Region, SliceRegion}; -/// let mut r = SliceRegion::<::Region>::default(); +/// let mut r = ::Region>>::default(); /// /// let panagram_en = "The quick fox jumps over the lazy dog" /// .split(" ") diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index bde5234..d49678a 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -11,7 +11,7 @@ use crate::{CopyIter, CopyOnto, Region, ReserveItems}; /// /// ``` /// use flatcontainer::{CopyOnto, CopyRegion, Region}; -/// let mut r = CopyRegion::::default(); +/// let mut r = >::default(); /// /// let panagram_en = "The quick fox jumps over the lazy dog"; /// let panagram_de = "Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich"; diff --git a/src/lib.rs b/src/lib.rs index fe60cae..9f96d2d 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -34,7 +34,6 @@ use std::fmt::{Debug, Formatter}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -pub mod codec; pub mod impls; pub use impls::columns::ColumnsRegion; @@ -331,7 +330,7 @@ mod tests { #[test] fn test_slice_string_onto() { - let mut c = StringRegion::default(); + let mut c = ::default(); let index = "abc".to_string().copy_onto(&mut c); assert_eq!("abc", c.index(index)); let index = "def".copy_onto(&mut c); @@ -349,7 +348,7 @@ mod tests { #[test] fn test_vec() { - let mut c = SliceRegion::>::default(); + let mut c = >>::default(); let slice = &[1u8, 2, 3]; let idx = slice.copy_onto(&mut c); assert!(slice.iter().copied().eq(c.index(idx))); @@ -357,7 +356,7 @@ mod tests { #[test] fn test_vec_onto() { - let mut c: SliceRegion> = SliceRegion::default(); + let mut c = >>::default(); let slice = &[1u8, 2, 3][..]; let idx = slice.copy_onto(&mut c); assert!(slice.iter().copied().eq(c.index(idx))); @@ -504,7 +503,7 @@ mod tests { #[test] fn test_result() { let r: Result<_, u16> = Ok("abc"); - let mut c = ResultRegion::>::default(); + let mut c = >>::default(); let idx = copy(&mut c, r); assert_eq!(r, c.index(idx)); } From 8a8dcd6f0dc5a9f4ca22e7bba31114a81d37a3d0 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Wed, 14 Feb 2024 20:41:39 -0500 Subject: [PATCH 2/4] StringRegion generic over inner region Signed-off-by: Moritz Hoffmann --- src/impls/string.rs | 93 +++++++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 21 deletions(-) diff --git a/src/impls/string.rs b/src/impls/string.rs index 693ae00..da632b2 100644 --- a/src/impls/string.rs +++ b/src/impls/string.rs @@ -8,12 +8,15 @@ use crate::{Containerized, CopyOnto, Region, ReserveItems}; /// A region to store strings and read `&str`. /// +/// Delegates to a region `R` to store `u8` slices. By default, it uses a [`CopyRegion`], but a +/// different region can be provided, as long as it absorbs and reads items as `&[u8]`. +/// /// # Examples /// /// We fill some data into a string region and use extract it later. /// ``` -/// use flatcontainer::{Containerized, CopyOnto, Region, StringRegion}; -/// let mut r = StringRegion::default(); +/// use flatcontainer::{Containerized, CopyOnto, CopyRegion, Region, StringRegion}; +/// let mut r = ::default(); /// /// let panagram_en = "The quick fox jumps over the lazy dog"; /// let panagram_de = "Zwölf Boxkämpfer jagen Viktor quer über den großen Sylter Deich"; @@ -26,20 +29,28 @@ use crate::{Containerized, CopyOnto, Region, ReserveItems}; /// ``` #[derive(Default, Debug, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct StringRegion { - inner: CopyRegion, +pub struct StringRegion> +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto, +{ + inner: R, } -impl Region for StringRegion { +impl Region for StringRegion +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto, +{ type ReadItem<'a> = &'a str where Self: 'a ; - type Index = as Region>::Index; + type Index = R::Index; fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, { Self { - inner: CopyRegion::merge_regions(regions.map(|r| &r.inner)), + inner: R::merge_regions(regions.map(|r| &r.inner)), } } @@ -70,22 +81,34 @@ impl<'a> Containerized for &'a str { type Region = StringRegion; } -impl CopyOnto for String { +impl CopyOnto> for String +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto, +{ #[inline] - fn copy_onto(self, target: &mut StringRegion) -> ::Index { + fn copy_onto(self, target: &mut StringRegion) -> as Region>::Index { self.as_str().copy_onto(target) } } -impl CopyOnto for &String { +impl CopyOnto> for &String +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto, +{ #[inline] - fn copy_onto(self, target: &mut StringRegion) -> ::Index { + fn copy_onto(self, target: &mut StringRegion) -> as Region>::Index { self.as_str().copy_onto(target) } } -impl ReserveItems for &String { - fn reserve_items(target: &mut StringRegion, items: I) +impl ReserveItems> for &String +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto + ReserveItems, +{ + fn reserve_items(target: &mut StringRegion, items: I) where I: Iterator + Clone, { @@ -93,22 +116,34 @@ impl ReserveItems for &String { } } -impl CopyOnto for &str { +impl CopyOnto> for &str +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto, +{ #[inline] - fn copy_onto(self, target: &mut StringRegion) -> ::Index { + fn copy_onto(self, target: &mut StringRegion) -> as Region>::Index { self.as_bytes().copy_onto(&mut target.inner) } } -impl CopyOnto for &&str { +impl CopyOnto> for &&str +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto, +{ #[inline] - fn copy_onto(self, target: &mut StringRegion) -> ::Index { + fn copy_onto(self, target: &mut StringRegion) -> as Region>::Index { self.as_bytes().copy_onto(&mut target.inner) } } -impl ReserveItems for &str { - fn reserve_items(target: &mut StringRegion, items: I) +impl ReserveItems> for &str +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto + ReserveItems, +{ + fn reserve_items(target: &mut StringRegion, items: I) where I: Iterator + Clone, { @@ -116,11 +151,27 @@ impl ReserveItems for &str { } } -impl ReserveItems for &&str { - fn reserve_items(target: &mut StringRegion, items: I) +impl ReserveItems> for &&str +where + for<'a> R: Region = &'a [u8]> + 'a, + for<'a> &'a [u8]: CopyOnto + ReserveItems, +{ + fn reserve_items(target: &mut StringRegion, items: I) where I: Iterator + Clone, { ReserveItems::reserve_items(&mut target.inner, items.map(|s| s.as_bytes())) } } + +#[cfg(test)] +mod tests { + use crate::{CopyOnto, Region, StringRegion}; + + #[test] + fn test_inner() { + let mut r = ::default(); + let index = "abc".copy_onto(&mut r); + assert_eq!(r.index(index), "abc"); + } +} From b34e72dd9519ed28c8ee459961657be011cd24c1 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Wed, 14 Feb 2024 20:52:38 -0500 Subject: [PATCH 3/4] Remove staging vector in codec Signed-off-by: Moritz Hoffmann --- src/impls/codec.rs | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/src/impls/codec.rs b/src/impls/codec.rs index 870f77c..b720f7a 100644 --- a/src/impls/codec.rs +++ b/src/impls/codec.rs @@ -75,7 +75,6 @@ fn consolidate_slice(slice: &mut [(T, usize)]) -> usize { pub struct CodecRegion> { inner: R, codec: C, - staging: Vec, } impl Region for CodecRegion @@ -98,7 +97,6 @@ where Self { inner: R::merge_regions(regions.map(|r| &r.inner)), codec, - staging: vec![], } } @@ -126,9 +124,7 @@ where for<'a> &'a [u8]: CopyOnto, { fn copy_onto(self, target: &mut CodecRegion) -> as Region>::Index { - target.staging.clear(); - target.codec.encode(self, &mut target.staging); - target.staging.as_slice().copy_onto(&mut target.inner) + target.codec.encode(self, &mut target.inner) } } @@ -137,7 +133,9 @@ pub trait Codec: Default + 'static { /// Decodes an input byte slice into a sequence of byte slices. fn decode<'a>(&'a self, bytes: &'a [u8]) -> &'a [u8]; /// Encodes a sequence of byte slices into an output byte slice. - fn encode(&mut self, bytes: &[u8], output: &mut Vec); + fn encode(&mut self, bytes: &[u8], output: &mut R) -> R::Index + where + for<'a> &'a [u8]: CopyOnto; /// Constructs a new instance of `Self` from accumulated statistics. /// These statistics should cover the data the output expects to see. fn new_from<'a, I: Iterator + Clone>(stats: I) -> Self; @@ -147,6 +145,7 @@ pub trait Codec: Default + 'static { mod dictionary { + use crate::{CopyOnto, Region}; use std::collections::BTreeMap; pub use super::{BytesMap, Codec, MisraGries}; @@ -174,23 +173,26 @@ mod dictionary { /// Encode a sequence of byte slices. /// /// Encoding also records statistics about the structure of the input. - fn encode(&mut self, bytes: &[u8], output: &mut Vec) { - let pre_len = output.len(); - + fn encode(&mut self, bytes: &[u8], output: &mut R) -> R::Index + where + for<'a> &'a [u8]: CopyOnto, + { self.total += bytes.len(); // If we have an index referencing `bytes`, use the index key. - if let Some(b) = self.encode.get(bytes) { - output.push(*b); + let index = if let Some(b) = self.encode.get(bytes) { + self.bytes += 1; + [*b].as_slice().copy_onto(output) } else { - output.extend_from_slice(bytes); - } + self.bytes += bytes.len(); + bytes.copy_onto(output) + }; // Stats stuff. self.stats.0.insert(bytes.to_owned()); let tag = bytes[0]; let tag_idx: usize = (tag % 4).into(); self.stats.1[tag_idx] |= 1 << (tag >> 2); - self.bytes += output.len() - pre_len; + index } /// Construct a new encoder from supplied statistics. @@ -301,8 +303,7 @@ mod misra_gries { /// Maintains a summary of "heavy hitters" in a presented collection of items. #[derive(Clone, Debug)] pub struct MisraGries { - /// TODO - pub inner: Vec<(T, usize)>, + inner: Vec<(T, usize)>, } impl Default for MisraGries { From 1d0730b1e8d0a979d435643995c29b36c79fa904 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Wed, 14 Feb 2024 22:52:11 -0500 Subject: [PATCH 4/4] Heap size Signed-off-by: Moritz Hoffmann --- benches/bench.rs | 35 +++++++++++++++++++++++++++++++++-- src/impls/codec.rs | 12 ++++++++++++ src/impls/columns.rs | 7 +++++++ src/impls/deduplicate.rs | 9 +++++++++ src/impls/mirror.rs | 4 ++++ src/impls/offsets.rs | 17 +++++++++++++++++ src/impls/option.rs | 4 ++++ src/impls/result.rs | 5 +++++ src/impls/slice.rs | 5 +++++ src/impls/slice_copy.rs | 8 ++++++++ src/impls/string.rs | 4 ++++ src/impls/tuple.rs | 4 ++++ src/lib.rs | 16 ++++++++++++++++ 13 files changed, 128 insertions(+), 2 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index 3440738..8cb1e79 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -8,8 +8,8 @@ use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs use flatcontainer::impls::offsets::OffsetOptimized; use flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion}; use flatcontainer::{ - Containerized, CopyOnto, CopyRegion, FlatStack, MirrorRegion, Region, ReserveItems, - SliceRegion, StringRegion, + ColumnsRegion, Containerized, CopyOnto, CopyRegion, FlatStack, MirrorRegion, Region, + ReserveItems, SliceRegion, StringRegion, }; use test::Bencher; @@ -115,6 +115,25 @@ fn vec_u_vn_s_copy_region(bencher: &mut Bencher) { vec![vec![(0u64, vec![(); 1 << 40], "grawwwwrr!".to_string()); 32]; 32], ); } +#[bench] +fn vec_u_vn_s_copy_region_column(bencher: &mut Bencher) { + _bench_copy_region::< + SliceRegion< + ColumnsRegion< + TupleABCRegion< + MirrorRegion<_>, + CollapseSequence>, + CollapseSequence, + >, + _, + >, + >, + _, + >( + bencher, + vec![vec![(0u64, vec![(); 1 << 40], "grawwwwrr!".to_string()); 32]; 32], + ); +} #[bench] fn empty_clone(bencher: &mut Bencher) { @@ -258,6 +277,12 @@ where arena.copy(&record); } }); + let (mut siz, mut cap) = (0, 0); + arena.heap_size(|this_siz, this_cap| { + siz += this_siz; + cap += this_cap + }); + println!("{siz} {cap}"); } fn _bench_copy_region(bencher: &mut Bencher, record: T) @@ -273,6 +298,12 @@ where arena.copy(&record); } }); + let (mut siz, mut cap) = (0, 0); + arena.heap_size(|this_siz, this_cap| { + siz += this_siz; + cap += this_cap + }); + println!("{siz} {cap}"); } fn _bench_clone(bencher: &mut Bencher, record: T) { diff --git a/src/impls/codec.rs b/src/impls/codec.rs index b720f7a..ca37207 100644 --- a/src/impls/codec.rs +++ b/src/impls/codec.rs @@ -116,6 +116,11 @@ where fn clear(&mut self) { self.codec = Default::default(); } + + fn heap_size(&self, mut callback: F) { + self.inner.heap_size(&mut callback); + self.codec.heap_size(callback); + } } impl CopyOnto> for &[u8] @@ -141,6 +146,9 @@ pub trait Codec: Default + 'static { fn new_from<'a, I: Iterator + Clone>(stats: I) -> Self; /// Diagnostic information about the state of the codec. fn report(&self) {} + + /// Heap size, size - capacity + fn heap_size(&self, callback: F); } mod dictionary { @@ -252,6 +260,10 @@ mod dictionary { ) // } } + + fn heap_size(&self, _callback: F) { + // Lazy + } } } diff --git a/src/impls/columns.rs b/src/impls/columns.rs index e998966..39315a7 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -113,6 +113,13 @@ where } self.indices.clear(); } + + fn heap_size(&self, mut callback: F) { + for inner in &self.inner { + inner.heap_size(&mut callback); + } + self.indices.heap_size(callback); + } } impl Default for ColumnsRegion diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index dae356e..220e68c 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -65,6 +65,10 @@ where self.inner.clear(); self.last_index = None; } + + fn heap_size(&self, callback: F) { + self.inner.heap_size(callback); + } } impl> CopyOnto> for T @@ -170,6 +174,11 @@ impl, O: OffsetContainer> Region self.offsets.clear(); self.offsets.push(0); } + + fn heap_size(&self, mut callback: F) { + self.offsets.heap_size(&mut callback); + self.inner.heap_size(callback); + } } impl, O: OffsetContainer, T: CopyOnto> diff --git a/src/impls/mirror.rs b/src/impls/mirror.rs index 38c05ce..cb47fc3 100644 --- a/src/impls/mirror.rs +++ b/src/impls/mirror.rs @@ -69,6 +69,10 @@ impl> Region for MirrorRegion { fn clear(&mut self) { // No storage } + + fn heap_size(&self, _callback: F) { + // No storage + } } impl CopyOnto> for T { diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 17e261d..6b1b5ab 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -22,6 +22,9 @@ pub trait OffsetContainer: Default + Extend { /// Reserve space for `additional` elements. fn reserve(&mut self, additional: usize); + + /// Heap size, size - capacity + fn heap_size(&self, callback: F); } /// A container for offsets that can represent strides of offsets. @@ -174,6 +177,11 @@ impl OffsetList { self.smol.clear(); self.chonk.clear(); } + + fn heap_size(&self, mut callback: F) { + self.smol.heap_size(&mut callback); + self.chonk.heap_size(callback); + } } /// An offset container implementation that first tries to recognize strides, and then spilles into @@ -218,6 +226,10 @@ impl OffsetContainer for OffsetOptimized { self.spilled.reserve(additional); } } + + fn heap_size(&self, callback: F) { + self.spilled.heap_size(callback); + } } impl Extend for OffsetOptimized { @@ -253,6 +265,11 @@ impl OffsetContainer for Vec { fn reserve(&mut self, additional: usize) { self.reserve(additional) } + + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback(self.len() * size_of_t, self.capacity() * size_of_t); + } } #[cfg(test)] diff --git a/src/impls/option.rs b/src/impls/option.rs index 11e97e4..7840f5b 100644 --- a/src/impls/option.rs +++ b/src/impls/option.rs @@ -63,6 +63,10 @@ impl Region for OptionRegion { fn clear(&mut self) { self.inner.clear(); } + + fn heap_size(&self, callback: F) { + self.inner.heap_size(callback); + } } impl CopyOnto> for Option diff --git a/src/impls/result.rs b/src/impls/result.rs index b033d94..beaa8a1 100644 --- a/src/impls/result.rs +++ b/src/impls/result.rs @@ -73,6 +73,11 @@ where self.oks.clear(); self.errs.clear(); } + + fn heap_size(&self, mut callback: F) { + self.oks.heap_size(&mut callback); + self.errs.heap_size(callback); + } } impl CopyOnto> for Result diff --git a/src/impls/slice.rs b/src/impls/slice.rs index 03dfd51..0a943ff 100644 --- a/src/impls/slice.rs +++ b/src/impls/slice.rs @@ -98,6 +98,11 @@ impl> Region for SliceRegion { self.slices.clear(); self.inner.clear(); } + + fn heap_size(&self, mut callback: F) { + self.slices.heap_size(&mut callback); + self.inner.heap_size(callback); + } } impl> Default for SliceRegion { diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index d49678a..6f4e9e9 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -58,6 +58,14 @@ impl Region for CopyRegion { fn clear(&mut self) { self.slices.clear(); } + + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback( + self.slices.len() * size_of_t, + self.slices.capacity() * size_of_t, + ); + } } impl Default for CopyRegion { diff --git a/src/impls/string.rs b/src/impls/string.rs index da632b2..d36d528 100644 --- a/src/impls/string.rs +++ b/src/impls/string.rs @@ -71,6 +71,10 @@ where fn clear(&mut self) { self.inner.clear(); } + + fn heap_size(&self, callback: F) { + self.inner.heap_size(callback); + } } impl Containerized for String { diff --git a/src/impls/tuple.rs b/src/impls/tuple.rs index c143a96..ede5c8c 100644 --- a/src/impls/tuple.rs +++ b/src/impls/tuple.rs @@ -58,6 +58,10 @@ macro_rules! tuple_flatcontainer { fn clear(&mut self) { $(self.[].clear();)* } + + fn heap_size(&self, mut callback: Fn) { + $(self.[].heap_size(&mut callback);)* + } } #[allow(non_camel_case_types)] diff --git a/src/lib.rs b/src/lib.rs index 9f96d2d..336a150 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -92,6 +92,9 @@ pub trait Region: Default { /// Remove all elements from this region, but retain allocations if possible. fn clear(&mut self); + + /// Heap size, size - capacity + fn heap_size(&self, callback: F); } /// A trait to let types express a default container type. @@ -238,6 +241,13 @@ impl FlatStack { pub fn iter(&self) -> Iter<'_, R> { self.into_iter() } + + /// Heap size, size - capacity + pub fn heap_size(&self, mut callback: F) { + self.region.heap_size(&mut callback); + use crate::impls::offsets::OffsetContainer; + self.indices.heap_size(callback); + } } impl, R: Region> Extend for FlatStack { @@ -437,6 +447,12 @@ mod tests { self.age_container.clear(); self.hobbies.clear(); } + + fn heap_size(&self, mut callback: F) { + self.name_container.heap_size(&mut callback); + self.age_container.heap_size(&mut callback); + self.hobbies.heap_size(callback); + } } impl<'a> CopyOnto for &'a Person {