From 57c115c7550a4c6a34fd9ed35bcd968adab855ad Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Mon, 26 Feb 2024 09:55:53 -0500 Subject: [PATCH 01/15] Introduce storage trait to abstract slices Signed-off-by: Moritz Hoffmann --- benches/bench.rs | 4 +- src/{impls/mod.rs => impls.rs} | 1 + src/impls/columns.rs | 24 ++- src/impls/deduplicate.rs | 87 +++++++- src/impls/offsets.rs | 153 ++++++++++++-- src/impls/slice.rs | 17 ++ src/impls/slice_copy.rs | 88 ++++---- src/impls/storage.rs | 362 +++++++++++++++++++++++++++++++++ src/lib.rs | 171 ++++++++++++---- tests/recursive.rs | 4 +- 10 files changed, 798 insertions(+), 113 deletions(-) rename src/{impls/mod.rs => impls.rs} (93%) create mode 100644 src/impls/storage.rs diff --git a/benches/bench.rs b/benches/bench.rs index 9903bab..2e7cf3d 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -5,7 +5,7 @@ extern crate test; use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; -use flatcontainer::impls::offsets::OffsetOptimized; +use flatcontainer::impls::offsets::OffsetStride; use flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion}; use flatcontainer::{ ColumnsRegion, FlatStack, MirrorRegion, OwnedRegion, Push, Region, RegionPreference, @@ -87,7 +87,7 @@ fn string10_copy_region(bencher: &mut Bencher) { #[bench] fn string10_copy_region_collapse(bencher: &mut Bencher) { _bench_copy_region::< - SliceRegion>, OffsetOptimized>, + SliceRegion>, OffsetStride>, _, >(bencher, vec![format!("grawwwwrr!"); 1024]); } diff --git a/src/impls/mod.rs b/src/impls.rs similarity index 93% rename from src/impls/mod.rs rename to src/impls.rs index 54f9ad7..a265d3a 100644 --- a/src/impls/mod.rs +++ b/src/impls.rs @@ -10,6 +10,7 @@ pub mod option; pub mod result; pub mod slice; pub mod slice_copy; +pub mod storage; pub mod string; pub mod tuple; mod vec; diff --git a/src/impls/columns.rs b/src/impls/columns.rs index e7b97a4..1eb6225 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -1,6 +1,8 @@ //! A region to contain a variable number of columns. use std::fmt::Debug; +use std::iter::Zip; +use std::slice::Iter; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; @@ -90,7 +92,7 @@ where { type Owned = Vec; type ReadItem<'a> = ReadColumns<'a, R> where Self: 'a; - type Index = usize; + type Index = , OffsetOptimized> as Region>::Index; fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where @@ -321,13 +323,11 @@ where } /// An iterator over the elements of a row. -pub struct ReadColumnsIter<'a, R: Region>( - Result, std::slice::Iter<'a, R::Owned>>, -); +pub struct ReadColumnsIter<'a, R: Region>(Result, Iter<'a, R::Owned>>); /// An iterator over the elements of a row. pub struct ReadColumnsIterInner<'a, R: Region> { - iter: std::iter::Zip, std::slice::Iter<'a, R>>, + iter: Zip, Iter<'a, R>>, } impl<'a, R> Iterator for ReadColumnsIter<'a, R> @@ -342,8 +342,17 @@ where Err(slice) => slice.next().map(IntoOwned::borrow_as), } } + + fn size_hint(&self) -> (usize, Option) { + match &self.0 { + Ok(inner) => inner.size_hint(), + Err(slice) => slice.size_hint(), + } + } } +impl<'a, R> ExactSizeIterator for ReadColumnsIter<'a, R> where R: Region {} + impl<'a, R> Iterator for ReadColumnsIterInner<'a, R> where R: Region, @@ -353,6 +362,10 @@ where fn next(&mut self) -> Option { self.iter.next().map(|(&i, r)| r.index(i)) } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } } impl Push> for ColumnsRegion @@ -467,6 +480,7 @@ impl Push> for ColumnsRegion where R: Region + Push, I: IntoIterator, + I::IntoIter: ExactSizeIterator, { #[inline] fn push(&mut self, item: CopyIter) -> as Region>::Index { diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index e53073c..5d57a49 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -4,6 +4,7 @@ use serde::{Deserialize, Serialize}; use crate::impls::offsets::{OffsetContainer, OffsetOptimized}; +use crate::impls::tuple::TupleABRegion; use crate::{Push, Region, ReserveItems}; /// A region to deduplicate consecutive equal items. @@ -125,8 +126,9 @@ where /// use flatcontainer::{Push, OwnedRegion, Region, StringRegion}; /// let mut r = >>::default(); /// -/// let index: usize = r.push(&b"abc"); -/// assert_eq!(b"abc", r.index(index)); +/// let index = r.push(&b"abc"); +/// assert_eq!(index.0, 0); +/// assert_eq!(b"abc", r.index(0.into())); /// ``` #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -182,7 +184,7 @@ where where Self: 'a; - type Index = usize; + type Index = Sequential; #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self @@ -201,7 +203,7 @@ where #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { self.inner - .index((self.offsets.index(index), self.offsets.index(index + 1))) + .index((self.offsets.index(index.0), self.offsets.index(index.0 + 1))) } #[inline] @@ -247,7 +249,7 @@ where debug_assert_eq!(index.0, self.last_index); self.last_index = index.1; self.offsets.push(index.1); - self.offsets.len() - 2 + (self.offsets.len() - 2).into() } } @@ -264,6 +266,81 @@ where } } +/// TODO +#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Sequential(pub usize); + +impl From for Sequential { + fn from(value: usize) -> Self { + Self(value) + } +} + +/// TODO +#[derive(Default)] +pub struct CombineSequential(R); + +impl Push for CombineSequential> +where + A: Region, + B: Region, + TupleABRegion: Region + Push, + CombineSequential>: Region, +{ + fn push(&mut self, item: T) -> Self::Index { + self.0.push(item).0 + } +} + +impl Region for CombineSequential> +where + A: Region, + B: Region, +{ + type Owned = as Region>::Owned; + type ReadItem<'a> = as Region>::ReadItem<'a> + where + Self: 'a; + type Index = Sequential; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self( as Region>::merge_regions( + regions.map(|r| &r.0), + )) + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + self.0.index((index, index)) + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.0.reserve_regions(regions.map(|r| &r.0)); + } + + fn clear(&mut self) { + self.0.clear() + } + + fn heap_size(&self, callback: F) { + self.0.heap_size(callback) + } + + fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> + where + Self: 'a, + { + as Region>::reborrow(item) + } +} + #[cfg(test)] mod tests { use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 617721d..7269dc0 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -3,11 +3,30 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::impls::deduplicate::Sequential; + /// A container to store offsets. -pub trait OffsetContainer: Default + Extend { +pub trait OffsetContainer: Default { + /// Allocate with space for `capacity` elements. + fn with_capacity(capacity: usize) -> Self; + + /// Allocate storage large enough to absorb `regions`'s contents. + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::with_capacity(regions.map(Self::len).sum()) + } + /// Accepts a newly pushed element. fn push(&mut self, item: T); + /// Extend from iterator. Must be [`ExactSizeIterator`] to efficiently + /// pre-allocate. + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator; + /// Lookup an index. May panic for invalid indexes. fn index(&self, index: usize) -> T; @@ -138,20 +157,28 @@ impl OffsetStride { /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. #[derive(Eq, PartialEq, Clone, Debug, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OffsetList { +pub struct OffsetList, L = Vec> +where + S: OffsetContainer, + L: OffsetContainer, +{ /// Offsets that fit within a `u32`. - pub smol: Vec, + pub smol: S, /// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit. - pub chonk: Vec, + pub chonk: L, } -impl OffsetList { +impl OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ /// Allocate a new list with a specified capacity. #[must_use] pub fn with_capacity(cap: usize) -> Self { Self { - smol: Vec::with_capacity(cap), - chonk: Vec::new(), + smol: S::with_capacity(cap), + chonk: L::default(), } } @@ -180,9 +207,10 @@ impl OffsetList { #[must_use] pub fn index(&self, index: usize) -> usize { if index < self.smol.len() { - self.smol[index].try_into().unwrap() + self.smol.index(index).try_into().unwrap() } else { - self.chonk[index - self.smol.len()].try_into().unwrap() + let index = index - self.smol.len(); + self.chonk.index(index).try_into().unwrap() } } /// The number of offsets in the list. @@ -194,7 +222,7 @@ impl OffsetList { /// Returns `true` if this list contains no elements. #[must_use] pub fn is_empty(&self) -> bool { - self.len() == 0 + self.smol.is_empty() && self.chonk.is_empty() } /// Reserve space for `additional` elements. @@ -218,12 +246,66 @@ impl OffsetList { /// a regular offset list. #[derive(Eq, PartialEq, Default, Debug, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OffsetOptimized { +pub struct OffsetOptimized, L = Vec> +where + S: OffsetContainer, + L: OffsetContainer, +{ strided: OffsetStride, - spilled: OffsetList, + spilled: OffsetList, +} + +impl OffsetContainer for OffsetStride { + fn with_capacity(_capacity: usize) -> Self { + Self::default() + } + + fn push(&mut self, item: Sequential) { + let pushed = self.push(item.0); + debug_assert!(pushed); + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + for item in iter { + let pushed = self.push(item.0); + debug_assert!(pushed); + } + } + + fn index(&self, index: usize) -> Sequential { + self.index(index).into() + } + + fn clear(&mut self) { + self.clear(); + } + + fn len(&self) -> usize { + self.len() + } + + fn reserve(&mut self, _additional: usize) { + // Nop + } + + fn heap_size(&self, _callback: F) { + // Nop + } } -impl OffsetContainer for OffsetOptimized { +impl OffsetContainer for OffsetOptimized +where + S: OffsetContainer, + L: OffsetContainer, +{ + fn with_capacity(_capacity: usize) -> Self { + // `self.strided` doesn't have any capacity, and we don't know the structure of the data. + Self::default() + } + fn push(&mut self, item: usize) { if self.spilled.is_empty() { let inserted = self.strided.push(item); @@ -235,6 +317,15 @@ impl OffsetContainer for OffsetOptimized { } } + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + for item in iter { + self.push(item); + } + } + fn index(&self, index: usize) -> usize { if index < self.strided.len() { self.strided.index(index) @@ -263,7 +354,11 @@ impl OffsetContainer for OffsetOptimized { } } -impl Extend for OffsetOptimized { +impl Extend for OffsetOptimized +where + S: OffsetContainer, + L: OffsetContainer, +{ fn extend>(&mut self, iter: T) { for item in iter { self.push(item); @@ -272,11 +367,22 @@ impl Extend for OffsetOptimized { } impl OffsetContainer for Vec { + fn with_capacity(capacity: usize) -> Self { + Vec::with_capacity(capacity) + } + #[inline] fn push(&mut self, item: T) { self.push(item); } + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + Extend::extend(self, iter); + } + #[inline] #[must_use] fn index(&self, index: usize) -> T { @@ -308,6 +414,7 @@ impl OffsetContainer for Vec { #[cfg(test)] mod tests { use crate::impls::deduplicate::ConsecutiveOffsetPairs; + use crate::impls::storage::Doubling; use crate::{Push, Region, SliceRegion, StringRegion}; use super::*; @@ -320,7 +427,7 @@ mod tests { let mut r = SliceRegion::< ConsecutiveOffsetPairs, - OffsetOptimized, + OffsetStride, >::default(); let idx = copy(&mut r, ["abc"]); assert_eq!("abc", r.index(idx).get(0)) @@ -328,7 +435,7 @@ mod tests { #[test] fn test_offset_optimized_clear() { - let mut oo = OffsetOptimized::default(); + let mut oo = ::default(); oo.push(0); assert_eq!(oo.len(), 1); oo.clear(); @@ -342,7 +449,7 @@ mod tests { #[test] fn test_offset_optimized_reserve() { - let mut oo = OffsetOptimized::default(); + let mut oo = ::default(); oo.push(9999999999); assert_eq!(oo.len(), 1); oo.reserve(1); @@ -350,7 +457,7 @@ mod tests { #[test] fn test_offset_optimized_heap_size() { - let mut oo = OffsetOptimized::default(); + let mut oo = ::default(); oo.push(9999999999); let mut cap = 0; oo.heap_size(|_, ca| { @@ -388,7 +495,7 @@ mod tests { #[test] fn test_chonk() { - let mut ol = OffsetList::default(); + let mut ol = ::default(); ol.push(usize::MAX); assert_eq!(usize::MAX, ol.index(0)); } @@ -399,4 +506,12 @@ mod tests { let os = OffsetStride::default(); let _ = os.index(0); } + + #[test] + fn test_offset_optimized_doubling() { + let mut oo = , Doubling<_>>>::default(); + oo.push(9999999999); + assert_eq!(oo.len(), 1); + oo.reserve(1); + } } diff --git a/src/impls/slice.rs b/src/impls/slice.rs index 51c9705..9fe04ad 100644 --- a/src/impls/slice.rs +++ b/src/impls/slice.rs @@ -375,6 +375,15 @@ impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIter<'a, } } +impl<'a, R, O> ExactSizeIterator for ReadSliceIter<'a, R, O> +where + R: Region, + O: OffsetContainer, + std::slice::Iter<'a, R::Owned>: ExactSizeIterator, + ReadSliceIterInner<'a, R, O>: ExactSizeIterator, +{ +} + impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIterInner<'a, C, O> { type Item = C::ReadItem<'a>; @@ -386,6 +395,14 @@ impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIterInne } } +impl<'a, R, O> ExactSizeIterator for ReadSliceIterInner<'a, R, O> +where + R: Region, + O: OffsetContainer, + Range: ExactSizeIterator, +{ +} + impl<'a, C, T, O> Push<&'a [T]> for SliceRegion where C: Region + Push<&'a T>, diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 33aa134..7bc208e 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -1,8 +1,11 @@ //! A region that stores slices of copy types. +use std::marker::PhantomData; + #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::impls::storage::SliceStorage; use crate::{CopyIter, Push, Region, ReserveItems}; /// A container for owned types. @@ -29,14 +32,16 @@ use crate::{CopyIter, Push, Region, ReserveItems}; /// ``` #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OwnedRegion { - slices: Vec, +pub struct OwnedRegion> { + slices: S, + _marker: PhantomData, } -impl Clone for OwnedRegion { +impl Clone for OwnedRegion { fn clone(&self) -> Self { Self { slices: self.slices.clone(), + _marker: PhantomData, } } @@ -45,9 +50,10 @@ impl Clone for OwnedRegion { } } -impl Region for OwnedRegion +impl Region for OwnedRegion where [T]: ToOwned, + S: SliceStorage, { type Owned = <[T] as ToOwned>::Owned; type ReadItem<'a> = &'a [T] where Self: 'a; @@ -59,13 +65,14 @@ where Self: 'a, { Self { - slices: Vec::with_capacity(regions.map(|r| r.slices.len()).sum()), + slices: S::merge_regions(regions.map(|r| &r.slices)), + _marker: PhantomData, } } #[inline] fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { - &self.slices[start..end] + self.slices.index_slice(start, end) } #[inline] @@ -74,7 +81,7 @@ where Self: 'a, I: Iterator + Clone, { - self.slices.reserve(regions.map(|r| r.slices.len()).sum()); + self.slices.reserve_regions(regions.map(|r| &r.slices)); } #[inline] @@ -83,12 +90,8 @@ where } #[inline] - fn heap_size(&self, mut callback: F) { - let size_of_t = std::mem::size_of::(); - callback( - self.slices.len() * size_of_t, - self.slices.capacity() * size_of_t, - ); + fn heap_size(&self, callback: F) { + self.slices.heap_size(callback); } #[inline] @@ -100,18 +103,20 @@ where } } -impl Default for OwnedRegion { +impl> Default for OwnedRegion { #[inline] fn default() -> Self { Self { - slices: Vec::default(), + slices: S::default(), + _marker: PhantomData, } } } -impl Push<[T; N]> for OwnedRegion +impl Push<[T; N]> for OwnedRegion where [T]: ToOwned, + S: SliceStorage, { #[inline] fn push(&mut self, item: [T; N]) -> as Region>::Index { @@ -121,23 +126,23 @@ where } } -impl Push<&[T; N]> for OwnedRegion { +impl, const N: usize> Push<&[T; N]> for OwnedRegion { #[inline] fn push(&mut self, item: &[T; N]) -> as Region>::Index { - let start = self.slices.len(); - self.slices.extend_from_slice(item); - (start, self.slices.len()) + self.push(item.as_slice()) } } -impl Push<&&[T; N]> for OwnedRegion { +impl, const N: usize> Push<&&[T; N]> for OwnedRegion { #[inline] fn push(&mut self, item: &&[T; N]) -> as Region>::Index { self.push(*item) } } -impl<'b, T: Clone, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion { +impl<'b, T: Clone, S: SliceStorage, const N: usize> ReserveItems<&'b [T; N]> + for OwnedRegion +{ #[inline] fn reserve_items(&mut self, items: I) where @@ -147,28 +152,29 @@ impl<'b, T: Clone, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion { } } -impl Push<&[T]> for OwnedRegion { +impl> Push<&[T]> for OwnedRegion { #[inline] - fn push(&mut self, item: &[T]) -> as Region>::Index { + fn push(&mut self, item: &[T]) -> as Region>::Index { let start = self.slices.len(); self.slices.extend_from_slice(item); (start, self.slices.len()) } } -impl Push<&&[T]> for OwnedRegion +impl> Push<&&[T]> for OwnedRegion where for<'a> Self: Push<&'a [T]>, { #[inline] - fn push(&mut self, item: &&[T]) -> as Region>::Index { + fn push(&mut self, item: &&[T]) -> as Region>::Index { self.push(*item) } } -impl<'b, T> ReserveItems<&'b [T]> for OwnedRegion +impl<'b, T, S> ReserveItems<&'b [T]> for OwnedRegion where [T]: ToOwned, + S: SliceStorage, { #[inline] fn reserve_items(&mut self, items: I) @@ -179,28 +185,30 @@ where } } -impl Push> for OwnedRegion +impl Push> for OwnedRegion where [T]: ToOwned, + S: SliceStorage, { #[inline] - fn push(&mut self, mut item: Vec) -> as Region>::Index { + fn push(&mut self, mut item: Vec) -> as Region>::Index { let start = self.slices.len(); self.slices.append(&mut item); (start, self.slices.len()) } } -impl Push<&Vec> for OwnedRegion { +impl> Push<&Vec> for OwnedRegion { #[inline] - fn push(&mut self, item: &Vec) -> as Region>::Index { + fn push(&mut self, item: &Vec) -> as Region>::Index { self.push(item.as_slice()) } } -impl<'a, T> ReserveItems<&'a Vec> for OwnedRegion +impl<'a, T, S> ReserveItems<&'a Vec> for OwnedRegion where [T]: ToOwned, + S: SliceStorage, { #[inline] fn reserve_items(&mut self, items: I) @@ -211,16 +219,21 @@ where } } -impl> Push> for OwnedRegion { +impl, I: IntoIterator> Push> + for OwnedRegion +where + ::IntoIter: ExactSizeIterator, +{ #[inline] - fn push(&mut self, item: CopyIter) -> as Region>::Index { + fn push(&mut self, item: CopyIter) -> as Region>::Index { let start = self.slices.len(); self.slices.extend(item.0); (start, self.slices.len()) } } -impl> ReserveItems> for OwnedRegion +impl, J: IntoIterator> ReserveItems> + for OwnedRegion where [T]: ToOwned, { @@ -269,8 +282,9 @@ mod tests { #[test] fn test_copy_iter() { let mut r = >::default(); - r.reserve_items(std::iter::once(CopyIter(std::iter::repeat(1).take(4)))); - let index = r.push(CopyIter(std::iter::repeat(1).take(4))); + let iter = [1; 4].into_iter(); + r.reserve_items(std::iter::once(CopyIter(iter.clone()))); + let index = r.push(CopyIter(iter)); assert_eq!([1, 1, 1, 1], r.index(index)); } } diff --git a/src/impls/storage.rs b/src/impls/storage.rs new file mode 100644 index 0000000..0f01898 --- /dev/null +++ b/src/impls/storage.rs @@ -0,0 +1,362 @@ +//! Storage abstractions to represent slices of data. + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// Behavior to allocate storage +pub trait Storage: Default { + /// Allocate storage for at least `capacity` elements. + fn with_capacity(capacity: usize) -> Self; + + /// Allocate storage large enough to absorb `regions`'s contents. + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::with_capacity(regions.map(Self::len).sum()) + } + + /// Reserve space for `additional` elements. + fn reserve(&mut self, additional: usize); + + /// Reserve space for `regions`. + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.reserve(regions.map(Self::len).sum()); + } + + /// Clear all contents, possibly retaining some allocations. + fn clear(&mut self); + + /// Observe the heap size information (size and capacity). + fn heap_size(&self, callback: F); + + /// Extend from iterator. Must be [`ExactSizeIterator`] to efficiently + /// pre-allocate. + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator; + + /// Append the contents of `data`. + fn append(&mut self, data: &mut Vec); + + /// Extend from the contents of `slice`. + fn extend_from_slice(&mut self, slice: &[T]) + where + T: Clone; + + /// Lookup the slice in range `start..end`. + fn index(&self, index: usize) -> &T; + + /// Returns the number of elements. + fn len(&self) -> usize; + + /// Returns `true` if empty, i.e., it doesn't contain any elements. + fn is_empty(&self) -> bool; +} + +/// Behavior for slice storage. +pub trait SliceStorage: Storage { + /// Lookup the slice in range `start..end`. + fn index_slice(&self, start: usize, end: usize) -> &[T]; +} + +impl Storage for Vec { + #[inline] + fn with_capacity(capacity: usize) -> Self { + Vec::with_capacity(capacity) + } + + #[inline] + fn reserve(&mut self, additional: usize) { + Vec::reserve(self, additional); + } + + #[inline] + fn clear(&mut self) { + self.clear(); + } + + #[inline] + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback(self.len() * size_of_t, self.capacity() * size_of_t); + } + + #[inline] + fn extend>(&mut self, iter: I) { + Extend::extend(self, iter); + } + + #[inline] + fn append(&mut self, data: &mut Vec) { + self.append(data); + } + + #[inline] + fn extend_from_slice(&mut self, slice: &[T]) + where + T: Clone, + { + self.extend_from_slice(slice); + } + + #[inline] + #[must_use] + fn index(&self, index: usize) -> &T { + &self[index] + } + + #[inline] + #[must_use] + fn len(&self) -> usize { + self.len() + } + + #[inline] + #[must_use] + fn is_empty(&self) -> bool { + self.is_empty() + } +} + +impl SliceStorage for Vec { + #[inline] + #[must_use] + fn index_slice(&self, start: usize, end: usize) -> &[T] { + &self[start..end] + } +} + +/// TODO +#[derive(Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct Doubling { + inner: Vec>, + offsets: Vec, + len: usize, +} + +impl Default for Doubling { + fn default() -> Self { + Self { + inner: Vec::default(), + offsets: Vec::default(), + len: 0, + } + } +} + +impl Doubling { + #[inline] + #[must_use] + fn with_capacity(capacity: usize) -> Self { + Self { + inner: vec![Vec::with_capacity(capacity)], + offsets: Vec::default(), + len: 0, + } + } + + #[inline] + fn reserve(&mut self, additional: usize) { + let (remaining, last_len) = self + .inner + .last() + .map_or((0, 0), |last| (last.capacity() - last.len(), last.len())); + if remaining < additional { + let len = 2 * last_len; + let len = std::cmp::max(additional, len); + let len = len.next_power_of_two(); + self.offsets + .push(last_len + *self.offsets.last().unwrap_or(&0)); + self.inner.push(Vec::with_capacity(len)); + } + } + + #[inline] + fn clear(&mut self) { + self.inner.drain(..self.len().saturating_sub(1)); + if let Some(last) = self.inner.last_mut() { + last.clear(); + } + } + + #[inline] + fn heap_size(&self, mut callback: F) { + let size_of_usize = std::mem::size_of::(); + callback( + self.offsets.len() * size_of_usize, + self.offsets.capacity() * size_of_usize, + ); + let size_of_t = std::mem::size_of::(); + for inner in &self.inner { + callback(inner.len() * size_of_t, inner.capacity() * size_of_t); + } + } + + #[inline] + fn extend>(&mut self, iter: I) { + let iter = iter.into_iter(); + let (lo, hi) = iter.size_hint(); + self.reserve(hi.unwrap_or(lo)); + Extend::extend(self.inner.last_mut().unwrap(), iter); + } + + #[inline] + #[must_use] + fn index(&self, index: usize) -> &T { + let slice_index = self + .offsets + .iter() + .position(|&o| o > index) + .unwrap_or_else(|| self.offsets.len().saturating_sub(1)); + let index = index - self.offsets[slice_index]; + &self.inner[slice_index][index] + } + + #[inline] + fn len(&self) -> usize { + *self.offsets.last().unwrap_or(&0) + self.inner.last().map_or(0, Vec::len) + } + + #[inline] + fn is_empty(&self) -> bool { + self.offsets.last().unwrap_or(&0) > &0 || self.inner.last().map_or(false, Vec::is_empty) + } +} + +impl Storage for Doubling { + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity(capacity) + } + + fn reserve(&mut self, additional: usize) { + self.reserve(additional); + } + + fn clear(&mut self) { + self.clear() + } + + fn heap_size(&self, callback: F) { + self.heap_size(callback); + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + self.extend(iter); + } + + #[inline] + fn append(&mut self, data: &mut Vec) { + self.len += data.len(); + self.reserve(data.len()); + self.inner.last_mut().unwrap().append(data); + } + + #[inline] + fn extend_from_slice(&mut self, slice: &[T]) + where + T: Clone, + { + self.len += slice.len(); + self.reserve(slice.len()); + self.inner.last_mut().unwrap().extend_from_slice(slice); + } + + fn index(&self, index: usize) -> &T { + self.index(index) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn is_empty(&self) -> bool { + self.is_empty() + } +} + +impl SliceStorage for Doubling { + #[inline] + fn index_slice(&self, start: usize, end: usize) -> &[T] { + let index = self + .offsets + .iter() + .position(|&o| o > start) + .unwrap_or_else(|| self.offsets.len().saturating_sub(1)); + let start = start - self.offsets[index]; + let end = end - self.offsets[index]; + &self.inner[index][start..end] + } +} + +mod offsetcontainer { + use crate::impls::offsets::OffsetContainer; + use crate::impls::storage::Doubling; + + impl OffsetContainer for Doubling { + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity(capacity) + } + + fn push(&mut self, item: T) { + self.len += 1; + self.reserve(1); + self.inner.last_mut().unwrap().push(item); + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + self.extend(iter); + } + + fn index(&self, index: usize) -> T { + *self.index(index) + } + + fn clear(&mut self) { + self.clear() + } + + fn len(&self) -> usize { + self.len() + } + + fn reserve(&mut self, additional: usize) { + self.reserve(additional); + } + + fn heap_size(&self, callback: F) { + self.heap_size(callback); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_doubling() { + let mut d: Doubling = Doubling::default(); + let mut start = 0; + + for i in 0..1000 { + d.extend_from_slice(&[i, i + 1, i + 3]); + let end = d.len(); + assert_eq!(&[i, i + 1, i + 3], d.index_slice(start, end)); + start = end; + } + } +} diff --git a/src/lib.rs b/src/lib.rs index cb15176..b041e30 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,12 +9,15 @@ use serde::{Deserialize, Serialize}; pub mod impls; +use crate::impls::offsets::OffsetContainer; pub use impls::columns::ColumnsRegion; +pub use impls::deduplicate::CombineSequential; pub use impls::mirror::MirrorRegion; pub use impls::option::OptionRegion; pub use impls::result::ResultRegion; pub use impls::slice::SliceRegion; pub use impls::slice_copy::OwnedRegion; +pub use impls::storage::Storage; pub use impls::string::StringRegion; /// An index into a region. Automatically implemented for relevant types. @@ -160,51 +163,47 @@ impl<'a, T: ToOwned + ?Sized> IntoOwned<'a> for &'a T { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr( feature = "serde", - serde( - bound = "R: Serialize + for<'a> Deserialize<'a>, R::Index: Serialize + for<'a> Deserialize<'a>" - ) + serde(bound = " + R: Serialize + for<'a> Deserialize<'a>, + R::Index: Serialize + for<'a> Deserialize<'a>, + S: Serialize + for<'a> Deserialize<'a>, + ") )] -pub struct FlatStack { +pub struct FlatStack = Vec<::Index>> { /// The indices, which we use to lookup items in the region. - indices: Vec, + indices: S, /// A region to index into. region: R, } -impl Default for FlatStack { +impl::Index>> Default for FlatStack { #[inline] fn default() -> Self { Self { - indices: Vec::default(), + indices: S::default(), region: R::default(), } } } -impl Debug for FlatStack +impl::Index>> Debug for FlatStack where for<'a> R::ReadItem<'a>: Debug, + for<'a> &'a S: IntoIterator, { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_list().entries(self.iter()).finish() } } -impl FlatStack { - /// Default implementation based on the preference of type `T`. - #[inline] - #[must_use] - pub fn default_impl>() -> Self { - Self::default() - } - +impl::Index>> FlatStack { /// Returns a flat stack that can absorb `capacity` indices without reallocation. /// /// Prefer [`Self::merge_capacity`] over this function to also pre-size the regions. #[must_use] pub fn with_capacity(capacity: usize) -> Self { Self { - indices: Vec::with_capacity(capacity), + indices: S::with_capacity(capacity), region: R::default(), } } @@ -213,10 +212,10 @@ impl FlatStack { #[must_use] pub fn merge_capacity<'a, I: Iterator + Clone + 'a>(stacks: I) -> Self where - R: 'a, + Self: 'a, { Self { - indices: Vec::with_capacity(stacks.clone().map(|s| s.indices.len()).sum()), + indices: S::merge_regions(stacks.clone().map(|s| &s.indices)), region: R::merge_regions(stacks.map(|r| &r.region)), } } @@ -235,7 +234,7 @@ impl FlatStack { #[inline] #[must_use] pub fn get(&self, offset: usize) -> R::ReadItem<'_> { - self.region.index(self.indices[offset]) + self.region.index(self.indices.index(offset)) } /// Returns the number of indices in the stack. @@ -252,12 +251,6 @@ impl FlatStack { self.indices.is_empty() } - /// Returns the total number of indices the stack can hold without reallocation. - #[must_use] - pub fn capacity(&self) -> usize { - self.indices.capacity() - } - /// Reserves space to hold `additional` indices. #[inline] pub fn reserve(&mut self, additional: usize) { @@ -289,22 +282,47 @@ impl FlatStack { self.region.reserve_regions(regions); } + /// Heap size, size - capacity + #[inline] + pub fn heap_size(&self, mut callback: F) { + self.region.heap_size(&mut callback); + OffsetContainer::heap_size(&self.indices, callback); + } +} + +impl FlatStack +where + R: Region, + S: OffsetContainer<::Index>, +{ /// Iterate the items in this stack. #[inline] - pub fn iter(&self) -> Iter<'_, R> { + pub fn iter<'a>(&'a self) -> Iter<'a, R, <&'a S as IntoIterator>::IntoIter> + where + &'a S: IntoIterator, + { self.into_iter() } +} - /// Heap size, size - capacity +impl FlatStack { + /// Default implementation based on the preference of type `T`. #[inline] - pub fn heap_size(&self, mut callback: F) { - use crate::impls::offsets::OffsetContainer; - self.region.heap_size(&mut callback); - OffsetContainer::heap_size(&self.indices, callback); + #[must_use] + pub fn default_impl>() -> Self { + Self::default() + } + + /// Returns the total number of indices the stack can hold without reallocation. + #[must_use] + pub fn capacity(&self) -> usize { + self.indices.capacity() } } -impl> Extend for FlatStack { +impl, S: OffsetContainer<::Index>> Extend + for FlatStack +{ fn extend>(&mut self, iter: I) { let iter = iter.into_iter(); self.reserve(iter.size_hint().0); @@ -314,13 +332,16 @@ impl> Extend for FlatStack { } } -impl<'a, R: Region> IntoIterator for &'a FlatStack { +impl<'a, R: Region, S: OffsetContainer<::Index>> IntoIterator for &'a FlatStack +where + &'a S: IntoIterator::Index>, +{ type Item = R::ReadItem<'a>; - type IntoIter = Iter<'a, R>; + type IntoIter = Iter<'a, R, <&'a S as IntoIterator>::IntoIter>; fn into_iter(self) -> Self::IntoIter { Iter { - inner: self.indices.iter(), + inner: self.indices.into_iter(), region: &self.region, } } @@ -328,14 +349,22 @@ impl<'a, R: Region> IntoIterator for &'a FlatStack { /// An iterator over [`FlatStack`]. The iterator yields [`Region::ReadItem`] elements, which /// it obtains by looking up indices. -pub struct Iter<'a, R: Region> { +pub struct Iter<'a, R, S> +where + R: Region, + S: Iterator::Index>, +{ /// Iterator over indices. - inner: std::slice::Iter<'a, R::Index>, + inner: S, /// Region to map indices to read items. region: &'a R, } -impl<'a, R: Region> Iterator for Iter<'a, R> { +impl<'a, R, S> Iterator for Iter<'a, R, S> +where + R: Region, + S: Iterator::Index>, +{ type Item = R::ReadItem<'a>; fn next(&mut self) -> Option { @@ -347,9 +376,18 @@ impl<'a, R: Region> Iterator for Iter<'a, R> { } } -impl ExactSizeIterator for Iter<'_, R> {} +impl<'a, R, S> ExactSizeIterator for Iter<'a, R, S> +where + R: Region, + S: ExactSizeIterator::Index>, +{ +} -impl Clone for Iter<'_, R> { +impl<'a, R, S> Clone for Iter<'a, R, S> +where + R: Region, + S: Iterator::Index> + Clone, +{ fn clone(&self) -> Self { Self { inner: self.inner.clone(), @@ -391,8 +429,9 @@ pub struct CopyIter(pub I); #[cfg(test)] mod tests { - use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; - use crate::impls::tuple::TupleARegion; + use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs, Sequential}; + use crate::impls::offsets::OffsetStride; + use crate::impls::tuple::{TupleABRegion, TupleARegion}; use super::*; @@ -628,6 +667,52 @@ mod tests { owned_roundtrip::(&mut c, index); } + #[test] + fn test_my_understanding() { + let item = (vec![1, 2, 3], vec![1, 2, 3]); + let mut r = >, OwnedRegion>>::default(); + let _index: ((usize, usize), (usize, usize)) = r.push(&item); + + let mut r = >>, + ConsecutiveOffsetPairs>, + >>::default(); + let _index: (Sequential, Sequential) = r.push(&item); + + let mut r = >>, + ConsecutiveOffsetPairs>, + >, + >>::default(); + let _index: Sequential = r.push(&item); + + let mut fs = FlatStack::< + CombineSequential< + TupleABRegion< + ConsecutiveOffsetPairs>>, + CollapseSequence>>, + >, + >, + OffsetStride, + >::default(); + + for _ in 0..1000 { + fs.copy(&item); + let mut size = 0; + let mut capacity = 0; + let mut count = 0; + fs.heap_size(|siz, cap| { + size += siz; + capacity += cap; + count += 1; + }); + + println!("size {size}, capacity {capacity}, allocations {count}"); + } + assert_eq!(&item.1, fs.get(0).1); + } + /// Test that items and owned variants can be reborrowed to shorten their lifetimes. fn _test_reborrow(item: R::ReadItem<'_>, owned: &R::Owned) where diff --git a/tests/recursive.rs b/tests/recursive.rs index 2dba7a5..aceb271 100644 --- a/tests/recursive.rs +++ b/tests/recursive.rs @@ -1,7 +1,7 @@ //! Demonstration of how to encode recursive data structures. use flatcontainer::impls::deduplicate::ConsecutiveOffsetPairs; -use flatcontainer::{IntoOwned, Push, Region, StringRegion}; +use flatcontainer::{IntoOwned, Push, Region, Storage, StringRegion}; #[derive(Clone)] struct List(T, Option>>); @@ -108,7 +108,7 @@ where } fn heap_size(&self, mut callback: F) { - self.indexes.heap_size(&mut callback); + Storage::heap_size(&self.indexes, &mut callback); self.inner.heap_size(callback); } From 4c90cb5e8a90e9c8daba20933e8b8c82c668e27e Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 20:49:37 -0400 Subject: [PATCH 02/15] Introduce PushStorage trait Signed-off-by: Moritz Hoffmann --- src/impls/slice_copy.rs | 44 +++++++++++----- src/impls/storage.rs | 112 ++++++++++++++++++++-------------------- 2 files changed, 88 insertions(+), 68 deletions(-) diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 7bc208e..0a491ed 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -5,7 +5,7 @@ use std::marker::PhantomData; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::storage::SliceStorage; +use crate::impls::storage::{PushStorage, SliceStorage}; use crate::{CopyIter, Push, Region, ReserveItems}; /// A container for owned types. @@ -116,24 +116,32 @@ impl> Default for OwnedRegion { impl Push<[T; N]> for OwnedRegion where [T]: ToOwned, - S: SliceStorage, + S: SliceStorage + for<'a> PushStorage>, { #[inline] fn push(&mut self, item: [T; N]) -> as Region>::Index { let start = self.slices.len(); - self.slices.extend(item); + self.slices.push_storage(CopyIter(item)); (start, self.slices.len()) } } -impl, const N: usize> Push<&[T; N]> for OwnedRegion { +impl Push<&[T; N]> for OwnedRegion +where + T: Clone, + S: SliceStorage + for<'a> PushStorage<&'a [T]>, +{ #[inline] fn push(&mut self, item: &[T; N]) -> as Region>::Index { self.push(item.as_slice()) } } -impl, const N: usize> Push<&&[T; N]> for OwnedRegion { +impl Push<&&[T; N]> for OwnedRegion +where + T: Clone, + S: SliceStorage + for<'a> PushStorage<&'a [T]>, +{ #[inline] fn push(&mut self, item: &&[T; N]) -> as Region>::Index { self.push(*item) @@ -152,11 +160,15 @@ impl<'b, T: Clone, S: SliceStorage, const N: usize> ReserveItems<&'b [T; N]> } } -impl> Push<&[T]> for OwnedRegion { +impl Push<&[T]> for OwnedRegion +where + T: Clone, + S: SliceStorage + for<'a> PushStorage<&'a [T]>, +{ #[inline] fn push(&mut self, item: &[T]) -> as Region>::Index { let start = self.slices.len(); - self.slices.extend_from_slice(item); + self.slices.push_storage(item); (start, self.slices.len()) } } @@ -188,17 +200,21 @@ where impl Push> for OwnedRegion where [T]: ToOwned, - S: SliceStorage, + S: SliceStorage + for<'a> PushStorage<&'a mut Vec>, { #[inline] fn push(&mut self, mut item: Vec) -> as Region>::Index { let start = self.slices.len(); - self.slices.append(&mut item); + self.slices.push_storage(&mut item); (start, self.slices.len()) } } -impl> Push<&Vec> for OwnedRegion { +impl Push<&Vec> for OwnedRegion +where + T: Clone, + S: SliceStorage + for<'a> PushStorage<&'a [T]>, +{ #[inline] fn push(&mut self, item: &Vec) -> as Region>::Index { self.push(item.as_slice()) @@ -219,15 +235,17 @@ where } } -impl, I: IntoIterator> Push> - for OwnedRegion +impl Push> for OwnedRegion where + I: IntoIterator, ::IntoIter: ExactSizeIterator, + T: Clone, + S: SliceStorage + PushStorage>, { #[inline] fn push(&mut self, item: CopyIter) -> as Region>::Index { let start = self.slices.len(); - self.slices.extend(item.0); + self.slices.push_storage(item); (start, self.slices.len()) } } diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 0f01898..54a74e6 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -3,6 +3,8 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::CopyIter; + /// Behavior to allocate storage pub trait Storage: Default { /// Allocate storage for at least `capacity` elements. @@ -34,20 +36,6 @@ pub trait Storage: Default { /// Observe the heap size information (size and capacity). fn heap_size(&self, callback: F); - /// Extend from iterator. Must be [`ExactSizeIterator`] to efficiently - /// pre-allocate. - fn extend>(&mut self, iter: I) - where - I::IntoIter: ExactSizeIterator; - - /// Append the contents of `data`. - fn append(&mut self, data: &mut Vec); - - /// Extend from the contents of `slice`. - fn extend_from_slice(&mut self, slice: &[T]) - where - T: Clone; - /// Lookup the slice in range `start..end`. fn index(&self, index: usize) -> &T; @@ -86,24 +74,6 @@ impl Storage for Vec { callback(self.len() * size_of_t, self.capacity() * size_of_t); } - #[inline] - fn extend>(&mut self, iter: I) { - Extend::extend(self, iter); - } - - #[inline] - fn append(&mut self, data: &mut Vec) { - self.append(data); - } - - #[inline] - fn extend_from_slice(&mut self, slice: &[T]) - where - T: Clone, - { - self.extend_from_slice(slice); - } - #[inline] #[must_use] fn index(&self, index: usize) -> &T { @@ -123,6 +93,33 @@ impl Storage for Vec { } } +/// Push an item into storage. +pub trait PushStorage { + /// Push an item into storage. + fn push_storage(&mut self, item: T); +} + +impl PushStorage<&mut Vec> for Vec { + #[inline] + fn push_storage(&mut self, item: &mut Vec) { + self.append(item); + } +} + +impl PushStorage<&[T]> for Vec { + #[inline] + fn push_storage(&mut self, item: &[T]) { + self.extend_from_slice(item); + } +} + +impl, T> PushStorage> for Vec { + #[inline] + fn push_storage(&mut self, item: CopyIter) { + self.extend(item.0); + } +} + impl SliceStorage for Vec { #[inline] #[must_use] @@ -246,42 +243,47 @@ impl Storage for Doubling { self.heap_size(callback); } - fn extend>(&mut self, iter: I) - where - I::IntoIter: ExactSizeIterator, - { - self.extend(iter); + fn index(&self, index: usize) -> &T { + self.index(index) } #[inline] - fn append(&mut self, data: &mut Vec) { - self.len += data.len(); - self.reserve(data.len()); - self.inner.last_mut().unwrap().append(data); + fn len(&self) -> usize { + self.len() } #[inline] - fn extend_from_slice(&mut self, slice: &[T]) - where - T: Clone, - { - self.len += slice.len(); - self.reserve(slice.len()); - self.inner.last_mut().unwrap().extend_from_slice(slice); + fn is_empty(&self) -> bool { + self.is_empty() } +} - fn index(&self, index: usize) -> &T { - self.index(index) +impl PushStorage> for Doubling +where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, +{ + #[inline] + fn push_storage(&mut self, item: CopyIter) { + self.extend(item.0); } +} +impl PushStorage<&mut Vec> for Doubling { #[inline] - fn len(&self) -> usize { - self.len() + fn push_storage(&mut self, item: &mut Vec) { + self.len += item.len(); + self.reserve(item.len()); + self.inner.last_mut().unwrap().append(item); } +} +impl PushStorage<&[T]> for Doubling { #[inline] - fn is_empty(&self) -> bool { - self.is_empty() + fn push_storage(&mut self, item: &[T]) { + self.len += item.len(); + self.reserve(item.len()); + self.inner.last_mut().unwrap().extend_from_slice(item); } } @@ -353,7 +355,7 @@ mod tests { let mut start = 0; for i in 0..1000 { - d.extend_from_slice(&[i, i + 1, i + 3]); + d.push_storage([i, i + 1, i + 3].as_slice()); let end = d.len(); assert_eq!(&[i, i + 1, i + 3], d.index_slice(start, end)); start = end; From f5e907762c1b5ab55edafea900bd7674f9af9d5b Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 21:18:09 -0400 Subject: [PATCH 03/15] Unify offsets and storage Signed-off-by: Moritz Hoffmann --- src/impls/offsets.rs | 166 +++++++++++++++---------------------------- src/impls/storage.rs | 62 +++++++--------- src/lib.rs | 2 +- tests/recursive.rs | 4 +- 4 files changed, 85 insertions(+), 149 deletions(-) diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 7269dc0..8539325 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -4,19 +4,12 @@ use serde::{Deserialize, Serialize}; use crate::impls::deduplicate::Sequential; +use crate::impls::storage::AllocateStorage; /// A container to store offsets. -pub trait OffsetContainer: Default { - /// Allocate with space for `capacity` elements. - fn with_capacity(capacity: usize) -> Self; - - /// Allocate storage large enough to absorb `regions`'s contents. - fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self - where - Self: 'a, - { - Self::with_capacity(regions.map(Self::len).sum()) - } +pub trait OffsetContainer: AllocateStorage { + /// Lookup an index. May panic for invalid indexes. + fn index(&self, index: usize) -> T; /// Accepts a newly pushed element. fn push(&mut self, item: T); @@ -26,28 +19,6 @@ pub trait OffsetContainer: Default { fn extend>(&mut self, iter: I) where I::IntoIter: ExactSizeIterator; - - /// Lookup an index. May panic for invalid indexes. - fn index(&self, index: usize) -> T; - - /// Clear all contents. - fn clear(&mut self); - - /// Returns the number of elements. - fn len(&self) -> usize; - - /// Returns `true` if empty. - #[inline] - #[must_use] - fn is_empty(&self) -> bool { - self.len() == 0 - } - - /// Reserve space for `additional` elements. - fn reserve(&mut self, additional: usize); - - /// Heap size, size - capacity - fn heap_size(&self, callback: F); } /// A container for offsets that can represent strides of offsets. @@ -255,30 +226,11 @@ where spilled: OffsetList, } -impl OffsetContainer for OffsetStride { +impl AllocateStorage for OffsetStride { fn with_capacity(_capacity: usize) -> Self { Self::default() } - fn push(&mut self, item: Sequential) { - let pushed = self.push(item.0); - debug_assert!(pushed); - } - - fn extend>(&mut self, iter: I) - where - I::IntoIter: ExactSizeIterator, - { - for item in iter { - let pushed = self.push(item.0); - debug_assert!(pushed); - } - } - - fn index(&self, index: usize) -> Sequential { - self.index(index).into() - } - fn clear(&mut self) { self.clear(); } @@ -294,44 +246,41 @@ impl OffsetContainer for OffsetStride { fn heap_size(&self, _callback: F) { // Nop } + + fn is_empty(&self) -> bool { + self.is_empty() + } } -impl OffsetContainer for OffsetOptimized -where - S: OffsetContainer, - L: OffsetContainer, -{ - fn with_capacity(_capacity: usize) -> Self { - // `self.strided` doesn't have any capacity, and we don't know the structure of the data. - Self::default() +impl OffsetContainer for OffsetStride { + fn index(&self, index: usize) -> Sequential { + self.index(index).into() } - fn push(&mut self, item: usize) { - if self.spilled.is_empty() { - let inserted = self.strided.push(item); - if !inserted { - self.spilled.push(item); - } - } else { - self.spilled.push(item); - } + fn push(&mut self, item: Sequential) { + let pushed = self.push(item.0); + debug_assert!(pushed); } - fn extend>(&mut self, iter: I) + fn extend>(&mut self, iter: I) where I::IntoIter: ExactSizeIterator, { for item in iter { - self.push(item); + let pushed = self.push(item.0); + debug_assert!(pushed); } } +} - fn index(&self, index: usize) -> usize { - if index < self.strided.len() { - self.strided.index(index) - } else { - self.spilled.index(index - self.strided.len()) - } +impl AllocateStorage for OffsetOptimized +where + S: OffsetContainer, + L: OffsetContainer, +{ + fn with_capacity(_capacity: usize) -> Self { + // `self.strided` doesn't have any capacity, and we don't know the structure of the data. + Self::default() } fn clear(&mut self) { @@ -343,6 +292,10 @@ where self.strided.len() + self.spilled.len() } + fn is_empty(&self) -> bool { + self.strided.is_empty() && self.spilled.is_empty() + } + fn reserve(&mut self, additional: usize) { if !self.spilled.is_empty() { self.spilled.reserve(additional); @@ -354,12 +307,34 @@ where } } -impl Extend for OffsetOptimized +impl OffsetContainer for OffsetOptimized where S: OffsetContainer, L: OffsetContainer, { - fn extend>(&mut self, iter: T) { + fn index(&self, index: usize) -> usize { + if index < self.strided.len() { + self.strided.index(index) + } else { + self.spilled.index(index - self.strided.len()) + } + } + + fn push(&mut self, item: usize) { + if self.spilled.is_empty() { + let inserted = self.strided.push(item); + if !inserted { + self.spilled.push(item); + } + } else { + self.spilled.push(item); + } + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { for item in iter { self.push(item); } @@ -367,8 +342,8 @@ where } impl OffsetContainer for Vec { - fn with_capacity(capacity: usize) -> Self { - Vec::with_capacity(capacity) + fn index(&self, index: usize) -> T { + self[index] } #[inline] @@ -382,33 +357,6 @@ impl OffsetContainer for Vec { { Extend::extend(self, iter); } - - #[inline] - #[must_use] - fn index(&self, index: usize) -> T { - self[index] - } - - #[inline] - fn clear(&mut self) { - self.clear(); - } - - #[inline] - #[must_use] - fn len(&self) -> usize { - self.len() - } - - #[inline] - fn reserve(&mut self, additional: usize) { - self.reserve(additional); - } - - fn heap_size(&self, mut callback: F) { - let size_of_t = std::mem::size_of::(); - callback(self.len() * size_of_t, self.capacity() * size_of_t); - } } #[cfg(test)] diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 54a74e6..8bcdc51 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -6,11 +6,13 @@ use serde::{Deserialize, Serialize}; use crate::CopyIter; /// Behavior to allocate storage -pub trait Storage: Default { +pub trait AllocateStorage: Default { /// Allocate storage for at least `capacity` elements. + #[must_use] fn with_capacity(capacity: usize) -> Self; /// Allocate storage large enough to absorb `regions`'s contents. + #[must_use] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, @@ -22,6 +24,7 @@ pub trait Storage: Default { fn reserve(&mut self, additional: usize); /// Reserve space for `regions`. + #[inline] fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, @@ -36,23 +39,30 @@ pub trait Storage: Default { /// Observe the heap size information (size and capacity). fn heap_size(&self, callback: F); - /// Lookup the slice in range `start..end`. - fn index(&self, index: usize) -> &T; - /// Returns the number of elements. + #[must_use] fn len(&self) -> usize; /// Returns `true` if empty, i.e., it doesn't contain any elements. + #[must_use] fn is_empty(&self) -> bool; } +/// Behavior for storage. +// TODO: This should really be `std::ops::Index`. +pub trait Storage: AllocateStorage { + /// Lookup the item at `index`. + fn index(&self, index: usize) -> &T; +} + /// Behavior for slice storage. -pub trait SliceStorage: Storage { +// TODO: This should really be `std::ops::Index>`. +pub trait SliceStorage: AllocateStorage { /// Lookup the slice in range `start..end`. fn index_slice(&self, start: usize, end: usize) -> &[T]; } -impl Storage for Vec { +impl AllocateStorage for Vec { #[inline] fn with_capacity(capacity: usize) -> Self { Vec::with_capacity(capacity) @@ -74,12 +84,6 @@ impl Storage for Vec { callback(self.len() * size_of_t, self.capacity() * size_of_t); } - #[inline] - #[must_use] - fn index(&self, index: usize) -> &T { - &self[index] - } - #[inline] #[must_use] fn len(&self) -> usize { @@ -93,6 +97,14 @@ impl Storage for Vec { } } +impl Storage for Vec { + #[inline] + #[must_use] + fn index(&self, index: usize) -> &T { + &self[index] + } +} + /// Push an item into storage. pub trait PushStorage { /// Push an item into storage. @@ -226,7 +238,7 @@ impl Doubling { } } -impl Storage for Doubling { +impl AllocateStorage for Doubling { fn with_capacity(capacity: usize) -> Self { Self::with_capacity(capacity) } @@ -243,10 +255,6 @@ impl Storage for Doubling { self.heap_size(callback); } - fn index(&self, index: usize) -> &T { - self.index(index) - } - #[inline] fn len(&self) -> usize { self.len() @@ -306,10 +314,6 @@ mod offsetcontainer { use crate::impls::storage::Doubling; impl OffsetContainer for Doubling { - fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) - } - fn push(&mut self, item: T) { self.len += 1; self.reserve(1); @@ -326,22 +330,6 @@ mod offsetcontainer { fn index(&self, index: usize) -> T { *self.index(index) } - - fn clear(&mut self) { - self.clear() - } - - fn len(&self) -> usize { - self.len() - } - - fn reserve(&mut self, additional: usize) { - self.reserve(additional); - } - - fn heap_size(&self, callback: F) { - self.heap_size(callback); - } } } diff --git a/src/lib.rs b/src/lib.rs index b041e30..877f563 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -286,7 +286,7 @@ impl::Index>> FlatStack { #[inline] pub fn heap_size(&self, mut callback: F) { self.region.heap_size(&mut callback); - OffsetContainer::heap_size(&self.indices, callback); + self.indices.heap_size(callback); } } diff --git a/tests/recursive.rs b/tests/recursive.rs index aceb271..2dba7a5 100644 --- a/tests/recursive.rs +++ b/tests/recursive.rs @@ -1,7 +1,7 @@ //! Demonstration of how to encode recursive data structures. use flatcontainer::impls::deduplicate::ConsecutiveOffsetPairs; -use flatcontainer::{IntoOwned, Push, Region, Storage, StringRegion}; +use flatcontainer::{IntoOwned, Push, Region, StringRegion}; #[derive(Clone)] struct List(T, Option>>); @@ -108,7 +108,7 @@ where } fn heap_size(&self, mut callback: F) { - Storage::heap_size(&self.indexes, &mut callback); + self.indexes.heap_size(&mut callback); self.inner.heap_size(callback); } From dbfd9e04b339db67069d6b2841aef0f7312887c6 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 21:27:22 -0400 Subject: [PATCH 04/15] Use index traits --- src/impls/slice_copy.rs | 53 +++++++++++++++++++++++++++-------------- src/impls/storage.rs | 44 +++++++--------------------------- src/lib.rs | 1 - 3 files changed, 43 insertions(+), 55 deletions(-) diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 0a491ed..9fcea9f 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -5,7 +5,7 @@ use std::marker::PhantomData; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::storage::{PushStorage, SliceStorage}; +use crate::impls::storage::{AllocateStorage, PushStorage}; use crate::{CopyIter, Push, Region, ReserveItems}; /// A container for owned types. @@ -53,7 +53,7 @@ impl Clone for OwnedRegion { impl Region for OwnedRegion where [T]: ToOwned, - S: SliceStorage, + S: AllocateStorage + std::ops::Index, Output = [T]>, { type Owned = <[T] as ToOwned>::Owned; type ReadItem<'a> = &'a [T] where Self: 'a; @@ -72,7 +72,7 @@ where #[inline] fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { - self.slices.index_slice(start, end) + &self.slices[start..end] } #[inline] @@ -103,7 +103,7 @@ where } } -impl> Default for OwnedRegion { +impl> Default for OwnedRegion { #[inline] fn default() -> Self { Self { @@ -116,7 +116,9 @@ impl> Default for OwnedRegion { impl Push<[T; N]> for OwnedRegion where [T]: ToOwned, - S: SliceStorage + for<'a> PushStorage>, + S: AllocateStorage + + for<'a> PushStorage> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: [T; N]) -> as Region>::Index { @@ -129,7 +131,9 @@ where impl Push<&[T; N]> for OwnedRegion where T: Clone, - S: SliceStorage + for<'a> PushStorage<&'a [T]>, + S: AllocateStorage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: &[T; N]) -> as Region>::Index { @@ -140,7 +144,9 @@ where impl Push<&&[T; N]> for OwnedRegion where T: Clone, - S: SliceStorage + for<'a> PushStorage<&'a [T]>, + S: AllocateStorage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: &&[T; N]) -> as Region>::Index { @@ -148,8 +154,10 @@ where } } -impl<'b, T: Clone, S: SliceStorage, const N: usize> ReserveItems<&'b [T; N]> - for OwnedRegion +impl<'b, T, S, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion +where + T: Clone, + S: AllocateStorage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -163,7 +171,9 @@ impl<'b, T: Clone, S: SliceStorage, const N: usize> ReserveItems<&'b [T; N]> impl Push<&[T]> for OwnedRegion where T: Clone, - S: SliceStorage + for<'a> PushStorage<&'a [T]>, + S: AllocateStorage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: &[T]) -> as Region>::Index { @@ -173,7 +183,7 @@ where } } -impl> Push<&&[T]> for OwnedRegion +impl> Push<&&[T]> for OwnedRegion where for<'a> Self: Push<&'a [T]>, { @@ -186,7 +196,7 @@ where impl<'b, T, S> ReserveItems<&'b [T]> for OwnedRegion where [T]: ToOwned, - S: SliceStorage, + S: AllocateStorage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -200,7 +210,9 @@ where impl Push> for OwnedRegion where [T]: ToOwned, - S: SliceStorage + for<'a> PushStorage<&'a mut Vec>, + S: AllocateStorage + + for<'a> PushStorage<&'a mut Vec> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, mut item: Vec) -> as Region>::Index { @@ -213,7 +225,9 @@ where impl Push<&Vec> for OwnedRegion where T: Clone, - S: SliceStorage + for<'a> PushStorage<&'a [T]>, + S: AllocateStorage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: &Vec) -> as Region>::Index { @@ -224,7 +238,7 @@ where impl<'a, T, S> ReserveItems<&'a Vec> for OwnedRegion where [T]: ToOwned, - S: SliceStorage, + S: AllocateStorage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -240,7 +254,9 @@ where I: IntoIterator, ::IntoIter: ExactSizeIterator, T: Clone, - S: SliceStorage + PushStorage>, + S: AllocateStorage + + PushStorage> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: CopyIter) -> as Region>::Index { @@ -250,10 +266,11 @@ where } } -impl, J: IntoIterator> ReserveItems> - for OwnedRegion +impl ReserveItems> for OwnedRegion where [T]: ToOwned, + S: AllocateStorage + std::ops::Index, Output = [T]>, + J: IntoIterator, { #[inline] fn reserve_items(&mut self, items: I) diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 8bcdc51..143ee40 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -2,6 +2,7 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use std::ops::Range; use crate::CopyIter; @@ -48,20 +49,6 @@ pub trait AllocateStorage: Default { fn is_empty(&self) -> bool; } -/// Behavior for storage. -// TODO: This should really be `std::ops::Index`. -pub trait Storage: AllocateStorage { - /// Lookup the item at `index`. - fn index(&self, index: usize) -> &T; -} - -/// Behavior for slice storage. -// TODO: This should really be `std::ops::Index>`. -pub trait SliceStorage: AllocateStorage { - /// Lookup the slice in range `start..end`. - fn index_slice(&self, start: usize, end: usize) -> &[T]; -} - impl AllocateStorage for Vec { #[inline] fn with_capacity(capacity: usize) -> Self { @@ -97,14 +84,6 @@ impl AllocateStorage for Vec { } } -impl Storage for Vec { - #[inline] - #[must_use] - fn index(&self, index: usize) -> &T { - &self[index] - } -} - /// Push an item into storage. pub trait PushStorage { /// Push an item into storage. @@ -132,14 +111,6 @@ impl, T> PushStorage> for Vec { } } -impl SliceStorage for Vec { - #[inline] - #[must_use] - fn index_slice(&self, start: usize, end: usize) -> &[T] { - &self[start..end] - } -} - /// TODO #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -295,16 +266,17 @@ impl PushStorage<&[T]> for Doubling { } } -impl SliceStorage for Doubling { +impl std::ops::Index> for Doubling { + type Output = [T]; #[inline] - fn index_slice(&self, start: usize, end: usize) -> &[T] { + fn index(&self, range: Range) -> &Self::Output { let index = self .offsets .iter() - .position(|&o| o > start) + .position(|&o| o > range.start) .unwrap_or_else(|| self.offsets.len().saturating_sub(1)); - let start = start - self.offsets[index]; - let end = end - self.offsets[index]; + let start = range.start - self.offsets[index]; + let end = range.end - self.offsets[index]; &self.inner[index][start..end] } } @@ -345,7 +317,7 @@ mod tests { for i in 0..1000 { d.push_storage([i, i + 1, i + 3].as_slice()); let end = d.len(); - assert_eq!(&[i, i + 1, i + 3], d.index_slice(start, end)); + assert_eq!(&[i, i + 1, i + 3], &d[start..end]); start = end; } } diff --git a/src/lib.rs b/src/lib.rs index 877f563..954148b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -17,7 +17,6 @@ pub use impls::option::OptionRegion; pub use impls::result::ResultRegion; pub use impls::slice::SliceRegion; pub use impls::slice_copy::OwnedRegion; -pub use impls::storage::Storage; pub use impls::string::StringRegion; /// An index into a region. Automatically implemented for relevant types. From 72fc6dfedf998e2465db9e33f1e5c03e207b2929 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 21:40:12 -0400 Subject: [PATCH 05/15] Cleanup Signed-off-by: Moritz Hoffmann --- src/impls/deduplicate.rs | 18 +++++++----------- src/impls/offsets.rs | 10 +++++----- src/impls/slice_copy.rs | 30 +++++++++++++++--------------- src/impls/storage.rs | 6 +++--- src/lib.rs | 2 +- 5 files changed, 31 insertions(+), 35 deletions(-) diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index 5d57a49..e926d88 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -122,13 +122,13 @@ where /// /// The following example shows that two inserts into a copy region have a collapsible index: /// ``` -/// use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; +/// use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs, Sequential}; /// use flatcontainer::{Push, OwnedRegion, Region, StringRegion}; /// let mut r = >>::default(); /// /// let index = r.push(&b"abc"); -/// assert_eq!(index.0, 0); -/// assert_eq!(b"abc", r.index(0.into())); +/// assert_eq!(index, Sequential(0)); +/// assert_eq!(b"abc", r.index(Sequential(0))); /// ``` #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -249,7 +249,7 @@ where debug_assert_eq!(index.0, self.last_index); self.last_index = index.1; self.offsets.push(index.1); - (self.offsets.len() - 2).into() + Sequential(self.offsets.len() - 2) } } @@ -271,12 +271,6 @@ where #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Sequential(pub usize); -impl From for Sequential { - fn from(value: usize) -> Self { - Self(value) - } -} - /// TODO #[derive(Default)] pub struct CombineSequential(R); @@ -289,7 +283,9 @@ where CombineSequential>: Region, { fn push(&mut self, item: T) -> Self::Index { - self.0.push(item).0 + let (a, b) = self.0.push(item); + assert_eq!(a, b); + a } } diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 8539325..31d7b9f 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -4,10 +4,10 @@ use serde::{Deserialize, Serialize}; use crate::impls::deduplicate::Sequential; -use crate::impls::storage::AllocateStorage; +use crate::impls::storage::Storage; /// A container to store offsets. -pub trait OffsetContainer: AllocateStorage { +pub trait OffsetContainer: Storage { /// Lookup an index. May panic for invalid indexes. fn index(&self, index: usize) -> T; @@ -226,7 +226,7 @@ where spilled: OffsetList, } -impl AllocateStorage for OffsetStride { +impl Storage for OffsetStride { fn with_capacity(_capacity: usize) -> Self { Self::default() } @@ -254,7 +254,7 @@ impl AllocateStorage for OffsetStride { impl OffsetContainer for OffsetStride { fn index(&self, index: usize) -> Sequential { - self.index(index).into() + Sequential(self.index(index)) } fn push(&mut self, item: Sequential) { @@ -273,7 +273,7 @@ impl OffsetContainer for OffsetStride { } } -impl AllocateStorage for OffsetOptimized +impl Storage for OffsetOptimized where S: OffsetContainer, L: OffsetContainer, diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 9fcea9f..460de1a 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -5,7 +5,7 @@ use std::marker::PhantomData; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::storage::{AllocateStorage, PushStorage}; +use crate::impls::storage::{Storage, PushStorage}; use crate::{CopyIter, Push, Region, ReserveItems}; /// A container for owned types. @@ -53,7 +53,7 @@ impl Clone for OwnedRegion { impl Region for OwnedRegion where [T]: ToOwned, - S: AllocateStorage + std::ops::Index, Output = [T]>, + S: Storage + std::ops::Index, Output = [T]>, { type Owned = <[T] as ToOwned>::Owned; type ReadItem<'a> = &'a [T] where Self: 'a; @@ -103,7 +103,7 @@ where } } -impl> Default for OwnedRegion { +impl> Default for OwnedRegion { #[inline] fn default() -> Self { Self { @@ -116,7 +116,7 @@ impl> Default for OwnedRegion { impl Push<[T; N]> for OwnedRegion where [T]: ToOwned, - S: AllocateStorage + S: Storage + for<'a> PushStorage> + std::ops::Index, Output = [T]>, { @@ -131,7 +131,7 @@ where impl Push<&[T; N]> for OwnedRegion where T: Clone, - S: AllocateStorage + S: Storage + for<'a> PushStorage<&'a [T]> + std::ops::Index, Output = [T]>, { @@ -144,7 +144,7 @@ where impl Push<&&[T; N]> for OwnedRegion where T: Clone, - S: AllocateStorage + S: Storage + for<'a> PushStorage<&'a [T]> + std::ops::Index, Output = [T]>, { @@ -157,7 +157,7 @@ where impl<'b, T, S, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion where T: Clone, - S: AllocateStorage + std::ops::Index, Output = [T]>, + S: Storage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -171,7 +171,7 @@ where impl Push<&[T]> for OwnedRegion where T: Clone, - S: AllocateStorage + S: Storage + for<'a> PushStorage<&'a [T]> + std::ops::Index, Output = [T]>, { @@ -183,7 +183,7 @@ where } } -impl> Push<&&[T]> for OwnedRegion +impl> Push<&&[T]> for OwnedRegion where for<'a> Self: Push<&'a [T]>, { @@ -196,7 +196,7 @@ where impl<'b, T, S> ReserveItems<&'b [T]> for OwnedRegion where [T]: ToOwned, - S: AllocateStorage + std::ops::Index, Output = [T]>, + S: Storage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -210,7 +210,7 @@ where impl Push> for OwnedRegion where [T]: ToOwned, - S: AllocateStorage + S: Storage + for<'a> PushStorage<&'a mut Vec> + std::ops::Index, Output = [T]>, { @@ -225,7 +225,7 @@ where impl Push<&Vec> for OwnedRegion where T: Clone, - S: AllocateStorage + S: Storage + for<'a> PushStorage<&'a [T]> + std::ops::Index, Output = [T]>, { @@ -238,7 +238,7 @@ where impl<'a, T, S> ReserveItems<&'a Vec> for OwnedRegion where [T]: ToOwned, - S: AllocateStorage + std::ops::Index, Output = [T]>, + S: Storage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -254,7 +254,7 @@ where I: IntoIterator, ::IntoIter: ExactSizeIterator, T: Clone, - S: AllocateStorage + S: Storage + PushStorage> + std::ops::Index, Output = [T]>, { @@ -269,7 +269,7 @@ where impl ReserveItems> for OwnedRegion where [T]: ToOwned, - S: AllocateStorage + std::ops::Index, Output = [T]>, + S: Storage + std::ops::Index, Output = [T]>, J: IntoIterator, { #[inline] diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 143ee40..e1b5e0b 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -7,7 +7,7 @@ use std::ops::Range; use crate::CopyIter; /// Behavior to allocate storage -pub trait AllocateStorage: Default { +pub trait Storage: Default { /// Allocate storage for at least `capacity` elements. #[must_use] fn with_capacity(capacity: usize) -> Self; @@ -49,7 +49,7 @@ pub trait AllocateStorage: Default { fn is_empty(&self) -> bool; } -impl AllocateStorage for Vec { +impl Storage for Vec { #[inline] fn with_capacity(capacity: usize) -> Self { Vec::with_capacity(capacity) @@ -209,7 +209,7 @@ impl Doubling { } } -impl AllocateStorage for Doubling { +impl Storage for Doubling { fn with_capacity(capacity: usize) -> Self { Self::with_capacity(capacity) } diff --git a/src/lib.rs b/src/lib.rs index 954148b..2b75773 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -690,7 +690,7 @@ mod tests { CombineSequential< TupleABRegion< ConsecutiveOffsetPairs>>, - CollapseSequence>>, + ConsecutiveOffsetPairs>, >, >, OffsetStride, From 4de078dae3faa910fbf346dedcf266532b59b098 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 22:21:00 -0400 Subject: [PATCH 06/15] better sequential error handling Signed-off-by: Moritz Hoffmann --- src/impls/deduplicate.rs | 20 ++++++++++++-------- src/impls/offsets.rs | 2 +- src/lib.rs | 5 +++++ 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index e926d88..5102de7 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -3,7 +3,7 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::offsets::{OffsetContainer, OffsetOptimized}; +use crate::impls::offsets::{OffsetContainer, OffsetOptimized, OffsetStride}; use crate::impls::tuple::TupleABRegion; use crate::{Push, Region, ReserveItems}; @@ -273,23 +273,27 @@ pub struct Sequential(pub usize); /// TODO #[derive(Default)] -pub struct CombineSequential(R); +pub struct CombineSequential(R, S); -impl Push for CombineSequential> +impl Push for CombineSequential, (OffsetStride, OffsetStride)> where A: Region, B: Region, TupleABRegion: Region + Push, - CombineSequential>: Region, + Self: Region, { fn push(&mut self, item: T) -> Self::Index { let (a, b) = self.0.push(item); - assert_eq!(a, b); - a + let pushed = self.1.0.push(a.0); + assert!(pushed, "0 ({:?}).push({a:?})", self.1.0); + let pushed = self.1.1.push(b.0); + assert!(pushed, "1 ({:?}).push({b:?})", self.1.1); + assert_eq!(self.1.0.len(), self.1.1.len()); + Sequential(self.1.0.len() - 1) } } -impl Region for CombineSequential> +impl Region for CombineSequential, (OffsetStride, OffsetStride)> where A: Region, B: Region, @@ -306,7 +310,7 @@ where { Self( as Region>::merge_regions( regions.map(|r| &r.0), - )) + ), Default::default()) } fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 31d7b9f..af09148 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -259,7 +259,7 @@ impl OffsetContainer for OffsetStride { fn push(&mut self, item: Sequential) { let pushed = self.push(item.0); - debug_assert!(pushed); + debug_assert!(pushed, "Failed to push {item:?} into {self:?}"); } fn extend>(&mut self, iter: I) diff --git a/src/lib.rs b/src/lib.rs index 2b75773..d7800da 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -669,6 +669,7 @@ mod tests { #[test] fn test_my_understanding() { let item = (vec![1, 2, 3], vec![1, 2, 3]); + let item2 = (vec![1, 2, 3, 4], vec![1, 2, 3, 4]); let mut r = >, OwnedRegion>>::default(); let _index: ((usize, usize), (usize, usize)) = r.push(&item); @@ -683,6 +684,7 @@ mod tests { ConsecutiveOffsetPairs>>, ConsecutiveOffsetPairs>, >, + (OffsetStride, OffsetStride), >>::default(); let _index: Sequential = r.push(&item); @@ -690,8 +692,10 @@ mod tests { CombineSequential< TupleABRegion< ConsecutiveOffsetPairs>>, + // CollapseSequence>>, ConsecutiveOffsetPairs>, >, + (OffsetStride, OffsetStride), >, OffsetStride, >::default(); @@ -709,6 +713,7 @@ mod tests { println!("size {size}, capacity {capacity}, allocations {count}"); } + fs.copy(&item2); assert_eq!(&item.1, fs.get(0).1); } From 932f1db7ad8759770102b0fe08b651bdbd2d9da1 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 22:31:14 -0400 Subject: [PATCH 07/15] Remove sequential Signed-off-by: Moritz Hoffmann --- src/impls/deduplicate.rs | 57 +++++++++++++++++++--------------------- src/impls/offsets.rs | 50 +---------------------------------- src/impls/slice_copy.rs | 2 +- src/lib.rs | 27 ++++++++++--------- 4 files changed, 43 insertions(+), 93 deletions(-) diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index 5102de7..a8ab262 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -3,7 +3,7 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::offsets::{OffsetContainer, OffsetOptimized, OffsetStride}; +use crate::impls::offsets::{OffsetContainer, OffsetOptimized}; use crate::impls::tuple::TupleABRegion; use crate::{Push, Region, ReserveItems}; @@ -122,13 +122,12 @@ where /// /// The following example shows that two inserts into a copy region have a collapsible index: /// ``` -/// use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs, Sequential}; +/// use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; /// use flatcontainer::{Push, OwnedRegion, Region, StringRegion}; /// let mut r = >>::default(); /// /// let index = r.push(&b"abc"); -/// assert_eq!(index, Sequential(0)); -/// assert_eq!(b"abc", r.index(Sequential(0))); +/// assert_eq!(b"abc", r.index(index)); /// ``` #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] @@ -184,7 +183,7 @@ where where Self: 'a; - type Index = Sequential; + type Index = usize; #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self @@ -203,7 +202,7 @@ where #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { self.inner - .index((self.offsets.index(index.0), self.offsets.index(index.0 + 1))) + .index((self.offsets.index(index), self.offsets.index(index + 1))) } #[inline] @@ -249,7 +248,7 @@ where debug_assert_eq!(index.0, self.last_index); self.last_index = index.1; self.offsets.push(index.1); - Sequential(self.offsets.len() - 2) + self.offsets.len() - 2 } } @@ -266,51 +265,49 @@ where } } -/// TODO -#[derive(Copy, Clone, Debug, Ord, PartialOrd, Eq, PartialEq)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct Sequential(pub usize); - /// TODO #[derive(Default)] pub struct CombineSequential(R, S); -impl Push for CombineSequential, (OffsetStride, OffsetStride)> +impl Push for CombineSequential, (O1, O2)> where - A: Region, - B: Region, - TupleABRegion: Region + Push, - Self: Region, + A: Region, + B: Region, + O1: OffsetContainer, + O2: OffsetContainer, + TupleABRegion: Region + Push, + Self: Region, { fn push(&mut self, item: T) -> Self::Index { let (a, b) = self.0.push(item); - let pushed = self.1.0.push(a.0); - assert!(pushed, "0 ({:?}).push({a:?})", self.1.0); - let pushed = self.1.1.push(b.0); - assert!(pushed, "1 ({:?}).push({b:?})", self.1.1); - assert_eq!(self.1.0.len(), self.1.1.len()); - Sequential(self.1.0.len() - 1) + self.1 .0.push(a); + self.1 .1.push(b); + assert_eq!(self.1 .0.len(), self.1 .1.len()); + self.1 .0.len() - 1 } } -impl Region for CombineSequential, (OffsetStride, OffsetStride)> +impl Region for CombineSequential, (O1, O2)> where - A: Region, - B: Region, + A: Region, + B: Region, + O1: OffsetContainer, + O2: OffsetContainer, { type Owned = as Region>::Owned; type ReadItem<'a> = as Region>::ReadItem<'a> where Self: 'a; - type Index = Sequential; + type Index = usize; fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, { - Self( as Region>::merge_regions( - regions.map(|r| &r.0), - ), Default::default()) + Self( + as Region>::merge_regions(regions.map(|r| &r.0)), + Default::default(), + ) } fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index af09148..ea6491f 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -3,7 +3,6 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::deduplicate::Sequential; use crate::impls::storage::Storage; /// A container to store offsets. @@ -226,53 +225,6 @@ where spilled: OffsetList, } -impl Storage for OffsetStride { - fn with_capacity(_capacity: usize) -> Self { - Self::default() - } - - fn clear(&mut self) { - self.clear(); - } - - fn len(&self) -> usize { - self.len() - } - - fn reserve(&mut self, _additional: usize) { - // Nop - } - - fn heap_size(&self, _callback: F) { - // Nop - } - - fn is_empty(&self) -> bool { - self.is_empty() - } -} - -impl OffsetContainer for OffsetStride { - fn index(&self, index: usize) -> Sequential { - Sequential(self.index(index)) - } - - fn push(&mut self, item: Sequential) { - let pushed = self.push(item.0); - debug_assert!(pushed, "Failed to push {item:?} into {self:?}"); - } - - fn extend>(&mut self, iter: I) - where - I::IntoIter: ExactSizeIterator, - { - for item in iter { - let pushed = self.push(item.0); - debug_assert!(pushed); - } - } -} - impl Storage for OffsetOptimized where S: OffsetContainer, @@ -375,7 +327,7 @@ mod tests { let mut r = SliceRegion::< ConsecutiveOffsetPairs, - OffsetStride, + OffsetOptimized, >::default(); let idx = copy(&mut r, ["abc"]); assert_eq!("abc", r.index(idx).get(0)) diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 460de1a..e20873f 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -5,7 +5,7 @@ use std::marker::PhantomData; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -use crate::impls::storage::{Storage, PushStorage}; +use crate::impls::storage::{PushStorage, Storage}; use crate::{CopyIter, Push, Region, ReserveItems}; /// A container for owned types. diff --git a/src/lib.rs b/src/lib.rs index d7800da..29fb118 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -428,8 +428,8 @@ pub struct CopyIter(pub I); #[cfg(test)] mod tests { - use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs, Sequential}; - use crate::impls::offsets::OffsetStride; + use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; + use crate::impls::offsets::OffsetOptimized; use crate::impls::tuple::{TupleABRegion, TupleARegion}; use super::*; @@ -677,43 +677,44 @@ mod tests { ConsecutiveOffsetPairs>>, ConsecutiveOffsetPairs>, >>::default(); - let _index: (Sequential, Sequential) = r.push(&item); + let _index: (usize, usize) = r.push(&item); let mut r = >>, ConsecutiveOffsetPairs>, >, - (OffsetStride, OffsetStride), + (OffsetOptimized, OffsetOptimized), >>::default(); - let _index: Sequential = r.push(&item); + let _index: usize = r.push(&item); let mut fs = FlatStack::< CombineSequential< TupleABRegion< ConsecutiveOffsetPairs>>, - // CollapseSequence>>, - ConsecutiveOffsetPairs>, + CollapseSequence>>, >, - (OffsetStride, OffsetStride), + (OffsetOptimized, OffsetOptimized), >, - OffsetStride, + OffsetOptimized, >::default(); - for _ in 0..1000 { - fs.copy(&item); + for item in std::iter::repeat(&item) + .take(1000) + .chain(std::iter::once(&item2)) + { + fs.copy(item); let mut size = 0; let mut capacity = 0; let mut count = 0; fs.heap_size(|siz, cap| { size += siz; capacity += cap; - count += 1; + count += (cap > 0) as usize }); println!("size {size}, capacity {capacity}, allocations {count}"); } - fs.copy(&item2); assert_eq!(&item.1, fs.get(0).1); } From 77dc09e37289306b211cf35c025569483aa8a6c3 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 22:42:47 -0400 Subject: [PATCH 08/15] OffsetList without default parameters Signed-off-by: Moritz Hoffmann --- src/impls/offsets.rs | 10 +++++++--- src/impls/storage.rs | 2 +- src/lib.rs | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index ea6491f..5ecf5df 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -22,7 +22,11 @@ pub trait OffsetContainer: Storage { /// A container for offsets that can represent strides of offsets. /// -/// Does not implement `OffsetContainer` because it cannot accept arbitrary pushes. +/// Does not implement `OffsetContainer` because it cannot accept arbitrary pushes. Instead, +/// its `push` method returns a boolean to indicate whether the push was successful or not. +/// +/// This type can absorb sequences of the form `0, stride, 2 * stride, 3 * stride, ...` and +/// saturates in a repeated last element. #[derive(Eq, PartialEq, Debug, Default, Clone, Copy)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum OffsetStride { @@ -127,7 +131,7 @@ impl OffsetStride { /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. #[derive(Eq, PartialEq, Clone, Debug, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OffsetList, L = Vec> +pub struct OffsetList where S: OffsetContainer, L: OffsetContainer, @@ -395,7 +399,7 @@ mod tests { #[test] fn test_chonk() { - let mut ol = ::default(); + let mut ol = , Vec<_>>>::default(); ol.push(usize::MAX); assert_eq!(usize::MAX, ol.index(0)); } diff --git a/src/impls/storage.rs b/src/impls/storage.rs index e1b5e0b..0164af6 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -111,7 +111,7 @@ impl, T> PushStorage> for Vec { } } -/// TODO +/// A storage that maintains non-reallocating allocations and allocates double the size when needed. #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Doubling { diff --git a/src/lib.rs b/src/lib.rs index 29fb118..b2d8902 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -715,6 +715,7 @@ mod tests { println!("size {size}, capacity {capacity}, allocations {count}"); } + println!("fs size {}", std::mem::size_of_val(&fs)); assert_eq!(&item.1, fs.get(0).1); } From af4fa51fe4bf231a87738343b7992322ee3e7acc Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Sun, 23 Jun 2024 22:44:03 -0400 Subject: [PATCH 09/15] Fix bench Signed-off-by: Moritz Hoffmann --- benches/bench.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/benches/bench.rs b/benches/bench.rs index 2e7cf3d..9903bab 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -5,7 +5,7 @@ extern crate test; use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; -use flatcontainer::impls::offsets::OffsetStride; +use flatcontainer::impls::offsets::OffsetOptimized; use flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion}; use flatcontainer::{ ColumnsRegion, FlatStack, MirrorRegion, OwnedRegion, Push, Region, RegionPreference, @@ -87,7 +87,7 @@ fn string10_copy_region(bencher: &mut Bencher) { #[bench] fn string10_copy_region_collapse(bencher: &mut Bencher) { _bench_copy_region::< - SliceRegion>, OffsetStride>, + SliceRegion>, OffsetOptimized>, _, >(bencher, vec![format!("grawwwwrr!"); 1024]); } From 45a361dbdee15e9f8f8e35735c7274fb8e5e1b70 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Thu, 27 Jun 2024 11:52:19 -0400 Subject: [PATCH 10/15] Add OffsetContainer to columns region Signed-off-by: Moritz Hoffmann --- src/impls/columns.rs | 58 ++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 23 deletions(-) diff --git a/src/impls/columns.rs b/src/impls/columns.rs index 1eb6225..f7e4fd7 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -8,7 +8,7 @@ use std::slice::Iter; use serde::{Deserialize, Serialize}; use crate::impls::deduplicate::ConsecutiveOffsetPairs; -use crate::impls::offsets::OffsetOptimized; +use crate::impls::offsets::{OffsetContainer, OffsetOptimized}; use crate::{CopyIter, IntoOwned}; use crate::{OwnedRegion, Push, Region}; @@ -54,24 +54,27 @@ use crate::{OwnedRegion, Push, Region}; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr( feature = "serde", - serde( - bound = "R: Serialize + for<'a> Deserialize<'a>, R::Index: Serialize + for<'a> Deserialize<'a>" - ) + serde(bound = " + R: Serialize + for<'a> Deserialize<'a>, + R::Index: Serialize + for<'a> Deserialize<'a>, + O: Serialize + for<'a> Deserialize<'a>, + ") )] -pub struct ColumnsRegion +pub struct ColumnsRegion where R: Region, { /// Indices to address rows in `inner`. For each row, we remember /// an index for each column. - indices: ConsecutiveOffsetPairs, OffsetOptimized>, + indices: ConsecutiveOffsetPairs, O>, /// Storage for columns. inner: Vec, } -impl Clone for ColumnsRegion +impl Clone for ColumnsRegion where R: Region + Clone, + O: Clone, { fn clone(&self) -> Self { Self { @@ -86,9 +89,10 @@ where } } -impl Region for ColumnsRegion +impl Region for ColumnsRegion where R: Region, + O: OffsetContainer, { type Owned = Vec; type ReadItem<'a> = ReadColumns<'a, R> where Self: 'a; @@ -162,9 +166,10 @@ where } } -impl Default for ColumnsRegion +impl Default for ColumnsRegion where R: Region, + O: OffsetContainer, { fn default() -> Self { Self { @@ -368,11 +373,12 @@ where } } -impl Push> for ColumnsRegion +impl Push> for ColumnsRegion where for<'a> R: Region + Push<::ReadItem<'a>>, + O: OffsetContainer, { - fn push(&mut self, item: ReadColumns<'_, R>) -> as Region>::Index { + fn push(&mut self, item: ReadColumns<'_, R>) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -386,11 +392,12 @@ where } } -impl<'a, R, T> Push<&'a [T]> for ColumnsRegion +impl<'a, R, O, T> Push<&'a [T]> for ColumnsRegion where R: Region + Push<&'a T>, + O: OffsetContainer, { - fn push(&mut self, item: &'a [T]) -> as Region>::Index { + fn push(&mut self, item: &'a [T]) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -404,11 +411,12 @@ where } } -impl Push<[T; N]> for ColumnsRegion +impl Push<[T; N]> for ColumnsRegion where R: Region + Push, + O: OffsetContainer, { - fn push(&mut self, item: [T; N]) -> as Region>::Index { + fn push(&mut self, item: [T; N]) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -422,11 +430,12 @@ where } } -impl<'a, R, T, const N: usize> Push<&'a [T; N]> for ColumnsRegion +impl<'a, R, O, T, const N: usize> Push<&'a [T; N]> for ColumnsRegion where R: Region + Push<&'a T>, + O: OffsetContainer, { - fn push(&mut self, item: &'a [T; N]) -> as Region>::Index { + fn push(&mut self, item: &'a [T; N]) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -440,11 +449,12 @@ where } } -impl Push> for ColumnsRegion +impl Push> for ColumnsRegion where R: Region + Push, + O: OffsetContainer, { - fn push(&mut self, item: Vec) -> as Region>::Index { + fn push(&mut self, item: Vec) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -458,11 +468,12 @@ where } } -impl<'a, R, T> Push<&'a Vec> for ColumnsRegion +impl<'a, R, O, T> Push<&'a Vec> for ColumnsRegion where R: Region + Push<&'a T>, + O: OffsetContainer, { - fn push(&mut self, item: &'a Vec) -> as Region>::Index { + fn push(&mut self, item: &'a Vec) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -476,14 +487,15 @@ where } } -impl Push> for ColumnsRegion +impl Push> for ColumnsRegion where R: Region + Push, + O: OffsetContainer, I: IntoIterator, I::IntoIter: ExactSizeIterator, { #[inline] - fn push(&mut self, item: CopyIter) -> as Region>::Index { + fn push(&mut self, item: CopyIter) -> as Region>::Index { let iter = item.0.into_iter().enumerate().map(|(index, value)| { // Ensure all required regions exist. if self.inner.len() <= index { From f0290455c5d242522e76d70f9611b1dd817d8422 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Thu, 27 Jun 2024 16:02:51 -0400 Subject: [PATCH 11/15] FlatStack cleanup Signed-off-by: Moritz Hoffmann --- src/lib.rs | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index b2d8902..b67556b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -164,18 +164,17 @@ impl<'a, T: ToOwned + ?Sized> IntoOwned<'a> for &'a T { feature = "serde", serde(bound = " R: Serialize + for<'a> Deserialize<'a>, - R::Index: Serialize + for<'a> Deserialize<'a>, S: Serialize + for<'a> Deserialize<'a>, ") )] -pub struct FlatStack = Vec<::Index>> { +pub struct FlatStack::Index>> { /// The indices, which we use to lookup items in the region. indices: S, /// A region to index into. region: R, } -impl::Index>> Default for FlatStack { +impl Default for FlatStack { #[inline] fn default() -> Self { Self { @@ -319,8 +318,10 @@ impl FlatStack { } } -impl, S: OffsetContainer<::Index>> Extend - for FlatStack +impl Extend for FlatStack +where + R: Region + Push, + S: OffsetContainer<::Index>, { fn extend>(&mut self, iter: I) { let iter = iter.into_iter(); @@ -395,7 +396,11 @@ where } } -impl, T> FromIterator for FlatStack { +impl FromIterator for FlatStack +where + R: Region + Push, + S: OffsetContainer<::Index>, +{ fn from_iter>(iter: I) -> Self { let iter = iter.into_iter(); let mut c = Self::with_capacity(iter.size_hint().0); @@ -404,7 +409,7 @@ impl, T> FromIterator for FlatStack { } } -impl Clone for FlatStack { +impl Clone for FlatStack { fn clone(&self) -> Self { Self { region: self.region.clone(), @@ -496,7 +501,7 @@ mod tests { // Make sure that types are debug, even if we don't use this in the test. for<'a> R::ReadItem<'a>: Debug, { - let mut c = FlatStack::default(); + let mut c = FlatStack::<_>::default(); c.copy(t); let mut cc = c.clone(); From 392e683531444c5c4d4c0a2948808171fea290ac Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 2 Jul 2024 10:52:10 -0400 Subject: [PATCH 12/15] Move iteration into OffsetContainer Signed-off-by: Moritz Hoffmann --- src/impls/columns.rs | 14 ++-- src/impls/offsets.rs | 193 +++++++++++++++++++++++++++++++++++++++++++ src/impls/storage.rs | 45 +++++++++- 3 files changed, 244 insertions(+), 8 deletions(-) diff --git a/src/impls/columns.rs b/src/impls/columns.rs index f7e4fd7..b678f4a 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -527,7 +527,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().copied().eq(r.index(index).iter())); } } @@ -553,7 +553,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().copied().eq(r.index(index).iter())); } @@ -582,7 +582,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().eq(r.index(index).iter())); } @@ -610,8 +610,8 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { - assert!(row.iter().copied().eq(r.index(index).iter())); + for (index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); } println!("{r:?}"); @@ -638,8 +638,8 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { - assert!(row.iter().copied().eq(r.index(index).iter())); + for (index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); } assert_eq!("1", r.index(indices[1]).get(0)); diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 5ecf5df..0ebc628 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -7,6 +7,11 @@ use crate::impls::storage::Storage; /// A container to store offsets. pub trait OffsetContainer: Storage { + /// Iterator over the elements. + type Iter<'a>: Iterator + where + Self: 'a; + /// Lookup an index. May panic for invalid indexes. fn index(&self, index: usize) -> T; @@ -18,6 +23,9 @@ pub trait OffsetContainer: Storage { fn extend>(&mut self, iter: I) where I::IntoIter: ExactSizeIterator; + + /// Returns an iterator over the elements. + fn iter(&self) -> Self::Iter<'_>; } /// A container for offsets that can represent strides of offsets. @@ -45,6 +53,7 @@ pub enum OffsetStride { impl OffsetStride { /// Accepts or rejects a newly pushed element. #[must_use] + #[inline] pub fn push(&mut self, item: usize) -> bool { match self { OffsetStride::Empty => { @@ -88,6 +97,7 @@ impl OffsetStride { /// Panics for out-of-bounds accesses, i.e., if `index` greater or equal to /// [`len`][OffsetStride::len]. #[must_use] + #[inline] pub fn index(&self, index: usize) -> usize { match self { OffsetStride::Empty => { @@ -107,6 +117,7 @@ impl OffsetStride { /// Returns the number of elements. #[must_use] + #[inline] pub fn len(&self) -> usize { match self { OffsetStride::Empty => 0, @@ -118,14 +129,47 @@ impl OffsetStride { /// Returns `true` if empty. #[must_use] + #[inline] pub fn is_empty(&self) -> bool { matches!(self, OffsetStride::Empty) } /// Removes all elements. + #[inline] pub fn clear(&mut self) { *self = Self::default(); } + + /// Return an iterator over the elements. + #[must_use] + #[inline] + pub fn iter(&self) -> OffsetStrideIter { + OffsetStrideIter { + strided: *self, + index: 0, + } + } +} + +/// An iterator over the elements of an [`OffsetStride`]. +pub struct OffsetStrideIter { + strided: OffsetStride, + index: usize, +} + +impl Iterator for OffsetStrideIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.index < self.strided.len() { + let item = self.strided.index(self.index); + self.index += 1; + Some(item) + } else { + None + } + } } /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. @@ -149,6 +193,7 @@ where { /// Allocate a new list with a specified capacity. #[must_use] + #[inline] pub fn with_capacity(cap: usize) -> Self { Self { smol: S::with_capacity(cap), @@ -161,6 +206,7 @@ where /// # Panics /// /// Panics if `usize` does not fit in `u64`. + #[inline] pub fn push(&mut self, offset: usize) { if self.chonk.is_empty() { if let Ok(smol) = offset.try_into() { @@ -179,6 +225,7 @@ where /// /// Panics if the index is out of bounds, i.e., it is larger or equal to the length. #[must_use] + #[inline] pub fn index(&self, index: usize) -> usize { if index < self.smol.len() { self.smol.index(index).try_into().unwrap() @@ -189,33 +236,136 @@ where } /// The number of offsets in the list. #[must_use] + #[inline] pub fn len(&self) -> usize { self.smol.len() + self.chonk.len() } /// Returns `true` if this list contains no elements. #[must_use] + #[inline] pub fn is_empty(&self) -> bool { self.smol.is_empty() && self.chonk.is_empty() } /// Reserve space for `additional` elements. + #[inline] pub fn reserve(&mut self, additional: usize) { self.smol.reserve(additional); } /// Remove all elements. + #[inline] pub fn clear(&mut self) { self.smol.clear(); self.chonk.clear(); } + #[inline] fn heap_size(&self, mut callback: F) { self.smol.heap_size(&mut callback); self.chonk.heap_size(callback); } } +impl Storage for OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ + #[inline] + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity(capacity) + } + + #[inline] + fn reserve(&mut self, additional: usize) { + self.reserve(additional) + } + + #[inline] + fn clear(&mut self) { + self.clear() + } + + #[inline] + fn heap_size(&self, callback: F) { + self.heap_size(callback) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn is_empty(&self) -> bool { + self.is_empty() + } +} + +impl OffsetContainer for OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ + type Iter<'a> = OffsetListIter<'a, S, L> where Self: 'a; + + #[inline] + fn index(&self, index: usize) -> usize { + self.index(index) + } + + #[inline] + fn push(&mut self, item: usize) { + self.push(item) + } + + #[inline] + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + for item in iter { + self.push(item); + } + } + + #[inline] + fn iter(&self) -> Self::Iter<'_> { + OffsetListIter { + smol: self.smol.iter(), + chonk: self.chonk.iter(), + } + } +} + +/// An iterator over the elements of an [`OffsetList`]. +pub struct OffsetListIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + smol: S::Iter<'a>, + chonk: L::Iter<'a>, +} + +impl<'a, S, L> Iterator for OffsetListIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.smol + .next() + .map(|x| x as usize) + .or_else(|| self.chonk.next().map(|x| x as usize)) + } +} + /// An offset container implementation that first tries to recognize strides, and then spilles into /// a regular offset list. #[derive(Eq, PartialEq, Default, Debug, Clone)] @@ -234,30 +384,36 @@ where S: OffsetContainer, L: OffsetContainer, { + #[inline] fn with_capacity(_capacity: usize) -> Self { // `self.strided` doesn't have any capacity, and we don't know the structure of the data. Self::default() } + #[inline] fn clear(&mut self) { self.spilled.clear(); self.strided = OffsetStride::default(); } + #[inline] fn len(&self) -> usize { self.strided.len() + self.spilled.len() } + #[inline] fn is_empty(&self) -> bool { self.strided.is_empty() && self.spilled.is_empty() } + #[inline] fn reserve(&mut self, additional: usize) { if !self.spilled.is_empty() { self.spilled.reserve(additional); } } + #[inline] fn heap_size(&self, callback: F) { self.spilled.heap_size(callback); } @@ -268,6 +424,8 @@ where S: OffsetContainer, L: OffsetContainer, { + type Iter<'a> = OffsetOptimizedIter<'a, S , L> where Self: 'a; + fn index(&self, index: usize) -> usize { if index < self.strided.len() { self.strided.index(index) @@ -295,9 +453,40 @@ where self.push(item); } } + + fn iter(&self) -> Self::Iter<'_> { + OffsetOptimizedIter { + strided: self.strided.iter(), + spilled: self.spilled.iter(), + } + } +} + +/// An iterator over the elements of an [`OffsetOptimized`]. +pub struct OffsetOptimizedIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + strided: OffsetStrideIter, + spilled: as OffsetContainer>::Iter<'a>, +} + +impl<'a, S, L> Iterator for OffsetOptimizedIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + type Item = usize; + + fn next(&mut self) -> Option { + self.strided.next().or_else(|| self.spilled.next()) + } } impl OffsetContainer for Vec { + type Iter<'a> = std::iter::Copied> where Self: 'a; + fn index(&self, index: usize) -> T { self[index] } @@ -313,6 +502,10 @@ impl OffsetContainer for Vec { { Extend::extend(self, iter); } + + fn iter(&self) -> Self::Iter<'_> { + self.as_slice().iter().copied() + } } #[cfg(test)] diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 0164af6..33d52ac 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -14,6 +14,7 @@ pub trait Storage: Default { /// Allocate storage large enough to absorb `regions`'s contents. #[must_use] + #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, @@ -112,6 +113,8 @@ impl, T> PushStorage> for Vec { } /// A storage that maintains non-reallocating allocations and allocates double the size when needed. +/// +/// Not considered part of the stable interface of this crate. #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Doubling { @@ -210,18 +213,22 @@ impl Doubling { } impl Storage for Doubling { + #[inline] fn with_capacity(capacity: usize) -> Self { Self::with_capacity(capacity) } + #[inline] fn reserve(&mut self, additional: usize) { self.reserve(additional); } + #[inline] fn clear(&mut self) { self.clear() } + #[inline] fn heap_size(&self, callback: F) { self.heap_size(callback); } @@ -281,11 +288,39 @@ impl std::ops::Index> for Doubling { } } +/// An iterator over the elements of a [`Doubling`]. +pub struct DoublingIter<'a, T: 'a> { + inner: Option>>, + remaining: std::slice::Iter<'a, Vec>, +} + +impl<'a, T: Copy> Iterator for DoublingIter<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + loop { + if let Some(inner) = &mut self.inner { + if let Some(item) = inner.next() { + return Some(item); + } + } + self.inner = self + .remaining + .next() + .map(|vec| vec.as_slice().iter().copied()); + self.inner.as_ref()?; + } + } +} + mod offsetcontainer { use crate::impls::offsets::OffsetContainer; - use crate::impls::storage::Doubling; + use crate::impls::storage::{Doubling, DoublingIter}; impl OffsetContainer for Doubling { + type Iter<'a> = DoublingIter<'a, T> where Self: 'a; + fn push(&mut self, item: T) { self.len += 1; self.reserve(1); @@ -302,6 +337,14 @@ mod offsetcontainer { fn index(&self, index: usize) -> T { *self.index(index) } + + fn iter(&self) -> Self::Iter<'_> { + let mut iter = self.inner.as_slice().iter(); + DoublingIter { + inner: iter.next().map(|vec| vec.as_slice().iter().copied()), + remaining: iter, + } + } } } From 728e96e1635f10fc311779c98c045bffc1ec5416 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 2 Jul 2024 13:04:01 -0400 Subject: [PATCH 13/15] Remove non-finished ideas Signed-off-by: Moritz Hoffmann --- src/impls/deduplicate.rs | 74 ----------- src/impls/offsets.rs | 9 -- src/impls/storage.rs | 258 --------------------------------------- src/lib.rs | 57 +-------- 4 files changed, 1 insertion(+), 397 deletions(-) diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index a8ab262..7c19bc6 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -4,7 +4,6 @@ use serde::{Deserialize, Serialize}; use crate::impls::offsets::{OffsetContainer, OffsetOptimized}; -use crate::impls::tuple::TupleABRegion; use crate::{Push, Region, ReserveItems}; /// A region to deduplicate consecutive equal items. @@ -265,79 +264,6 @@ where } } -/// TODO -#[derive(Default)] -pub struct CombineSequential(R, S); - -impl Push for CombineSequential, (O1, O2)> -where - A: Region, - B: Region, - O1: OffsetContainer, - O2: OffsetContainer, - TupleABRegion: Region + Push, - Self: Region, -{ - fn push(&mut self, item: T) -> Self::Index { - let (a, b) = self.0.push(item); - self.1 .0.push(a); - self.1 .1.push(b); - assert_eq!(self.1 .0.len(), self.1 .1.len()); - self.1 .0.len() - 1 - } -} - -impl Region for CombineSequential, (O1, O2)> -where - A: Region, - B: Region, - O1: OffsetContainer, - O2: OffsetContainer, -{ - type Owned = as Region>::Owned; - type ReadItem<'a> = as Region>::ReadItem<'a> - where - Self: 'a; - type Index = usize; - - fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self - where - Self: 'a, - { - Self( - as Region>::merge_regions(regions.map(|r| &r.0)), - Default::default(), - ) - } - - fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { - self.0.index((index, index)) - } - - fn reserve_regions<'a, I>(&mut self, regions: I) - where - Self: 'a, - I: Iterator + Clone, - { - self.0.reserve_regions(regions.map(|r| &r.0)); - } - - fn clear(&mut self) { - self.0.clear() - } - - fn heap_size(&self, callback: F) { - self.0.heap_size(callback) - } - - fn reborrow<'b, 'a: 'b>(item: Self::ReadItem<'a>) -> Self::ReadItem<'b> - where - Self: 'a, - { - as Region>::reborrow(item) - } -} - #[cfg(test)] mod tests { use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 0ebc628..d8b8bd0 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -511,7 +511,6 @@ impl OffsetContainer for Vec { #[cfg(test)] mod tests { use crate::impls::deduplicate::ConsecutiveOffsetPairs; - use crate::impls::storage::Doubling; use crate::{Push, Region, SliceRegion, StringRegion}; use super::*; @@ -603,12 +602,4 @@ mod tests { let os = OffsetStride::default(); let _ = os.index(0); } - - #[test] - fn test_offset_optimized_doubling() { - let mut oo = , Doubling<_>>>::default(); - oo.push(9999999999); - assert_eq!(oo.len(), 1); - oo.reserve(1); - } } diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 33d52ac..2dc33e1 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -1,9 +1,5 @@ //! Storage abstractions to represent slices of data. -#[cfg(feature = "serde")] -use serde::{Deserialize, Serialize}; -use std::ops::Range; - use crate::CopyIter; /// Behavior to allocate storage @@ -111,257 +107,3 @@ impl, T> PushStorage> for Vec { self.extend(item.0); } } - -/// A storage that maintains non-reallocating allocations and allocates double the size when needed. -/// -/// Not considered part of the stable interface of this crate. -#[derive(Debug)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct Doubling { - inner: Vec>, - offsets: Vec, - len: usize, -} - -impl Default for Doubling { - fn default() -> Self { - Self { - inner: Vec::default(), - offsets: Vec::default(), - len: 0, - } - } -} - -impl Doubling { - #[inline] - #[must_use] - fn with_capacity(capacity: usize) -> Self { - Self { - inner: vec![Vec::with_capacity(capacity)], - offsets: Vec::default(), - len: 0, - } - } - - #[inline] - fn reserve(&mut self, additional: usize) { - let (remaining, last_len) = self - .inner - .last() - .map_or((0, 0), |last| (last.capacity() - last.len(), last.len())); - if remaining < additional { - let len = 2 * last_len; - let len = std::cmp::max(additional, len); - let len = len.next_power_of_two(); - self.offsets - .push(last_len + *self.offsets.last().unwrap_or(&0)); - self.inner.push(Vec::with_capacity(len)); - } - } - - #[inline] - fn clear(&mut self) { - self.inner.drain(..self.len().saturating_sub(1)); - if let Some(last) = self.inner.last_mut() { - last.clear(); - } - } - - #[inline] - fn heap_size(&self, mut callback: F) { - let size_of_usize = std::mem::size_of::(); - callback( - self.offsets.len() * size_of_usize, - self.offsets.capacity() * size_of_usize, - ); - let size_of_t = std::mem::size_of::(); - for inner in &self.inner { - callback(inner.len() * size_of_t, inner.capacity() * size_of_t); - } - } - - #[inline] - fn extend>(&mut self, iter: I) { - let iter = iter.into_iter(); - let (lo, hi) = iter.size_hint(); - self.reserve(hi.unwrap_or(lo)); - Extend::extend(self.inner.last_mut().unwrap(), iter); - } - - #[inline] - #[must_use] - fn index(&self, index: usize) -> &T { - let slice_index = self - .offsets - .iter() - .position(|&o| o > index) - .unwrap_or_else(|| self.offsets.len().saturating_sub(1)); - let index = index - self.offsets[slice_index]; - &self.inner[slice_index][index] - } - - #[inline] - fn len(&self) -> usize { - *self.offsets.last().unwrap_or(&0) + self.inner.last().map_or(0, Vec::len) - } - - #[inline] - fn is_empty(&self) -> bool { - self.offsets.last().unwrap_or(&0) > &0 || self.inner.last().map_or(false, Vec::is_empty) - } -} - -impl Storage for Doubling { - #[inline] - fn with_capacity(capacity: usize) -> Self { - Self::with_capacity(capacity) - } - - #[inline] - fn reserve(&mut self, additional: usize) { - self.reserve(additional); - } - - #[inline] - fn clear(&mut self) { - self.clear() - } - - #[inline] - fn heap_size(&self, callback: F) { - self.heap_size(callback); - } - - #[inline] - fn len(&self) -> usize { - self.len() - } - - #[inline] - fn is_empty(&self) -> bool { - self.is_empty() - } -} - -impl PushStorage> for Doubling -where - I: IntoIterator, - I::IntoIter: ExactSizeIterator, -{ - #[inline] - fn push_storage(&mut self, item: CopyIter) { - self.extend(item.0); - } -} - -impl PushStorage<&mut Vec> for Doubling { - #[inline] - fn push_storage(&mut self, item: &mut Vec) { - self.len += item.len(); - self.reserve(item.len()); - self.inner.last_mut().unwrap().append(item); - } -} - -impl PushStorage<&[T]> for Doubling { - #[inline] - fn push_storage(&mut self, item: &[T]) { - self.len += item.len(); - self.reserve(item.len()); - self.inner.last_mut().unwrap().extend_from_slice(item); - } -} - -impl std::ops::Index> for Doubling { - type Output = [T]; - #[inline] - fn index(&self, range: Range) -> &Self::Output { - let index = self - .offsets - .iter() - .position(|&o| o > range.start) - .unwrap_or_else(|| self.offsets.len().saturating_sub(1)); - let start = range.start - self.offsets[index]; - let end = range.end - self.offsets[index]; - &self.inner[index][start..end] - } -} - -/// An iterator over the elements of a [`Doubling`]. -pub struct DoublingIter<'a, T: 'a> { - inner: Option>>, - remaining: std::slice::Iter<'a, Vec>, -} - -impl<'a, T: Copy> Iterator for DoublingIter<'a, T> { - type Item = T; - - #[inline] - fn next(&mut self) -> Option { - loop { - if let Some(inner) = &mut self.inner { - if let Some(item) = inner.next() { - return Some(item); - } - } - self.inner = self - .remaining - .next() - .map(|vec| vec.as_slice().iter().copied()); - self.inner.as_ref()?; - } - } -} - -mod offsetcontainer { - use crate::impls::offsets::OffsetContainer; - use crate::impls::storage::{Doubling, DoublingIter}; - - impl OffsetContainer for Doubling { - type Iter<'a> = DoublingIter<'a, T> where Self: 'a; - - fn push(&mut self, item: T) { - self.len += 1; - self.reserve(1); - self.inner.last_mut().unwrap().push(item); - } - - fn extend>(&mut self, iter: I) - where - I::IntoIter: ExactSizeIterator, - { - self.extend(iter); - } - - fn index(&self, index: usize) -> T { - *self.index(index) - } - - fn iter(&self) -> Self::Iter<'_> { - let mut iter = self.inner.as_slice().iter(); - DoublingIter { - inner: iter.next().map(|vec| vec.as_slice().iter().copied()), - remaining: iter, - } - } - } -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_doubling() { - let mut d: Doubling = Doubling::default(); - let mut start = 0; - - for i in 0..1000 { - d.push_storage([i, i + 1, i + 3].as_slice()); - let end = d.len(); - assert_eq!(&[i, i + 1, i + 3], &d[start..end]); - start = end; - } - } -} diff --git a/src/lib.rs b/src/lib.rs index b67556b..168d9e2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -11,7 +11,6 @@ pub mod impls; use crate::impls::offsets::OffsetContainer; pub use impls::columns::ColumnsRegion; -pub use impls::deduplicate::CombineSequential; pub use impls::mirror::MirrorRegion; pub use impls::option::OptionRegion; pub use impls::result::ResultRegion; @@ -434,8 +433,7 @@ pub struct CopyIter(pub I); #[cfg(test)] mod tests { use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; - use crate::impls::offsets::OffsetOptimized; - use crate::impls::tuple::{TupleABRegion, TupleARegion}; + use crate::impls::tuple::{TupleARegion}; use super::*; @@ -671,59 +669,6 @@ mod tests { owned_roundtrip::(&mut c, index); } - #[test] - fn test_my_understanding() { - let item = (vec![1, 2, 3], vec![1, 2, 3]); - let item2 = (vec![1, 2, 3, 4], vec![1, 2, 3, 4]); - let mut r = >, OwnedRegion>>::default(); - let _index: ((usize, usize), (usize, usize)) = r.push(&item); - - let mut r = >>, - ConsecutiveOffsetPairs>, - >>::default(); - let _index: (usize, usize) = r.push(&item); - - let mut r = >>, - ConsecutiveOffsetPairs>, - >, - (OffsetOptimized, OffsetOptimized), - >>::default(); - let _index: usize = r.push(&item); - - let mut fs = FlatStack::< - CombineSequential< - TupleABRegion< - ConsecutiveOffsetPairs>>, - CollapseSequence>>, - >, - (OffsetOptimized, OffsetOptimized), - >, - OffsetOptimized, - >::default(); - - for item in std::iter::repeat(&item) - .take(1000) - .chain(std::iter::once(&item2)) - { - fs.copy(item); - let mut size = 0; - let mut capacity = 0; - let mut count = 0; - fs.heap_size(|siz, cap| { - size += siz; - capacity += cap; - count += (cap > 0) as usize - }); - - println!("size {size}, capacity {capacity}, allocations {count}"); - } - println!("fs size {}", std::mem::size_of_val(&fs)); - assert_eq!(&item.1, fs.get(0).1); - } - /// Test that items and owned variants can be reborrowed to shorten their lifetimes. fn _test_reborrow(item: R::ReadItem<'_>, owned: &R::Owned) where From d869d0f558132af42ac3ce99178d7554963ac90e Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 2 Jul 2024 13:08:32 -0400 Subject: [PATCH 14/15] Cleanup Signed-off-by: Moritz Hoffmann --- src/impls/offsets.rs | 6 +----- src/lib.rs | 2 +- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index d8b8bd0..3f2a79d 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -175,11 +175,7 @@ impl Iterator for OffsetStrideIter { /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. #[derive(Eq, PartialEq, Clone, Debug, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OffsetList -where - S: OffsetContainer, - L: OffsetContainer, -{ +pub struct OffsetList { /// Offsets that fit within a `u32`. pub smol: S, /// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit. diff --git a/src/lib.rs b/src/lib.rs index 168d9e2..d193712 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -433,7 +433,7 @@ pub struct CopyIter(pub I); #[cfg(test)] mod tests { use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; - use crate::impls::tuple::{TupleARegion}; + use crate::impls::tuple::TupleARegion; use super::*; From c24025764c63561243c89fccdac2203e67b94f1b Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Tue, 2 Jul 2024 13:11:56 -0400 Subject: [PATCH 15/15] Cleanup Signed-off-by: Moritz Hoffmann --- src/impls/storage.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 2dc33e1..f23c058 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -2,7 +2,11 @@ use crate::CopyIter; -/// Behavior to allocate storage +/// Behavior to allocate storage. +/// +/// This trait does not express opinions on how to populate itself and how to extract data. Clients +/// should use the [`PushStorage`] trait to insert data into storage, and appropriate +/// [`Index`](std::ops::Index) bounds to extract data. pub trait Storage: Default { /// Allocate storage for at least `capacity` elements. #[must_use]