diff --git a/src/impls/mod.rs b/src/impls.rs similarity index 93% rename from src/impls/mod.rs rename to src/impls.rs index 54f9ad7..a265d3a 100644 --- a/src/impls/mod.rs +++ b/src/impls.rs @@ -10,6 +10,7 @@ pub mod option; pub mod result; pub mod slice; pub mod slice_copy; +pub mod storage; pub mod string; pub mod tuple; mod vec; diff --git a/src/impls/columns.rs b/src/impls/columns.rs index e7b97a4..b678f4a 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -1,12 +1,14 @@ //! A region to contain a variable number of columns. use std::fmt::Debug; +use std::iter::Zip; +use std::slice::Iter; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; use crate::impls::deduplicate::ConsecutiveOffsetPairs; -use crate::impls::offsets::OffsetOptimized; +use crate::impls::offsets::{OffsetContainer, OffsetOptimized}; use crate::{CopyIter, IntoOwned}; use crate::{OwnedRegion, Push, Region}; @@ -52,24 +54,27 @@ use crate::{OwnedRegion, Push, Region}; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr( feature = "serde", - serde( - bound = "R: Serialize + for<'a> Deserialize<'a>, R::Index: Serialize + for<'a> Deserialize<'a>" - ) + serde(bound = " + R: Serialize + for<'a> Deserialize<'a>, + R::Index: Serialize + for<'a> Deserialize<'a>, + O: Serialize + for<'a> Deserialize<'a>, + ") )] -pub struct ColumnsRegion +pub struct ColumnsRegion where R: Region, { /// Indices to address rows in `inner`. For each row, we remember /// an index for each column. - indices: ConsecutiveOffsetPairs, OffsetOptimized>, + indices: ConsecutiveOffsetPairs, O>, /// Storage for columns. inner: Vec, } -impl Clone for ColumnsRegion +impl Clone for ColumnsRegion where R: Region + Clone, + O: Clone, { fn clone(&self) -> Self { Self { @@ -84,13 +89,14 @@ where } } -impl Region for ColumnsRegion +impl Region for ColumnsRegion where R: Region, + O: OffsetContainer, { type Owned = Vec; type ReadItem<'a> = ReadColumns<'a, R> where Self: 'a; - type Index = usize; + type Index = , OffsetOptimized> as Region>::Index; fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where @@ -160,9 +166,10 @@ where } } -impl Default for ColumnsRegion +impl Default for ColumnsRegion where R: Region, + O: OffsetContainer, { fn default() -> Self { Self { @@ -321,13 +328,11 @@ where } /// An iterator over the elements of a row. -pub struct ReadColumnsIter<'a, R: Region>( - Result, std::slice::Iter<'a, R::Owned>>, -); +pub struct ReadColumnsIter<'a, R: Region>(Result, Iter<'a, R::Owned>>); /// An iterator over the elements of a row. pub struct ReadColumnsIterInner<'a, R: Region> { - iter: std::iter::Zip, std::slice::Iter<'a, R>>, + iter: Zip, Iter<'a, R>>, } impl<'a, R> Iterator for ReadColumnsIter<'a, R> @@ -342,8 +347,17 @@ where Err(slice) => slice.next().map(IntoOwned::borrow_as), } } + + fn size_hint(&self) -> (usize, Option) { + match &self.0 { + Ok(inner) => inner.size_hint(), + Err(slice) => slice.size_hint(), + } + } } +impl<'a, R> ExactSizeIterator for ReadColumnsIter<'a, R> where R: Region {} + impl<'a, R> Iterator for ReadColumnsIterInner<'a, R> where R: Region, @@ -353,13 +367,18 @@ where fn next(&mut self) -> Option { self.iter.next().map(|(&i, r)| r.index(i)) } + + fn size_hint(&self) -> (usize, Option) { + self.iter.size_hint() + } } -impl Push> for ColumnsRegion +impl Push> for ColumnsRegion where for<'a> R: Region + Push<::ReadItem<'a>>, + O: OffsetContainer, { - fn push(&mut self, item: ReadColumns<'_, R>) -> as Region>::Index { + fn push(&mut self, item: ReadColumns<'_, R>) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -373,11 +392,12 @@ where } } -impl<'a, R, T> Push<&'a [T]> for ColumnsRegion +impl<'a, R, O, T> Push<&'a [T]> for ColumnsRegion where R: Region + Push<&'a T>, + O: OffsetContainer, { - fn push(&mut self, item: &'a [T]) -> as Region>::Index { + fn push(&mut self, item: &'a [T]) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -391,11 +411,12 @@ where } } -impl Push<[T; N]> for ColumnsRegion +impl Push<[T; N]> for ColumnsRegion where R: Region + Push, + O: OffsetContainer, { - fn push(&mut self, item: [T; N]) -> as Region>::Index { + fn push(&mut self, item: [T; N]) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -409,11 +430,12 @@ where } } -impl<'a, R, T, const N: usize> Push<&'a [T; N]> for ColumnsRegion +impl<'a, R, O, T, const N: usize> Push<&'a [T; N]> for ColumnsRegion where R: Region + Push<&'a T>, + O: OffsetContainer, { - fn push(&mut self, item: &'a [T; N]) -> as Region>::Index { + fn push(&mut self, item: &'a [T; N]) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -427,11 +449,12 @@ where } } -impl Push> for ColumnsRegion +impl Push> for ColumnsRegion where R: Region + Push, + O: OffsetContainer, { - fn push(&mut self, item: Vec) -> as Region>::Index { + fn push(&mut self, item: Vec) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -445,11 +468,12 @@ where } } -impl<'a, R, T> Push<&'a Vec> for ColumnsRegion +impl<'a, R, O, T> Push<&'a Vec> for ColumnsRegion where R: Region + Push<&'a T>, + O: OffsetContainer, { - fn push(&mut self, item: &'a Vec) -> as Region>::Index { + fn push(&mut self, item: &'a Vec) -> as Region>::Index { // Ensure all required regions exist. while self.inner.len() < item.len() { self.inner.push(R::default()); @@ -463,13 +487,15 @@ where } } -impl Push> for ColumnsRegion +impl Push> for ColumnsRegion where R: Region + Push, + O: OffsetContainer, I: IntoIterator, + I::IntoIter: ExactSizeIterator, { #[inline] - fn push(&mut self, item: CopyIter) -> as Region>::Index { + fn push(&mut self, item: CopyIter) -> as Region>::Index { let iter = item.0.into_iter().enumerate().map(|(index, value)| { // Ensure all required regions exist. if self.inner.len() <= index { @@ -501,7 +527,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().copied().eq(r.index(index).iter())); } } @@ -527,7 +553,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().copied().eq(r.index(index).iter())); } @@ -556,7 +582,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().eq(r.index(index).iter())); } @@ -584,8 +610,8 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { - assert!(row.iter().copied().eq(r.index(index).iter())); + for (index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); } println!("{r:?}"); @@ -612,8 +638,8 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { - assert!(row.iter().copied().eq(r.index(index).iter())); + for (index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); } assert_eq!("1", r.index(indices[1]).get(0)); diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index e53073c..7c19bc6 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -125,7 +125,7 @@ where /// use flatcontainer::{Push, OwnedRegion, Region, StringRegion}; /// let mut r = >>::default(); /// -/// let index: usize = r.push(&b"abc"); +/// let index = r.push(&b"abc"); /// assert_eq!(b"abc", r.index(index)); /// ``` #[derive(Debug)] diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 617721d..3f2a79d 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -3,37 +3,38 @@ #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::impls::storage::Storage; + /// A container to store offsets. -pub trait OffsetContainer: Default + Extend { - /// Accepts a newly pushed element. - fn push(&mut self, item: T); +pub trait OffsetContainer: Storage { + /// Iterator over the elements. + type Iter<'a>: Iterator + where + Self: 'a; /// Lookup an index. May panic for invalid indexes. fn index(&self, index: usize) -> T; - /// Clear all contents. - fn clear(&mut self); - - /// Returns the number of elements. - fn len(&self) -> usize; - - /// Returns `true` if empty. - #[inline] - #[must_use] - fn is_empty(&self) -> bool { - self.len() == 0 - } + /// Accepts a newly pushed element. + fn push(&mut self, item: T); - /// Reserve space for `additional` elements. - fn reserve(&mut self, additional: usize); + /// Extend from iterator. Must be [`ExactSizeIterator`] to efficiently + /// pre-allocate. + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator; - /// Heap size, size - capacity - fn heap_size(&self, callback: F); + /// Returns an iterator over the elements. + fn iter(&self) -> Self::Iter<'_>; } /// A container for offsets that can represent strides of offsets. /// -/// Does not implement `OffsetContainer` because it cannot accept arbitrary pushes. +/// Does not implement `OffsetContainer` because it cannot accept arbitrary pushes. Instead, +/// its `push` method returns a boolean to indicate whether the push was successful or not. +/// +/// This type can absorb sequences of the form `0, stride, 2 * stride, 3 * stride, ...` and +/// saturates in a repeated last element. #[derive(Eq, PartialEq, Debug, Default, Clone, Copy)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum OffsetStride { @@ -52,6 +53,7 @@ pub enum OffsetStride { impl OffsetStride { /// Accepts or rejects a newly pushed element. #[must_use] + #[inline] pub fn push(&mut self, item: usize) -> bool { match self { OffsetStride::Empty => { @@ -95,6 +97,7 @@ impl OffsetStride { /// Panics for out-of-bounds accesses, i.e., if `index` greater or equal to /// [`len`][OffsetStride::len]. #[must_use] + #[inline] pub fn index(&self, index: usize) -> usize { match self { OffsetStride::Empty => { @@ -114,6 +117,7 @@ impl OffsetStride { /// Returns the number of elements. #[must_use] + #[inline] pub fn len(&self) -> usize { match self { OffsetStride::Empty => 0, @@ -125,33 +129,71 @@ impl OffsetStride { /// Returns `true` if empty. #[must_use] + #[inline] pub fn is_empty(&self) -> bool { matches!(self, OffsetStride::Empty) } /// Removes all elements. + #[inline] pub fn clear(&mut self) { *self = Self::default(); } + + /// Return an iterator over the elements. + #[must_use] + #[inline] + pub fn iter(&self) -> OffsetStrideIter { + OffsetStrideIter { + strided: *self, + index: 0, + } + } +} + +/// An iterator over the elements of an [`OffsetStride`]. +pub struct OffsetStrideIter { + strided: OffsetStride, + index: usize, +} + +impl Iterator for OffsetStrideIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.index < self.strided.len() { + let item = self.strided.index(self.index); + self.index += 1; + Some(item) + } else { + None + } + } } /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. #[derive(Eq, PartialEq, Clone, Debug, Default)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OffsetList { +pub struct OffsetList { /// Offsets that fit within a `u32`. - pub smol: Vec, + pub smol: S, /// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit. - pub chonk: Vec, + pub chonk: L, } -impl OffsetList { +impl OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ /// Allocate a new list with a specified capacity. #[must_use] + #[inline] pub fn with_capacity(cap: usize) -> Self { Self { - smol: Vec::with_capacity(cap), - chonk: Vec::new(), + smol: S::with_capacity(cap), + chonk: L::default(), } } @@ -160,6 +202,7 @@ impl OffsetList { /// # Panics /// /// Panics if `usize` does not fit in `u64`. + #[inline] pub fn push(&mut self, offset: usize) { if self.chonk.is_empty() { if let Ok(smol) = offset.try_into() { @@ -178,130 +221,286 @@ impl OffsetList { /// /// Panics if the index is out of bounds, i.e., it is larger or equal to the length. #[must_use] + #[inline] pub fn index(&self, index: usize) -> usize { if index < self.smol.len() { - self.smol[index].try_into().unwrap() + self.smol.index(index).try_into().unwrap() } else { - self.chonk[index - self.smol.len()].try_into().unwrap() + let index = index - self.smol.len(); + self.chonk.index(index).try_into().unwrap() } } /// The number of offsets in the list. #[must_use] + #[inline] pub fn len(&self) -> usize { self.smol.len() + self.chonk.len() } /// Returns `true` if this list contains no elements. #[must_use] + #[inline] pub fn is_empty(&self) -> bool { - self.len() == 0 + self.smol.is_empty() && self.chonk.is_empty() } /// Reserve space for `additional` elements. + #[inline] pub fn reserve(&mut self, additional: usize) { self.smol.reserve(additional); } /// Remove all elements. + #[inline] pub fn clear(&mut self) { self.smol.clear(); self.chonk.clear(); } + #[inline] fn heap_size(&self, mut callback: F) { self.smol.heap_size(&mut callback); self.chonk.heap_size(callback); } } -/// An offset container implementation that first tries to recognize strides, and then spilles into -/// a regular offset list. -#[derive(Eq, PartialEq, Default, Debug, Clone)] -#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OffsetOptimized { - strided: OffsetStride, - spilled: OffsetList, +impl Storage for OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ + #[inline] + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity(capacity) + } + + #[inline] + fn reserve(&mut self, additional: usize) { + self.reserve(additional) + } + + #[inline] + fn clear(&mut self) { + self.clear() + } + + #[inline] + fn heap_size(&self, callback: F) { + self.heap_size(callback) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn is_empty(&self) -> bool { + self.is_empty() + } } -impl OffsetContainer for OffsetOptimized { +impl OffsetContainer for OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ + type Iter<'a> = OffsetListIter<'a, S, L> where Self: 'a; + + #[inline] + fn index(&self, index: usize) -> usize { + self.index(index) + } + + #[inline] fn push(&mut self, item: usize) { - if self.spilled.is_empty() { - let inserted = self.strided.push(item); - if !inserted { - self.spilled.push(item); - } - } else { - self.spilled.push(item); + self.push(item) + } + + #[inline] + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + for item in iter { + self.push(item); } } - fn index(&self, index: usize) -> usize { - if index < self.strided.len() { - self.strided.index(index) - } else { - self.spilled.index(index - self.strided.len()) + #[inline] + fn iter(&self) -> Self::Iter<'_> { + OffsetListIter { + smol: self.smol.iter(), + chonk: self.chonk.iter(), } } +} + +/// An iterator over the elements of an [`OffsetList`]. +pub struct OffsetListIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + smol: S::Iter<'a>, + chonk: L::Iter<'a>, +} + +impl<'a, S, L> Iterator for OffsetListIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.smol + .next() + .map(|x| x as usize) + .or_else(|| self.chonk.next().map(|x| x as usize)) + } +} + +/// An offset container implementation that first tries to recognize strides, and then spilles into +/// a regular offset list. +#[derive(Eq, PartialEq, Default, Debug, Clone)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct OffsetOptimized, L = Vec> +where + S: OffsetContainer, + L: OffsetContainer, +{ + strided: OffsetStride, + spilled: OffsetList, +} + +impl Storage for OffsetOptimized +where + S: OffsetContainer, + L: OffsetContainer, +{ + #[inline] + fn with_capacity(_capacity: usize) -> Self { + // `self.strided` doesn't have any capacity, and we don't know the structure of the data. + Self::default() + } + #[inline] fn clear(&mut self) { self.spilled.clear(); self.strided = OffsetStride::default(); } + #[inline] fn len(&self) -> usize { self.strided.len() + self.spilled.len() } + #[inline] + fn is_empty(&self) -> bool { + self.strided.is_empty() && self.spilled.is_empty() + } + + #[inline] fn reserve(&mut self, additional: usize) { if !self.spilled.is_empty() { self.spilled.reserve(additional); } } + #[inline] fn heap_size(&self, callback: F) { self.spilled.heap_size(callback); } } -impl Extend for OffsetOptimized { - fn extend>(&mut self, iter: T) { +impl OffsetContainer for OffsetOptimized +where + S: OffsetContainer, + L: OffsetContainer, +{ + type Iter<'a> = OffsetOptimizedIter<'a, S , L> where Self: 'a; + + fn index(&self, index: usize) -> usize { + if index < self.strided.len() { + self.strided.index(index) + } else { + self.spilled.index(index - self.strided.len()) + } + } + + fn push(&mut self, item: usize) { + if self.spilled.is_empty() { + let inserted = self.strided.push(item); + if !inserted { + self.spilled.push(item); + } + } else { + self.spilled.push(item); + } + } + + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { for item in iter { self.push(item); } } + + fn iter(&self) -> Self::Iter<'_> { + OffsetOptimizedIter { + strided: self.strided.iter(), + spilled: self.spilled.iter(), + } + } } -impl OffsetContainer for Vec { - #[inline] - fn push(&mut self, item: T) { - self.push(item); +/// An iterator over the elements of an [`OffsetOptimized`]. +pub struct OffsetOptimizedIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + strided: OffsetStrideIter, + spilled: as OffsetContainer>::Iter<'a>, +} + +impl<'a, S, L> Iterator for OffsetOptimizedIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + type Item = usize; + + fn next(&mut self) -> Option { + self.strided.next().or_else(|| self.spilled.next()) } +} + +impl OffsetContainer for Vec { + type Iter<'a> = std::iter::Copied> where Self: 'a; - #[inline] - #[must_use] fn index(&self, index: usize) -> T { self[index] } #[inline] - fn clear(&mut self) { - self.clear(); - } - - #[inline] - #[must_use] - fn len(&self) -> usize { - self.len() + fn push(&mut self, item: T) { + self.push(item); } - #[inline] - fn reserve(&mut self, additional: usize) { - self.reserve(additional); + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + Extend::extend(self, iter); } - fn heap_size(&self, mut callback: F) { - let size_of_t = std::mem::size_of::(); - callback(self.len() * size_of_t, self.capacity() * size_of_t); + fn iter(&self) -> Self::Iter<'_> { + self.as_slice().iter().copied() } } @@ -328,7 +527,7 @@ mod tests { #[test] fn test_offset_optimized_clear() { - let mut oo = OffsetOptimized::default(); + let mut oo = ::default(); oo.push(0); assert_eq!(oo.len(), 1); oo.clear(); @@ -342,7 +541,7 @@ mod tests { #[test] fn test_offset_optimized_reserve() { - let mut oo = OffsetOptimized::default(); + let mut oo = ::default(); oo.push(9999999999); assert_eq!(oo.len(), 1); oo.reserve(1); @@ -350,7 +549,7 @@ mod tests { #[test] fn test_offset_optimized_heap_size() { - let mut oo = OffsetOptimized::default(); + let mut oo = ::default(); oo.push(9999999999); let mut cap = 0; oo.heap_size(|_, ca| { @@ -388,7 +587,7 @@ mod tests { #[test] fn test_chonk() { - let mut ol = OffsetList::default(); + let mut ol = , Vec<_>>>::default(); ol.push(usize::MAX); assert_eq!(usize::MAX, ol.index(0)); } diff --git a/src/impls/slice.rs b/src/impls/slice.rs index 51c9705..9fe04ad 100644 --- a/src/impls/slice.rs +++ b/src/impls/slice.rs @@ -375,6 +375,15 @@ impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIter<'a, } } +impl<'a, R, O> ExactSizeIterator for ReadSliceIter<'a, R, O> +where + R: Region, + O: OffsetContainer, + std::slice::Iter<'a, R::Owned>: ExactSizeIterator, + ReadSliceIterInner<'a, R, O>: ExactSizeIterator, +{ +} + impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIterInner<'a, C, O> { type Item = C::ReadItem<'a>; @@ -386,6 +395,14 @@ impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIterInne } } +impl<'a, R, O> ExactSizeIterator for ReadSliceIterInner<'a, R, O> +where + R: Region, + O: OffsetContainer, + Range: ExactSizeIterator, +{ +} + impl<'a, C, T, O> Push<&'a [T]> for SliceRegion where C: Region + Push<&'a T>, diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index 33aa134..e20873f 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -1,8 +1,11 @@ //! A region that stores slices of copy types. +use std::marker::PhantomData; + #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::impls::storage::{PushStorage, Storage}; use crate::{CopyIter, Push, Region, ReserveItems}; /// A container for owned types. @@ -29,14 +32,16 @@ use crate::{CopyIter, Push, Region, ReserveItems}; /// ``` #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OwnedRegion { - slices: Vec, +pub struct OwnedRegion> { + slices: S, + _marker: PhantomData, } -impl Clone for OwnedRegion { +impl Clone for OwnedRegion { fn clone(&self) -> Self { Self { slices: self.slices.clone(), + _marker: PhantomData, } } @@ -45,9 +50,10 @@ impl Clone for OwnedRegion { } } -impl Region for OwnedRegion +impl Region for OwnedRegion where [T]: ToOwned, + S: Storage + std::ops::Index, Output = [T]>, { type Owned = <[T] as ToOwned>::Owned; type ReadItem<'a> = &'a [T] where Self: 'a; @@ -59,7 +65,8 @@ where Self: 'a, { Self { - slices: Vec::with_capacity(regions.map(|r| r.slices.len()).sum()), + slices: S::merge_regions(regions.map(|r| &r.slices)), + _marker: PhantomData, } } @@ -74,7 +81,7 @@ where Self: 'a, I: Iterator + Clone, { - self.slices.reserve(regions.map(|r| r.slices.len()).sum()); + self.slices.reserve_regions(regions.map(|r| &r.slices)); } #[inline] @@ -83,12 +90,8 @@ where } #[inline] - fn heap_size(&self, mut callback: F) { - let size_of_t = std::mem::size_of::(); - callback( - self.slices.len() * size_of_t, - self.slices.capacity() * size_of_t, - ); + fn heap_size(&self, callback: F) { + self.slices.heap_size(callback); } #[inline] @@ -100,44 +103,62 @@ where } } -impl Default for OwnedRegion { +impl> Default for OwnedRegion { #[inline] fn default() -> Self { Self { - slices: Vec::default(), + slices: S::default(), + _marker: PhantomData, } } } -impl Push<[T; N]> for OwnedRegion +impl Push<[T; N]> for OwnedRegion where [T]: ToOwned, + S: Storage + + for<'a> PushStorage> + + std::ops::Index, Output = [T]>, { #[inline] fn push(&mut self, item: [T; N]) -> as Region>::Index { let start = self.slices.len(); - self.slices.extend(item); + self.slices.push_storage(CopyIter(item)); (start, self.slices.len()) } } -impl Push<&[T; N]> for OwnedRegion { +impl Push<&[T; N]> for OwnedRegion +where + T: Clone, + S: Storage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, +{ #[inline] fn push(&mut self, item: &[T; N]) -> as Region>::Index { - let start = self.slices.len(); - self.slices.extend_from_slice(item); - (start, self.slices.len()) + self.push(item.as_slice()) } } -impl Push<&&[T; N]> for OwnedRegion { +impl Push<&&[T; N]> for OwnedRegion +where + T: Clone, + S: Storage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, +{ #[inline] fn push(&mut self, item: &&[T; N]) -> as Region>::Index { self.push(*item) } } -impl<'b, T: Clone, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion { +impl<'b, T, S, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion +where + T: Clone, + S: Storage + std::ops::Index, Output = [T]>, +{ #[inline] fn reserve_items(&mut self, items: I) where @@ -147,28 +168,35 @@ impl<'b, T: Clone, const N: usize> ReserveItems<&'b [T; N]> for OwnedRegion { } } -impl Push<&[T]> for OwnedRegion { +impl Push<&[T]> for OwnedRegion +where + T: Clone, + S: Storage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, +{ #[inline] - fn push(&mut self, item: &[T]) -> as Region>::Index { + fn push(&mut self, item: &[T]) -> as Region>::Index { let start = self.slices.len(); - self.slices.extend_from_slice(item); + self.slices.push_storage(item); (start, self.slices.len()) } } -impl Push<&&[T]> for OwnedRegion +impl> Push<&&[T]> for OwnedRegion where for<'a> Self: Push<&'a [T]>, { #[inline] - fn push(&mut self, item: &&[T]) -> as Region>::Index { + fn push(&mut self, item: &&[T]) -> as Region>::Index { self.push(*item) } } -impl<'b, T> ReserveItems<&'b [T]> for OwnedRegion +impl<'b, T, S> ReserveItems<&'b [T]> for OwnedRegion where [T]: ToOwned, + S: Storage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -179,28 +207,38 @@ where } } -impl Push> for OwnedRegion +impl Push> for OwnedRegion where [T]: ToOwned, + S: Storage + + for<'a> PushStorage<&'a mut Vec> + + std::ops::Index, Output = [T]>, { #[inline] - fn push(&mut self, mut item: Vec) -> as Region>::Index { + fn push(&mut self, mut item: Vec) -> as Region>::Index { let start = self.slices.len(); - self.slices.append(&mut item); + self.slices.push_storage(&mut item); (start, self.slices.len()) } } -impl Push<&Vec> for OwnedRegion { +impl Push<&Vec> for OwnedRegion +where + T: Clone, + S: Storage + + for<'a> PushStorage<&'a [T]> + + std::ops::Index, Output = [T]>, +{ #[inline] - fn push(&mut self, item: &Vec) -> as Region>::Index { + fn push(&mut self, item: &Vec) -> as Region>::Index { self.push(item.as_slice()) } } -impl<'a, T> ReserveItems<&'a Vec> for OwnedRegion +impl<'a, T, S> ReserveItems<&'a Vec> for OwnedRegion where [T]: ToOwned, + S: Storage + std::ops::Index, Output = [T]>, { #[inline] fn reserve_items(&mut self, items: I) @@ -211,18 +249,28 @@ where } } -impl> Push> for OwnedRegion { +impl Push> for OwnedRegion +where + I: IntoIterator, + ::IntoIter: ExactSizeIterator, + T: Clone, + S: Storage + + PushStorage> + + std::ops::Index, Output = [T]>, +{ #[inline] - fn push(&mut self, item: CopyIter) -> as Region>::Index { + fn push(&mut self, item: CopyIter) -> as Region>::Index { let start = self.slices.len(); - self.slices.extend(item.0); + self.slices.push_storage(item); (start, self.slices.len()) } } -impl> ReserveItems> for OwnedRegion +impl ReserveItems> for OwnedRegion where [T]: ToOwned, + S: Storage + std::ops::Index, Output = [T]>, + J: IntoIterator, { #[inline] fn reserve_items(&mut self, items: I) @@ -269,8 +317,9 @@ mod tests { #[test] fn test_copy_iter() { let mut r = >::default(); - r.reserve_items(std::iter::once(CopyIter(std::iter::repeat(1).take(4)))); - let index = r.push(CopyIter(std::iter::repeat(1).take(4))); + let iter = [1; 4].into_iter(); + r.reserve_items(std::iter::once(CopyIter(iter.clone()))); + let index = r.push(CopyIter(iter)); assert_eq!([1, 1, 1, 1], r.index(index)); } } diff --git a/src/impls/storage.rs b/src/impls/storage.rs new file mode 100644 index 0000000..f23c058 --- /dev/null +++ b/src/impls/storage.rs @@ -0,0 +1,113 @@ +//! Storage abstractions to represent slices of data. + +use crate::CopyIter; + +/// Behavior to allocate storage. +/// +/// This trait does not express opinions on how to populate itself and how to extract data. Clients +/// should use the [`PushStorage`] trait to insert data into storage, and appropriate +/// [`Index`](std::ops::Index) bounds to extract data. +pub trait Storage: Default { + /// Allocate storage for at least `capacity` elements. + #[must_use] + fn with_capacity(capacity: usize) -> Self; + + /// Allocate storage large enough to absorb `regions`'s contents. + #[must_use] + #[inline] + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::with_capacity(regions.map(Self::len).sum()) + } + + /// Reserve space for `additional` elements. + fn reserve(&mut self, additional: usize); + + /// Reserve space for `regions`. + #[inline] + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.reserve(regions.map(Self::len).sum()); + } + + /// Clear all contents, possibly retaining some allocations. + fn clear(&mut self); + + /// Observe the heap size information (size and capacity). + fn heap_size(&self, callback: F); + + /// Returns the number of elements. + #[must_use] + fn len(&self) -> usize; + + /// Returns `true` if empty, i.e., it doesn't contain any elements. + #[must_use] + fn is_empty(&self) -> bool; +} + +impl Storage for Vec { + #[inline] + fn with_capacity(capacity: usize) -> Self { + Vec::with_capacity(capacity) + } + + #[inline] + fn reserve(&mut self, additional: usize) { + Vec::reserve(self, additional); + } + + #[inline] + fn clear(&mut self) { + self.clear(); + } + + #[inline] + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback(self.len() * size_of_t, self.capacity() * size_of_t); + } + + #[inline] + #[must_use] + fn len(&self) -> usize { + self.len() + } + + #[inline] + #[must_use] + fn is_empty(&self) -> bool { + self.is_empty() + } +} + +/// Push an item into storage. +pub trait PushStorage { + /// Push an item into storage. + fn push_storage(&mut self, item: T); +} + +impl PushStorage<&mut Vec> for Vec { + #[inline] + fn push_storage(&mut self, item: &mut Vec) { + self.append(item); + } +} + +impl PushStorage<&[T]> for Vec { + #[inline] + fn push_storage(&mut self, item: &[T]) { + self.extend_from_slice(item); + } +} + +impl, T> PushStorage> for Vec { + #[inline] + fn push_storage(&mut self, item: CopyIter) { + self.extend(item.0); + } +} diff --git a/src/lib.rs b/src/lib.rs index cb15176..d193712 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -9,6 +9,7 @@ use serde::{Deserialize, Serialize}; pub mod impls; +use crate::impls::offsets::OffsetContainer; pub use impls::columns::ColumnsRegion; pub use impls::mirror::MirrorRegion; pub use impls::option::OptionRegion; @@ -160,51 +161,46 @@ impl<'a, T: ToOwned + ?Sized> IntoOwned<'a> for &'a T { #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr( feature = "serde", - serde( - bound = "R: Serialize + for<'a> Deserialize<'a>, R::Index: Serialize + for<'a> Deserialize<'a>" - ) + serde(bound = " + R: Serialize + for<'a> Deserialize<'a>, + S: Serialize + for<'a> Deserialize<'a>, + ") )] -pub struct FlatStack { +pub struct FlatStack::Index>> { /// The indices, which we use to lookup items in the region. - indices: Vec, + indices: S, /// A region to index into. region: R, } -impl Default for FlatStack { +impl Default for FlatStack { #[inline] fn default() -> Self { Self { - indices: Vec::default(), + indices: S::default(), region: R::default(), } } } -impl Debug for FlatStack +impl::Index>> Debug for FlatStack where for<'a> R::ReadItem<'a>: Debug, + for<'a> &'a S: IntoIterator, { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { f.debug_list().entries(self.iter()).finish() } } -impl FlatStack { - /// Default implementation based on the preference of type `T`. - #[inline] - #[must_use] - pub fn default_impl>() -> Self { - Self::default() - } - +impl::Index>> FlatStack { /// Returns a flat stack that can absorb `capacity` indices without reallocation. /// /// Prefer [`Self::merge_capacity`] over this function to also pre-size the regions. #[must_use] pub fn with_capacity(capacity: usize) -> Self { Self { - indices: Vec::with_capacity(capacity), + indices: S::with_capacity(capacity), region: R::default(), } } @@ -213,10 +209,10 @@ impl FlatStack { #[must_use] pub fn merge_capacity<'a, I: Iterator + Clone + 'a>(stacks: I) -> Self where - R: 'a, + Self: 'a, { Self { - indices: Vec::with_capacity(stacks.clone().map(|s| s.indices.len()).sum()), + indices: S::merge_regions(stacks.clone().map(|s| &s.indices)), region: R::merge_regions(stacks.map(|r| &r.region)), } } @@ -235,7 +231,7 @@ impl FlatStack { #[inline] #[must_use] pub fn get(&self, offset: usize) -> R::ReadItem<'_> { - self.region.index(self.indices[offset]) + self.region.index(self.indices.index(offset)) } /// Returns the number of indices in the stack. @@ -252,12 +248,6 @@ impl FlatStack { self.indices.is_empty() } - /// Returns the total number of indices the stack can hold without reallocation. - #[must_use] - pub fn capacity(&self) -> usize { - self.indices.capacity() - } - /// Reserves space to hold `additional` indices. #[inline] pub fn reserve(&mut self, additional: usize) { @@ -289,22 +279,49 @@ impl FlatStack { self.region.reserve_regions(regions); } + /// Heap size, size - capacity + #[inline] + pub fn heap_size(&self, mut callback: F) { + self.region.heap_size(&mut callback); + self.indices.heap_size(callback); + } +} + +impl FlatStack +where + R: Region, + S: OffsetContainer<::Index>, +{ /// Iterate the items in this stack. #[inline] - pub fn iter(&self) -> Iter<'_, R> { + pub fn iter<'a>(&'a self) -> Iter<'a, R, <&'a S as IntoIterator>::IntoIter> + where + &'a S: IntoIterator, + { self.into_iter() } +} - /// Heap size, size - capacity +impl FlatStack { + /// Default implementation based on the preference of type `T`. #[inline] - pub fn heap_size(&self, mut callback: F) { - use crate::impls::offsets::OffsetContainer; - self.region.heap_size(&mut callback); - OffsetContainer::heap_size(&self.indices, callback); + #[must_use] + pub fn default_impl>() -> Self { + Self::default() + } + + /// Returns the total number of indices the stack can hold without reallocation. + #[must_use] + pub fn capacity(&self) -> usize { + self.indices.capacity() } } -impl> Extend for FlatStack { +impl Extend for FlatStack +where + R: Region + Push, + S: OffsetContainer<::Index>, +{ fn extend>(&mut self, iter: I) { let iter = iter.into_iter(); self.reserve(iter.size_hint().0); @@ -314,13 +331,16 @@ impl> Extend for FlatStack { } } -impl<'a, R: Region> IntoIterator for &'a FlatStack { +impl<'a, R: Region, S: OffsetContainer<::Index>> IntoIterator for &'a FlatStack +where + &'a S: IntoIterator::Index>, +{ type Item = R::ReadItem<'a>; - type IntoIter = Iter<'a, R>; + type IntoIter = Iter<'a, R, <&'a S as IntoIterator>::IntoIter>; fn into_iter(self) -> Self::IntoIter { Iter { - inner: self.indices.iter(), + inner: self.indices.into_iter(), region: &self.region, } } @@ -328,14 +348,22 @@ impl<'a, R: Region> IntoIterator for &'a FlatStack { /// An iterator over [`FlatStack`]. The iterator yields [`Region::ReadItem`] elements, which /// it obtains by looking up indices. -pub struct Iter<'a, R: Region> { +pub struct Iter<'a, R, S> +where + R: Region, + S: Iterator::Index>, +{ /// Iterator over indices. - inner: std::slice::Iter<'a, R::Index>, + inner: S, /// Region to map indices to read items. region: &'a R, } -impl<'a, R: Region> Iterator for Iter<'a, R> { +impl<'a, R, S> Iterator for Iter<'a, R, S> +where + R: Region, + S: Iterator::Index>, +{ type Item = R::ReadItem<'a>; fn next(&mut self) -> Option { @@ -347,9 +375,18 @@ impl<'a, R: Region> Iterator for Iter<'a, R> { } } -impl ExactSizeIterator for Iter<'_, R> {} +impl<'a, R, S> ExactSizeIterator for Iter<'a, R, S> +where + R: Region, + S: ExactSizeIterator::Index>, +{ +} -impl Clone for Iter<'_, R> { +impl<'a, R, S> Clone for Iter<'a, R, S> +where + R: Region, + S: Iterator::Index> + Clone, +{ fn clone(&self) -> Self { Self { inner: self.inner.clone(), @@ -358,7 +395,11 @@ impl Clone for Iter<'_, R> { } } -impl, T> FromIterator for FlatStack { +impl FromIterator for FlatStack +where + R: Region + Push, + S: OffsetContainer<::Index>, +{ fn from_iter>(iter: I) -> Self { let iter = iter.into_iter(); let mut c = Self::with_capacity(iter.size_hint().0); @@ -367,7 +408,7 @@ impl, T> FromIterator for FlatStack { } } -impl Clone for FlatStack { +impl Clone for FlatStack { fn clone(&self) -> Self { Self { region: self.region.clone(), @@ -458,7 +499,7 @@ mod tests { // Make sure that types are debug, even if we don't use this in the test. for<'a> R::ReadItem<'a>: Debug, { - let mut c = FlatStack::default(); + let mut c = FlatStack::<_>::default(); c.copy(t); let mut cc = c.clone();