diff --git a/benches/bench.rs b/benches/bench.rs index 4a0e143..3440738 100644 --- a/benches/bench.rs +++ b/benches/bench.rs @@ -4,7 +4,9 @@ extern crate test; -use flatcontainer::impls::tuple::TupleABCRegion; +use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; +use flatcontainer::impls::offsets::OffsetOptimized; +use flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion}; use flatcontainer::{ Containerized, CopyOnto, CopyRegion, FlatStack, MirrorRegion, Region, ReserveItems, SliceRegion, StringRegion, @@ -80,15 +82,25 @@ fn str100_copy_region(bencher: &mut Bencher) { } #[bench] fn string10_copy_region(bencher: &mut Bencher) { - _bench_copy_region::, _>(bencher, vec![format!("grawwwwrr!"); 1024]); + _bench_copy_region::, _>(bencher, vec![format!("grawwwwrr!"); 1024]); +} +#[bench] +fn string10_copy_region_collapse(bencher: &mut Bencher) { + _bench_copy_region::< + SliceRegion>, OffsetOptimized>, + _, + >(bencher, vec![format!("grawwwwrr!"); 1024]); } #[bench] fn string20_copy_region(bencher: &mut Bencher) { - _bench_copy_region::, _>(bencher, vec![format!("grawwwwrr!!!!!!!!!!!"); 512]); + _bench_copy_region::, _>( + bencher, + vec![format!("grawwwwrr!!!!!!!!!!!"); 512], + ); } #[bench] fn vec_u_s_copy_region(bencher: &mut Bencher) { - _bench_copy_region::, _>( + _bench_copy_region::, StringRegion>>>, _>( bencher, vec![vec![(0u64, "grawwwwrr!".to_string()); 32]; 32], ); diff --git a/src/impls/columns.rs b/src/impls/columns.rs new file mode 100644 index 0000000..618939d --- /dev/null +++ b/src/impls/columns.rs @@ -0,0 +1,372 @@ +//! A region to contain a variable number of columns. + +use std::fmt::Debug; + +use crate::impls::deduplicate::ConsecutiveOffsetPairs; +use crate::impls::offsets::OffsetOptimized; +use crate::impls::slice_copy::CopyIter; +use crate::{CopyOnto, CopyRegion, Index, Region}; + +/// A region that can store a variable number of elements per row. +/// +/// The region is backed by a number of columns, where the number depends on +/// the length of the longest row encountered. For pushed row, the region +/// remembers the indices into each column that populated. Rows can have different +/// lengths, which means that only the first columns will contain a value. +/// +/// All columns have the same type `R`. +/// +/// # Examples +/// +/// Copy a table-like structure: +/// ``` +/// ``` +#[derive(Debug)] +struct ColumnsRegion +where + R: Region, + Idx: Index, +{ + /// Indices to address rows in `inner`. For each row, we remeber + /// an index for each column. + indices: ConsecutiveOffsetPairs, OffsetOptimized>, + /// Storage for columns. + inner: Vec, +} + +impl Region for ColumnsRegion +where + R: Region, + Idx: Index, +{ + type ReadItem<'a> = ReadColumns<'a, R, Idx> where Self: 'a; + type Index = usize; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + let cols = regions.clone().map(|r| r.inner.len()).max().unwrap_or(0); + + let mut inner = Vec::with_capacity(cols); + for col in 0..cols { + inner.push(R::merge_regions( + regions.clone().flat_map(|r| r.inner.get(col)), + )); + } + + Self { + indices: ConsecutiveOffsetPairs::merge_regions(regions.map(|r| &r.indices)), + inner, + } + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + ReadColumns { + columns: &self.inner, + index: self.indices.index(index), + } + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + for region in regions.clone() { + while self.inner.len() < region.inner.len() { + self.inner.push(R::default()); + } + } + for (index, inner) in self.inner.iter_mut().enumerate() { + inner.reserve_regions(regions.clone().flat_map(|r| r.inner.get(index))) + } + } + + fn clear(&mut self) { + for inner in &mut self.inner { + inner.clear(); + } + self.indices.clear(); + } +} + +impl Default for ColumnsRegion +where + R: Region, + Idx: Index, +{ + fn default() -> Self { + Self { + indices: Default::default(), + inner: Vec::default(), + } + } +} + +/// Read the values of a row. +#[derive(Copy)] +pub struct ReadColumns<'a, R, Idx> +where + R: Region, + Idx: Index, +{ + /// Storage for columns. + columns: &'a [R], + /// Indices to retrieve values from columns. + index: &'a [Idx], +} + +impl<'a, R, Idx> Clone for ReadColumns<'a, R, Idx> +where + R: Region, + Idx: Index, +{ + fn clone(&self) -> Self { + Self { + columns: self.columns, + index: self.index, + } + } +} + +impl<'a, R, Idx> Debug for ReadColumns<'a, R, Idx> +where + R: Region, + R::ReadItem<'a>: Debug, + Idx: Index, +{ + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +impl<'a, R, Idx> ReadColumns<'a, R, Idx> +where + R: Region, + Idx: Index, +{ + /// Iterate the individual values of a row. + pub fn iter(&self) -> impl Iterator> { + self.index + .iter() + .zip(self.columns) + .map(|(idx, r)| r.index(*idx)) + } + + /// Get the element at `offset`. + pub fn get(&self, offset: usize) -> R::ReadItem<'a> { + self.columns[offset].index(self.index[offset]) + } + + /// Returns the length of this row. + pub fn len(&self) -> usize { + self.index.len() + } + + /// Returns `true` if this row is empty. + pub fn is_empty(&self) -> bool { + self.index.is_empty() + } +} + +impl<'a, R, Idx> CopyOnto> for ReadColumns<'a, R, Idx> +where + R: Region, + Idx: Index, +{ + fn copy_onto( + self, + target: &mut ColumnsRegion, + ) -> as Region>::Index { + // Ensure all required regions exist. + while target.inner.len() < self.len() { + target.inner.push(R::default()); + } + + let iter = self + .iter() + .zip(&mut target.inner) + .map(|(value, region)| value.copy_onto(region)); + CopyIter(iter).copy_onto(&mut target.indices) + } +} + +impl<'a, R, Idx, T> CopyOnto> for &'a [T] +where + R: Region, + Idx: Index, + &'a T: CopyOnto, +{ + fn copy_onto( + self, + target: &mut ColumnsRegion, + ) -> as Region>::Index { + // Ensure all required regions exist. + while target.inner.len() < self.len() { + target.inner.push(R::default()); + } + + let iter = self + .iter() + .zip(&mut target.inner) + .map(|(value, region)| value.copy_onto(region)); + CopyIter(iter).copy_onto(&mut target.indices) + } +} + +impl CopyOnto> for Vec +where + R: Region, + Idx: Index, + T: CopyOnto, +{ + fn copy_onto( + self, + target: &mut ColumnsRegion, + ) -> as Region>::Index { + // Ensure all required regions exist. + while target.inner.len() < self.len() { + target.inner.push(R::default()); + } + + let iter = self + .into_iter() + .zip(&mut target.inner) + .map(|(value, region)| value.copy_onto(region)); + CopyIter(iter).copy_onto(&mut target.indices) + } +} + +impl<'a, R, Idx, T> CopyOnto> for &'a Vec +where + R: Region, + Idx: Index, + &'a T: CopyOnto, +{ + fn copy_onto( + self, + target: &mut ColumnsRegion, + ) -> as Region>::Index { + // Ensure all required regions exist. + while target.inner.len() < self.len() { + target.inner.push(R::default()); + } + + let iter = self + .iter() + .zip(&mut target.inner) + .map(|(value, region)| value.copy_onto(region)); + CopyIter(iter).copy_onto(&mut target.indices) + } +} + +#[cfg(test)] +mod tests { + use crate::impls::columns::ColumnsRegion; + use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; + use crate::{CopyOnto, MirrorRegion, Region, StringRegion}; + + #[test] + fn test_matrix() { + let data = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]; + + let mut r = ColumnsRegion::, _>::default(); + + let mut indices = Vec::with_capacity(data.len()); + + for row in &data { + let index = row.as_slice().copy_onto(&mut r); + indices.push(index); + } + + for (&index, row) in indices.iter().zip(&data) { + assert!(row.iter().copied().eq(r.index(index).iter())); + } + } + + #[test] + fn test_ragged() { + let data = [ + [].as_slice(), + [1].as_slice(), + [2, 3].as_slice(), + [4, 5, 6].as_slice(), + [7, 8].as_slice(), + [9].as_slice(), + [].as_slice(), + ]; + + let mut r = ColumnsRegion::, _>::default(); + + let mut indices = Vec::with_capacity(data.len()); + + for row in &data { + let index = (*row).copy_onto(&mut r); + indices.push(index); + } + + for (&index, row) in indices.iter().zip(&data) { + assert!(row.iter().copied().eq(r.index(index).iter())); + } + + println!("{r:?}"); + } + + #[test] + fn test_ragged_string_vec() { + let data = vec![ + vec![], + vec!["1".to_string()], + vec!["2".to_string(), "3".to_string()], + vec!["4".to_string(), "5".to_string(), "6".to_string()], + vec!["7".to_string(), "8".to_string()], + vec!["9".to_string()], + vec![], + ]; + + let mut r = + ColumnsRegion::>, _>::default(); + + let mut indices = Vec::with_capacity(data.len()); + + for row in &data { + let index = row.copy_onto(&mut r); + indices.push(index); + } + + for (&index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); + } + + println!("{r:?}"); + } + + #[test] + fn test_ragged_str_vec() { + let data = [ + vec![], + vec!["1"], + vec!["2", "3"], + vec!["4", "5", "6"], + vec!["7", "8"], + vec!["9"], + vec![], + ]; + + let mut r = ColumnsRegion::, _>::default(); + + let mut indices = Vec::with_capacity(data.len()); + + for row in &data { + let index = row.copy_onto(&mut r); + indices.push(index); + } + + for (&index, row) in indices.iter().zip(&data) { + assert!(row.iter().copied().eq(r.index(index).iter())); + } + + println!("{r:?}"); + } +} diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs new file mode 100644 index 0000000..2f4cda2 --- /dev/null +++ b/src/impls/deduplicate.rs @@ -0,0 +1,211 @@ +//! Simple deduplication of equal consecutive items. + +use crate::impls::offsets::OffsetContainer; +use crate::{CopyOnto, Region}; + +/// A region to deduplicate consecutive equal items. +#[derive(Debug, Clone)] +pub struct CollapseSequence { + /// Inner region. + inner: R, + /// The index of the last pushed item. + last_index: Option, +} + +impl Default for CollapseSequence { + fn default() -> Self { + Self { + inner: R::default(), + last_index: None, + } + } +} + +impl Region for CollapseSequence +where + for<'a, 'b> R::ReadItem<'a>: PartialEq>, +{ + type ReadItem<'a> = R::ReadItem<'a> where Self: 'a; + type Index = R::Index; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + inner: R::merge_regions(regions.map(|r| &r.inner)), + last_index: None, + } + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + self.inner.index(index) + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.inner.reserve_regions(regions.map(|r| &r.inner)); + } + + fn clear(&mut self) { + self.inner.clear(); + self.last_index = None; + } +} + +impl> CopyOnto> for T +where + for<'a> T: PartialEq>, + for<'a, 'b> R::ReadItem<'a>: PartialEq>, +{ + fn copy_onto(self, target: &mut CollapseSequence) -> as Region>::Index { + if let Some(last_index) = target.last_index { + if self == target.inner.index(last_index) { + return last_index; + } + } + let index = self.copy_onto(&mut target.inner); + target.last_index = Some(index); + index + } +} + +/// Transform an index of `(usize, usize)` to a sequence of `0..`. Requires the pairs to +/// be dense, i.e., `(i, j)` is followed by `(j, k)`. +/// +/// Defers to region `R` for storing items, and uses offset container `O` to +/// rember indices. By default, `O` is `Vec`. +#[derive(Debug, Clone)] +pub struct ConsecutiveOffsetPairs> +where + R: Region, + O: OffsetContainer, +{ + /// Wrapped region + inner: R, + /// Storage for offsets. Always stores element 0. + offsets: O, + /// The most recent end of the index pair of region `R`. + last_index: usize, +} + +impl, O: OffsetContainer> Default + for ConsecutiveOffsetPairs +{ + fn default() -> Self { + let mut d = Self { + inner: Default::default(), + offsets: Default::default(), + last_index: 0, + }; + d.offsets.push(0); + d + } +} + +impl, O: OffsetContainer> Region + for ConsecutiveOffsetPairs +{ + type ReadItem<'a> = R::ReadItem<'a> + where + Self: 'a; + + type Index = usize; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + let mut offsets = O::default(); + offsets.push(0); + Self { + inner: R::merge_regions(regions.clone().map(|r| &r.inner)), + offsets, + last_index: 0, + } + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + self.inner + .index((self.offsets.index(index), self.offsets.index(index + 1))) + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.inner.reserve_regions(regions.map(|r| &r.inner)); + } + + fn clear(&mut self) { + self.last_index = 0; + self.inner.clear(); + self.offsets.clear(); + self.offsets.push(0); + } +} + +impl, O: OffsetContainer, T: CopyOnto> + CopyOnto> for T +{ + fn copy_onto( + self, + target: &mut ConsecutiveOffsetPairs, + ) -> as Region>::Index { + let index = self.copy_onto(&mut target.inner); + debug_assert_eq!(index.0, target.last_index); + target.last_index = index.1; + target.offsets.push(index.1); + target.offsets.len() - 2 + } +} + +#[cfg(test)] +mod tests { + use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; + use crate::impls::offsets::OffsetOptimized; + use crate::{CopyOnto, FlatStack, Region, StringRegion}; + + #[test] + fn test_dedup_flatstack() { + let mut fs = FlatStack::>::default(); + + fs.copy("abc"); + fs.copy("abc"); + + println!("{fs:?}"); + } + + #[test] + fn test_dedup_region() { + let mut r = CollapseSequence::::default(); + + fn copy(r: &mut R, item: impl CopyOnto) -> R::Index { + item.copy_onto(r) + } + + assert_eq!(copy(&mut r, "abc"), copy(&mut r, "abc")); + + println!("{r:?}"); + } + + #[test] + fn test_offset_optimized() { + let mut r = + CollapseSequence::>::default(); + + fn copy(r: &mut R, item: impl CopyOnto) -> R::Index { + item.copy_onto(r) + } + + for _ in 0..1000 { + copy(&mut r, "abc"); + } + + println!("{r:?}"); + } +} diff --git a/src/impls/mirror.rs b/src/impls/mirror.rs index 32b0c9a..a4f8520 100644 --- a/src/impls/mirror.rs +++ b/src/impls/mirror.rs @@ -44,6 +44,13 @@ impl> Region for MirrorRegion { type ReadItem<'a> = T where T: 'a; type Index = T; + fn merge_regions<'a>(_regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::default() + } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { index @@ -64,44 +71,44 @@ impl> Region for MirrorRegion { } } -macro_rules! implement_for { - ($index_type:ty) => { - impl Containerized for $index_type { - type Region = MirrorRegion; - } +impl CopyOnto> for T { + #[inline(always)] + fn copy_onto(self, _target: &mut MirrorRegion) -> T { + self + } +} - impl CopyOnto> for $index_type { - #[inline(always)] - fn copy_onto(self, _target: &mut MirrorRegion) -> $index_type { - self - } - } +impl<'a, T: Index> CopyOnto> for &'a T { + #[inline(always)] + fn copy_onto(self, _target: &mut MirrorRegion) -> T { + *self + } +} - impl<'a> CopyOnto> for &'a $index_type { - #[inline(always)] - fn copy_onto(self, _target: &mut MirrorRegion<$index_type>) -> $index_type { - *self - } - } +impl ReserveItems> for T { + #[inline(always)] + fn reserve_items(_target: &mut MirrorRegion, _items: I) + where + I: Iterator + Clone, + { + // No storage + } +} - impl<'a> ReserveItems> for $index_type { - #[inline(always)] - fn reserve_items(_target: &mut MirrorRegion<$index_type>, _items: I) - where - I: Iterator + Clone, - { - // No storage - } - } +impl<'a, T: Index> ReserveItems> for &'a T { + #[inline(always)] + fn reserve_items(_target: &mut MirrorRegion, _items: I) + where + I: Iterator + Clone, + { + // No storage + } +} - impl<'a> ReserveItems> for &'a $index_type { - #[inline(always)] - fn reserve_items(_target: &mut MirrorRegion<$index_type>, _items: I) - where - I: Iterator + Clone, - { - // No storage - } +macro_rules! implement_for { + ($index_type:ty) => { + impl Containerized for $index_type { + type Region = MirrorRegion; } }; } diff --git a/src/impls/mod.rs b/src/impls/mod.rs index 4ad472e..fdcf043 100644 --- a/src/impls/mod.rs +++ b/src/impls/mod.rs @@ -1,6 +1,9 @@ //! Various region implementations. +pub mod columns; +pub mod deduplicate; pub mod mirror; +pub mod offsets; pub mod option; pub mod result; pub mod slice; diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs new file mode 100644 index 0000000..c0811f5 --- /dev/null +++ b/src/impls/offsets.rs @@ -0,0 +1,267 @@ +//! Types to represent offsets. + +/// TODO +pub trait OffsetContainer: Default + Extend { + /// Accepts a newly pushed element. + fn push(&mut self, item: T); + + /// Lookup an index + fn index(&self, index: usize) -> T; + + /// Clear all contents. + fn clear(&mut self); + + /// Returns the number of elements. + fn len(&self) -> usize; + + /// Returns `true` if empty. + #[inline] + fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Reserve space for `additional` elements. + fn reserve(&mut self, additional: usize); +} + +#[derive(Debug, Default)] +enum OffsetStride { + #[default] + Empty, + Zero, + Striding(usize, usize), + Saturated(usize, usize, usize), +} + +impl OffsetStride { + /// Accepts or rejects a newly pushed element. + fn push(&mut self, item: usize) -> bool { + match self { + OffsetStride::Empty => { + if item == 0 { + *self = OffsetStride::Zero; + true + } else { + false + } + } + OffsetStride::Zero => { + *self = OffsetStride::Striding(item, 2); + true + } + OffsetStride::Striding(stride, count) => { + if item == *stride * *count { + *count += 1; + true + } else if item == *stride * (*count - 1) { + *self = OffsetStride::Saturated(*stride, *count, 1); + true + } else { + false + } + } + OffsetStride::Saturated(stride, count, reps) => { + if item == *stride * (*count - 1) { + *reps += 1; + true + } else { + false + } + } + } + } + + fn index(&self, index: usize) -> usize { + match self { + OffsetStride::Empty => { + panic!("Empty OffsetStride") + } + OffsetStride::Zero => 0, + OffsetStride::Striding(stride, _steps) => *stride * index, + OffsetStride::Saturated(stride, steps, _reps) => { + if index < *steps { + *stride * index + } else { + *stride * (*steps - 1) + } + } + } + } + + fn len(&self) -> usize { + match self { + OffsetStride::Empty => 0, + OffsetStride::Zero => 1, + OffsetStride::Striding(_stride, steps) => *steps, + OffsetStride::Saturated(_stride, steps, reps) => *steps + *reps, + } + } +} + +/// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. +#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Debug, Default)] +pub struct OffsetList { + /// Length of a prefix of zero elements. + pub zero_prefix: usize, + /// Offsets that fit within a `u32`. + pub smol: Vec, + /// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit. + pub chonk: Vec, +} + +impl OffsetList { + // TODO + // /// Allocate a new list with a specified capacity. + // pub fn with_capacity(cap: usize) -> Self { + // Self { + // zero_prefix: 0, + // smol: Vec::with_capacity(cap), + // chonk: Vec::new(), + // } + // } + /// Inserts the offset, as a `u32` if that is still on the table. + pub fn push(&mut self, offset: usize) { + if self.smol.is_empty() && self.chonk.is_empty() && offset == 0 { + self.zero_prefix += 1; + } else if self.chonk.is_empty() { + if let Ok(smol) = offset.try_into() { + self.smol.push(smol); + } else { + self.chonk.push(offset.try_into().unwrap()) + } + } else { + self.chonk.push(offset.try_into().unwrap()) + } + } + /// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`. + pub fn index(&self, index: usize) -> usize { + if index < self.zero_prefix { + 0 + } else if index - self.zero_prefix < self.smol.len() { + self.smol[index - self.zero_prefix].try_into().unwrap() + } else { + self.chonk[index - self.zero_prefix - self.smol.len()] + .try_into() + .unwrap() + } + } + /// The number of offsets in the list. + pub fn len(&self) -> usize { + self.zero_prefix + self.smol.len() + self.chonk.len() + } + + /// Returns `true` if this list contains no elements. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + /// Reserve space for `additional` elements. + pub fn reserve(&mut self, additional: usize) { + self.smol.reserve(additional) + } + + /// Remove all elements. + pub fn clear(&mut self) { + self.smol.clear(); + self.chonk.clear(); + } +} + +/// TODO +#[derive(Default, Debug)] +pub struct OffsetOptimized { + strided: OffsetStride, + spilled: OffsetList, +} + +impl OffsetContainer for OffsetOptimized { + fn push(&mut self, item: usize) { + if !self.spilled.is_empty() { + self.spilled.push(item); + } else { + let inserted = self.strided.push(item); + if !inserted { + self.spilled.push(item); + } + } + } + + fn index(&self, index: usize) -> usize { + if index < self.strided.len() { + self.strided.index(index) + } else { + self.spilled.index(index - self.strided.len()) + } + } + + fn clear(&mut self) { + self.spilled.clear(); + self.strided = OffsetStride::default(); + } + + fn len(&self) -> usize { + self.strided.len() + self.spilled.len() + } + + fn reserve(&mut self, additional: usize) { + if !self.spilled.is_empty() { + self.spilled.reserve(additional); + } + } +} + +impl Extend for OffsetOptimized { + fn extend>(&mut self, iter: T) { + for item in iter { + self.push(item); + } + } +} + +impl OffsetContainer for Vec { + #[inline] + fn push(&mut self, item: T) { + self.push(item) + } + + #[inline] + fn index(&self, index: usize) -> T { + self[index] + } + + #[inline] + fn clear(&mut self) { + self.clear() + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn reserve(&mut self, additional: usize) { + self.reserve(additional) + } +} + +#[cfg(test)] +mod tests { + use crate::impls::deduplicate::ConsecutiveOffsetPairs; + use crate::impls::offsets::OffsetOptimized; + use crate::{CopyOnto, Region, SliceRegion, StringRegion}; + + #[test] + fn test_offset_optimized() { + fn copy(r: &mut R, item: impl CopyOnto) -> R::Index { + item.copy_onto(r) + } + + let mut r = SliceRegion::< + ConsecutiveOffsetPairs, + OffsetOptimized, + >::default(); + let idx = copy(&mut r, ["abc"]); + assert_eq!("abc", r.index(idx).get(0)) + } +} diff --git a/src/impls/option.rs b/src/impls/option.rs index 1ce2b26..ae476c2 100644 --- a/src/impls/option.rs +++ b/src/impls/option.rs @@ -35,6 +35,15 @@ impl Region for OptionRegion { type ReadItem<'a> = Option> where Self: 'a; type Index = Option; + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + inner: R::merge_regions(regions.map(|r| &r.inner)), + } + } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { index.map(|t| self.inner.index(t)) diff --git a/src/impls/result.rs b/src/impls/result.rs index c400a60..79bb2b9 100644 --- a/src/impls/result.rs +++ b/src/impls/result.rs @@ -40,6 +40,16 @@ where type ReadItem<'a> = Result, E::ReadItem<'a>> where Self: 'a; type Index = Result; + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + oks: T::merge_regions(regions.clone().map(|r| &r.oks)), + errs: E::merge_regions(regions.map(|r| &r.errs)), + } + } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { match index { diff --git a/src/impls/slice.rs b/src/impls/slice.rs index 3da7af1..0545dc6 100644 --- a/src/impls/slice.rs +++ b/src/impls/slice.rs @@ -1,11 +1,12 @@ //! A region that stores slices. use std::fmt::{Debug, Formatter}; -use std::ops::Deref; +use std::ops::{Deref, Range}; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::impls::offsets::OffsetContainer; use crate::{Containerized, CopyOnto, Region, ReserveItems}; impl Containerized for Vec { @@ -51,21 +52,34 @@ impl Containerized for [T; N] { /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct SliceRegion { +pub struct SliceRegion = Vec<::Index>> { /// Container of slices. - slices: Vec, + slices: O, /// Inner region. inner: C, } -impl Region for SliceRegion { - type ReadItem<'a> = ReadSlice<'a, C> where Self: 'a; +impl> Region for SliceRegion { + type ReadItem<'a> = ReadSlice<'a, C, O> where Self: 'a; type Index = (usize, usize); + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + slices: O::default(), + inner: C::merge_regions(regions.map(|r| &r.inner)), + } + } + #[inline] fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { - let slice = &self.slices[start..end]; - ReadSlice(&self.inner, slice) + ReadSlice { + region: self, + start, + end, + } } #[inline] @@ -86,33 +100,47 @@ impl Region for SliceRegion { } } -impl Default for SliceRegion { +impl> Default for SliceRegion { fn default() -> Self { Self { - slices: Vec::default(), + slices: O::default(), inner: C::default(), } } } /// A helper to read data out of a slice region. -pub struct ReadSlice<'a, C: Region>(pub &'a C, pub &'a [C::Index]); +pub struct ReadSlice<'a, C: Region, O: OffsetContainer = Vec<::Index>> { + region: &'a SliceRegion, + start: usize, + end: usize, +} -impl<'a, C: Region> ReadSlice<'a, C> { +impl<'a, C: Region, O: OffsetContainer> ReadSlice<'a, C, O> { /// Read the n-th item from the underlying region. #[inline] pub fn get(&self, index: usize) -> C::ReadItem<'_> { - self.0.index(self.1[index]) + if index > self.end - self.start { + panic!( + "Index {index} out of bounds {} ({}..{})", + self.end - self.start, + self.start, + self.end + ); + } + self.region + .inner + .index(self.region.slices.index(self.start + index)) } /// The number in this slice. pub fn len(&self) -> usize { - self.1.len() + self.region.slices.len() } /// Returns `true` if the slice is empty. pub fn is_empty(&self) -> bool { - self.1.is_empty() + self.region.slices.is_empty() } /// Returns an iterator over all contained items. @@ -121,7 +149,7 @@ impl<'a, C: Region> ReadSlice<'a, C> { } } -impl<'a, C: Region> Debug for ReadSlice<'a, C> +impl<'a, C: Region, O: OffsetContainer> Debug for ReadSlice<'a, C, O> where C::ReadItem<'a>: Debug, { @@ -130,44 +158,49 @@ where } } -impl<'a, C: Region> Clone for ReadSlice<'a, C> { +impl<'a, C: Region, O: OffsetContainer> Clone for ReadSlice<'a, C, O> { #[inline] fn clone(&self) -> Self { *self } } -impl<'a, C: Region> Copy for ReadSlice<'a, C> {} +impl<'a, C: Region, O: OffsetContainer> Copy for ReadSlice<'a, C, O> {} -impl<'a, C: Region> IntoIterator for ReadSlice<'a, C> { +impl<'a, C: Region, O: OffsetContainer> IntoIterator for ReadSlice<'a, C, O> { type Item = C::ReadItem<'a>; - type IntoIter = ReadSliceIter<'a, C>; + type IntoIter = ReadSliceIter<'a, C, O>; fn into_iter(self) -> Self::IntoIter { - ReadSliceIter(self.0, self.1.iter()) + ReadSliceIter(self.region, self.start..self.end) } } /// An iterator over the items read from a slice region. #[derive(Debug, Clone)] -pub struct ReadSliceIter<'a, C: Region>(&'a C, std::slice::Iter<'a, C::Index>); +pub struct ReadSliceIter<'a, C: Region, O: OffsetContainer>( + &'a SliceRegion, + Range, +); -impl<'a, C: Region> Iterator for ReadSliceIter<'a, C> { +impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIter<'a, C, O> { type Item = C::ReadItem<'a>; #[inline] fn next(&mut self) -> Option { - self.1.next().map(|idx| self.0.index(*idx)) + self.1 + .next() + .map(|idx| self.0.inner.index(self.0.slices.index(idx))) } } -impl<'a, C, T: 'a> CopyOnto> for &'a [T] +impl<'a, C, T: 'a, O: OffsetContainer> CopyOnto> for &'a [T] where C: Region, &'a T: CopyOnto, { #[inline] - fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { let start = target.slices.len(); target .slices @@ -176,11 +209,11 @@ where } } -impl<'a, T, R: Region> ReserveItems> for &'a [T] +impl<'a, T, R: Region, O: OffsetContainer> ReserveItems> for &'a [T] where &'a T: ReserveItems + 'a, { - fn reserve_items(target: &mut SliceRegion, items: I) + fn reserve_items(target: &mut SliceRegion, items: I) where I: Iterator + Clone, { @@ -189,22 +222,23 @@ where } } -impl<'a, C, T> CopyOnto> for &'a Vec +impl<'a, C, T, O: OffsetContainer> CopyOnto> for &'a Vec where C: Region, - &'a [T]: CopyOnto>, + &'a [T]: CopyOnto>, { #[inline] - fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { self.as_slice().copy_onto(target) } } -impl<'a, T: 'a, R: Region> ReserveItems> for &'a Vec +impl<'a, T: 'a, R: Region, O: OffsetContainer> ReserveItems> + for &'a Vec where &'a T: ReserveItems, { - fn reserve_items(target: &mut SliceRegion, items: I) + fn reserve_items(target: &mut SliceRegion, items: I) where I: Iterator + Clone, { @@ -212,49 +246,56 @@ where } } -impl CopyOnto> for Vec +impl> CopyOnto> for Vec where C: Region, - for<'a> &'a [T]: CopyOnto>, + T: CopyOnto, { #[inline] - fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { - self.as_slice().copy_onto(target) + fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { + let start = target.slices.len(); + target + .slices + .extend(self.into_iter().map(|t| t.copy_onto(&mut target.inner))); + (start, target.slices.len()) } } -impl<'a, C: Region + 'a> CopyOnto> for ReadSlice<'a, C> +impl<'a, C: Region + 'a, O: OffsetContainer> CopyOnto> + for ReadSlice<'a, C, O> where C::ReadItem<'a>: CopyOnto, { #[inline] - fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { - let ReadSlice(container, indexes) = self; - let start = target.slices.len(); - target.slices.extend( - indexes - .iter() - .map(|&index| container.index(index).copy_onto(&mut target.inner)), - ); - (start, target.slices.len()) + fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { + let ReadSlice { region, start, end } = self; + let start_len = target.slices.len(); + for index in start..end { + let index = region.slices.index(index); + let index = region.inner.index(index).copy_onto(&mut target.inner); + target.slices.push(index); + } + (start_len, target.slices.len()) } } -impl<'a, T, R: Region, const N: usize> CopyOnto> for &'a [T; N] +impl<'a, T, R: Region, O: OffsetContainer, const N: usize> CopyOnto> + for &'a [T; N] where - for<'b> &'b [T]: CopyOnto>, + for<'b> &'b [T]: CopyOnto>, { #[inline] - fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { self.as_slice().copy_onto(target) } } -impl<'a, T: 'a, R: Region, const N: usize> ReserveItems> for &'a [T; N] +impl<'a, T: 'a, R: Region, O: OffsetContainer, const N: usize> + ReserveItems> for &'a [T; N] where &'a T: ReserveItems, { - fn reserve_items(target: &mut SliceRegion, items: I) + fn reserve_items(target: &mut SliceRegion, items: I) where I: Iterator + Clone, { @@ -262,30 +303,13 @@ where } } -impl CopyOnto> for [T; N] +impl, const N: usize> CopyOnto> + for [T; N] where - for<'a> &'a [T]: CopyOnto>, + for<'a> &'a [T]: CopyOnto>, { #[inline] - fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut SliceRegion) -> as Region>::Index { self.as_slice().copy_onto(target) } } - -impl<'a, C: Region + 'a> ReserveItems> for &'a (C, &'a [C::Index]) -where - C::ReadItem<'a>: ReserveItems, -{ - fn reserve_items(target: &mut SliceRegion, items: I) - where - I: Iterator + Clone, - { - target - .slices - .reserve(items.clone().map(|(_c, is)| is.len()).sum()); - ReserveItems::reserve_items( - &mut target.inner, - items.flat_map(|(c, is)| is.iter().map(|i| c.index(*i))), - ) - } -} diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index db66276..9309c53 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -32,6 +32,15 @@ impl Region for CopyRegion { type ReadItem<'a> = &'a [T] where Self: 'a; type Index = (usize, usize); + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + slices: Vec::with_capacity(regions.map(|r| r.slices.len()).sum()), + } + } + #[inline] fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { &self.slices[start..end] @@ -63,6 +72,7 @@ impl CopyOnto> for &[T] where T: Copy, { + #[inline] fn copy_onto(self, target: &mut CopyRegion) -> as Region>::Index { let start = target.slices.len(); target.slices.extend_from_slice(self); @@ -75,7 +85,7 @@ impl ReserveItems> for &[T] { where I: Iterator + Clone, { - target.slices.reserve(items.clone().map(|i| i.len()).sum()); + target.slices.reserve(items.map(|i| i.len()).sum()); } } @@ -83,18 +93,43 @@ impl CopyOnto> for &Vec where T: Copy, { + #[inline] + fn copy_onto(self, target: &mut CopyRegion) -> as Region>::Index { + self.as_slice().copy_onto(target) + } +} + +impl ReserveItems> for &Vec { + fn reserve_items(target: &mut CopyRegion, items: I) + where + I: Iterator + Clone, + { + ReserveItems::reserve_items(target, items.map(Vec::as_slice)) + } +} + +/// A type to wrap iterators. +pub struct CopyIter(pub I); + +impl> CopyOnto> for CopyIter +where + T: Copy, +{ + #[inline] fn copy_onto(self, target: &mut CopyRegion) -> as Region>::Index { let start = target.slices.len(); - target.slices.extend_from_slice(self); + target.slices.extend(self.0); (start, target.slices.len()) } } -impl ReserveItems> for &Vec { +impl> ReserveItems> for CopyIter { fn reserve_items(target: &mut CopyRegion, items: I) where I: Iterator + Clone, { - target.slices.reserve(items.clone().map(|i| i.len()).sum()); + target + .slices + .reserve(items.flat_map(|i| i.0.into_iter()).count()); } } diff --git a/src/impls/string.rs b/src/impls/string.rs index 9069e66..693ae00 100644 --- a/src/impls/string.rs +++ b/src/impls/string.rs @@ -34,6 +34,15 @@ impl Region for StringRegion { type ReadItem<'a> = &'a str where Self: 'a ; type Index = as Region>::Index; + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + inner: CopyRegion::merge_regions(regions.map(|r| &r.inner)), + } + } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { unsafe { std::str::from_utf8_unchecked(self.inner.index(index)) } diff --git a/src/impls/tuple.rs b/src/impls/tuple.rs index b4c7d6a..c143a96 100644 --- a/src/impls/tuple.rs +++ b/src/impls/tuple.rs @@ -31,6 +31,14 @@ macro_rules! tuple_flatcontainer { type Index = ($($name::Index,)*); + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a { + Self { + $([]: $name::merge_regions(regions.clone().map(|r| &r.[]))),* + } + } + #[inline] fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { let ($($name,)*) = index; ( diff --git a/src/lib.rs b/src/lib.rs index 5c5ee07..ed84181 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -74,6 +74,11 @@ pub trait Region: Default { /// as an opaque type, even if known. type Index: Index; + /// Construct a region that can absorb the contents of `regions` in the future. + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a; + /// Index into the container. The index must be obtained by /// pushing data into the container. fn index(&self, index: Self::Index) -> Self::ReadItem<'_>; @@ -101,17 +106,6 @@ pub trait CopyOnto { fn copy_onto(self, target: &mut C) -> C::Index; } -// Blanket implementation for `Box`. This might be a bad idea because it precludes blanket -// implementations. -impl CopyOnto for Box -where - for<'a> &'a T: CopyOnto, -{ - fn copy_onto(self, target: &mut R) -> R::Index { - self.as_ref().copy_onto(target) - } -} - /// Reserve space in the receiving region. pub trait ReserveItems { /// Ensure that the region can absorb `items` without reallocation. @@ -121,7 +115,6 @@ pub trait ReserveItems { } /// A container for indices into a region. -#[derive(Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr( feature = "serde", @@ -162,6 +155,27 @@ impl FlatStack { Self::default() } + /// Returns a flat stack that can absorb `capacity` indices without reallocation. + /// + /// Prefer [`Self::merge_capacity`] over this function to also pre-size the regions. + pub fn with_capacity(capacity: usize) -> Self { + Self { + indices: Vec::with_capacity(capacity), + region: R::default(), + } + } + + /// Returns a flat stack that can absorb the contents of `iter` without reallocation. + pub fn merge_capacity<'a, I: Iterator + Clone + 'a>(stacks: I) -> Self + where + R: 'a, + { + Self { + indices: Vec::with_capacity(stacks.clone().map(|s| s.indices.len()).sum()), + region: R::merge_regions(stacks.map(|r| &r.region)), + } + } + /// Appends the element to the back of the stack. #[inline] pub fn copy(&mut self, item: impl CopyOnto) { @@ -225,6 +239,16 @@ impl FlatStack { } } +impl, R: Region> Extend for FlatStack { + fn extend>(&mut self, iter: I) { + let iter = iter.into_iter(); + self.reserve(iter.size_hint().0); + for item in iter { + self.indices.push(item.copy_onto(&mut self.region)); + } + } +} + impl<'a, R: Region> IntoIterator for &'a FlatStack { type Item = R::ReadItem<'a>; type IntoIter = Iter<'a, R>; @@ -260,15 +284,39 @@ impl<'a, R: Region> Iterator for Iter<'a, R> { impl<'a, R: Region> ExactSizeIterator for Iter<'a, R> {} +impl> FromIterator for FlatStack { + fn from_iter>(iter: I) -> Self { + let iter = iter.into_iter(); + let mut c = Self::with_capacity(iter.size_hint().0); + c.extend(iter); + c + } +} + +impl Clone for FlatStack { + fn clone(&self) -> Self { + let mut clone = Self::merge_capacity(std::iter::once(self)); + clone.extend(self.iter()); + clone + } +} + #[cfg(test)] mod tests { + use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs}; + use crate::impls::tuple::TupleARegion; + use super::*; + fn copy(r: &mut R, item: impl CopyOnto) -> R::Index { + item.copy_onto(r) + } + #[test] fn test_readme() { let r: Result<_, u16> = Ok("abc"); let mut c = FlatStack::default_impl::>(); - c.copy(&r); + c.copy(r); assert_eq!(r, c.get(0)); } @@ -292,10 +340,10 @@ mod tests { #[test] fn test_vec() { - let mut c = SliceRegion::default(); + let mut c = SliceRegion::>::default(); let slice = &[1u8, 2, 3]; let idx = slice.copy_onto(&mut c); - assert_eq!(slice, c.index(idx).1) + assert!(slice.iter().copied().eq(c.index(idx))); } #[test] @@ -303,7 +351,7 @@ mod tests { let mut c: SliceRegion> = SliceRegion::default(); let slice = &[1u8, 2, 3][..]; let idx = slice.copy_onto(&mut c); - assert_eq!(slice, c.index(idx).1) + assert!(slice.iter().copied().eq(c.index(idx))); } struct Person { @@ -338,6 +386,23 @@ mod tests { < as Containerized>::Region as Region>::Index, ); + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + name_container: ::Region::merge_regions( + regions.clone().map(|r| &r.name_container), + ), + age_container: ::Region::merge_regions( + regions.clone().map(|r| &r.age_container), + ), + hobbies: as Containerized>::Region::merge_regions( + regions.map(|r| &r.hobbies), + ), + } + } + fn index(&self, (name, age, hobbies): Self::Index) -> Self::ReadItem<'_> { PersonRef { name: self.name_container.index(name), @@ -409,9 +474,9 @@ mod tests { let person_ref = c.get(0); assert_eq!("Moritz", person_ref.name); assert_eq!(123, person_ref.age); - assert_eq!(2, person_ref.hobbies.1.len()); - for (idx, hobby) in person_ref.hobbies.1.iter().zip(hobbies) { - assert_eq!(hobby, person_ref.hobbies.0.index(*idx)); + assert_eq!(2, person_ref.hobbies.len()); + for (copied_hobby, hobby) in person_ref.hobbies.iter().zip(hobbies) { + assert_eq!(copied_hobby, hobby); } let mut cc = FlatStack::default_impl::(); @@ -421,17 +486,17 @@ mod tests { let person_ref = cc.get(0); assert_eq!("Moritz", person_ref.name); assert_eq!(123, person_ref.age); - assert_eq!(2, person_ref.hobbies.1.len()); - for (idx, hobby) in person_ref.hobbies.1.iter().zip(hobbies) { - assert_eq!(hobby, person_ref.hobbies.0.index(*idx)); + assert_eq!(2, person_ref.hobbies.len()); + for (copied_hobby, hobby) in person_ref.hobbies.iter().zip(hobbies) { + assert_eq!(copied_hobby, hobby); } } #[test] fn test_result() { let r: Result<_, u16> = Ok("abc"); - let mut c = ResultRegion::default(); - let idx = r.copy_onto(&mut c); + let mut c = ResultRegion::>::default(); + let idx = copy(&mut c, r); assert_eq!(r, c.index(idx)); } @@ -500,35 +565,41 @@ mod tests { test_copy::<_, MirrorRegion<_>>(std::num::Wrapping(0isize)); test_copy::<_, MirrorRegion<_>>(&std::num::Wrapping(0isize)); - test_copy::<_, ResultRegion<_, _>>(Result::::Ok(0)); - test_copy::<_, ResultRegion<_, _>>(&Result::::Ok(0)); + test_copy::<_, ResultRegion, MirrorRegion<_>>>(Result::::Ok(0)); + test_copy::<_, ResultRegion, MirrorRegion<_>>>(&Result::::Ok(0)); - test_copy::<_, SliceRegion<_>>([0u8].as_slice()); - test_copy::<_, SliceRegion<_>>(vec![0u8]); - test_copy::<_, SliceRegion<_>>(&vec![0u8]); + test_copy::<_, SliceRegion>>([0u8].as_slice()); + test_copy::<_, SliceRegion>>(vec![0u8]); + test_copy::<_, SliceRegion>>(&vec![0u8]); - test_copy::<_, SliceRegion<_>>(["a"].as_slice()); - test_copy::<_, SliceRegion<_>>(vec!["a"]); - test_copy::<_, SliceRegion<_>>(&vec!["a"]); + test_copy::<_, SliceRegion>(["a"].as_slice()); + test_copy::<_, SliceRegion>(vec!["a"]); + test_copy::<_, SliceRegion>(&vec!["a"]); - test_copy::<_, SliceRegion<_>>([("a",)].as_slice()); - test_copy::<_, SliceRegion<_>>(vec![("a",)]); - test_copy::<_, SliceRegion<_>>(&vec![("a",)]); + test_copy::<_, SliceRegion>>([("a",)].as_slice()); + test_copy::<_, SliceRegion>>(vec![("a",)]); + test_copy::<_, SliceRegion>>(&vec![("a",)]); test_copy::<_, CopyRegion<_>>([0u8].as_slice()); test_copy::<_, <(u8, u8) as Containerized>::Region>((1, 2)); + + test_copy::<_, ConsecutiveOffsetPairs>>([1, 2, 3].as_slice()); + + test_copy::<_, CollapseSequence>>([1, 2, 3].as_slice()); } #[test] fn slice_region_read_item() { + fn is_clone(_: &T) {} + let mut c = FlatStack::>>::default(); c.copy(vec![1, 2, 3]); let mut r = SliceRegion::>::default(); let idx = [1, 2, 3].copy_onto(&mut r); let read_item = r.index(idx); - let _read_item2 = read_item.clone(); + is_clone(&read_item); let _read_item3 = read_item; assert_eq!(vec![1, 2, 3], read_item.into_iter().collect::>()); }