diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index 2f4cda2..35c1ab6 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -1,7 +1,8 @@ //! Simple deduplication of equal consecutive items. -use crate::impls::offsets::OffsetContainer; +use crate::impls::offsets::{OffsetContainer, OffsetRegion}; use crate::{CopyOnto, Region}; +use crate::impls::vec::CopyVector; /// A region to deduplicate consecutive equal items. #[derive(Debug, Clone)] @@ -79,10 +80,10 @@ where /// Defers to region `R` for storing items, and uses offset container `O` to /// rember indices. By default, `O` is `Vec`. #[derive(Debug, Clone)] -pub struct ConsecutiveOffsetPairs> +pub struct ConsecutiveOffsetPairs> where R: Region, - O: OffsetContainer, + O: OffsetRegion, usize: CopyOnto, { /// Wrapped region inner: R, @@ -92,8 +93,9 @@ where last_index: usize, } -impl, O: OffsetContainer> Default +impl, O: OffsetRegion> Default for ConsecutiveOffsetPairs +where usize: CopyOnto, { fn default() -> Self { let mut d = Self { @@ -101,13 +103,14 @@ impl, O: OffsetContainer> Default offsets: Default::default(), last_index: 0, }; - d.offsets.push(0); + 0.copy_onto(&mut d.offsets); d } } -impl, O: OffsetContainer> Region +impl, O: OffsetRegion> Region for ConsecutiveOffsetPairs + where usize: CopyOnto, { type ReadItem<'a> = R::ReadItem<'a> where @@ -120,7 +123,7 @@ impl, O: OffsetContainer> Region Self: 'a, { let mut offsets = O::default(); - offsets.push(0); + 0.copy_onto(&mut offsets); Self { inner: R::merge_regions(regions.clone().map(|r| &r.inner)), offsets, @@ -145,12 +148,13 @@ impl, O: OffsetContainer> Region self.last_index = 0; self.inner.clear(); self.offsets.clear(); - self.offsets.push(0); + 0.copy_onto(&mut self.offsets); } } -impl, O: OffsetContainer, T: CopyOnto> +impl, O: OffsetRegion, T: CopyOnto> CopyOnto> for T + where usize: CopyOnto, { fn copy_onto( self, @@ -159,7 +163,7 @@ impl, O: OffsetContainer, T: CopyOnto Self: Region = usize, Index=usize> + 'a, + usize: CopyOnto, +{ +} + +impl OffsetRegion for R where for<'a> Self: Region = usize, Index=usize> + 'a, +usize: CopyOnto, +{} + /// TODO pub trait OffsetContainer: Default + Extend { /// Accepts a newly pushed element. @@ -101,29 +114,69 @@ impl OffsetStride { /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. #[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Debug, Default)] pub struct OffsetList { - /// Length of a prefix of zero elements. - pub zero_prefix: usize, /// Offsets that fit within a `u32`. pub smol: Vec, /// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit. pub chonk: Vec, } +impl Region for OffsetList { + type ReadItem<'a> = usize where Self: 'a; + type Index = usize; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + smol: Vec::with_capacity(regions.clone().map(|r| r.smol.len()).sum()), + chonk: Vec::with_capacity(regions.clone().map(|r| r.chonk.len()).sum()), + } + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + if index < self.smol.len() { + self.smol[index].try_into().unwrap() + } else { + self.chonk[index - self.smol.len()].try_into().unwrap() + } + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.smol + .reserve(regions.clone().map(|r| r.smol.len()).sum()); + self.chonk + .reserve(regions.clone().map(|r| r.chonk.len()).sum()); + } + + fn clear(&mut self) { + self.clear(); + } +} + +impl CopyOnto for usize { + fn copy_onto(self, target: &mut OffsetList) -> usize { + target.push(self); + target.len() - 1 + } +} + impl OffsetList { // TODO // /// Allocate a new list with a specified capacity. // pub fn with_capacity(cap: usize) -> Self { // Self { - // zero_prefix: 0, // smol: Vec::with_capacity(cap), // chonk: Vec::new(), // } // } /// Inserts the offset, as a `u32` if that is still on the table. pub fn push(&mut self, offset: usize) { - if self.smol.is_empty() && self.chonk.is_empty() && offset == 0 { - self.zero_prefix += 1; - } else if self.chonk.is_empty() { + if self.chonk.is_empty() { if let Ok(smol) = offset.try_into() { self.smol.push(smol); } else { @@ -133,21 +186,17 @@ impl OffsetList { self.chonk.push(offset.try_into().unwrap()) } } - /// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`. - pub fn index(&self, index: usize) -> usize { - if index < self.zero_prefix { - 0 - } else if index - self.zero_prefix < self.smol.len() { - self.smol[index - self.zero_prefix].try_into().unwrap() - } else { - self.chonk[index - self.zero_prefix - self.smol.len()] - .try_into() - .unwrap() - } - } + // /// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`. + // pub fn index(&self, index: usize) -> usize { + // if index < self.smol.len() { + // self.smol[index].try_into().unwrap() + // } else { + // self.chonk[index - self.smol.len()].try_into().unwrap() + // } + // } /// The number of offsets in the list. pub fn len(&self) -> usize { - self.zero_prefix + self.smol.len() + self.chonk.len() + self.smol.len() + self.chonk.len() } /// Returns `true` if this list contains no elements. @@ -174,6 +223,49 @@ pub struct OffsetOptimized { spilled: OffsetList, } +impl Region for OffsetOptimized { + type ReadItem<'a> = usize where Self: 'a; + type Index = usize; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self { + strided: OffsetStride::default(), + spilled: OffsetList::merge_regions(regions.map(|r| &r.spilled)), + } + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + if index < self.strided.len() { + self.strided.index(index) + } else { + self.spilled.index(index - self.strided.len()) + } + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.spilled.reserve_regions(regions.map(|r| &r.spilled)); + } + + fn clear(&mut self) { + self.strided = OffsetStride::default(); + self.spilled.clear(); + } +} + +impl CopyOnto for usize { + fn copy_onto(self, target: &mut OffsetOptimized) -> usize { + target.push(self); + target.len() - 1 + } +} + impl OffsetContainer for OffsetOptimized { fn push(&mut self, item: usize) { if !self.spilled.is_empty() { @@ -195,8 +287,8 @@ impl OffsetContainer for OffsetOptimized { } fn clear(&mut self) { - self.spilled.clear(); self.strided = OffsetStride::default(); + self.spilled.clear(); } fn len(&self) -> usize { diff --git a/src/impls/vec.rs b/src/impls/vec.rs new file mode 100644 index 0000000..2ab3932 --- /dev/null +++ b/src/impls/vec.rs @@ -0,0 +1,119 @@ +//! Region delegating to a vector. + +use std::ops::{Deref, DerefMut}; +use crate::{CopyOnto, Region}; + +#[derive(Debug)] +pub struct Vector(pub Vec); + +impl Default for Vector { + fn default() -> Self { + Self(Vec::default()) + } +} + + +impl Deref for Vector { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Vector { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Region for Vector { + type ReadItem<'a> = &'a T where Self: 'a; + type Index = usize; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a { + Self::with_capacity(regions.map(Vec::len).sum()) + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + &self[index] + } + + fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, I: Iterator + Clone { + self.reserve(regions.map(Vec::len).sum()); + } + + fn clear(&mut self) { + self.clear() + } +} + +impl CopyOnto> for T { + fn copy_onto(self, target: &mut Vector) -> usize { + target.push(self); + target.len() - 1 + } +} + +impl CopyOnto> for &T { + fn copy_onto(self, target: &mut Vector) -> usize { + self.clone().copy_onto(target) + } +} + +#[derive(Debug)] +pub struct CopyVector(pub Vec); + +impl Default for CopyVector { + fn default() -> Self { + Self(Vec::default()) + } +} + +impl Deref for CopyVector { + type Target = Vec; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for CopyVector { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl Region for CopyVector { + type ReadItem<'a> = T where Self: 'a; + type Index = usize; + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a { + Self::with_capacity(regions.map(Vec::len).sum()) + } + + fn index(&self, index: Self::Index) -> Self::ReadItem<'_> { + &self[index] + } + + fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, I: Iterator + Clone { + self.reserve(regions.map(Vec::len).sum()); + } + + fn clear(&mut self) { + self.clear() + } +} + +impl CopyOnto> for T { + fn copy_onto(self, target: &mut CopyVector) -> usize { + target.push(self); + target.len() - 1 + } +} + +impl CopyOnto> for &T { + fn copy_onto(self, target: &mut CopyVector) -> usize { + self.copied().copy_onto(target) + } +}