Skip to content

Commit

Permalink
Make deduplication work.
Browse files Browse the repository at this point in the history
Signed-off-by: Moritz Hoffmann <[email protected]>
  • Loading branch information
antiguru committed Feb 9, 2024
1 parent a5c1a15 commit 1540573
Show file tree
Hide file tree
Showing 6 changed files with 359 additions and 242 deletions.
20 changes: 16 additions & 4 deletions benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

extern crate test;

use flatcontainer::impls::tuple::TupleABCRegion;
use flatcontainer::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs};
use flatcontainer::impls::offsets::OffsetOptimized;
use flatcontainer::impls::tuple::{TupleABCRegion, TupleABRegion};
use flatcontainer::{
Containerized, CopyOnto, CopyRegion, FlatStack, MirrorRegion, Region, ReserveItems,
SliceRegion, StringRegion,
Expand Down Expand Up @@ -80,15 +82,25 @@ fn str100_copy_region(bencher: &mut Bencher) {
}
#[bench]
fn string10_copy_region(bencher: &mut Bencher) {
_bench_copy_region::<SliceRegion<_>, _>(bencher, vec![format!("grawwwwrr!"); 1024]);
_bench_copy_region::<SliceRegion<StringRegion>, _>(bencher, vec![format!("grawwwwrr!"); 1024]);
}
#[bench]
fn string10_copy_region_collapse(bencher: &mut Bencher) {
_bench_copy_region::<
SliceRegion<CollapseSequence<ConsecutiveOffsetPairs<StringRegion>>, OffsetOptimized>,
_,
>(bencher, vec![format!("grawwwwrr!"); 1024]);
}
#[bench]
fn string20_copy_region(bencher: &mut Bencher) {
_bench_copy_region::<SliceRegion<_>, _>(bencher, vec![format!("grawwwwrr!!!!!!!!!!!"); 512]);
_bench_copy_region::<SliceRegion<StringRegion>, _>(
bencher,
vec![format!("grawwwwrr!!!!!!!!!!!"); 512],
);
}
#[bench]
fn vec_u_s_copy_region(bencher: &mut Bencher) {
_bench_copy_region::<SliceRegion<_>, _>(
_bench_copy_region::<SliceRegion<SliceRegion<TupleABRegion<MirrorRegion<_>, StringRegion>>>, _>(
bencher,
vec![vec![(0u64, "grawwwwrr!".to_string()); 32]; 32],
);
Expand Down
177 changes: 177 additions & 0 deletions src/impls/deduplicate.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//! Simple deduplication of equal consecutive items.
use crate::impls::offsets::OffsetContainer;
use crate::{CopyOnto, Region};

/// A region to deduplicate consecutive equal items.
#[derive(Debug)]
pub struct CollapseSequence<R: Region> {
inner: R,
last_index: Option<R::Index>,
}

impl<R: Region> Default for CollapseSequence<R> {
fn default() -> Self {
Self {
inner: R::default(),
last_index: None,
}
}
}

impl<R: Region> Region for CollapseSequence<R>
where
for<'a, 'b> R::ReadItem<'a>: PartialEq<R::ReadItem<'b>>,
{
type ReadItem<'a> = R::ReadItem<'a> where Self: 'a;
type Index = R::Index;

fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
self.inner.index(index)
}

fn reserve_regions<'a, I>(&mut self, regions: I)
where
Self: 'a,
I: Iterator<Item = &'a Self> + Clone,
{
self.inner.reserve_regions(regions.map(|r| &r.inner));
}

fn clear(&mut self) {
self.inner.clear();
self.last_index = None;
}
}

impl<R: Region, T: CopyOnto<R>> CopyOnto<CollapseSequence<R>> for T
where
for<'a> R::ReadItem<'a>: PartialEq<T>,
for<'a, 'b> R::ReadItem<'a>: PartialEq<R::ReadItem<'b>>,
{
fn copy_onto(self, target: &mut CollapseSequence<R>) -> <CollapseSequence<R> as Region>::Index {
if let Some(last_index) = target.last_index {
if target.inner.index(last_index) == self {
return last_index;
}
}
let index = self.copy_onto(&mut target.inner);
target.last_index = Some(index);
index
}
}

/// TODO
#[derive(Debug)]
pub struct ConsecutiveOffsetPairs<
R: Region<Index = (usize, usize)>,
O: OffsetContainer<usize> = Vec<usize>,
> {
inner: R,
offsets: O,
last_index: usize,
}

impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>> Default
for ConsecutiveOffsetPairs<R, O>
{
fn default() -> Self {
let mut d = Self {
inner: Default::default(),
offsets: Default::default(),
last_index: 0,
};
d.offsets.push(0);
d
}
}

impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>> Region
for ConsecutiveOffsetPairs<R, O>
{
type ReadItem<'a> = R::ReadItem<'a>
where
Self: 'a;

type Index = usize;

fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
self.inner
.index((self.offsets.index(index), self.offsets.index(index + 1)))
}

fn reserve_regions<'a, I>(&mut self, regions: I)
where
Self: 'a,
I: Iterator<Item = &'a Self> + Clone,
{
self.inner.reserve_regions(regions.map(|r| &r.inner));
}

fn clear(&mut self) {
self.inner.clear();
self.offsets.clear();
self.offsets.push(0);
}
}

impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>, T: CopyOnto<R>>
CopyOnto<ConsecutiveOffsetPairs<R, O>> for T
{
fn copy_onto(
self,
target: &mut ConsecutiveOffsetPairs<R, O>,
) -> <ConsecutiveOffsetPairs<R, O> as Region>::Index {
let index = self.copy_onto(&mut target.inner);
assert_eq!(index.0, target.last_index);
target.offsets.push(index.1);
target.offsets.len() - 2
}
}

#[cfg(test)]
mod tests {
use crate::impls::deduplicate::{CollapseSequence, ConsecutiveOffsetPairs};
use crate::impls::offsets::OffsetOptimized;
use crate::{CopyOnto, FlatStack, Region, StringRegion};

#[test]
fn test_dedup_flatstack() {
let mut fs = FlatStack::<CollapseSequence<StringRegion>>::default();

fs.copy("abc");
fs.copy("abc");

println!("{fs:?}");
println!("{:?}", fs.as_parts());
}

#[test]
fn test_dedup_region() {
let mut r = CollapseSequence::<StringRegion>::default();

fn copy<R: Region>(r: &mut R, item: impl CopyOnto<R>) -> R::Index {
item.copy_onto(r)
}

assert_eq!(copy(&mut r, "abc"), copy(&mut r, "abc"));

println!("{r:?}");
}

#[test]
fn test_offset_optimized() {
let mut r =
ConsecutiveOffsetPairs::<CollapseSequence<StringRegion>, OffsetOptimized>::default();

fn copy<R: Region>(r: &mut R, item: impl CopyOnto<R>) -> R::Index {
item.copy_onto(r)
}

for _ in 0..1000 {
copy(&mut r, "abc");
}

println!("{r:?}");
}
}
3 changes: 2 additions & 1 deletion src/impls/mod.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
//! Various region implementations.
pub mod dedup_one;
pub mod deduplicate;
pub mod mirror;
pub mod offsets;
pub mod option;
pub mod result;
pub mod slice;
Expand Down
Loading

0 comments on commit 1540573

Please sign in to comment.