Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
Signed-off-by: Moritz Hoffmann <[email protected]>
  • Loading branch information
antiguru committed Feb 12, 2024
1 parent 549c190 commit 79f266a
Show file tree
Hide file tree
Showing 4 changed files with 246 additions and 30 deletions.
24 changes: 14 additions & 10 deletions src/impls/deduplicate.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
//! Simple deduplication of equal consecutive items.
use crate::impls::offsets::OffsetContainer;
use crate::impls::offsets::{OffsetContainer, OffsetRegion};
use crate::{CopyOnto, Region};
use crate::impls::vec::CopyVector;

/// A region to deduplicate consecutive equal items.
#[derive(Debug, Clone)]
Expand Down Expand Up @@ -79,10 +80,10 @@ where
/// Defers to region `R` for storing items, and uses offset container `O` to
/// rember indices. By default, `O` is `Vec<usize>`.
#[derive(Debug, Clone)]
pub struct ConsecutiveOffsetPairs<R, O = Vec<usize>>
pub struct ConsecutiveOffsetPairs<R, O = CopyVector<usize>>
where
R: Region<Index = (usize, usize)>,
O: OffsetContainer<usize>,
O: OffsetRegion, usize: CopyOnto<O>,
{
/// Wrapped region
inner: R,
Expand All @@ -92,22 +93,24 @@ where
last_index: usize,
}

impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>> Default
impl<R: Region<Index = (usize, usize)>, O: OffsetRegion> Default
for ConsecutiveOffsetPairs<R, O>
where usize: CopyOnto<O>,
{
fn default() -> Self {
let mut d = Self {
inner: Default::default(),
offsets: Default::default(),
last_index: 0,
};
d.offsets.push(0);
0.copy_onto(&mut d.offsets);
d
}
}

impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>> Region
impl<R: Region<Index = (usize, usize)>, O: OffsetRegion> Region
for ConsecutiveOffsetPairs<R, O>
where usize: CopyOnto<O>,
{
type ReadItem<'a> = R::ReadItem<'a>
where
Expand All @@ -120,7 +123,7 @@ impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>> Region
Self: 'a,
{
let mut offsets = O::default();
offsets.push(0);
0.copy_onto(&mut offsets);
Self {
inner: R::merge_regions(regions.clone().map(|r| &r.inner)),
offsets,
Expand All @@ -145,12 +148,13 @@ impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>> Region
self.last_index = 0;
self.inner.clear();
self.offsets.clear();
self.offsets.push(0);
0.copy_onto(&mut self.offsets);
}
}

impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>, T: CopyOnto<R>>
impl<R: Region<Index = (usize, usize)>, O: OffsetRegion, T: CopyOnto<R>>
CopyOnto<ConsecutiveOffsetPairs<R, O>> for T
where usize: CopyOnto<O>,
{
fn copy_onto(
self,
Expand All @@ -159,7 +163,7 @@ impl<R: Region<Index = (usize, usize)>, O: OffsetContainer<usize>, T: CopyOnto<R
let index = self.copy_onto(&mut target.inner);
debug_assert_eq!(index.0, target.last_index);
target.last_index = index.1;
target.offsets.push(index.1);
index.1.copy_onto(&mut target.offsets);
target.offsets.len() - 2
}
}
Expand Down
1 change: 1 addition & 0 deletions src/impls/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@ pub mod slice;
pub mod slice_copy;
pub mod string;
pub mod tuple;
pub mod vec;
132 changes: 112 additions & 20 deletions src/impls/offsets.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
//! Types to represent offsets.
use crate::{CopyOnto, Region};

pub trait OffsetRegion
where
for<'a> Self: Region<ReadItem<'a> = usize, Index=usize> + 'a,
usize: CopyOnto<Self>,
{
}

impl<R> OffsetRegion for R where for<'a> Self: Region<ReadItem<'a> = usize, Index=usize> + 'a,
usize: CopyOnto<Self>,
{}

/// TODO
pub trait OffsetContainer<T>: Default + Extend<T> {
/// Accepts a newly pushed element.
Expand Down Expand Up @@ -101,29 +114,69 @@ impl OffsetStride {
/// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not.
#[derive(Eq, PartialEq, Ord, PartialOrd, Clone, Debug, Default)]
pub struct OffsetList {
/// Length of a prefix of zero elements.
pub zero_prefix: usize,
/// Offsets that fit within a `u32`.
pub smol: Vec<u32>,
/// Offsets that either do not fit in a `u32`, or are inserted after some offset that did not fit.
pub chonk: Vec<u64>,
}

impl Region for OffsetList {
type ReadItem<'a> = usize where Self: 'a;
type Index = usize;

fn merge_regions<'a>(regions: impl Iterator<Item = &'a Self> + Clone) -> Self
where
Self: 'a,
{
Self {
smol: Vec::with_capacity(regions.clone().map(|r| r.smol.len()).sum()),
chonk: Vec::with_capacity(regions.clone().map(|r| r.chonk.len()).sum()),
}
}

fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
if index < self.smol.len() {
self.smol[index].try_into().unwrap()
} else {
self.chonk[index - self.smol.len()].try_into().unwrap()
}
}

fn reserve_regions<'a, I>(&mut self, regions: I)
where
Self: 'a,
I: Iterator<Item = &'a Self> + Clone,
{
self.smol
.reserve(regions.clone().map(|r| r.smol.len()).sum());
self.chonk
.reserve(regions.clone().map(|r| r.chonk.len()).sum());
}

fn clear(&mut self) {
self.clear();
}
}

impl CopyOnto<OffsetList> for usize {
fn copy_onto(self, target: &mut OffsetList) -> usize {
target.push(self);
target.len() - 1
}
}

impl OffsetList {
// TODO
// /// Allocate a new list with a specified capacity.
// pub fn with_capacity(cap: usize) -> Self {
// Self {
// zero_prefix: 0,
// smol: Vec::with_capacity(cap),
// chonk: Vec::new(),
// }
// }
/// Inserts the offset, as a `u32` if that is still on the table.
pub fn push(&mut self, offset: usize) {
if self.smol.is_empty() && self.chonk.is_empty() && offset == 0 {
self.zero_prefix += 1;
} else if self.chonk.is_empty() {
if self.chonk.is_empty() {
if let Ok(smol) = offset.try_into() {
self.smol.push(smol);
} else {
Expand All @@ -133,21 +186,17 @@ impl OffsetList {
self.chonk.push(offset.try_into().unwrap())
}
}
/// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`.
pub fn index(&self, index: usize) -> usize {
if index < self.zero_prefix {
0
} else if index - self.zero_prefix < self.smol.len() {
self.smol[index - self.zero_prefix].try_into().unwrap()
} else {
self.chonk[index - self.zero_prefix - self.smol.len()]
.try_into()
.unwrap()
}
}
// /// Like `std::ops::Index`, which we cannot implement as it must return a `&usize`.
// pub fn index(&self, index: usize) -> usize {
// if index < self.smol.len() {
// self.smol[index].try_into().unwrap()
// } else {
// self.chonk[index - self.smol.len()].try_into().unwrap()
// }
// }
/// The number of offsets in the list.
pub fn len(&self) -> usize {
self.zero_prefix + self.smol.len() + self.chonk.len()
self.smol.len() + self.chonk.len()
}

/// Returns `true` if this list contains no elements.
Expand All @@ -174,6 +223,49 @@ pub struct OffsetOptimized {
spilled: OffsetList,
}

impl Region for OffsetOptimized {
type ReadItem<'a> = usize where Self: 'a;
type Index = usize;

fn merge_regions<'a>(regions: impl Iterator<Item = &'a Self> + Clone) -> Self
where
Self: 'a,
{
Self {
strided: OffsetStride::default(),
spilled: OffsetList::merge_regions(regions.map(|r| &r.spilled)),
}
}

fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
if index < self.strided.len() {
self.strided.index(index)
} else {
self.spilled.index(index - self.strided.len())
}
}

fn reserve_regions<'a, I>(&mut self, regions: I)
where
Self: 'a,
I: Iterator<Item = &'a Self> + Clone,
{
self.spilled.reserve_regions(regions.map(|r| &r.spilled));
}

fn clear(&mut self) {
self.strided = OffsetStride::default();
self.spilled.clear();
}
}

impl CopyOnto<OffsetOptimized> for usize {
fn copy_onto(self, target: &mut OffsetOptimized) -> usize {
target.push(self);
target.len() - 1
}
}

impl OffsetContainer<usize> for OffsetOptimized {
fn push(&mut self, item: usize) {
if !self.spilled.is_empty() {
Expand All @@ -195,8 +287,8 @@ impl OffsetContainer<usize> for OffsetOptimized {
}

fn clear(&mut self) {
self.spilled.clear();
self.strided = OffsetStride::default();
self.spilled.clear();
}

fn len(&self) -> usize {
Expand Down
119 changes: 119 additions & 0 deletions src/impls/vec.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
//! Region delegating to a vector.
use std::ops::{Deref, DerefMut};
use crate::{CopyOnto, Region};

#[derive(Debug)]
pub struct Vector<T>(pub Vec<T>);

impl<T> Default for Vector<T> {
fn default() -> Self {
Self(Vec::default())
}
}


impl<T> Deref for Vector<T> {
type Target = Vec<T>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> DerefMut for Vector<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<T: Clone> Region for Vector<T> {
type ReadItem<'a> = &'a T where Self: 'a;
type Index = usize;

fn merge_regions<'a>(regions: impl Iterator<Item=&'a Self> + Clone) -> Self where Self: 'a {
Self::with_capacity(regions.map(Vec::len).sum())
}

fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
&self[index]
}

fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, I: Iterator<Item=&'a Self> + Clone {
self.reserve(regions.map(Vec::len).sum());
}

fn clear(&mut self) {
self.clear()
}
}

impl<T: Clone> CopyOnto<Vector<T>> for T {
fn copy_onto(self, target: &mut Vector<T>) -> usize {
target.push(self);
target.len() - 1
}
}

impl<T: Clone> CopyOnto<Vector<T>> for &T {
fn copy_onto(self, target: &mut Vector<T>) -> usize {
self.clone().copy_onto(target)
}
}

#[derive(Debug)]
pub struct CopyVector<T>(pub Vec<T>);

impl<T> Default for CopyVector<T> {
fn default() -> Self {
Self(Vec::default())
}
}

impl<T> Deref for CopyVector<T> {
type Target = Vec<T>;

fn deref(&self) -> &Self::Target {
&self.0
}
}

impl<T> DerefMut for CopyVector<T> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

impl<T: Copy> Region for CopyVector<T> {
type ReadItem<'a> = T where Self: 'a;
type Index = usize;

fn merge_regions<'a>(regions: impl Iterator<Item=&'a Self> + Clone) -> Self where Self: 'a {
Self::with_capacity(regions.map(Vec::len).sum())
}

fn index(&self, index: Self::Index) -> Self::ReadItem<'_> {
&self[index]
}

fn reserve_regions<'a, I>(&mut self, regions: I) where Self: 'a, I: Iterator<Item=&'a Self> + Clone {
self.reserve(regions.map(Vec::len).sum());
}

fn clear(&mut self) {
self.clear()
}
}

impl<T: Copy> CopyOnto<CopyVector<T>> for T {
fn copy_onto(self, target: &mut CopyVector<T>) -> usize {
target.push(self);
target.len() - 1
}
}

impl<T: Copy> CopyOnto<CopyVector<T>> for &T {
fn copy_onto(self, target: &mut CopyVector<T>) -> usize {
self.copied().copy_onto(target)
}
}

0 comments on commit 79f266a

Please sign in to comment.