From 7f2b7d53668a0e5cc042f26027cce5593d5461f5 Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Mon, 26 Feb 2024 09:55:53 -0500 Subject: [PATCH] Introduce storage trait to abstract storage of slices Signed-off-by: Moritz Hoffmann --- src/impls/mod.rs | 1 + src/impls/slice_copy.rs | 115 +++++++++++----------- src/impls/storage.rs | 205 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 267 insertions(+), 54 deletions(-) create mode 100644 src/impls/storage.rs diff --git a/src/impls/mod.rs b/src/impls/mod.rs index be5a6a1..6f71cca 100644 --- a/src/impls/mod.rs +++ b/src/impls/mod.rs @@ -9,5 +9,6 @@ pub mod option; pub mod result; pub mod slice; pub mod slice_copy; +pub mod storage; pub mod string; pub mod tuple; diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index b25d1b3..24eb55c 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -1,8 +1,11 @@ //! A region that stores slices of copy types. +use std::marker::PhantomData; + #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; +use crate::impls::storage::Storage; use crate::{CopyIter, CopyOnto, Region, ReserveItems}; /// A container for owned types. @@ -29,11 +32,13 @@ use crate::{CopyIter, CopyOnto, Region, ReserveItems}; /// ``` #[derive(Debug, Clone)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] -pub struct OwnedRegion { - slices: Vec, +pub struct OwnedRegion = Vec> { + slices: S, + offset: usize, + _marker: PhantomData, } -impl Region for OwnedRegion { +impl> Region for OwnedRegion { type ReadItem<'a> = &'a [T] where Self: 'a; type Index = (usize, usize); @@ -42,13 +47,15 @@ impl Region for OwnedRegion { Self: 'a, { Self { - slices: Vec::with_capacity(regions.map(|r| r.slices.len()).sum()), + slices: S::merge_regions(regions.map(|r| &r.slices)), + offset: 0, + _marker: PhantomData, } } #[inline] fn index(&self, (start, end): Self::Index) -> Self::ReadItem<'_> { - &self.slices[start..end] + self.slices.index(start, end) } fn reserve_regions<'a, I>(&mut self, regions: I) @@ -56,7 +63,7 @@ impl Region for OwnedRegion { Self: 'a, I: Iterator + Clone, { - self.slices.reserve(regions.map(|r| r.slices.len()).sum()); + self.slices.reserve_regions(regions.map(|r| &r.slices)); } #[inline] @@ -64,50 +71,48 @@ impl Region for OwnedRegion { self.slices.clear(); } - fn heap_size(&self, mut callback: F) { - let size_of_t = std::mem::size_of::(); - callback( - self.slices.len() * size_of_t, - self.slices.capacity() * size_of_t, - ); + fn heap_size(&self, callback: F) { + self.slices.heap_size(callback); } } -impl Default for OwnedRegion { +impl> Default for OwnedRegion { fn default() -> Self { Self { - slices: Vec::default(), + slices: S::default(), + offset: 0, + _marker: PhantomData, } } } -impl CopyOnto> for [T; N] { +impl, const N: usize> CopyOnto> for [T; N] { #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { - let start = target.slices.len(); - target.slices.extend(self); - (start, target.slices.len()) + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + let start = target.offset; + target.offset = target.slices.extend(self); + (start, target.offset) } } -impl CopyOnto> for &[T; N] { +impl, const N: usize> CopyOnto> for &[T; N] { #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { - let start = target.slices.len(); - target.slices.extend_from_slice(self); - (start, target.slices.len()) + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + let start = target.offset; + target.offset = target.slices.extend_from_slice(self); + (start, target.offset) } } -impl CopyOnto> for &&[T; N] { +impl, const N: usize> CopyOnto> for &&[T; N] { #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { (*self).copy_onto(target) } } -impl ReserveItems> for &[T; N] { - fn reserve_items(target: &mut OwnedRegion, items: I) +impl, const N: usize> ReserveItems> for &[T; N] { + fn reserve_items(target: &mut OwnedRegion, items: I) where I: Iterator + Clone, { @@ -115,27 +120,27 @@ impl ReserveItems> for &[T; N] { } } -impl CopyOnto> for &[T] { +impl> CopyOnto> for &[T] { #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { - let start = target.slices.len(); - target.slices.extend_from_slice(self); - (start, target.slices.len()) + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + let start = target.offset; + target.offset = target.slices.extend_from_slice(self); + (start, target.offset) } } -impl CopyOnto> for &&[T] +impl> CopyOnto> for &&[T] where - for<'a> &'a [T]: CopyOnto>, + for<'a> &'a [T]: CopyOnto>, { #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { (*self).copy_onto(target) } } -impl ReserveItems> for &[T] { - fn reserve_items(target: &mut OwnedRegion, items: I) +impl> ReserveItems> for &[T] { + fn reserve_items(target: &mut OwnedRegion, items: I) where I: Iterator + Clone, { @@ -143,24 +148,24 @@ impl ReserveItems> for &[T] { } } -impl CopyOnto> for Vec { +impl> CopyOnto> for Vec { #[inline] - fn copy_onto(mut self, target: &mut OwnedRegion) -> as Region>::Index { - let start = target.slices.len(); - target.slices.append(&mut self); - (start, target.slices.len()) + fn copy_onto(mut self, target: &mut OwnedRegion) -> as Region>::Index { + let start = target.offset; + target.offset = target.slices.append(&mut self); + (start, target.offset) } } -impl CopyOnto> for &Vec { +impl> CopyOnto> for &Vec { #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { self.as_slice().copy_onto(target) } } -impl ReserveItems> for &Vec { - fn reserve_items(target: &mut OwnedRegion, items: I) +impl> ReserveItems> for &Vec { + fn reserve_items(target: &mut OwnedRegion, items: I) where I: Iterator + Clone, { @@ -168,17 +173,19 @@ impl ReserveItems> for &Vec { } } -impl> CopyOnto> for CopyIter { +impl, I: IntoIterator> CopyOnto> + for CopyIter +{ #[inline] - fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { - let start = target.slices.len(); - target.slices.extend(self.0); - (start, target.slices.len()) + fn copy_onto(self, target: &mut OwnedRegion) -> as Region>::Index { + let start = target.offset; + target.offset = target.slices.extend(self.0); + (start, target.offset) } } -impl> ReserveItems> for CopyIter { - fn reserve_items(target: &mut OwnedRegion, items: I) +impl, J: IntoIterator> ReserveItems> for CopyIter { + fn reserve_items(target: &mut OwnedRegion, items: I) where I: Iterator + Clone, { diff --git a/src/impls/storage.rs b/src/impls/storage.rs new file mode 100644 index 0000000..e2e4f83 --- /dev/null +++ b/src/impls/storage.rs @@ -0,0 +1,205 @@ +//! Storage abstractions to represent slices of data. + +#[cfg(feature = "serde")] +use serde::{Deserialize, Serialize}; + +/// TODO +pub trait Storage: Default { + /// TODO + fn with_capacity(capacity: usize) -> Self; + /// TODO + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a; + /// TODO + fn reserve(&mut self, additional: usize); + /// TODO + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone; + /// TODO + fn clear(&mut self); + /// TODO + fn heap_size(&self, callback: F); + /// TODO + fn extend>(&mut self, iter: I) -> usize; + /// TODO + fn append(&mut self, data: &mut Vec) -> usize; + /// TODO + fn extend_from_slice(&mut self, slice: &[T]) -> usize + where + T: Clone; + /// TODO + fn index(&self, start: usize, end: usize) -> &[T]; +} + +impl Storage for Vec { + fn with_capacity(capacity: usize) -> Self { + Vec::with_capacity(capacity) + } + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::with_capacity(regions.map(Vec::len).sum()) + } + + fn reserve(&mut self, additional: usize) { + Vec::reserve(self, additional); + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.reserve(regions.map(Vec::len).sum()); + } + + fn clear(&mut self) { + self.clear(); + } + + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + callback(self.len() * size_of_t, self.capacity() * size_of_t); + } + + fn extend>(&mut self, iter: I) -> usize { + Extend::extend(self, iter); + self.len() + } + + fn append(&mut self, data: &mut Vec) -> usize { + self.append(data); + self.len() + } + + fn extend_from_slice(&mut self, slice: &[T]) -> usize + where + T: Clone, + { + self.extend_from_slice(slice); + self.len() + } + + fn index(&self, start: usize, end: usize) -> &[T] { + &self[start..end] + } +} + +#[derive(Debug)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +struct Doubling { + inner: Vec>, + offsets: Vec, +} + +impl Default for Doubling { + fn default() -> Self { + Self { + inner: Vec::default(), + offsets: Vec::default(), + } + } +} + +impl Storage for Doubling { + fn with_capacity(capacity: usize) -> Self { + Self { + inner: vec![Vec::with_capacity(capacity)], + offsets: Vec::default(), + } + } + + fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self + where + Self: 'a, + { + Self::with_capacity(regions.flat_map(|r| &r.inner).map(Vec::len).sum()) + } + + fn reserve(&mut self, additional: usize) { + let (remaining, last_len) = self + .inner + .last() + .map_or((0, 0), |last| (last.capacity() - last.len(), last.len())); + if remaining < additional { + let len = 2 * last_len; + let len = std::cmp::max(additional, len); + let len = len.next_power_of_two(); + self.offsets + .push(last_len + *self.offsets.last().unwrap_or(&0)); + self.inner.push(Vec::with_capacity(len)); + } + } + + fn reserve_regions<'a, I>(&mut self, regions: I) + where + Self: 'a, + I: Iterator + Clone, + { + self.reserve(regions.flat_map(|r| &r.inner).map(Vec::len).sum()); + } + + fn clear(&mut self) { + self.inner.clear(); + } + + fn heap_size(&self, mut callback: F) { + let size_of_t = std::mem::size_of::(); + for inner in &self.inner { + callback(inner.len() * size_of_t, inner.capacity() * size_of_t); + } + } + + fn extend>(&mut self, iter: I) -> usize { + let vec: Vec = iter.into_iter().collect(); + self.extend_from_slice(&vec) + } + + fn append(&mut self, data: &mut Vec) -> usize { + self.reserve(data.len()); + self.inner.last_mut().unwrap().append(data); + *self.offsets.last().unwrap_or(&0) + self.inner.last().unwrap().len() + } + + fn extend_from_slice(&mut self, slice: &[T]) -> usize + where + T: Clone, + { + self.reserve(slice.len()); + self.inner.last_mut().unwrap().extend_from_slice(slice); + *self.offsets.last().unwrap_or(&0) + self.inner.last().unwrap().len() + } + + fn index(&self, start: usize, end: usize) -> &[T] { + let index = self + .offsets + .iter() + .position(|&o| o > start) + .unwrap_or_else(|| self.offsets.len().saturating_sub(1)); + let start = start - self.offsets[index]; + let end = end - self.offsets[index]; + &self.inner[index][start..end] + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_doubling() { + let mut d: Doubling = Doubling::default(); + let mut start = 0; + + for i in 0..1000 { + let end = d.extend_from_slice(&[i, i + 1, i + 3]); + assert_eq!(&[i, i + 1, i + 3], d.index(start, end)); + start = end; + } + } +}