diff --git a/src/impls/columns.rs b/src/impls/columns.rs index f7e4fd7..b678f4a 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -527,7 +527,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().copied().eq(r.index(index).iter())); } } @@ -553,7 +553,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().copied().eq(r.index(index).iter())); } @@ -582,7 +582,7 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { + for (index, row) in indices.iter().zip(&data) { assert!(row.iter().eq(r.index(index).iter())); } @@ -610,8 +610,8 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { - assert!(row.iter().copied().eq(r.index(index).iter())); + for (index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); } println!("{r:?}"); @@ -638,8 +638,8 @@ mod tests { indices.push(index); } - for (&index, row) in indices.iter().zip(&data) { - assert!(row.iter().copied().eq(r.index(index).iter())); + for (index, row) in indices.iter().zip(&data) { + assert!(row.iter().eq(r.index(index).iter())); } assert_eq!("1", r.index(indices[1]).get(0)); diff --git a/src/impls/offsets.rs b/src/impls/offsets.rs index 5ecf5df..0ebc628 100644 --- a/src/impls/offsets.rs +++ b/src/impls/offsets.rs @@ -7,6 +7,11 @@ use crate::impls::storage::Storage; /// A container to store offsets. pub trait OffsetContainer: Storage { + /// Iterator over the elements. + type Iter<'a>: Iterator + where + Self: 'a; + /// Lookup an index. May panic for invalid indexes. fn index(&self, index: usize) -> T; @@ -18,6 +23,9 @@ pub trait OffsetContainer: Storage { fn extend>(&mut self, iter: I) where I::IntoIter: ExactSizeIterator; + + /// Returns an iterator over the elements. + fn iter(&self) -> Self::Iter<'_>; } /// A container for offsets that can represent strides of offsets. @@ -45,6 +53,7 @@ pub enum OffsetStride { impl OffsetStride { /// Accepts or rejects a newly pushed element. #[must_use] + #[inline] pub fn push(&mut self, item: usize) -> bool { match self { OffsetStride::Empty => { @@ -88,6 +97,7 @@ impl OffsetStride { /// Panics for out-of-bounds accesses, i.e., if `index` greater or equal to /// [`len`][OffsetStride::len]. #[must_use] + #[inline] pub fn index(&self, index: usize) -> usize { match self { OffsetStride::Empty => { @@ -107,6 +117,7 @@ impl OffsetStride { /// Returns the number of elements. #[must_use] + #[inline] pub fn len(&self) -> usize { match self { OffsetStride::Empty => 0, @@ -118,14 +129,47 @@ impl OffsetStride { /// Returns `true` if empty. #[must_use] + #[inline] pub fn is_empty(&self) -> bool { matches!(self, OffsetStride::Empty) } /// Removes all elements. + #[inline] pub fn clear(&mut self) { *self = Self::default(); } + + /// Return an iterator over the elements. + #[must_use] + #[inline] + pub fn iter(&self) -> OffsetStrideIter { + OffsetStrideIter { + strided: *self, + index: 0, + } + } +} + +/// An iterator over the elements of an [`OffsetStride`]. +pub struct OffsetStrideIter { + strided: OffsetStride, + index: usize, +} + +impl Iterator for OffsetStrideIter { + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + if self.index < self.strided.len() { + let item = self.strided.index(self.index); + self.index += 1; + Some(item) + } else { + None + } + } } /// A list of unsigned integers that uses `u32` elements as long as they are small enough, and switches to `u64` once they are not. @@ -149,6 +193,7 @@ where { /// Allocate a new list with a specified capacity. #[must_use] + #[inline] pub fn with_capacity(cap: usize) -> Self { Self { smol: S::with_capacity(cap), @@ -161,6 +206,7 @@ where /// # Panics /// /// Panics if `usize` does not fit in `u64`. + #[inline] pub fn push(&mut self, offset: usize) { if self.chonk.is_empty() { if let Ok(smol) = offset.try_into() { @@ -179,6 +225,7 @@ where /// /// Panics if the index is out of bounds, i.e., it is larger or equal to the length. #[must_use] + #[inline] pub fn index(&self, index: usize) -> usize { if index < self.smol.len() { self.smol.index(index).try_into().unwrap() @@ -189,33 +236,136 @@ where } /// The number of offsets in the list. #[must_use] + #[inline] pub fn len(&self) -> usize { self.smol.len() + self.chonk.len() } /// Returns `true` if this list contains no elements. #[must_use] + #[inline] pub fn is_empty(&self) -> bool { self.smol.is_empty() && self.chonk.is_empty() } /// Reserve space for `additional` elements. + #[inline] pub fn reserve(&mut self, additional: usize) { self.smol.reserve(additional); } /// Remove all elements. + #[inline] pub fn clear(&mut self) { self.smol.clear(); self.chonk.clear(); } + #[inline] fn heap_size(&self, mut callback: F) { self.smol.heap_size(&mut callback); self.chonk.heap_size(callback); } } +impl Storage for OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ + #[inline] + fn with_capacity(capacity: usize) -> Self { + Self::with_capacity(capacity) + } + + #[inline] + fn reserve(&mut self, additional: usize) { + self.reserve(additional) + } + + #[inline] + fn clear(&mut self) { + self.clear() + } + + #[inline] + fn heap_size(&self, callback: F) { + self.heap_size(callback) + } + + #[inline] + fn len(&self) -> usize { + self.len() + } + + #[inline] + fn is_empty(&self) -> bool { + self.is_empty() + } +} + +impl OffsetContainer for OffsetList +where + S: OffsetContainer, + L: OffsetContainer, +{ + type Iter<'a> = OffsetListIter<'a, S, L> where Self: 'a; + + #[inline] + fn index(&self, index: usize) -> usize { + self.index(index) + } + + #[inline] + fn push(&mut self, item: usize) { + self.push(item) + } + + #[inline] + fn extend>(&mut self, iter: I) + where + I::IntoIter: ExactSizeIterator, + { + for item in iter { + self.push(item); + } + } + + #[inline] + fn iter(&self) -> Self::Iter<'_> { + OffsetListIter { + smol: self.smol.iter(), + chonk: self.chonk.iter(), + } + } +} + +/// An iterator over the elements of an [`OffsetList`]. +pub struct OffsetListIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + smol: S::Iter<'a>, + chonk: L::Iter<'a>, +} + +impl<'a, S, L> Iterator for OffsetListIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + type Item = usize; + + #[inline] + fn next(&mut self) -> Option { + self.smol + .next() + .map(|x| x as usize) + .or_else(|| self.chonk.next().map(|x| x as usize)) + } +} + /// An offset container implementation that first tries to recognize strides, and then spilles into /// a regular offset list. #[derive(Eq, PartialEq, Default, Debug, Clone)] @@ -234,30 +384,36 @@ where S: OffsetContainer, L: OffsetContainer, { + #[inline] fn with_capacity(_capacity: usize) -> Self { // `self.strided` doesn't have any capacity, and we don't know the structure of the data. Self::default() } + #[inline] fn clear(&mut self) { self.spilled.clear(); self.strided = OffsetStride::default(); } + #[inline] fn len(&self) -> usize { self.strided.len() + self.spilled.len() } + #[inline] fn is_empty(&self) -> bool { self.strided.is_empty() && self.spilled.is_empty() } + #[inline] fn reserve(&mut self, additional: usize) { if !self.spilled.is_empty() { self.spilled.reserve(additional); } } + #[inline] fn heap_size(&self, callback: F) { self.spilled.heap_size(callback); } @@ -268,6 +424,8 @@ where S: OffsetContainer, L: OffsetContainer, { + type Iter<'a> = OffsetOptimizedIter<'a, S , L> where Self: 'a; + fn index(&self, index: usize) -> usize { if index < self.strided.len() { self.strided.index(index) @@ -295,9 +453,40 @@ where self.push(item); } } + + fn iter(&self) -> Self::Iter<'_> { + OffsetOptimizedIter { + strided: self.strided.iter(), + spilled: self.spilled.iter(), + } + } +} + +/// An iterator over the elements of an [`OffsetOptimized`]. +pub struct OffsetOptimizedIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + strided: OffsetStrideIter, + spilled: as OffsetContainer>::Iter<'a>, +} + +impl<'a, S, L> Iterator for OffsetOptimizedIter<'a, S, L> +where + S: OffsetContainer + 'a, + L: OffsetContainer + 'a, +{ + type Item = usize; + + fn next(&mut self) -> Option { + self.strided.next().or_else(|| self.spilled.next()) + } } impl OffsetContainer for Vec { + type Iter<'a> = std::iter::Copied> where Self: 'a; + fn index(&self, index: usize) -> T { self[index] } @@ -313,6 +502,10 @@ impl OffsetContainer for Vec { { Extend::extend(self, iter); } + + fn iter(&self) -> Self::Iter<'_> { + self.as_slice().iter().copied() + } } #[cfg(test)] diff --git a/src/impls/storage.rs b/src/impls/storage.rs index 0164af6..33d52ac 100644 --- a/src/impls/storage.rs +++ b/src/impls/storage.rs @@ -14,6 +14,7 @@ pub trait Storage: Default { /// Allocate storage large enough to absorb `regions`'s contents. #[must_use] + #[inline] fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where Self: 'a, @@ -112,6 +113,8 @@ impl, T> PushStorage> for Vec { } /// A storage that maintains non-reallocating allocations and allocates double the size when needed. +/// +/// Not considered part of the stable interface of this crate. #[derive(Debug)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub struct Doubling { @@ -210,18 +213,22 @@ impl Doubling { } impl Storage for Doubling { + #[inline] fn with_capacity(capacity: usize) -> Self { Self::with_capacity(capacity) } + #[inline] fn reserve(&mut self, additional: usize) { self.reserve(additional); } + #[inline] fn clear(&mut self) { self.clear() } + #[inline] fn heap_size(&self, callback: F) { self.heap_size(callback); } @@ -281,11 +288,39 @@ impl std::ops::Index> for Doubling { } } +/// An iterator over the elements of a [`Doubling`]. +pub struct DoublingIter<'a, T: 'a> { + inner: Option>>, + remaining: std::slice::Iter<'a, Vec>, +} + +impl<'a, T: Copy> Iterator for DoublingIter<'a, T> { + type Item = T; + + #[inline] + fn next(&mut self) -> Option { + loop { + if let Some(inner) = &mut self.inner { + if let Some(item) = inner.next() { + return Some(item); + } + } + self.inner = self + .remaining + .next() + .map(|vec| vec.as_slice().iter().copied()); + self.inner.as_ref()?; + } + } +} + mod offsetcontainer { use crate::impls::offsets::OffsetContainer; - use crate::impls::storage::Doubling; + use crate::impls::storage::{Doubling, DoublingIter}; impl OffsetContainer for Doubling { + type Iter<'a> = DoublingIter<'a, T> where Self: 'a; + fn push(&mut self, item: T) { self.len += 1; self.reserve(1); @@ -302,6 +337,14 @@ mod offsetcontainer { fn index(&self, index: usize) -> T { *self.index(index) } + + fn iter(&self) -> Self::Iter<'_> { + let mut iter = self.inner.as_slice().iter(); + DoublingIter { + inner: iter.next().map(|vec| vec.as_slice().iter().copied()), + remaining: iter, + } + } } }