From d3ffc2e3cd81c724955c1b5082f63b744290948f Mon Sep 17 00:00:00 2001 From: Moritz Hoffmann Date: Fri, 24 May 2024 11:00:59 -0400 Subject: [PATCH] Further cleanup Signed-off-by: Moritz Hoffmann --- src/impls/codec.rs | 62 +++++++++++++++++----------------------- src/impls/columns.rs | 8 ++---- src/impls/deduplicate.rs | 9 ++++-- src/impls/result.rs | 18 ++++-------- src/impls/slice.rs | 56 ++++++++++++++---------------------- src/impls/slice_copy.rs | 2 +- src/impls/string.rs | 25 ++++++---------- src/impls/tuple.rs | 43 ++++++++++++---------------- 8 files changed, 92 insertions(+), 131 deletions(-) diff --git a/src/impls/codec.rs b/src/impls/codec.rs index 7196911..29be462 100644 --- a/src/impls/codec.rs +++ b/src/impls/codec.rs @@ -28,46 +28,37 @@ fn consolidate_from(vec: &mut Vec<(T, usize)>, offset: usize) { /// Sorts and consolidates a slice, returning the valid prefix length. fn consolidate_slice(slice: &mut [(T, usize)]) -> usize { - // We could do an insertion-sort like initial scan which builds up sorted, consolidated runs. - // In a world where there are not many results, we may never even need to call in to merge sort. - slice.sort_by(|x, y| x.0.cmp(&y.0)); - - let slice_ptr = slice.as_mut_ptr(); - - // Counts the number of distinct known-non-zero accumulations. Indexes the write location. - let mut offset = 0; - for index in 1..slice.len() { - // The following unsafe block elides various bounds checks, using the reasoning that `offset` - // is always strictly less than `index` at the beginning of each iteration. This is initially - // true, and in each iteration `offset` can increase by at most one (whereas `index` always - // increases by one). As `index` is always in bounds, and `offset` starts at zero, it too is - // always in bounds. - // - // LLVM appears to struggle to optimize out Rust's split_at_mut, which would prove disjointness - // using run-time tests. - unsafe { - assert!(offset < index); - - // LOOP INVARIANT: offset < index - let ptr1 = slice_ptr.add(offset); - let ptr2 = slice_ptr.add(index); - - if (*ptr1).0 == (*ptr2).0 { - (*ptr1).1 += (*ptr2).1; + if slice.len() > 1 { + // We could do an insertion-sort like initial scan which builds up sorted, consolidated runs. + // In a world where there are not many results, we may never even need to call in to merge sort. + slice.sort_by(|x, y| x.0.cmp(&y.0)); + + // Counts the number of distinct known-non-zero accumulations. Indexes the write location. + let mut offset = 0; + let mut accum = slice[offset].1; + + for index in 1..slice.len() { + if slice[index].0 == slice[index - 1].0 { + accum += slice[index].1; } else { - if (*ptr1).1 != 0 { + if accum != 0 { + slice.swap(offset, index - 1); + slice[offset].1.clone_from(&accum); offset += 1; } - let ptr1 = slice_ptr.add(offset); - std::ptr::swap(ptr1, ptr2); + accum.clone_from(&slice[index].1); } } - } - if offset < slice.len() && slice[offset].1 != 0 { - offset += 1; - } + if accum != 0 { + slice.swap(offset, slice.len() - 1); + slice[offset].1 = accum; + offset += 1; + } - offset + offset + } else { + slice.iter().filter(|x| x.1 != 0).count() + } } /// A region that encodes its data in a codec `C`. @@ -123,8 +114,7 @@ where impl Push<&[u8]> for CodecRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: Push<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, { fn push(&mut self, item: &[u8]) -> as Region>::Index { self.codec.encode(item, &mut self.inner) diff --git a/src/impls/columns.rs b/src/impls/columns.rs index dd1d799..be3e40a 100644 --- a/src/impls/columns.rs +++ b/src/impls/columns.rs @@ -141,7 +141,7 @@ where { fn default() -> Self { Self { - indices: Default::default(), + indices: ConsecutiveOffsetPairs::default(), inner: Vec::default(), } } @@ -240,8 +240,7 @@ where impl Push> for ColumnsRegion where - R: Region, - for<'a> R: Push<::ReadItem<'a>>, + for<'a> R: Region + Push<::ReadItem<'a>>, { fn push(&mut self, item: ReadColumns<'_, R>) -> as Region>::Index { // Ensure all required regions exist. @@ -259,8 +258,7 @@ where impl<'a, R, T> Push<&'a [T]> for ColumnsRegion where - R: Region, - R: Push<&'a T>, + R: Region + Push<&'a T>, { fn push(&mut self, item: &'a [T]) -> as Region>::Index { // Ensure all required regions exist. diff --git a/src/impls/deduplicate.rs b/src/impls/deduplicate.rs index 56ee824..199bb25 100644 --- a/src/impls/deduplicate.rs +++ b/src/impls/deduplicate.rs @@ -72,8 +72,9 @@ impl Region for CollapseSequence { } } -impl> Push for CollapseSequence +impl Push for CollapseSequence where + R: Region + Push, for<'a> T: PartialEq>, { fn push(&mut self, item: T) -> as Region>::Index { @@ -182,8 +183,10 @@ impl, O: OffsetContainer> Region } } -impl + Push, O: OffsetContainer, T> Push - for ConsecutiveOffsetPairs +impl Push for ConsecutiveOffsetPairs +where + R: Region + Push, + O: OffsetContainer, { #[inline] fn push(&mut self, item: T) -> as Region>::Index { diff --git a/src/impls/result.rs b/src/impls/result.rs index 4888d44..dd579ae 100644 --- a/src/impls/result.rs +++ b/src/impls/result.rs @@ -82,10 +82,8 @@ where impl Push> for ResultRegion where - TC: Region, - EC: Region, - TC: Push, - EC: Push, + TC: Region + Push, + EC: Region + Push, { #[inline] fn push(&mut self, item: Result) -> as Region>::Index { @@ -98,10 +96,8 @@ where impl<'a, T: 'a, TC, E: 'a, EC> Push<&'a Result> for ResultRegion where - TC: Region, - EC: Region, - TC: Push<&'a T>, - EC: Push<&'a E>, + TC: Region + Push<&'a T>, + EC: Region + Push<&'a E>, { #[inline] fn push(&mut self, item: &'a Result) -> as Region>::Index { @@ -114,10 +110,8 @@ where impl<'a, T: 'a, TC, E: 'a, EC> ReserveItems<&'a Result> for ResultRegion where - TC: Region, - EC: Region, - TC: ReserveItems<&'a T>, - EC: ReserveItems<&'a E>, + TC: Region + ReserveItems<&'a T>, + EC: Region + ReserveItems<&'a E>, { fn reserve_items(&mut self, items: I) where diff --git a/src/impls/slice.rs b/src/impls/slice.rs index 1726020..3afbce1 100644 --- a/src/impls/slice.rs +++ b/src/impls/slice.rs @@ -213,14 +213,13 @@ impl<'a, C: Region, O: OffsetContainer> Iterator for ReadSliceIter<'a, } } -impl Push<&[T]> for SliceRegion +impl<'a, C, T, O> Push<&'a [T]> for SliceRegion where - C: Region, - for<'a> C: Push<&'a T>, + C: Region + Push<&'a T>, O: OffsetContainer, { #[inline] - fn push(&mut self, item: &[T]) -> as Region>::Index { + fn push(&mut self, item: &'a [T]) -> as Region>::Index { let start = self.slices.len(); self.slices.extend(item.iter().map(|t| self.inner.push(t))); (start, self.slices.len()) @@ -229,8 +228,7 @@ where impl<'a, T, R, O> ReserveItems<&'a [T]> for SliceRegion where - R: ReserveItems<&'a T>, - R: Region, + R: Region + ReserveItems<&'a T>, O: OffsetContainer, { fn reserve_items(&mut self, items: I) @@ -244,8 +242,7 @@ where impl Push> for SliceRegion where - C: Region, - C: Push, + C: Region + Push, O: OffsetContainer, { #[inline] @@ -259,8 +256,7 @@ where impl Push<&Vec> for SliceRegion where - C: Region, - for<'a> SliceRegion: Push<&'a [T]>, + for<'a> C: Region + Push<&'a T>, O: OffsetContainer, { #[inline] @@ -269,40 +265,37 @@ where } } -impl Push<&&Vec> for SliceRegion +impl<'a, C, T, O> Push<&&'a Vec> for SliceRegion where - C: Region, - for<'a> SliceRegion: Push<&'a [T]>, + C: Region + Push<&'a T>, O: OffsetContainer, { #[inline] - fn push(&mut self, item: &&Vec) -> as Region>::Index { + fn push(&mut self, item: &&'a Vec) -> as Region>::Index { self.push(item.as_slice()) } } -impl<'b, T, R, O> ReserveItems<&'b Vec> for SliceRegion +impl<'a, T, R, O> ReserveItems<&'a Vec> for SliceRegion where - for<'a> R: ReserveItems<&'a T>, - R: Region, + R: Region + ReserveItems<&'a T>, O: OffsetContainer, { fn reserve_items(&mut self, items: I) where - I: Iterator> + Clone, + I: Iterator> + Clone, { self.reserve_items(items.map(Deref::deref)); } } -impl Push> for SliceRegion +impl<'a, C, O> Push> for SliceRegion where - for<'a> C: Region, - for<'a> C: Push<::ReadItem<'a>>, + C: Region + Push<::ReadItem<'a>>, O: OffsetContainer, { #[inline] - fn push(&mut self, item: ReadSlice<'_, C, O>) -> as Region>::Index { + fn push(&mut self, item: ReadSlice<'a, C, O>) -> as Region>::Index { let ReadSlice { region, start, end } = item; let start_len = self.slices.len(); for index in start..end { @@ -316,8 +309,7 @@ where impl Push<[T; N]> for SliceRegion where - for<'a> R: Region, - for<'a> Self: Push<&'a [T]>, + for<'a> R: Region + Push<&'a T>, O: OffsetContainer, { #[inline] @@ -328,8 +320,7 @@ where impl<'a, T, R, O, const N: usize> Push<&'a [T; N]> for SliceRegion where - SliceRegion: Push<&'a [T]>, - R: Region, + R: Region + Push<&'a T>, O: OffsetContainer, { #[inline] @@ -338,22 +329,20 @@ where } } -impl Push<&&[T; N]> for SliceRegion +impl<'a, T, R, O, const N: usize> Push<&&'a [T; N]> for SliceRegion where - for<'b> SliceRegion: Push<&'b [T]>, - R: Region, + R: Region + Push<&'a T>, O: OffsetContainer, { #[inline] - fn push(&mut self, item: &&[T; N]) -> as Region>::Index { + fn push(&mut self, item: &&'a [T; N]) -> as Region>::Index { self.push(item.as_slice()) } } impl<'a, T, R, O, const N: usize> ReserveItems<&'a [T; N]> for SliceRegion where - R: ReserveItems<&'a T>, - R: Region, + R: Region + ReserveItems<&'a T>, O: OffsetContainer, { fn reserve_items(&mut self, items: I) @@ -366,8 +355,7 @@ where impl<'a, R, O> ReserveItems> for SliceRegion where - R: ReserveItems<::ReadItem<'a>> + 'a, - R: Region, + R: Region + ReserveItems<::ReadItem<'a>> + 'a, O: OffsetContainer, { fn reserve_items(&mut self, items: I) diff --git a/src/impls/slice_copy.rs b/src/impls/slice_copy.rs index b84d778..ac54c8e 100644 --- a/src/impls/slice_copy.rs +++ b/src/impls/slice_copy.rs @@ -126,7 +126,7 @@ impl Push<&[T]> for OwnedRegion { impl Push<&&[T]> for OwnedRegion where - for<'a> OwnedRegion: Push<&'a [T]>, + for<'a> Self: Push<&'a [T]>, { #[inline] fn push(&mut self, item: &&[T]) -> as Region>::Index { diff --git a/src/impls/string.rs b/src/impls/string.rs index 513bdb0..26c68a8 100644 --- a/src/impls/string.rs +++ b/src/impls/string.rs @@ -89,8 +89,7 @@ impl Containerized for &str { impl Push for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: Push<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, { #[inline] fn push(&mut self, item: String) -> as Region>::Index { @@ -100,8 +99,7 @@ where impl Push<&String> for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: Push<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, { #[inline] fn push(&mut self, item: &String) -> as Region>::Index { @@ -111,8 +109,7 @@ where impl<'b, R> ReserveItems<&'b String> for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: ReserveItems<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + ReserveItems<&'a [u8]> + 'a, { fn reserve_items(&mut self, items: I) where @@ -124,8 +121,7 @@ where impl Push<&str> for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: Push<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, { #[inline] fn push(&mut self, item: &str) -> as Region>::Index { @@ -135,8 +131,7 @@ where impl Push<&&str> for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: Push<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + Push<&'a [u8]> + 'a, { #[inline] fn push(&mut self, item: &&str) -> as Region>::Index { @@ -146,8 +141,7 @@ where impl<'b, R> ReserveItems<&'b str> for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: ReserveItems<&'a [u8]>, + for<'a> R: Region = &'a [u8]> + ReserveItems<&'a [u8]> + 'a, { #[inline] fn reserve_items(&mut self, items: I) @@ -158,15 +152,14 @@ where } } -impl<'b, R> ReserveItems<&'b &'b str> for StringRegion +impl<'a, R> ReserveItems<&'a &'a str> for StringRegion where - for<'a> R: Region = &'a [u8]> + 'a, - for<'a> R: ReserveItems<&'a [u8]>, + for<'b> R: Region = &'b [u8]> + ReserveItems<&'b [u8]> + 'b, { #[inline] fn reserve_items(&mut self, items: I) where - I: Iterator + Clone, + I: Iterator + Clone, { self.reserve_items(items.copied()); } diff --git a/src/impls/tuple.rs b/src/impls/tuple.rs index 30bfb04..b6d3417 100644 --- a/src/impls/tuple.rs +++ b/src/impls/tuple.rs @@ -33,7 +33,8 @@ macro_rules! tuple_flatcontainer { fn merge_regions<'a>(regions: impl Iterator + Clone) -> Self where - Self: 'a { + Self: 'a, + { Self { $([]: $name::merge_regions(regions.clone().map(|r| &r.[]))),* } @@ -50,7 +51,8 @@ macro_rules! tuple_flatcontainer { fn reserve_regions<'a, It>(&mut self, regions: It) where Self: 'a, - It: Iterator + Clone { + It: Iterator + Clone, + { $(self.[].reserve_regions(regions.clone().map(|r| &r.[]));)* } @@ -66,11 +68,9 @@ macro_rules! tuple_flatcontainer { #[allow(non_camel_case_types)] #[allow(non_snake_case)] - impl<$($name, [<$name _C>]: Region ),*> - Push<($($name,)*)> - for []<$([<$name _C>]),*> - where - $([<$name _C>]: Push<$name>),* + impl<$($name, [<$name _C>]: Region ),*> Push<($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: Push<$name>),* { fn push(&mut self, item: ($($name,)*)) -> <[]<$([<$name _C>]),*> as Region>::Index { @@ -81,11 +81,9 @@ macro_rules! tuple_flatcontainer { #[allow(non_camel_case_types)] #[allow(non_snake_case)] - impl<'a, $($name, [<$name _C>]: Region ),*> - Push<&'a ($($name,)*)> - for []<$([<$name _C>]),*> - where - $([<$name _C>]: Push<&'a $name>),* + impl<'a, $($name, [<$name _C>]),*> Push<&'a ($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: Region + Push<&'a $name>),* { fn push(&mut self, item: &'a ($($name,)*)) -> <[]<$([<$name _C>]),*> as Region>::Index { @@ -96,30 +94,27 @@ macro_rules! tuple_flatcontainer { #[allow(non_camel_case_types)] #[allow(non_snake_case)] - impl<'a, $($name, [<$name _C>]: Region ),*> - ReserveItems<&'a ($($name,)*)> - for []<$([<$name _C>]),*> - where - $([<$name _C>]: ReserveItems<&'a $name>),* + impl<'a, $($name, [<$name _C>]),*> ReserveItems<&'a ($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: Region + ReserveItems<&'a $name>),* { fn reserve_items(&mut self, items: It) where - It: Iterator + Clone { + It: Iterator + Clone, + { tuple_flatcontainer!(reserve_items self items $($name)* @ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31); } } #[allow(non_camel_case_types)] #[allow(non_snake_case)] - impl<$($name, [<$name _C>]: Region ),*> - ReserveItems<($($name,)*)> - for []<$([<$name _C>]),*> - where - $([<$name _C>]: ReserveItems<$name>),* + impl<$($name, [<$name _C>]),*> ReserveItems<($($name,)*)> for []<$([<$name _C>]),*> + where + $([<$name _C>]: Region + ReserveItems<$name>),* { fn reserve_items(&mut self, items: It) where - It: Iterator + Clone + It: Iterator + Clone, { tuple_flatcontainer!(reserve_items_owned self items $($name)* @ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31); }