From 33f68e40456aac30bda8258496b9a5a6382e67fb Mon Sep 17 00:00:00 2001 From: konsti Date: Thu, 14 Nov 2024 14:19:46 +0100 Subject: [PATCH] Add `FromIter` for `Ranges` (#278) * Add `FromIter` for `Ranges` Add a method to construct ranges from an iterator of arbitrary segments. This allows to `.collect()` an iterator of tuples of bounds. This is more ergonomic than folding the previous ranges with the next segment each time, and also faster. Split out from https://github.com/pubgrub-rs/pubgrub/pull/273 Closes https://github.com/astral-sh/pubgrub/pull/33 Fixes https://github.com/pubgrub-rs/pubgrub/issues/249 * Fix ascii art alignment * Fix algorithm with new proptest * Sorting comment * Review --- version-ranges/src/lib.rs | 190 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 190 insertions(+) diff --git a/version-ranges/src/lib.rs b/version-ranges/src/lib.rs index 4fe8f134..8b19881b 100644 --- a/version-ranges/src/lib.rs +++ b/version-ranges/src/lib.rs @@ -879,6 +879,186 @@ impl IntoIterator for Ranges { } } +impl FromIterator<(Bound, Bound)> for Ranges { + /// Constructor from arbitrary, unsorted and potentially overlapping ranges. + /// + /// This is equivalent, but faster, to computing the [`Ranges::union`] of the + /// [`Ranges::from_range_bounds`] of each segment. + fn from_iter, Bound)>>(iter: T) -> Self { + // We have three constraints we need to fulfil: + // 1. The segments are sorted, from lowest to highest (through `Ord`): By sorting. + // 2. Each segment contains at least one version (start < end): By skipping invalid + // segments. + // 3. There is at least one version between two segments: By merging overlapping elements. + // + // Technically, the implementation has a O(n²) worst case complexity since we're inserting + // and removing. This has two motivations: One is that we don't have any performance + // critical usages of this method as of this writing, so we have no real world benchmark. + // The other is that we get the elements from an iterator, so to avoid moving elements + // around we would first need to build a different, sorted collection with extra + // allocation(s), before we could build our real segments. --Konsti + + // For this implementation, we choose to only build a single smallvec and insert or remove + // in it, instead of e.g. collecting the segments into a sorted datastructure first and then + // construction the second smallvec without shifting. + let mut segments: SmallVec<[Interval; 1]> = SmallVec::new(); + + for segment in iter { + if !valid_segment(&segment.start_bound(), &segment.end_bound()) { + continue; + } + // Find where to insert the new segment + let insertion_point = segments.partition_point(|elem: &Interval| { + cmp_bounds_start(elem.start_bound(), segment.start_bound()) + .unwrap() + .is_lt() + }); + // Is it overlapping with the previous segment? + let previous_overlapping = insertion_point > 0 + && !end_before_start_with_gap( + &segments[insertion_point - 1].end_bound(), + &segment.start_bound(), + ); + + // Is it overlapping with the following segment? We'll check if there's more than one + // overlap later. + let next_overlapping = insertion_point < segments.len() + && !end_before_start_with_gap( + &segment.end_bound(), + &segments[insertion_point].start_bound(), + ); + + match (previous_overlapping, next_overlapping) { + (true, true) => { + // previous: |------| + // segment: |------| + // following: |------| + // final: |---------------| + // + // OR + // + // previous: |------| + // segment: |-----------| + // following: |----| + // final: |---------------| + // + // OR + // + // previous: |------| + // segment: |----------------| + // following: |----| |------| + // final: |------------------------| + // We merge all three segments into one, which is effectively removing one of + // two previously inserted and changing the bounds on the other. + + // Remove all elements covered by the final element + let mut following = segments.remove(insertion_point); + while insertion_point < segments.len() + && !end_before_start_with_gap( + &segment.end_bound(), + &segments[insertion_point].start_bound(), + ) + { + following = segments.remove(insertion_point); + } + + // Set end to max(segment.end, .end) + if cmp_bounds_end(segment.end_bound(), following.end_bound()) + .unwrap() + .is_lt() + { + segments[insertion_point - 1].1 = following.1; + } else { + segments[insertion_point - 1].1 = segment.1; + } + } + (true, false) => { + // previous: |------| + // segment: |------| + // following: |------| + // + // OR + // + // previous: |----------| + // segment: |---| + // following: |------| + // + // final: |----------| |------| + // We can reuse the existing element by extending it. + + // Set end to max(segment.end, .end) + if cmp_bounds_end( + segments[insertion_point - 1].end_bound(), + segment.end_bound(), + ) + .unwrap() + .is_lt() + { + segments[insertion_point - 1].1 = segment.1; + } + } + (false, true) => { + // previous: |------| + // segment: |------| + // following: |------| + // final: |------| |----------| + // + // OR + // + // previous: |------| + // segment: |----------| + // following: |---| + // final: |------| |----------| + // + // OR + // + // previous: |------| + // segment: |------------| + // following: |---| |------| + // + // final: |------| |-----------------| + // We can reuse the existing element by extending it. + + // Remove all fully covered segments so the next element is the last one that + // overlaps. + while insertion_point + 1 < segments.len() + && !end_before_start_with_gap( + &segment.end_bound(), + &segments[insertion_point + 1].start_bound(), + ) + { + // We know that the one after also overlaps, so we can drop the current + // following. + segments.remove(insertion_point); + } + + // Set end to max(segment.end, .end) + if cmp_bounds_end(segments[insertion_point].end_bound(), segment.end_bound()) + .unwrap() + .is_lt() + { + segments[insertion_point].1 = segment.1; + } + segments[insertion_point].0 = segment.0; + } + (false, false) => { + // previous: |------| + // segment: |------| + // following: |------| + // + // final: |------| |------| |------| + + // This line is O(n), which makes the algorithm O(n²), but it should be good + // enough for now. + segments.insert(insertion_point, segment); + } + } + } + + Self { segments }.check_invariants() + } +} + // REPORT ###################################################################### impl Display for Ranges { @@ -1183,6 +1363,16 @@ pub mod tests { } assert!(simp.segments.len() <= range.segments.len()) } + + #[test] + fn from_iter_valid(segments in proptest::collection::vec(any::<(Bound, Bound)>(), ..30)) { + let mut expected = Ranges::empty(); + for segment in &segments { + expected = expected.union(&Ranges::from_range_bounds(*segment)); + } + let actual = Ranges::from_iter(segments.clone()); + assert_eq!(expected, actual, "{segments:?}"); + } } #[test]