From fe7e010f5df4ea148dafb0a07576f855cd149ae3 Mon Sep 17 00:00:00 2001 From: konstin Date: Fri, 8 Nov 2024 10:25:04 +0100 Subject: [PATCH 1/5] Add `FromIter` for `Ranges` Add a method to construct ranges from an iterator of arbitrary segments. This allows to `.collect()` an iterator of tuples of bounds. This is more ergonomic than folding the previous ranges with the next segment each time, and also faster. Split out from https://github.com/pubgrub-rs/pubgrub/pull/273 Closes https://github.com/astral-sh/pubgrub/pull/33 Fixes https://github.com/pubgrub-rs/pubgrub/issues/249 --- version-ranges/src/lib.rs | 96 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) diff --git a/version-ranges/src/lib.rs b/version-ranges/src/lib.rs index 4fe8f134..b612b5b4 100644 --- a/version-ranges/src/lib.rs +++ b/version-ranges/src/lib.rs @@ -879,6 +879,87 @@ impl IntoIterator for Ranges { } } +impl FromIterator<(Bound, Bound)> for Ranges { + /// Constructor from arbitrary, unsorted and potentially overlapping ranges. + /// + /// This is equivalent, but faster, to computing the [`Ranges::union`] of the + /// [`Ranges::from_range_bounds`] of each segment. + fn from_iter, Bound)>>(iter: T) -> Self { + // We have three constraints we need to fulfil: + // 1. The segments are sorted, from lowest to highest (through `Ord`): By sorting. + // 2. Each segment contains at least one version (start < end): By `union`. + // 3. There is at least one version between two segments: By `union`. + let mut segments: SmallVec<[Interval; 1]> = SmallVec::new(); + + for segment in iter { + if !valid_segment(&segment.start_bound(), &segment.end_bound()) { + continue; + } + // Find where to insert the new segment + let insertion_point = segments.partition_point(|elem: &Interval| { + cmp_bounds_start(elem.start_bound(), segment.start_bound()) + .unwrap() + .is_lt() + }); + // Is it overlapping with the previous segment? + let previous_overlapping = insertion_point > 0 + && !end_before_start_with_gap( + &segments[insertion_point - 1].end_bound(), + &segment.start_bound(), + ); + + // Is it overlapping with the following segment? + let next_overlapping = insertion_point < segments.len() + && !end_before_start_with_gap( + &segment.end_bound(), + &segments[insertion_point].start_bound(), + ); + + match (previous_overlapping, next_overlapping) { + (true, true) => { + // previous: |-------| + // segment: |-------| + // following: |-----| + // + // final: |--------------------| + // We merge all three segments into one, which is effectively removing one of + // two previously inserted and changing the bounds on the other. + let following = segments.remove(insertion_point); + segments[insertion_point - 1].1 = following.1; + } + (true, false) => { + // previous: |-----| + // segment: |-----| + // following: |-----| + // + // final: |---------| |-----| + // We can reuse the existing element by extending it. + segments[insertion_point - 1].1 = segment.1; + } + (false, true) => { + // previous: |-----| + // segment: |-----| + // following: |-----| + // + // final: |-----| |---------| + // We can reuse the existing element by extending it. + segments[insertion_point].0 = segment.0; + } + (false, false) => { + // previous: |-----| + // segment: |-----| + // following: |-----| + // + // final: |-----| |-----| |-----| + segments.insert(insertion_point, segment); + } + } + } + + Self { segments }.check_invariants() + } +} + // REPORT ###################################################################### impl Display for Ranges { @@ -1183,6 +1264,12 @@ pub mod tests { } assert!(simp.segments.len() <= range.segments.len()) } + + #[test] + fn from_iter_valid(segments in proptest::collection::vec(any::<(Bound, Bound)>(), ..30)) { + // We check invariants in the method. + Ranges::from_iter(segments.clone()); + } } #[test] @@ -1219,6 +1306,15 @@ pub mod tests { ); } + #[test] + fn from_iter_regression() { + Ranges::from_iter([ + (Included(0), Included(0)), + (Excluded(1u32), Unbounded), + (Included(0), Included(0)), + ]); + } + #[test] fn version_ord() { let versions: &[Ranges] = &[ From 9e76cbbbbd9ac70aa3ae38e8403d232556e74f7f Mon Sep 17 00:00:00 2001 From: konstin Date: Fri, 8 Nov 2024 16:40:37 +0100 Subject: [PATCH 2/5] Fix ascii art alignment --- version-ranges/src/lib.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/version-ranges/src/lib.rs b/version-ranges/src/lib.rs index b612b5b4..dc6bc755 100644 --- a/version-ranges/src/lib.rs +++ b/version-ranges/src/lib.rs @@ -917,40 +917,40 @@ impl FromIterator<(Bound, Bound)> for Ranges { match (previous_overlapping, next_overlapping) { (true, true) => { - // previous: |-------| - // segment: |-------| - // following: |-----| + // previous: |------| + // segment: |------| + // following: |------| // - // final: |--------------------| + // final: |---------------| // We merge all three segments into one, which is effectively removing one of // two previously inserted and changing the bounds on the other. let following = segments.remove(insertion_point); segments[insertion_point - 1].1 = following.1; } (true, false) => { - // previous: |-----| - // segment: |-----| - // following: |-----| + // previous: |------| + // segment: |------| + // following: |------| // - // final: |---------| |-----| + // final: |----------| |------| // We can reuse the existing element by extending it. segments[insertion_point - 1].1 = segment.1; } (false, true) => { - // previous: |-----| - // segment: |-----| - // following: |-----| + // previous: |------| + // segment: |------| + // following: |------| // - // final: |-----| |---------| + // final: |------| |---------| // We can reuse the existing element by extending it. segments[insertion_point].0 = segment.0; } (false, false) => { - // previous: |-----| - // segment: |-----| - // following: |-----| + // previous: |------| + // segment: |------| + // following: |------| // - // final: |-----| |-----| |-----| + // final: |------| |------| |------| segments.insert(insertion_point, segment); } } From dae485bbb248e2cf6fcc1da816be7d95271f2553 Mon Sep 17 00:00:00 2001 From: konstin Date: Sun, 10 Nov 2024 10:53:02 +0100 Subject: [PATCH 3/5] Fix algorithm with new proptest --- version-ranges/src/lib.rs | 117 ++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 17 deletions(-) diff --git a/version-ranges/src/lib.rs b/version-ranges/src/lib.rs index dc6bc755..24d30cf6 100644 --- a/version-ranges/src/lib.rs +++ b/version-ranges/src/lib.rs @@ -879,7 +879,7 @@ impl IntoIterator for Ranges { } } -impl FromIterator<(Bound, Bound)> for Ranges { +impl FromIterator<(Bound, Bound)> for Ranges { /// Constructor from arbitrary, unsorted and potentially overlapping ranges. /// /// This is equivalent, but faster, to computing the [`Ranges::union`] of the @@ -889,6 +889,10 @@ impl FromIterator<(Bound, Bound)> for Ranges { // 1. The segments are sorted, from lowest to highest (through `Ord`): By sorting. // 2. Each segment contains at least one version (start < end): By `union`. // 3. There is at least one version between two segments: By `union`. + + // For this implementation, we choose to only build a single smallvec and insert or remove + // in it, instead of e.g. collecting the segments into one smallvec, sorting that and then + // construction the second smallvec without shifting. let mut segments: SmallVec<[Interval; 1]> = SmallVec::new(); for segment in iter { @@ -908,7 +912,8 @@ impl FromIterator<(Bound, Bound)> for Ranges { &segment.start_bound(), ); - // Is it overlapping with the following segment? + // Is it overlapping with the following segment? We'll check if there's more than one + // overlap later. let next_overlapping = insertion_point < segments.len() && !end_before_start_with_gap( &segment.end_bound(), @@ -920,29 +925,112 @@ impl FromIterator<(Bound, Bound)> for Ranges { // previous: |------| // segment: |------| // following: |------| + // final: |---------------| + // + // OR // + // previous: |------| + // segment: |-----------| + // following: |----| // final: |---------------| + // + // OR + // + // previous: |------| + // segment: |----------------| + // following: |----| |------| + // final: |------------------------| // We merge all three segments into one, which is effectively removing one of // two previously inserted and changing the bounds on the other. - let following = segments.remove(insertion_point); - segments[insertion_point - 1].1 = following.1; + + // Remove all elements covered by the final element + let mut following = segments.remove(insertion_point); + while insertion_point < segments.len() + && !end_before_start_with_gap( + &segment.end_bound(), + &segments[insertion_point].start_bound(), + ) + { + following = segments.remove(insertion_point); + } + + // Set end to max(segment.end, .end) + if cmp_bounds_end(segment.end_bound(), following.end_bound()) + .unwrap() + .is_lt() + { + segments[insertion_point - 1].1 = following.1; + } else { + segments[insertion_point - 1].1 = segment.1; + } } (true, false) => { // previous: |------| // segment: |------| // following: |------| // + // OR + // + // previous: |----------| + // segment: |---| + // following: |------| + // // final: |----------| |------| // We can reuse the existing element by extending it. - segments[insertion_point - 1].1 = segment.1; + + // Set end to max(segment.end, .end) + if cmp_bounds_end( + segments[insertion_point - 1].end_bound(), + segment.end_bound(), + ) + .unwrap() + .is_lt() + { + segments[insertion_point - 1].1 = segment.1; + } } (false, true) => { // previous: |------| // segment: |------| // following: |------| + // final: |------| |----------| + // + // OR + // + // previous: |------| + // segment: |----------| + // following: |---| + // final: |------| |----------| + // + // OR + // + // previous: |------| + // segment: |------------| + // following: |---| |------| // - // final: |------| |---------| + // final: |------| |-----------------| // We can reuse the existing element by extending it. + + // Remove all fully covered segments so the next element is the last one that + // overlaps. + while insertion_point + 1 < segments.len() + && !end_before_start_with_gap( + &segment.end_bound(), + &segments[insertion_point + 1].start_bound(), + ) + { + // We know that the one after also overlaps, so we can drop the current + // following. + segments.remove(insertion_point); + } + + // Set end to max(segment.end, .end) + if cmp_bounds_end(segments[insertion_point].end_bound(), segment.end_bound()) + .unwrap() + .is_lt() + { + segments[insertion_point].1 = segment.1; + } segments[insertion_point].0 = segment.0; } (false, false) => { @@ -1267,8 +1355,12 @@ pub mod tests { #[test] fn from_iter_valid(segments in proptest::collection::vec(any::<(Bound, Bound)>(), ..30)) { - // We check invariants in the method. - Ranges::from_iter(segments.clone()); + let mut expected = Ranges::empty(); + for segment in &segments { + expected = expected.union(&Ranges::from_range_bounds(*segment)); + } + let actual = Ranges::from_iter(segments.clone()); + assert_eq!(expected, actual, "{segments:?}"); } } @@ -1306,15 +1398,6 @@ pub mod tests { ); } - #[test] - fn from_iter_regression() { - Ranges::from_iter([ - (Included(0), Included(0)), - (Excluded(1u32), Unbounded), - (Included(0), Included(0)), - ]); - } - #[test] fn version_ord() { let versions: &[Ranges] = &[ From c8960a540eed5af44594b1cbfac00ba0bc210404 Mon Sep 17 00:00:00 2001 From: konstin Date: Sun, 10 Nov 2024 10:55:11 +0100 Subject: [PATCH 4/5] Sorting comment --- version-ranges/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/version-ranges/src/lib.rs b/version-ranges/src/lib.rs index 24d30cf6..607cf923 100644 --- a/version-ranges/src/lib.rs +++ b/version-ranges/src/lib.rs @@ -891,7 +891,7 @@ impl FromIterator<(Bound, Bound)> for Ranges { // 3. There is at least one version between two segments: By `union`. // For this implementation, we choose to only build a single smallvec and insert or remove - // in it, instead of e.g. collecting the segments into one smallvec, sorting that and then + // in it, instead of e.g. collecting the segments into a sorted datastructure first and then // construction the second smallvec without shifting. let mut segments: SmallVec<[Interval; 1]> = SmallVec::new(); From fe37b9106a5971cdab5dff693e303855a75f3423 Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 14 Nov 2024 14:16:21 +0100 Subject: [PATCH 5/5] Review --- version-ranges/src/lib.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/version-ranges/src/lib.rs b/version-ranges/src/lib.rs index 607cf923..8b19881b 100644 --- a/version-ranges/src/lib.rs +++ b/version-ranges/src/lib.rs @@ -887,8 +887,16 @@ impl FromIterator<(Bound, Bound)> for Ranges { fn from_iter, Bound)>>(iter: T) -> Self { // We have three constraints we need to fulfil: // 1. The segments are sorted, from lowest to highest (through `Ord`): By sorting. - // 2. Each segment contains at least one version (start < end): By `union`. - // 3. There is at least one version between two segments: By `union`. + // 2. Each segment contains at least one version (start < end): By skipping invalid + // segments. + // 3. There is at least one version between two segments: By merging overlapping elements. + // + // Technically, the implementation has a O(n²) worst case complexity since we're inserting + // and removing. This has two motivations: One is that we don't have any performance + // critical usages of this method as of this writing, so we have no real world benchmark. + // The other is that we get the elements from an iterator, so to avoid moving elements + // around we would first need to build a different, sorted collection with extra + // allocation(s), before we could build our real segments. --Konsti // For this implementation, we choose to only build a single smallvec and insert or remove // in it, instead of e.g. collecting the segments into a sorted datastructure first and then @@ -1039,6 +1047,9 @@ impl FromIterator<(Bound, Bound)> for Ranges { // following: |------| // // final: |------| |------| |------| + + // This line is O(n), which makes the algorithm O(n²), but it should be good + // enough for now. segments.insert(insertion_point, segment); } }