From e670baf5a8952d9191906696b134172529b2b6b5 Mon Sep 17 00:00:00 2001 From: konstin Date: Thu, 12 Dec 2024 16:12:14 +0100 Subject: [PATCH] Return and track affected and culprit on conflicts Whenever we either discard a version due to its dependencies or perform conflict resolution, we return the last conflict that led to discarding them. In cargo, we use this information for prioritization, which speeds up resolution (`cargo run -r -- -m pub --with-solana --filter solana-archiver-lib -t 16` goes from 90s to 20s on my machine). Configurations that are noticeably slower for the solana test case: * All incompatibilities unit propagation * Only the last root cause in unit propagation * No incompatibilities from unit propagation * No incompatibilities from `add_version` * Only affect counts (without culprit counts) * Backtracking with the same heuristic as https://github.com/astral-sh/uv/pull/9843 (backtracking once after affected hits 5) In uv, we use this to re-prioritize and backtrack when a package decision accumulated to many conflicts. Since we have our own solver loop, we add the incompatibility to our own tracking instead. Built on https://github.com/pubgrub-rs/pubgrub/pull/291 ## Benchmarks Main: ``` index commit hash: 82086e46740d7a9303216bfac093e7268a95121f index commit time: 2024-11-30T18:18:14Z index size: 32 solana in index: 32 Pub CPU time: 1215.49s == 20.26min Cargo CPU time: skipped Cargo check lock CPU time: skipped Pub check lock CPU time: skipped Wall time: 80.58s == 1.34min ``` With https://github.com/pubgrub-rs/pubgrub/pull/291: ``` index commit hash: 82086e46740d7a9303216bfac093e7268a95121f index commit time: 2024-11-30T18:18:14Z index size: 32 solana in index: 32 Pub CPU time: 467.73s == 7.80min Cargo CPU time: skipped Cargo check lock CPU time: skipped Pub check lock CPU time: skipped Wall time: 34.76s == 0.58min ``` This PR: ``` index commit hash: 82086e46740d7a9303216bfac093e7268a95121f index commit time: 2024-11-30T18:18:14Z index size: 32 solana in index: 32 Pub CPU time: 271.79s == 4.53min Cargo CPU time: skipped Cargo check lock CPU time: skipped Pub check lock CPU time: skipped Wall time: 20.17s == 0.34min ``` --- examples/caching_dependency_provider.rs | 15 ++++-- src/internal/core.rs | 19 +++++-- src/internal/partial_solution.rs | 67 ++++++++++++++--------- src/lib.rs | 6 +-- src/provider.rs | 23 +++++--- src/solver.rs | 71 +++++++++++++++++++++++-- tests/proptest.rs | 22 +++++--- 7 files changed, 171 insertions(+), 52 deletions(-) diff --git a/examples/caching_dependency_provider.rs b/examples/caching_dependency_provider.rs index f6798a29..e9728902 100644 --- a/examples/caching_dependency_provider.rs +++ b/examples/caching_dependency_provider.rs @@ -2,7 +2,10 @@ use std::cell::RefCell; -use pubgrub::{resolve, Dependencies, DependencyProvider, OfflineDependencyProvider, Ranges}; +use pubgrub::{ + resolve, Dependencies, DependencyProvider, OfflineDependencyProvider, + PackageResolutionStatistics, Ranges, +}; type NumVS = Ranges; @@ -57,8 +60,14 @@ impl> DependencyProvider for CachingDependenc type Priority = DP::Priority; - fn prioritize(&self, package: &DP::P, ranges: &DP::VS) -> Self::Priority { - self.remote_dependencies.prioritize(package, ranges) + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + self.remote_dependencies + .prioritize(package, range, package_statistics) } type Err = DP::Err; diff --git a/src/internal/core.rs b/src/internal/core.rs index 9e6d2181..a0ef8ff5 100644 --- a/src/internal/core.rs +++ b/src/internal/core.rs @@ -7,8 +7,8 @@ use std::collections::HashSet as Set; use std::sync::Arc; use crate::internal::{ - Arena, DecisionLevel, HashArena, Id, IncompDpId, Incompatibility, PartialSolution, Relation, - SatisfierSearch, SmallVec, + Arena, DecisionLevel, HashArena, Id, IncompDpId, IncompId, Incompatibility, PartialSolution, + Relation, SatisfierSearch, SmallVec, }; use crate::{DependencyProvider, DerivationTree, Map, NoSolutionError, VersionSet}; @@ -79,7 +79,7 @@ impl State { package: Id, version: DP::V, dependencies: impl IntoIterator, - ) { + ) -> Option> { let dep_incompats = self.add_incompatibility_from_dependencies(package, version.clone(), dependencies); self.partial_solution.add_package_version_dependencies( @@ -124,8 +124,16 @@ impl State { /// Unit propagation is the core mechanism of the solving algorithm. /// CF + /// + /// For each package with a satisfied incompatibility, returns the package and the root cause + /// incompatibility. #[cold] - pub fn unit_propagation(&mut self, package: Id) -> Result<(), NoSolutionError> { + #[allow(clippy::type_complexity)] // Type definitions don't support impl trait. + pub fn unit_propagation( + &mut self, + package: Id, + ) -> Result, IncompDpId)>, NoSolutionError> { + let mut root_causes = Vec::new(); self.unit_propagation_buffer.clear(); self.unit_propagation_buffer.push(package); while let Some(current_package) = self.unit_propagation_buffer.pop() { @@ -183,6 +191,7 @@ impl State { .map_err(|terminal_incompat_id| { self.build_derivation_tree(terminal_incompat_id) })?; + root_causes.push((package, root_cause)); self.unit_propagation_buffer.clear(); self.unit_propagation_buffer.push(package_almost); // Add to the partial solution with incompat as cause. @@ -198,7 +207,7 @@ impl State { } } // If there are no more changed packages, unit propagation is done. - Ok(()) + Ok(root_causes) } /// Return the root cause or the terminal incompatibility. diff --git a/src/internal/partial_solution.rs b/src/internal/partial_solution.rs index d03506d6..c0d037db 100644 --- a/src/internal/partial_solution.rs +++ b/src/internal/partial_solution.rs @@ -295,7 +295,7 @@ impl PartialSolution { #[cold] pub fn pick_highest_priority_pkg( &mut self, - prioritizer: impl Fn(Id, &DP::VS) -> DP::Priority, + mut prioritizer: impl FnMut(Id, &DP::VS) -> DP::Priority, ) -> Option> { let check_all = self.changed_this_decision_level == self.current_decision_level.0.saturating_sub(1) as usize; @@ -331,7 +331,21 @@ impl PartialSolution { .map(|(&p, pa)| match &pa.assignments_intersection { AssignmentsIntersection::Decision((_, v, _)) => (p, v.clone()), AssignmentsIntersection::Derivations(_) => { - panic!("Derivations in the Decision part") + let mut context = String::new(); + for (id, assignment) in self + .package_assignments + .iter() + .take(self.current_decision_level.0 as usize) + { + context.push_str(&format!( + " * {:?} {:?}\n", + id, assignment.assignments_intersection + )); + } + panic!( + "Derivations in the Decision part. Decision level {}\n{}", + self.current_decision_level.0, context + ) } }) } @@ -414,34 +428,39 @@ impl PartialSolution { version: DP::V, new_incompatibilities: std::ops::Range>, store: &Arena>, - ) { + ) -> Option> { if !self.has_ever_backtracked { - // Nothing has yet gone wrong during this resolution. This call is unlikely to be the first problem. + // Fast path: Nothing has yet gone wrong during this resolution. This call is unlikely to be the first problem. // So let's live with a little bit of risk and add the decision without checking the dependencies. // The worst that can happen is we will have to do a full backtrack which only removes this one decision. log::info!("add_decision: {package:?} @ {version} without checking dependencies"); self.add_decision(package, version); + return None; + } + + // Check if any of the dependencies preclude deciding on this crate version. + let package_term = Term::exact(version.clone()); + let relation = |incompat: IncompId| { + store[incompat].relation(|p| { + // The current package isn't part of the package assignments yet. + if p == package { + Some(&package_term) + } else { + self.term_intersection_for_package(p) + } + }) + }; + if let Some(satisfied) = Id::range_to_iter(new_incompatibilities) + .find(|incompat| relation(*incompat) == Relation::Satisfied) + { + log::info!( + "rejecting decision {package:?} @ {version} because its dependencies conflict" + ); + Some(satisfied) } else { - // Check if any of the new dependencies preclude deciding on this crate version. - let exact = Term::exact(version.clone()); - let not_satisfied = |incompat: &Incompatibility| { - incompat.relation(|p| { - if p == package { - Some(&exact) - } else { - self.term_intersection_for_package(p) - } - }) != Relation::Satisfied - }; - - // Check none of the dependencies (new_incompatibilities) - // would create a conflict (be satisfied). - if store[new_incompatibilities].iter().all(not_satisfied) { - log::info!("add_decision: {package:?} @ {version}"); - self.add_decision(package, version); - } else { - log::info!("not adding {package:?} @ {version} because of its dependencies",); - } + log::info!("adding decision: {package:?} @ {version}"); + self.add_decision(package, version); + None } } diff --git a/src/lib.rs b/src/lib.rs index 38ed470f..8ce89d6e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,7 +71,7 @@ //! and [SemanticVersion] for versions. //! This may be done quite easily by implementing the three following functions. //! ``` -//! # use pubgrub::{DependencyProvider, Dependencies, SemanticVersion, Ranges, DependencyConstraints, Map}; +//! # use pubgrub::{DependencyProvider, Dependencies, SemanticVersion, Ranges, DependencyConstraints, Map, PackageResolutionStatistics}; //! # use std::error::Error; //! # use std::borrow::Borrow; //! # use std::convert::Infallible; @@ -86,7 +86,7 @@ //! } //! //! type Priority = usize; -//! fn prioritize(&self, package: &String, range: &SemVS) -> Self::Priority { +//! fn prioritize(&self, package: &String, range: &SemVS, conflicts_counts: &PackageResolutionStatistics) -> Self::Priority { //! unimplemented!() //! } //! @@ -227,7 +227,7 @@ pub use report::{ DefaultStringReportFormatter, DefaultStringReporter, DerivationTree, Derived, External, ReportFormatter, Reporter, }; -pub use solver::{resolve, Dependencies, DependencyProvider}; +pub use solver::{resolve, Dependencies, DependencyProvider, PackageResolutionStatistics}; pub use term::Term; pub use type_aliases::{DependencyConstraints, Map, SelectedDependencies, Set}; pub use version::{SemanticVersion, VersionParseError}; diff --git a/src/provider.rs b/src/provider.rs index 83268795..aad87175 100644 --- a/src/provider.rs +++ b/src/provider.rs @@ -2,6 +2,7 @@ use std::cmp::Reverse; use std::collections::BTreeMap; use std::convert::Infallible; +use crate::solver::PackageResolutionStatistics; use crate::{Dependencies, DependencyConstraints, DependencyProvider, Map, Package, VersionSet}; /// A basic implementation of [DependencyProvider]. @@ -92,15 +93,23 @@ impl DependencyProvider for OfflineDependencyProvide .and_then(|versions| versions.keys().rev().find(|v| range.contains(v)).cloned())) } - type Priority = Reverse; + type Priority = (u32, Reverse); #[inline] - fn prioritize(&self, package: &P, range: &VS) -> Self::Priority { - Reverse( - self.dependencies - .get(package) - .map(|versions| versions.keys().filter(|v| range.contains(v)).count()) - .unwrap_or(0), + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + ( + package_statistics.conflict_count(), + Reverse( + self.dependencies + .get(package) + .map(|versions| versions.keys().filter(|v| range.contains(v)).count()) + .unwrap_or(0), + ), ) } diff --git a/src/solver.rs b/src/solver.rs index fbbf3ebe..82b6e2be 100644 --- a/src/solver.rs +++ b/src/solver.rs @@ -68,6 +68,31 @@ use log::{debug, info}; use crate::internal::{Id, Incompatibility, State}; use crate::{DependencyConstraints, Map, Package, PubGrubError, SelectedDependencies, VersionSet}; +/// Statistics on how often a package conflicted with other packages. +#[derive(Debug, Default, Clone)] +pub struct PackageResolutionStatistics { + unit_propagation_affected: u32, + unit_propagation_culprit: u32, + dependencies_affected: u32, + dependencies_culprit: u32, +} + +impl PackageResolutionStatistics { + /// The number of conflicts this package was involved in. + /// + /// Processing packages with a high conflict count earlier usually speeds up resolution. + /// + /// Whenever a package is part of the root cause incompatibility of a conflict, we increase its + /// count by one. Since the structure of the incompatibilities may change, this count too may + /// change in the future. + pub fn conflict_count(&self) -> u32 { + self.unit_propagation_affected + + self.unit_propagation_culprit + + self.dependencies_affected + + self.dependencies_culprit + } +} + /// Main function of the library. /// Finds a set of packages satisfying dependency bounds for a given package + version pair. #[cold] @@ -77,6 +102,7 @@ pub fn resolve( version: impl Into, ) -> Result, PubGrubError> { let mut state: State = State::init(package.clone(), version.into()); + let mut conflict_tracker: Map, PackageResolutionStatistics> = Map::default(); let mut added_dependencies: Map, Set> = Map::default(); let mut next = state.root_package; loop { @@ -88,7 +114,22 @@ pub fn resolve( "unit_propagation: {:?} = '{}'", &next, state.package_store[next] ); - state.unit_propagation(next)?; + let root_causes = state.unit_propagation(next)?; + for (affected, incompat) in root_causes { + conflict_tracker + .entry(affected) + .or_default() + .unit_propagation_affected += 1; + for (conflict_package, _) in state.incompatibility_store[incompat].iter() { + if conflict_package == affected { + continue; + } + conflict_tracker + .entry(conflict_package) + .or_default() + .unit_propagation_culprit += 1; + } + } debug!( "Partial solution after unit propagation: {}", @@ -97,7 +138,11 @@ pub fn resolve( let Some(highest_priority_pkg) = state.partial_solution.pick_highest_priority_pkg(|p, r| { - dependency_provider.prioritize(&state.package_store[p], r) + dependency_provider.prioritize( + &state.package_store[p], + r, + conflict_tracker.entry(p).or_default(), + ) }) else { return Ok(state @@ -171,7 +216,20 @@ pub fn resolve( }; // Add that package and version if the dependencies are not problematic. - state.add_package_version_dependencies(p, v.clone(), dependencies); + if let Some(conflict) = + state.add_package_version_dependencies(p, v.clone(), dependencies) + { + conflict_tracker.entry(p).or_default().dependencies_affected += 1; + for (incompat_package, _) in state.incompatibility_store[conflict].iter() { + if incompat_package == p { + continue; + } + conflict_tracker + .entry(incompat_package) + .or_default() + .unit_propagation_culprit += 1; + } + } } else { // `dep_incompats` are already in `incompatibilities` so we know there are not satisfied // terms and can add the decision directly. @@ -249,7 +307,12 @@ pub trait DependencyProvider { /// /// Note: the resolver may call this even when the range has not changed, /// if it is more efficient for the resolvers internal data structures. - fn prioritize(&self, package: &Self::P, range: &Self::VS) -> Self::Priority; + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_conflicts_counts: &PackageResolutionStatistics, + ) -> Self::Priority; /// The type returned from `prioritize`. The resolver does not care what type this is /// as long as it can pick a largest one and clone it. /// diff --git a/tests/proptest.rs b/tests/proptest.rs index 65d4753b..74305aad 100644 --- a/tests/proptest.rs +++ b/tests/proptest.rs @@ -13,8 +13,8 @@ use proptest::string::string_regex; use pubgrub::{ resolve, DefaultStringReporter, Dependencies, DependencyProvider, DerivationTree, External, - OfflineDependencyProvider, Package, PubGrubError, Ranges, Reporter, SelectedDependencies, - VersionSet, + OfflineDependencyProvider, Package, PackageResolutionStatistics, PubGrubError, Ranges, + Reporter, SelectedDependencies, VersionSet, }; use crate::sat_dependency_provider::SatResolve; @@ -49,8 +49,13 @@ impl DependencyProvider for OldestVersionsDependency type Priority = as DependencyProvider>::Priority; - fn prioritize(&self, package: &P, range: &VS) -> Self::Priority { - self.0.prioritize(package, range) + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + self.0.prioritize(package, range, package_statistics) } type Err = Infallible; @@ -104,8 +109,13 @@ impl DependencyProvider for TimeoutDependencyProvider Self::Priority { - self.dp.prioritize(package, range) + fn prioritize( + &self, + package: &Self::P, + range: &Self::VS, + package_statistics: &PackageResolutionStatistics, + ) -> Self::Priority { + self.dp.prioritize(package, range, package_statistics) } type Err = DP::Err;