From ad93d54bf3aa83ccafb2159c40573fcb252f8bfc Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Sat, 20 Jul 2024 14:13:50 -0400 Subject: [PATCH 1/3] cache "concrete" dists by Distribution instead of InstallRequirement --- news/12863.trivial.rst | 1 + src/pip/_internal/distributions/__init__.py | 5 + src/pip/_internal/distributions/base.py | 19 +++- src/pip/_internal/distributions/installed.py | 14 ++- src/pip/_internal/distributions/sdist.py | 20 +++- src/pip/_internal/distributions/wheel.py | 4 +- src/pip/_internal/metadata/base.py | 21 ++++ .../_internal/metadata/importlib/_dists.py | 19 +++- src/pip/_internal/metadata/importlib/_envs.py | 8 +- src/pip/_internal/metadata/pkg_resources.py | 15 ++- src/pip/_internal/operations/prepare.py | 47 +++++---- src/pip/_internal/req/req_install.py | 96 ++++++++++++++----- .../resolution/resolvelib/resolver.py | 2 +- .../metadata/test_metadata_pkg_resources.py | 1 + tests/unit/test_req.py | 31 +++++- 15 files changed, 231 insertions(+), 72 deletions(-) create mode 100644 news/12863.trivial.rst diff --git a/news/12863.trivial.rst b/news/12863.trivial.rst new file mode 100644 index 00000000000..dc36a82a0df --- /dev/null +++ b/news/12863.trivial.rst @@ -0,0 +1 @@ +Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``. diff --git a/src/pip/_internal/distributions/__init__.py b/src/pip/_internal/distributions/__init__.py index 9a89a838b9a..f6089daf40d 100644 --- a/src/pip/_internal/distributions/__init__.py +++ b/src/pip/_internal/distributions/__init__.py @@ -1,4 +1,5 @@ from pip._internal.distributions.base import AbstractDistribution +from pip._internal.distributions.installed import InstalledDistribution from pip._internal.distributions.sdist import SourceDistribution from pip._internal.distributions.wheel import WheelDistribution from pip._internal.req.req_install import InstallRequirement @@ -8,6 +9,10 @@ def make_distribution_for_install_requirement( install_req: InstallRequirement, ) -> AbstractDistribution: """Returns a Distribution for the given InstallRequirement""" + # Only pre-installed requirements will have a .satisfied_by dist. + if install_req.satisfied_by: + return InstalledDistribution(install_req) + # Editable requirements will always be source distributions. They use the # legacy logic until we create a modern standard for them. if install_req.editable: diff --git a/src/pip/_internal/distributions/base.py b/src/pip/_internal/distributions/base.py index 6e4d0c91a90..0a132b88f98 100644 --- a/src/pip/_internal/distributions/base.py +++ b/src/pip/_internal/distributions/base.py @@ -37,11 +37,17 @@ def build_tracker_id(self) -> Optional[str]: If None, then this dist has no work to do in the build tracker, and ``.prepare_distribution_metadata()`` will not be called.""" - raise NotImplementedError() + ... @abc.abstractmethod def get_metadata_distribution(self) -> BaseDistribution: - raise NotImplementedError() + """Generate a concrete ``BaseDistribution`` instance for this artifact. + + The implementation should also cache the result with + ``self.req.cache_concrete_dist()`` so the distribution is available to other + users of the ``InstallRequirement``. This method is not called within the build + tracker context, so it should not identify any new setup requirements.""" + ... @abc.abstractmethod def prepare_distribution_metadata( @@ -50,4 +56,11 @@ def prepare_distribution_metadata( build_isolation: bool, check_build_deps: bool, ) -> None: - raise NotImplementedError() + """Generate the information necessary to extract metadata from the artifact. + + This method will be executed within the context of ``BuildTracker#track()``, so + it needs to fully identify any setup requirements so they can be added to the + same active set of tracked builds, while ``.get_metadata_distribution()`` takes + care of generating and caching the ``BaseDistribution`` to expose to the rest of + the resolve.""" + ... diff --git a/src/pip/_internal/distributions/installed.py b/src/pip/_internal/distributions/installed.py index ab8d53be740..83e99fca9ca 100644 --- a/src/pip/_internal/distributions/installed.py +++ b/src/pip/_internal/distributions/installed.py @@ -1,9 +1,11 @@ -from typing import Optional +from typing import TYPE_CHECKING, Optional from pip._internal.distributions.base import AbstractDistribution -from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import BaseDistribution +if TYPE_CHECKING: + from pip._internal.index.package_finder import PackageFinder + class InstalledDistribution(AbstractDistribution): """Represents an installed package. @@ -17,12 +19,14 @@ def build_tracker_id(self) -> Optional[str]: return None def get_metadata_distribution(self) -> BaseDistribution: - assert self.req.satisfied_by is not None, "not actually installed" - return self.req.satisfied_by + dist = self.req.satisfied_by + assert dist is not None, "not actually installed" + self.req.cache_concrete_dist(dist) + return dist def prepare_distribution_metadata( self, - finder: PackageFinder, + finder: "PackageFinder", build_isolation: bool, check_build_deps: bool, ) -> None: diff --git a/src/pip/_internal/distributions/sdist.py b/src/pip/_internal/distributions/sdist.py index 28ea5cea16c..3e4a3e8d371 100644 --- a/src/pip/_internal/distributions/sdist.py +++ b/src/pip/_internal/distributions/sdist.py @@ -1,10 +1,10 @@ import logging -from typing import TYPE_CHECKING, Iterable, Optional, Set, Tuple +from typing import TYPE_CHECKING, Iterable, Set, Tuple from pip._internal.build_env import BuildEnvironment from pip._internal.distributions.base import AbstractDistribution from pip._internal.exceptions import InstallationError -from pip._internal.metadata import BaseDistribution +from pip._internal.metadata import BaseDistribution, get_directory_distribution from pip._internal.utils.subprocess import runner_with_spinner_message if TYPE_CHECKING: @@ -21,13 +21,19 @@ class SourceDistribution(AbstractDistribution): """ @property - def build_tracker_id(self) -> Optional[str]: + def build_tracker_id(self) -> str: """Identify this requirement uniquely by its link.""" assert self.req.link return self.req.link.url_without_fragment def get_metadata_distribution(self) -> BaseDistribution: - return self.req.get_dist() + assert ( + self.req.metadata_directory + ), "Set as part of .prepare_distribution_metadata()" + dist = get_directory_distribution(self.req.metadata_directory) + self.req.cache_concrete_dist(dist) + self.req.validate_sdist_metadata() + return dist def prepare_distribution_metadata( self, @@ -66,7 +72,11 @@ def prepare_distribution_metadata( self._raise_conflicts("the backend dependencies", conflicting) if missing: self._raise_missing_reqs(missing) - self.req.prepare_metadata() + + # NB: we must still call .cache_concrete_dist() and .validate_sdist_metadata() + # before the InstallRequirement itself has been updated with the metadata from + # this directory! + self.req.prepare_metadata_directory() def _prepare_build_backend(self, finder: "PackageFinder") -> None: # Isolate in a BuildEnvironment and install the build-time diff --git a/src/pip/_internal/distributions/wheel.py b/src/pip/_internal/distributions/wheel.py index bfadd39dcb7..e75c7910379 100644 --- a/src/pip/_internal/distributions/wheel.py +++ b/src/pip/_internal/distributions/wheel.py @@ -31,7 +31,9 @@ def get_metadata_distribution(self) -> BaseDistribution: assert self.req.local_file_path, "Set as part of preparation during download" assert self.req.name, "Wheels are never unnamed" wheel = FilesystemWheel(self.req.local_file_path) - return get_wheel_distribution(wheel, canonicalize_name(self.req.name)) + dist = get_wheel_distribution(wheel, canonicalize_name(self.req.name)) + self.req.cache_concrete_dist(dist) + return dist def prepare_distribution_metadata( self, diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index 9eabcdb278b..7e8c5b6f177 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -97,6 +97,15 @@ class RequiresEntry(NamedTuple): class BaseDistribution(Protocol): + @property + def is_concrete(self) -> bool: + """Whether the distribution really exists somewhere on disk. + + If this is false, it has been synthesized from metadata, e.g. via + ``.from_metadata_file_contents()``, or ``.from_wheel()`` against + a ``MemoryWheel``.""" + raise NotImplementedError() + @classmethod def from_directory(cls, directory: str) -> "BaseDistribution": """Load the distribution from a metadata directory. @@ -667,6 +676,10 @@ def iter_installed_distributions( class Wheel(Protocol): location: str + @property + def is_concrete(self) -> bool: + raise NotImplementedError() + def as_zipfile(self) -> zipfile.ZipFile: raise NotImplementedError() @@ -675,6 +688,10 @@ class FilesystemWheel(Wheel): def __init__(self, location: str) -> None: self.location = location + @property + def is_concrete(self) -> bool: + return True + def as_zipfile(self) -> zipfile.ZipFile: return zipfile.ZipFile(self.location, allowZip64=True) @@ -684,5 +701,9 @@ def __init__(self, location: str, stream: IO[bytes]) -> None: self.location = location self.stream = stream + @property + def is_concrete(self) -> bool: + return False + def as_zipfile(self) -> zipfile.ZipFile: return zipfile.ZipFile(self.stream, allowZip64=True) diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index 36cd326232e..63b5ff1a012 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -102,16 +102,22 @@ def __init__( dist: importlib.metadata.Distribution, info_location: Optional[BasePath], installed_location: Optional[BasePath], + concrete: bool, ) -> None: self._dist = dist self._info_location = info_location self._installed_location = installed_location + self._concrete = concrete + + @property + def is_concrete(self) -> bool: + return self._concrete @classmethod def from_directory(cls, directory: str) -> BaseDistribution: info_location = pathlib.Path(directory) dist = importlib.metadata.Distribution.at(info_location) - return cls(dist, info_location, info_location.parent) + return cls(dist, info_location, info_location.parent, concrete=True) @classmethod def from_metadata_file_contents( @@ -128,7 +134,7 @@ def from_metadata_file_contents( metadata_path.write_bytes(metadata_contents) # Construct dist pointing to the newly created directory. dist = importlib.metadata.Distribution.at(metadata_path.parent) - return cls(dist, metadata_path.parent, None) + return cls(dist, metadata_path.parent, None, concrete=False) @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: @@ -137,7 +143,14 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: dist = WheelDistribution.from_zipfile(zf, name, wheel.location) except zipfile.BadZipFile as e: raise InvalidWheel(wheel.location, name) from e - return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location)) + except UnsupportedWheel as e: + raise UnsupportedWheel(f"{name} has an invalid wheel, {e}") + return cls( + dist, + dist.info_location, + pathlib.PurePosixPath(wheel.location), + concrete=wheel.is_concrete, + ) @property def location(self) -> Optional[str]: diff --git a/src/pip/_internal/metadata/importlib/_envs.py b/src/pip/_internal/metadata/importlib/_envs.py index 70cb7a6009a..b7355d4935c 100644 --- a/src/pip/_internal/metadata/importlib/_envs.py +++ b/src/pip/_internal/metadata/importlib/_envs.py @@ -80,7 +80,7 @@ def find(self, location: str) -> Iterator[BaseDistribution]: installed_location: Optional[BasePath] = None else: installed_location = info_location.parent - yield Distribution(dist, info_location, installed_location) + yield Distribution(dist, info_location, installed_location, concrete=True) def find_linked(self, location: str) -> Iterator[BaseDistribution]: """Read location in egg-link files and return distributions in there. @@ -104,7 +104,7 @@ def find_linked(self, location: str) -> Iterator[BaseDistribution]: continue target_location = str(path.joinpath(target_rel)) for dist, info_location in self._find_impl(target_location): - yield Distribution(dist, info_location, path) + yield Distribution(dist, info_location, path, concrete=True) def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]: from pip._vendor.pkg_resources import find_distributions @@ -116,7 +116,7 @@ def _find_eggs_in_dir(self, location: str) -> Iterator[BaseDistribution]: if not entry.name.endswith(".egg"): continue for dist in find_distributions(entry.path): - yield legacy.Distribution(dist) + yield legacy.Distribution(dist, concrete=True) def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]: from pip._vendor.pkg_resources import find_eggs_in_zip @@ -128,7 +128,7 @@ def _find_eggs_in_zip(self, location: str) -> Iterator[BaseDistribution]: except zipimport.ZipImportError: return for dist in find_eggs_in_zip(importer, location): - yield legacy.Distribution(dist) + yield legacy.Distribution(dist, concrete=True) def find_eggs(self, location: str) -> Iterator[BaseDistribution]: """Find eggs in a location. diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index 4ea84f93a6f..428b1f4a7dd 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -81,8 +81,9 @@ def run_script(self, script_name: str, namespace: str) -> None: class Distribution(BaseDistribution): - def __init__(self, dist: pkg_resources.Distribution) -> None: + def __init__(self, dist: pkg_resources.Distribution, concrete: bool) -> None: self._dist = dist + self._concrete = concrete # This is populated lazily, to avoid loading metadata for all possible # distributions eagerly. self.__extra_mapping: Optional[Mapping[NormalizedName, str]] = None @@ -96,6 +97,10 @@ def _extra_mapping(self) -> Mapping[NormalizedName, str]: return self.__extra_mapping + @property + def is_concrete(self) -> bool: + return self._concrete + @classmethod def from_directory(cls, directory: str) -> BaseDistribution: dist_dir = directory.rstrip(os.sep) @@ -114,7 +119,7 @@ def from_directory(cls, directory: str) -> BaseDistribution: dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0] dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata) - return cls(dist) + return cls(dist, concrete=True) @classmethod def from_metadata_file_contents( @@ -131,7 +136,7 @@ def from_metadata_file_contents( metadata=InMemoryMetadata(metadata_dict, filename), project_name=project_name, ) - return cls(dist) + return cls(dist, concrete=False) @classmethod def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: @@ -152,7 +157,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution: metadata=InMemoryMetadata(metadata_dict, wheel.location), project_name=name, ) - return cls(dist) + return cls(dist, concrete=wheel.is_concrete) @property def location(self) -> Optional[str]: @@ -264,7 +269,7 @@ def from_paths(cls, paths: Optional[List[str]]) -> BaseEnvironment: def _iter_distributions(self) -> Iterator[BaseDistribution]: for dist in self._ws: - yield Distribution(dist) + yield Distribution(dist, concrete=True) def _search_distribution(self, name: str) -> Optional[BaseDistribution]: """Find a distribution matching the ``name`` in the environment. diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index e6aa3447200..7ad301a3c06 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -14,7 +14,6 @@ from pip._vendor.packaging.utils import canonicalize_name from pip._internal.distributions import make_distribution_for_install_requirement -from pip._internal.distributions.installed import InstalledDistribution from pip._internal.exceptions import ( DirectoryUrlHashUnsupported, HashMismatch, @@ -190,6 +189,8 @@ def _check_download_dir( ) -> Optional[str]: """Check download_dir for previously downloaded file with correct hash If a correct file is found return its path else None + + If a file is found at the given path, but with an invalid hash, the file is deleted. """ download_path = os.path.join(download_dir, link.filename) @@ -520,7 +521,9 @@ def prepare_linked_requirement( # The file is not available, attempt to fetch only metadata metadata_dist = self._fetch_metadata_only(req) if metadata_dist is not None: - req.needs_more_preparation = True + # These reqs now have the dependency information from the downloaded + # metadata, without having downloaded the actual dist at all. + req.cache_virtual_metadata_only_dist(metadata_dist) return metadata_dist # None of the optimizations worked, fully prepare the requirement @@ -530,27 +533,27 @@ def prepare_linked_requirements_more( self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False ) -> None: """Prepare linked requirements more, if needed.""" - reqs = [req for req in reqs if req.needs_more_preparation] + partially_downloaded_reqs: List[InstallRequirement] = [] for req in reqs: + if req.is_concrete: + continue + # Determine if any of these requirements were already downloaded. if self.download_dir is not None and req.link.is_wheel: hashes = self._get_linked_req_hashes(req) - file_path = _check_download_dir(req.link, self.download_dir, hashes) + # If the file is there, but doesn't match the hash, delete it and print + # a warning. We will be downloading it again via + # partially_downloaded_reqs. + file_path = _check_download_dir( + req.link, self.download_dir, hashes, warn_on_hash_mismatch=True + ) if file_path is not None: + # If the hash does match, then we still need to generate a concrete + # dist, but we don't have to download the wheel again. self._downloaded[req.link.url] = file_path - req.needs_more_preparation = False - # Prepare requirements we found were already downloaded for some - # reason. The other downloads will be completed separately. - partially_downloaded_reqs: List[InstallRequirement] = [] - for req in reqs: - if req.needs_more_preparation: - partially_downloaded_reqs.append(req) - else: - self._prepare_linked_requirement(req, parallel_builds) + partially_downloaded_reqs.append(req) - # TODO: separate this part out from RequirementPreparer when the v1 - # resolver can be removed! self._complete_partial_requirements( partially_downloaded_reqs, parallel_builds=parallel_builds, @@ -651,6 +654,7 @@ def _prepare_linked_requirement( def save_linked_requirement(self, req: InstallRequirement) -> None: assert self.download_dir is not None assert req.link is not None + assert req.is_concrete link = req.link if link.is_vcs or (link.is_existing_dir() and req.editable): # Make a .zip of the source_dir we already created. @@ -705,6 +709,8 @@ def prepare_editable_requirement( req.check_if_exists(self.use_user_site) + # This should already have been populated by the preparation of the source dist. + assert req.is_concrete return dist def prepare_installed_requirement( @@ -729,4 +735,13 @@ def prepare_installed_requirement( "completely repeatable environment, install into an " "empty virtualenv." ) - return InstalledDistribution(req).get_metadata_distribution() + dist = _get_prepared_distribution( + req, + self.build_tracker, + self.finder, + self.build_isolation, + self.check_build_deps, + ) + + assert req.is_concrete + return dist diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 834bc513356..2710142f26a 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -7,7 +7,16 @@ import zipfile from optparse import Values from pathlib import Path -from typing import Any, Collection, Dict, Iterable, List, Optional, Sequence, Union +from typing import ( + TYPE_CHECKING, + Collection, + Dict, + Iterable, + List, + Optional, + Sequence, + Union, +) from pip._vendor.packaging.markers import Marker from pip._vendor.packaging.requirements import Requirement @@ -23,10 +32,7 @@ from pip._internal.metadata import ( BaseDistribution, get_default_environment, - get_directory_distribution, - get_wheel_distribution, ) -from pip._internal.metadata.base import FilesystemWheel from pip._internal.models.direct_url import DirectUrl from pip._internal.models.link import Link from pip._internal.operations.build.metadata import generate_metadata @@ -59,6 +65,9 @@ from pip._internal.utils.virtualenv import running_under_virtualenv from pip._internal.vcs import vcs +if TYPE_CHECKING: + import email.message + logger = logging.getLogger(__name__) @@ -150,6 +159,7 @@ def __init__( self.hash_options = hash_options if hash_options else {} self.config_settings = config_settings # Set to True after successful preparation of this requirement + # TODO: this is only used in the legacy resolver: remove this! self.prepared = False # User supplied requirement are explicitly requested for installation # by the user via CLI arguments or requirements files, as opposed to, @@ -191,8 +201,11 @@ def __init__( ) self.use_pep517 = True - # This requirement needs more preparation before it can be built - self.needs_more_preparation = False + # When a dist is computed for this requirement, cache it here so it's visible + # everywhere within pip and isn't computed more than once. This may be + # a "virtual" dist without a physical location on the filesystem, or + # a "concrete" dist which has been fully downloaded. + self._dist: Optional[BaseDistribution] = None # This requirement needs to be unpacked before it can be installed. self._archive_source: Optional[Path] = None @@ -550,11 +563,11 @@ def isolated_editable_sanity_check(self) -> None: f"Consider using a build backend that supports PEP 660." ) - def prepare_metadata(self) -> None: + def prepare_metadata_directory(self) -> None: """Ensure that project metadata is available. - Under PEP 517 and PEP 660, call the backend hook to prepare the metadata. - Under legacy processing, call setup.py egg-info. + Under PEP 517 and PEP 660, call the backend hook to prepare the metadata + directory. Under legacy processing, call setup.py egg-info. """ assert self.source_dir, f"No source dir for {self}" details = self.name or f"from {self.link}" @@ -586,6 +599,8 @@ def prepare_metadata(self) -> None: details=details, ) + def validate_sdist_metadata(self) -> None: + """Ensure that we have a dist, and ensure it corresponds to expectations.""" # Act on the newly generated metadata, based on the name and version. if not self.name: self._set_requirement() @@ -595,25 +610,54 @@ def prepare_metadata(self) -> None: self.assert_source_matches_version() @property - def metadata(self) -> Any: - if not hasattr(self, "_metadata"): - self._metadata = self.get_dist().metadata - - return self._metadata + def metadata(self) -> "email.message.Message": + return self.get_dist().metadata def get_dist(self) -> BaseDistribution: - if self.metadata_directory: - return get_directory_distribution(self.metadata_directory) - elif self.local_file_path and self.is_wheel: - assert self.req is not None - return get_wheel_distribution( - FilesystemWheel(self.local_file_path), - canonicalize_name(self.req.name), - ) - raise AssertionError( - f"InstallRequirement {self} has no metadata directory and no wheel: " - f"can't make a distribution." - ) + """Retrieve the dist resolved from this requirement. + + :raises AssertionError: if the resolver has not yet been executed. + """ + if self._dist is None: + raise AssertionError(f"{self!r} has no dist associated.") + return self._dist + + def cache_virtual_metadata_only_dist(self, dist: BaseDistribution) -> None: + """Associate a "virtual" metadata-only dist to this requirement. + + This dist cannot be installed, but it can be used to complete the resolve + process. + + :raises AssertionError: if a dist has already been associated. + :raises AssertionError: if the provided dist is "concrete", i.e. exists + somewhere on the filesystem. + """ + assert self._dist is None, self + assert not dist.is_concrete, dist + self._dist = dist + + def cache_concrete_dist(self, dist: BaseDistribution) -> None: + """Associate a "concrete" dist to this requirement. + + A concrete dist exists somewhere on the filesystem and can be installed. + + :raises AssertionError: if a concrete dist has already been associated. + :raises AssertionError: if the provided dist is not concrete. + """ + if self._dist is not None: + # If we set a dist twice for the same requirement, we must be hydrating + # a concrete dist for what was previously virtual. This will occur in the + # case of `install --dry-run` when PEP 658 metadata is available. + + # TODO(#12186): avoid setting dist twice! + # assert not self._dist.is_concrete + pass + assert dist.is_concrete + self._dist = dist + + @property + def is_concrete(self) -> bool: + return self._dist is not None and self._dist.is_concrete def assert_source_matches_version(self) -> None: assert self.source_dir, f"No source dir for {self}" diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index c12beef0b2a..c22d1707b5e 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -179,7 +179,7 @@ def resolve( self.factory.preparer.prepare_linked_requirements_more(reqs) for req in reqs: req.prepared = True - req.needs_more_preparation = False + assert req.is_concrete return req_set def get_installation_order( diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py index 6044c14e4ca..195442cbe1e 100644 --- a/tests/unit/metadata/test_metadata_pkg_resources.py +++ b/tests/unit/metadata/test_metadata_pkg_resources.py @@ -104,6 +104,7 @@ def test_wheel_metadata_works() -> None: metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, ""), project_name=name, ), + concrete=False, ) assert name == dist.canonical_name == dist.raw_name diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py index 8a95c058706..5757850c6b3 100644 --- a/tests/unit/test_req.py +++ b/tests/unit/test_req.py @@ -23,6 +23,7 @@ PreviousBuildDirError, ) from pip._internal.index.package_finder import PackageFinder +from pip._internal.metadata import get_metadata_distribution from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo from pip._internal.models.link import Link from pip._internal.network.session import PipSession @@ -144,7 +145,11 @@ def test_no_reuse_existing_build_dir(self, data: TestData) -> None: ): resolver.resolve(reqset.all_requirements, True) - def test_environment_marker_extras(self, data: TestData) -> None: + def test_environment_marker_extras( + self, + data: TestData, + monkeypatch: pytest.MonkeyPatch, + ) -> None: """ Test that the environment marker extras are used with non-wheel installs. @@ -154,6 +159,13 @@ def test_environment_marker_extras(self, data: TestData) -> None: os.fspath(data.packages.joinpath("LocalEnvironMarker")), ) req.user_supplied = True + + def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def] + self._dist = dist + + monkeypatch.setattr( + req, "cache_concrete_dist", partial(cache_concrete_dist, req) + ) reqset.add_unnamed_requirement(req) finder = make_test_finder(find_links=[data.find_links]) with self._basic_resolver(finder) as resolver: @@ -499,12 +511,23 @@ def test_download_info_local_dir(self, data: TestData) -> None: assert req.download_info.url.startswith("file://") assert isinstance(req.download_info.info, DirInfo) - def test_download_info_local_editable_dir(self, data: TestData) -> None: + def test_download_info_local_editable_dir( + self, + data: TestData, + monkeypatch: pytest.MonkeyPatch, + ) -> None: """Test that download_info is set for requirements from a local editable dir.""" finder = make_test_finder() with self._basic_resolver(finder) as resolver: ireq_url = data.packages.joinpath("FSPkg").as_uri() ireq = get_processed_req_from_line(f"-e {ireq_url}#egg=FSPkg") + + def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def] + self._dist = dist + + monkeypatch.setattr( + ireq, "cache_concrete_dist", partial(cache_concrete_dist, ireq) + ) reqset = resolver.resolve([ireq], True) assert len(reqset.all_requirements) == 1 req = reqset.all_requirements[0] @@ -909,7 +932,9 @@ def test_mismatched_versions(caplog: pytest.LogCaptureFixture) -> None: metadata = email.message.Message() metadata["name"] = "simplewheel" metadata["version"] = "1.0" - req._metadata = metadata + req._dist = get_metadata_distribution( + bytes(metadata), "simplewheel-1.0.whl", "simplewheel" + ) req.assert_source_matches_version() assert caplog.records[-1].message == ( From 52133baf6c9d41f6264f64e1e71e93116ac75b4b Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 22 Jul 2024 19:09:04 -0400 Subject: [PATCH 2/3] refactor much of RequirementPreparer and add lengthy docs --- news/12871.trivial.rst | 1 + src/pip/_internal/distributions/__init__.py | 9 +- src/pip/_internal/operations/prepare.py | 324 +++++++++++--------- 3 files changed, 181 insertions(+), 153 deletions(-) create mode 100644 news/12871.trivial.rst diff --git a/news/12871.trivial.rst b/news/12871.trivial.rst new file mode 100644 index 00000000000..186e2bcb3c4 --- /dev/null +++ b/news/12871.trivial.rst @@ -0,0 +1 @@ +Refactor much of ``RequirementPreparer`` to avoid duplicated code paths for metadata-only requirements. diff --git a/src/pip/_internal/distributions/__init__.py b/src/pip/_internal/distributions/__init__.py index f6089daf40d..a870b227d1e 100644 --- a/src/pip/_internal/distributions/__init__.py +++ b/src/pip/_internal/distributions/__init__.py @@ -8,7 +8,14 @@ def make_distribution_for_install_requirement( install_req: InstallRequirement, ) -> AbstractDistribution: - """Returns a Distribution for the given InstallRequirement""" + """Returns an AbstractDistribution for the given InstallRequirement. + + As AbstractDistribution only covers installable artifacts, this method may only be + invoked at the conclusion of a resolve, when the RequirementPreparer has downloaded + the file corresponding to the resolved dist. Commands which intend to consume + metadata-only resolves without downloading should not call this method or + consume AbstractDistribution objects. + """ # Only pre-installed requirements will have a .satisfied_by dist. if install_req.satisfied_by: return InstalledDistribution(install_req) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 7ad301a3c06..17e0c9c28da 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -9,7 +9,7 @@ import shutil from dataclasses import dataclass from pathlib import Path -from typing import Dict, Iterable, List, Optional +from typing import Dict, Iterable, Optional from pip._vendor.packaging.utils import canonicalize_name @@ -63,7 +63,17 @@ def _get_prepared_distribution( build_isolation: bool, check_build_deps: bool, ) -> BaseDistribution: - """Prepare a distribution for installation.""" + """Prepare a distribution for installation. + + This method will only be called by the preparer at the end of the resolve, and only + for commands which need installable artifacts (not just resolved metadata). If the + dist was previously metadata-only, the preparer must have downloaded the file + corresponding to the dist and set ``req.local_file_path``. + + This method will execute ``req.cache_concrete_dist()``, so that after invocation, + ``req.is_concrete`` will be True, because ``req.get_dist()`` will return a concrete + ``Distribution``. + """ abstract_dist = make_distribution_for_install_requirement(req) tracker_id = abstract_dist.build_tracker_id if tracker_id is not None: @@ -305,11 +315,7 @@ def _ensure_link_req_src_dir( self, req: InstallRequirement, parallel_builds: bool ) -> None: """Ensure source_dir of a linked InstallRequirement.""" - # Since source_dir is only set for editable requirements. - if req.link.is_wheel: - # We don't need to unpack wheels, so no need for a source - # directory. - return + assert not req.link.is_wheel assert req.source_dir is None if req.link.is_existing_dir(): # build local directories in-tree @@ -356,6 +362,47 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes: # showing the user what the hash should be. return req.hashes(trust_internet=False) or MissingHashes() + def _rewrite_link_and_hashes_for_cached_wheel( + self, req: InstallRequirement, hashes: Hashes + ) -> Optional[Hashes]: + """Check the hash of the requirement's source eagerly and rewrite its link. + + ``req.link`` is unconditionally rewritten to the cached wheel source so that the + requirement corresponds to where it was actually downloaded from instead of the + local cache entry. + + :returns: None if the source hash validated successfully. + """ + assert hashes + assert req.is_wheel_from_cache + assert req.download_info is not None + assert req.link.is_wheel + assert req.link.is_file + + # TODO: is it possible to avoid providing a "wrong" req.link in the first place + # in the resolver, instead of having to patch it up afterwards? + req.link = req.cached_wheel_source_link + + # We need to verify hashes, and we have found the requirement in the cache + # of locally built wheels. + if ( + isinstance(req.download_info.info, ArchiveInfo) + and req.download_info.info.hashes + and hashes.has_one_of(req.download_info.info.hashes) + ): + # At this point we know the requirement was built from a hashable source + # artifact, and we verified that the cache entry's hash of the original + # artifact matches one of the hashes we expect. We don't verify hashes + # against the cached wheel, because the wheel is not the original. + return None + + logger.warning( + "The hashes of the source archive found in cache entry " + "don't match, ignoring cached built wheel " + "and re-downloading source." + ) + return hashes + def _fetch_metadata_only( self, req: InstallRequirement, @@ -447,7 +494,7 @@ def _fetch_metadata_using_lazy_wheel( def _complete_partial_requirements( self, - partially_downloaded_reqs: Iterable[InstallRequirement], + metadata_only_reqs: Iterable[InstallRequirement], parallel_builds: bool = False, ) -> None: """Download any requirements which were only fetched by metadata.""" @@ -458,10 +505,9 @@ def _complete_partial_requirements( # Map each link to the requirement that owns it. This allows us to set # `req.local_file_path` on the appropriate requirement after passing # all the links at once into BatchDownloader. - links_to_fully_download: Dict[Link, InstallRequirement] = {} - for req in partially_downloaded_reqs: - assert req.link - links_to_fully_download[req.link] = req + links_to_fully_download: Dict[Link, InstallRequirement] = { + req.link: req for req in metadata_only_reqs + } batch_download = self._batch_download( links_to_fully_download.keys(), @@ -486,9 +532,33 @@ def _complete_partial_requirements( # This step is necessary to ensure all lazy wheels are processed # successfully by the 'download', 'wheel', and 'install' commands. - for req in partially_downloaded_reqs: + for req in metadata_only_reqs: self._prepare_linked_requirement(req, parallel_builds) + def _check_download_path(self, req: InstallRequirement) -> None: + """Check if the relevant file is already available in the download directory. + + If so, check its hash, and delete the file if the hash doesn't match.""" + if self.download_dir is None: + return + if not req.link.is_wheel: + return + + hashes = self._get_linked_req_hashes(req) + if file_path := _check_download_dir( + req.link, + self.download_dir, + hashes, + # When a locally built wheel has been found in cache, we don't warn + # about re-downloading when the already downloaded wheel hash does + # not match. This is because the hash must be checked against the + # original link, not the cached link. It that case the already + # downloaded file will be removed and re-fetched from cache (which + # implies a hash check against the cache entry's origin.json). + warn_on_hash_mismatch=not req.is_wheel_from_cache, + ): + self._downloaded[req.link.url] = file_path + def prepare_linked_requirement( self, req: InstallRequirement, parallel_builds: bool = False ) -> BaseDistribution: @@ -496,35 +566,15 @@ def prepare_linked_requirement( assert req.link self._log_preparing_link(req) with indent_log(): - # Check if the relevant file is already available - # in the download directory - file_path = None - if self.download_dir is not None and req.link.is_wheel: - hashes = self._get_linked_req_hashes(req) - file_path = _check_download_dir( - req.link, - self.download_dir, - hashes, - # When a locally built wheel has been found in cache, we don't warn - # about re-downloading when the already downloaded wheel hash does - # not match. This is because the hash must be checked against the - # original link, not the cached link. It that case the already - # downloaded file will be removed and re-fetched from cache (which - # implies a hash check against the cache entry's origin.json). - warn_on_hash_mismatch=not req.is_wheel_from_cache, - ) + # See if the file is already downloaded, and check its hash if so. + self._check_download_path(req) - if file_path is not None: - # The file is already available, so mark it as downloaded - self._downloaded[req.link.url] = file_path - else: - # The file is not available, attempt to fetch only metadata - metadata_dist = self._fetch_metadata_only(req) - if metadata_dist is not None: - # These reqs now have the dependency information from the downloaded - # metadata, without having downloaded the actual dist at all. - req.cache_virtual_metadata_only_dist(metadata_dist) - return metadata_dist + # First try to fetch only metadata. + if metadata_dist := self._fetch_metadata_only(req): + # These reqs now have the dependency information from the downloaded + # metadata, without having downloaded the actual dist at all. + req.cache_virtual_metadata_only_dist(metadata_dist) + return metadata_dist # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) @@ -533,73 +583,37 @@ def prepare_linked_requirements_more( self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False ) -> None: """Prepare linked requirements more, if needed.""" - partially_downloaded_reqs: List[InstallRequirement] = [] - for req in reqs: - if req.is_concrete: - continue - - # Determine if any of these requirements were already downloaded. - if self.download_dir is not None and req.link.is_wheel: - hashes = self._get_linked_req_hashes(req) - # If the file is there, but doesn't match the hash, delete it and print - # a warning. We will be downloading it again via - # partially_downloaded_reqs. - file_path = _check_download_dir( - req.link, self.download_dir, hashes, warn_on_hash_mismatch=True - ) - if file_path is not None: - # If the hash does match, then we still need to generate a concrete - # dist, but we don't have to download the wheel again. - self._downloaded[req.link.url] = file_path - - partially_downloaded_reqs.append(req) - + metadata_only_reqs = [req for req in reqs if not req.is_concrete] self._complete_partial_requirements( - partially_downloaded_reqs, + metadata_only_reqs, parallel_builds=parallel_builds, ) - def _prepare_linked_requirement( - self, req: InstallRequirement, parallel_builds: bool - ) -> BaseDistribution: - assert req.link - link = req.link - - hashes = self._get_linked_req_hashes(req) - - if hashes and req.is_wheel_from_cache: - assert req.download_info is not None - assert link.is_wheel - assert link.is_file - # We need to verify hashes, and we have found the requirement in the cache - # of locally built wheels. - if ( - isinstance(req.download_info.info, ArchiveInfo) - and req.download_info.info.hashes - and hashes.has_one_of(req.download_info.info.hashes) - ): - # At this point we know the requirement was built from a hashable source - # artifact, and we verified that the cache entry's hash of the original - # artifact matches one of the hashes we expect. We don't verify hashes - # against the cached wheel, because the wheel is not the original. - hashes = None - else: - logger.warning( - "The hashes of the source archive found in cache entry " - "don't match, ignoring cached built wheel " - "and re-downloading source." - ) - req.link = req.cached_wheel_source_link - link = req.link + def _ensure_local_file_path( + self, req: InstallRequirement, hashes: Optional[Hashes] + ) -> None: + """Ensure that ``req.link`` is downloaded locally, matches the expected hash, + and that ``req.local_file_path`` is set to the download location.""" + if req.link.is_existing_dir(): + return - self._ensure_link_req_src_dir(req, parallel_builds) + # NB: req.local_file_path may be set already, if it was: + # (1) sourced from a local file (such as a local .tar.gz path), + # (2) also in the wheel cache (e.g. built from an sdist). + # We will overwrite it if so, since the local file path will still point to the + # .tar.gz source instead of the wheel cache entry. - if link.is_existing_dir(): - local_file = None - elif link.url not in self._downloaded: + local_file: Optional[File] = None + # The file may have already been downloaded in batch if it was + # a metadata-only requirement, or if it was already in the download directory. + if file_path := self._downloaded.get(req.link.url, None): + if hashes: + hashes.check_against_path(file_path) + local_file = File(file_path, content_type=None) + else: try: local_file = unpack_url( - link, + req.link, req.source_dir, self._download, self.verbosity, @@ -609,39 +623,13 @@ def _prepare_linked_requirement( except NetworkConnectionError as exc: raise InstallationError( f"Could not install requirement {req} because of HTTP " - f"error {exc} for URL {link}" - ) - else: - file_path = self._downloaded[link.url] - if hashes: - hashes.check_against_path(file_path) - local_file = File(file_path, content_type=None) + f"error {exc} for URL {req.link}" + ) from exc - # If download_info is set, we got it from the wheel cache. - if req.download_info is None: - # Editables don't go through this function (see - # prepare_editable_requirement). - assert not req.editable - req.download_info = direct_url_from_link(link, req.source_dir) - # Make sure we have a hash in download_info. If we got it as part of the - # URL, it will have been verified and we can rely on it. Otherwise we - # compute it from the downloaded file. - # FIXME: https://github.com/pypa/pip/issues/11943 - if ( - isinstance(req.download_info.info, ArchiveInfo) - and not req.download_info.info.hashes - and local_file - ): - hash = hash_file(local_file.path)[0].hexdigest() - # We populate info.hash for backward compatibility. - # This will automatically populate info.hashes. - req.download_info.info.hash = f"sha256={hash}" - - # For use in later processing, - # preserve the file path on the requirement. - if local_file: + if local_file is not None: req.local_file_path = local_file.path + def _prepare_and_finalize_dist(self, req: InstallRequirement) -> BaseDistribution: dist = _get_prepared_distribution( req, self.build_tracker, @@ -649,8 +637,60 @@ def _prepare_linked_requirement( self.build_isolation, self.check_build_deps, ) + assert dist.is_concrete + assert req.is_concrete + assert req.get_dist() is dist return dist + def _prepare_linked_requirement( + self, req: InstallRequirement, parallel_builds: bool + ) -> BaseDistribution: + """Ensure the dist pointing to the fully-resolved requirement is downloaded and + installable.""" + assert req.link, "this requirement must have a download link to fully prepare" + + hashes: Optional[Hashes] = self._get_linked_req_hashes(req) + + if hashes and req.is_wheel_from_cache: + hashes = self._rewrite_link_and_hashes_for_cached_wheel(req, hashes) + + # req.source_dir is only set for editable requirements. We don't need to unpack + # wheels, so no need for a source directory. + if not req.link.is_wheel: + self._ensure_link_req_src_dir(req, parallel_builds) + + # Ensure the dist is downloaded, check its hash, and unpack it into the source + # directory (if applicable). + self._ensure_local_file_path(req, hashes) + + # Set req.download_info for --report output. + if req.download_info is None: + # If download_info is set, we got it from the wheel cache. + self._ensure_download_info(req) + + # Build (if necessary) and prepare the distribution for installation. + return self._prepare_and_finalize_dist(req) + + def _ensure_download_info(self, req: InstallRequirement) -> None: + """Ensure that ``req.download_info`` is set, for uses such as --report.""" + assert req.download_info is None + # Editables don't go through this function (see prepare_editable_requirement). + assert not req.editable + req.download_info = direct_url_from_link(req.link, req.source_dir) + # Make sure we have a hash in download_info. If we got it as part of the + # URL, it will have been verified and we can rely on it. Otherwise we + # compute it from the downloaded file. + # FIXME: https://github.com/pypa/pip/issues/11943 + if ( + isinstance(req.download_info.info, ArchiveInfo) + and not req.download_info.info.hashes + and req.local_file_path + ): + hash = hash_file(req.local_file_path)[0].hexdigest() + # We populate info.hash for backward compatibility. + # This will automatically populate info.hashes. + req.download_info.info.hash = f"sha256={hash}" + def save_linked_requirement(self, req: InstallRequirement) -> None: assert self.download_dir is not None assert req.link is not None @@ -698,20 +738,9 @@ def prepare_editable_requirement( req.update_editable() assert req.source_dir req.download_info = direct_url_for_editable(req.unpacked_source_directory) - - dist = _get_prepared_distribution( - req, - self.build_tracker, - self.finder, - self.build_isolation, - self.check_build_deps, - ) - req.check_if_exists(self.use_user_site) - # This should already have been populated by the preparation of the source dist. - assert req.is_concrete - return dist + return self._prepare_and_finalize_dist(req) def prepare_installed_requirement( self, @@ -735,13 +764,4 @@ def prepare_installed_requirement( "completely repeatable environment, install into an " "empty virtualenv." ) - dist = _get_prepared_distribution( - req, - self.build_tracker, - self.finder, - self.build_isolation, - self.check_build_deps, - ) - - assert req.is_concrete - return dist + return self._prepare_and_finalize_dist(req) From e9849f851ef9b5ee50058738d981c9de6360f236 Mon Sep 17 00:00:00 2001 From: Danny McClanahan <1305167+cosmicexplorer@users.noreply.github.com> Date: Mon, 22 Jul 2024 18:40:08 -0400 Subject: [PATCH 3/3] use .metadata distribution info when possible When performing `install --dry-run` and PEP 658 .metadata files are available to guide the resolve, do not download the associated wheels. Rather use the distribution information directly from the .metadata files when reporting the results on the CLI and in the --report file. - describe the new --dry-run behavior - finalize linked requirements immediately after resolve - introduce is_concrete - funnel InstalledDistribution through _get_prepared_distribution() too - add test for new install --dry-run functionality (no downloading) --- news/12186.bugfix.rst | 1 + src/pip/_internal/commands/download.py | 5 +- src/pip/_internal/commands/install.py | 7 +- src/pip/_internal/commands/wheel.py | 5 +- src/pip/_internal/operations/check.py | 5 +- src/pip/_internal/operations/prepare.py | 59 ++++- src/pip/_internal/req/req_install.py | 5 +- .../resolution/resolvelib/resolver.py | 5 - tests/conftest.py | 37 ++- tests/functional/test_download.py | 3 +- tests/functional/test_install_metadata.py | 236 ++++++++++++++++++ 11 files changed, 342 insertions(+), 26 deletions(-) create mode 100644 news/12186.bugfix.rst create mode 100644 tests/functional/test_install_metadata.py diff --git a/news/12186.bugfix.rst b/news/12186.bugfix.rst new file mode 100644 index 00000000000..b51d84a0cb2 --- /dev/null +++ b/news/12186.bugfix.rst @@ -0,0 +1 @@ +Avoid downloading any dists in ``install --dry-run`` if PEP 658 ``.metadata`` files or lazy wheels are available. diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 917bbb91d83..5528c1866a0 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -130,6 +130,9 @@ def run(self, options: Values, args: List[str]) -> int: self.trace_basic_info(finder) requirement_set = resolver.resolve(reqs, check_supported_wheels=True) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), require_dist_files=True + ) downloaded: List[str] = [] for req in requirement_set.requirements.values(): @@ -138,8 +141,6 @@ def run(self, options: Values, args: List[str]) -> int: preparer.save_linked_requirement(req) downloaded.append(req.name) - preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - if downloaded: write_output("Successfully downloaded %s", " ".join(downloaded)) diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index ad45a2f2a57..90d73842ffc 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -85,7 +85,8 @@ def add_options(self) -> None: help=( "Don't actually install anything, just print what would be. " "Can be used in combination with --ignore-installed " - "to 'resolve' the requirements." + "to 'resolve' the requirements. If package metadata is available " + "or cached, --dry-run also avoids downloading the dependency at all." ), ) self.cmd_opts.add_option( @@ -379,6 +380,10 @@ def run(self, options: Values, args: List[str]) -> int: requirement_set = resolver.resolve( reqs, check_supported_wheels=not options.target_dir ) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), + require_dist_files=not options.dry_run, + ) if options.json_report_file: report = InstallationReport(requirement_set.requirements_to_install) diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index 278719f4e0c..f3a81db6602 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -145,6 +145,9 @@ def run(self, options: Values, args: List[str]) -> int: self.trace_basic_info(finder) requirement_set = resolver.resolve(reqs, check_supported_wheels=True) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), require_dist_files=True + ) reqs_to_build: List[InstallRequirement] = [] for req in requirement_set.requirements.values(): @@ -153,8 +156,6 @@ def run(self, options: Values, args: List[str]) -> int: elif should_build_for_wheel_command(req): reqs_to_build.append(req) - preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - # build wheels build_successes, build_failures = build( reqs_to_build, diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py index 4b6fbc4c375..4391d3e0121 100644 --- a/src/pip/_internal/operations/check.py +++ b/src/pip/_internal/operations/check.py @@ -23,7 +23,6 @@ from pip._vendor.packaging.utils import NormalizedName, canonicalize_name from pip._vendor.packaging.version import Version -from pip._internal.distributions import make_distribution_for_install_requirement from pip._internal.metadata import get_default_environment from pip._internal.metadata.base import BaseDistribution from pip._internal.req.req_install import InstallRequirement @@ -154,8 +153,8 @@ def _simulate_installation_of( # Modify it as installing requirement_set would (assuming no errors) for inst_req in to_install: - abstract_dist = make_distribution_for_install_requirement(inst_req) - dist = abstract_dist.get_metadata_distribution() + assert inst_req.is_concrete + dist = inst_req.get_dist() name = dist.canonical_name package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies())) diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index 17e0c9c28da..340e4a37f9d 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -579,16 +579,71 @@ def prepare_linked_requirement( # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) - def prepare_linked_requirements_more( + def _extract_download_info(self, reqs: Iterable[InstallRequirement]) -> None: + """ + `pip install --report` extracts the download info from each requirement for its + JSON output, so we need to make sure every requirement has this before finishing + the resolve. But .download_info will only be populated by the point this method + is called for requirements already found in the wheel cache, so we need to + synthesize it for uncached results. Luckily, a DirectUrl can be parsed directly + from a url without any other context. However, this also means the download info + will only contain a hash if the link itself declares the hash. + """ + for req in reqs: + if req.download_info is None: + self._ensure_download_info(req) + + def _force_fully_prepared( + self, reqs: Iterable[InstallRequirement], assert_has_dist_files: bool + ) -> None: + """ + The legacy resolver seems to prepare requirements differently that can leave + them half-done in certain code paths. I'm not quite sure how it's doing things, + but at least we can do this to make sure they do things right. + """ + for req in reqs: + req.prepared = True + if assert_has_dist_files: + assert req.is_concrete + + def _ensure_dist_files( self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False ) -> None: - """Prepare linked requirements more, if needed.""" + """Download any metadata-only linked requirements.""" metadata_only_reqs = [req for req in reqs if not req.is_concrete] self._complete_partial_requirements( metadata_only_reqs, parallel_builds=parallel_builds, ) + def finalize_linked_requirements( + self, + reqs: Iterable[InstallRequirement], + require_dist_files: bool, + parallel_builds: bool = False, + ) -> None: + """Prepare linked requirements more, if needed. + + Neighboring .metadata files as per PEP 658 or lazy wheels via fast-deps will be + preferred to extract metadata from any concrete requirement (one that has been + mapped to a Link) without downloading the underlying wheel or sdist. When ``pip + install --dry-run`` is called, we want to avoid ever downloading the underlying + dist, but we still need to provide all of the results that pip commands expect + from the typical resolve process. + + Those expectations vary, but one distinction lies in whether the command needs + an actual physical dist somewhere on the filesystem, or just the metadata about + it from the resolver (as in ``pip install --report``). If the command requires + actual physical filesystem locations for the resolved dists, it must call this + method with ``require_dist_files=True`` to fully download anything + that remains. + """ + if require_dist_files: + self._ensure_dist_files(reqs, parallel_builds=parallel_builds) + else: + self._extract_download_info(reqs) + self._force_fully_prepared(reqs, assert_has_dist_files=require_dist_files) + def _ensure_local_file_path( self, req: InstallRequirement, hashes: Optional[Hashes] ) -> None: diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 2710142f26a..a5800c9e3bd 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -648,10 +648,7 @@ def cache_concrete_dist(self, dist: BaseDistribution) -> None: # If we set a dist twice for the same requirement, we must be hydrating # a concrete dist for what was previously virtual. This will occur in the # case of `install --dry-run` when PEP 658 metadata is available. - - # TODO(#12186): avoid setting dist twice! - # assert not self._dist.is_concrete - pass + assert not self._dist.is_concrete assert dist.is_concrete self._dist = dist diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index c22d1707b5e..4880a5b9802 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -175,11 +175,6 @@ def resolve( req_set.add_named_requirement(ireq) - reqs = req_set.all_requirements - self.factory.preparer.prepare_linked_requirements_more(reqs) - for req in reqs: - req.prepared = True - assert req.is_concrete return req_set def get_installation_order( diff --git a/tests/conftest.py b/tests/conftest.py index da4ab5b9dfb..3a168c484a3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -748,6 +748,9 @@ class FakePackage: requires_dist: Tuple[str, ...] = () # This will override the Name specified in the actual dist's METADATA. metadata_name: Optional[str] = None + # Whether to delete the file this points to, which causes any attempt to fetch this + # package to fail unless it is processed as a metadata-only dist. + delete_linked_file: bool = False def metadata_filename(self) -> str: """This is specified by PEP 658.""" @@ -837,6 +840,27 @@ def fake_packages() -> Dict[str, List[FakePackage]]: ("simple==1.0",), ), ], + "complex-dist": [ + FakePackage( + "complex-dist", + "0.1", + "complex_dist-0.1-py2.py3-none-any.whl", + MetadataKind.Unhashed, + # Validate that the wheel isn't fetched if metadata is available and + # --dry-run is on, when the metadata presents no hash itself. + delete_linked_file=True, + ), + ], + "corruptwheel": [ + FakePackage( + "corruptwheel", + "1.0", + "corruptwheel-1.0-py2.py3-none-any.whl", + # Validate that the wheel isn't fetched if metadata is available and + # --dry-run is on, when the metadata *does* present a hash. + MetadataKind.Sha256, + ), + ], "has-script": [ # Ensure we check PEP 658 metadata hashing errors for wheel files. FakePackage( @@ -922,10 +946,10 @@ def html_index_for_packages( f' {package_link.filename}
' # noqa: E501 ) # (3.2) Copy over the corresponding file in `shared_data.packages`. - shutil.copy( - shared_data.packages / package_link.filename, - pkg_subdir / package_link.filename, - ) + cached_file = shared_data.packages / package_link.filename + new_file = pkg_subdir / package_link.filename + if not package_link.delete_linked_file: + shutil.copy(cached_file, new_file) # (3.3) Write a metadata file, if applicable. if package_link.metadata != MetadataKind.NoFile: with open(pkg_subdir / package_link.metadata_filename(), "wb") as f: @@ -980,7 +1004,8 @@ def html_index_with_onetime_server( """Serve files from a generated pypi index, erroring if a file is downloaded more than once. - Provide `-i http://localhost:8000` to pip invocations to point them at this server. + Provide `-i http://localhost:` to pip invocations to point them at + this server. """ class InDirectoryServer(http.server.ThreadingHTTPServer): @@ -995,7 +1020,7 @@ def finish_request(self: "Self", request: Any, client_address: Any) -> None: class Handler(OneTimeDownloadHandler): _seen_paths: ClassVar[Set[str]] = set() - with InDirectoryServer(("", 8000), Handler) as httpd: + with InDirectoryServer(("", 0), Handler) as httpd: server_thread = threading.Thread(target=httpd.serve_forever) server_thread.start() diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index d469e71c360..53c815ae98b 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1273,6 +1273,7 @@ def download_server_html_index( ) -> Callable[..., Tuple[TestPipResult, Path]]: """Execute `pip download` against a generated PyPI index.""" download_dir = tmpdir / "download_dir" + _, server_port = html_index_with_onetime_server.server_address def run_for_generated_index( args: List[str], @@ -1287,7 +1288,7 @@ def run_for_generated_index( "-d", str(download_dir), "-i", - "http://localhost:8000", + f"http://localhost:{server_port}", *args, ] result = script.pip(*pip_args, allow_error=allow_error) diff --git a/tests/functional/test_install_metadata.py b/tests/functional/test_install_metadata.py new file mode 100644 index 00000000000..a7c4dd51807 --- /dev/null +++ b/tests/functional/test_install_metadata.py @@ -0,0 +1,236 @@ +import json +import re +from pathlib import Path +from typing import Any, Callable, Dict, Iterator, List, Tuple + +import pytest +from pip._vendor.packaging.requirements import Requirement + +from pip._internal.models.direct_url import DirectUrl +from pip._internal.utils.urls import path_to_url +from tests.lib import ( + PipTestEnvironment, + TestPipResult, +) + + +@pytest.fixture +def install_with_generated_html_index( + script: PipTestEnvironment, + html_index_for_packages: Path, + tmpdir: Path, +) -> Callable[..., Tuple[TestPipResult, Dict[str, Any]]]: + """Execute `pip download` against a generated PyPI index.""" + output_file = tmpdir / "output_file.json" + + def run_for_generated_index( + args: List[str], + *, + dry_run: bool = True, + allow_error: bool = False, + ) -> Tuple[TestPipResult, Dict[str, Any]]: + """ + Produce a PyPI directory structure pointing to the specified packages, then + execute `pip install --report ... -i ...` pointing to our generated index. + """ + pip_args = [ + "install", + *(("--dry-run",) if dry_run else ()), + "--ignore-installed", + "--report", + str(output_file), + "-i", + path_to_url(str(html_index_for_packages)), + *args, + ] + result = script.pip(*pip_args, allow_error=allow_error) + try: + with open(output_file, "rb") as f: + report = json.load(f) + except FileNotFoundError: + if allow_error: + report = {} + else: + raise + return (result, report) + + return run_for_generated_index + + +def iter_dists(report: Dict[str, Any]) -> Iterator[Tuple[Requirement, DirectUrl]]: + """Parse a (req,url) tuple from each installed dist in the --report json.""" + for inst in report["install"]: + metadata = inst["metadata"] + name = metadata["name"] + version = metadata["version"] + req = Requirement(f"{name}=={version}") + direct_url = DirectUrl.from_dict(inst["download_info"]) + yield (req, direct_url) + + +@pytest.mark.parametrize( + "requirement_to_install, expected_outputs", + [ + ("simple2==1.0", ["simple2==1.0", "simple==1.0"]), + ("simple==2.0", ["simple==2.0"]), + ( + "colander", + ["colander==0.9.9", "translationstring==1.1"], + ), + ( + "compilewheel", + ["compilewheel==1.0", "simple==1.0"], + ), + ], +) +def test_install_with_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + expected_outputs: List[str], +) -> None: + """Verify that if a data-dist-info-metadata attribute is present, then it is used + instead of the actual dist's METADATA.""" + _, report = install_with_generated_html_index( + [requirement_to_install], + ) + installed = sorted(str(r) for r, _ in iter_dists(report)) + assert installed == expected_outputs + + +@pytest.mark.parametrize( + "requirement_to_install, real_hash", + [ + ( + "simple==3.0", + "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996", + ), + ( + "has-script", + "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b", + ), + ], +) +def test_incorrect_metadata_hash( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + real_hash: str, +) -> None: + """Verify that if a hash for data-dist-info-metadata is provided, it must match the + actual hash of the metadata file.""" + result, _ = install_with_generated_html_index( + [requirement_to_install], + allow_error=True, + ) + assert result.returncode != 0 + expected_msg = f"""\ + Expected sha256 wrong-hash + Got {real_hash}""" + assert expected_msg in result.stderr + + +@pytest.mark.parametrize( + "requirement_to_install, expected_url", + [ + ("simple2==2.0", "simple2-2.0.tar.gz.metadata"), + ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"), + ], +) +def test_metadata_not_found( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + expected_url: str, +) -> None: + """Verify that if a data-dist-info-metadata attribute is provided, that pip will + fetch the .metadata file at the location specified by PEP 658, and error + if unavailable.""" + result, _ = install_with_generated_html_index( + [requirement_to_install], + allow_error=True, + ) + assert result.returncode != 0 + expected_re = re.escape(expected_url) + pattern = re.compile( + f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}" + ) + assert pattern.search(result.stderr), (pattern, result.stderr) + + +def test_produces_error_for_mismatched_package_name_in_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], +) -> None: + """Verify that the package name from the metadata matches the requested package.""" + result, _ = install_with_generated_html_index( + ["simple2==3.0"], + allow_error=True, + ) + assert result.returncode != 0 + assert ( + "simple2-3.0.tar.gz has inconsistent Name: expected 'simple2', but metadata " + "has 'not-simple2'" + ) in result.stdout + + +@pytest.mark.parametrize( + "requirement", + [ + "requires-simple-extra==0.1", + "REQUIRES_SIMPLE-EXTRA==0.1", + "REQUIRES....simple-_-EXTRA==0.1", + ], +) +def test_canonicalizes_package_name_before_verifying_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement: str, +) -> None: + """Verify that the package name from the command line and the package's + METADATA are both canonicalized before comparison, while the name from the METADATA + is always used verbatim to represent the installed candidate in --report. + + Regression test for https://github.com/pypa/pip/issues/12038 + """ + _, report = install_with_generated_html_index( + [requirement], + ) + reqs = [str(r) for r, _ in iter_dists(report)] + assert reqs == ["Requires_Simple.Extra==0.1"] + + +@pytest.mark.parametrize( + "requirement,err_string", + [ + # It's important that we verify pip won't even attempt to fetch the file, so we + # construct an input that will cause it to error if it tries at all. + ("complex-dist==0.1", "404 Client Error: FileNotFoundError"), + ("corruptwheel==1.0", ".whl is invalid."), + ], +) +def test_dry_run_avoids_downloading_metadata_only_dists( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement: str, + err_string: str, +) -> None: + """Verify that the underlying dist files are not downloaded at all when + `install --dry-run` is used to resolve dists with PEP 658 metadata.""" + _, report = install_with_generated_html_index( + [requirement], + ) + assert [requirement] == [str(r) for r, _ in iter_dists(report)] + result, _ = install_with_generated_html_index( + [requirement], + dry_run=False, + allow_error=True, + ) + assert result.returncode != 0 + assert err_string in result.stderr