diff --git a/news/11512.bugfix.rst b/news/11512.bugfix.rst new file mode 100644 index 00000000000..a2169ec6e10 --- /dev/null +++ b/news/11512.bugfix.rst @@ -0,0 +1 @@ +Avoid downloading wheels when performing a ``--dry-run`` install when .metadata files are used. diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 54247a78a65..dc95a8f7a32 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -138,7 +138,9 @@ def run(self, options: Values, args: List[str]) -> int: preparer.save_linked_requirement(req) downloaded.append(req.name) - preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), hydrate_virtual_reqs=True + ) requirement_set.warn_legacy_versions_and_specifiers() if downloaded: diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index f6a300804f4..eb1729acf87 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -84,7 +84,8 @@ def add_options(self) -> None: help=( "Don't actually install anything, just print what would be. " "Can be used in combination with --ignore-installed " - "to 'resolve' the requirements." + "to 'resolve' the requirements. If PEP 648 or fast-deps metadata is " + "available, --dry-run also avoid downloading the dependency at all." ), ) self.cmd_opts.add_option( @@ -377,6 +378,10 @@ def run(self, options: Values, args: List[str]) -> int: requirement_set = resolver.resolve( reqs, check_supported_wheels=not options.target_dir ) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), + hydrate_virtual_reqs=not options.dry_run, + ) if options.json_report_file: report = InstallationReport(requirement_set.requirements_to_install) diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index ed578aa2500..4245f17d376 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -153,7 +153,9 @@ def run(self, options: Values, args: List[str]) -> int: elif should_build_for_wheel_command(req): reqs_to_build.append(req) - preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) + preparer.finalize_linked_requirements( + requirement_set.requirements.values(), hydrate_virtual_reqs=True + ) requirement_set.warn_legacy_versions_and_specifiers() # build wheels diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py index cb121bcb252..839cbbec916 100644 --- a/src/pip/_internal/operations/prepare.py +++ b/src/pip/_internal/operations/prepare.py @@ -480,19 +480,7 @@ def _complete_partial_requirements( logger.debug("Downloading link %s to %s", link, filepath) req = links_to_fully_download[link] req.local_file_path = filepath - # TODO: This needs fixing for sdists - # This is an emergency fix for #11847, which reports that - # distributions get downloaded twice when metadata is loaded - # from a PEP 658 standalone metadata file. Setting _downloaded - # fixes this for wheels, but breaks the sdist case (tests - # test_download_metadata). As PyPI is currently only serving - # metadata for wheels, this is not an immediate issue. - # Fixing the problem properly looks like it will require a - # complete refactoring of the `prepare_linked_requirements_more` - # logic, and I haven't a clue where to start on that, so for now - # I have fixed the issue *just* for wheels. - if req.is_wheel: - self._downloaded[req.link.url] = filepath + self._downloaded[req.link.url] = filepath # This step is necessary to ensure all lazy wheels are processed # successfully by the 'download', 'wheel', and 'install' commands. @@ -531,16 +519,67 @@ def prepare_linked_requirement( # The file is not available, attempt to fetch only metadata metadata_dist = self._fetch_metadata_only(req) if metadata_dist is not None: + # These reqs now have the dependency information from the downloaded + # metadata, without having downloaded the actual dist at all. + req.dist_from_metadata = metadata_dist req.needs_more_preparation = True return metadata_dist # None of the optimizations worked, fully prepare the requirement return self._prepare_linked_requirement(req, parallel_builds) - def prepare_linked_requirements_more( - self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False + def _ensure_download_info(self, reqs: Iterable[InstallRequirement]) -> None: + """ + `pip install --report` extracts the download info from each requirement for its + JSON output, so we need to make sure every requirement has this before finishing + the resolve. But .download_info will only be populated by the point this method + is called for requirements already found in the wheel cache, so we need to + synthesize it for uncached results. Luckily, a DirectUrl can be parsed directly + from a url without any other context. However, this also means the download info + will only contain a hash if the link itself declares the hash. + """ + for req in reqs: + # If download_info is set, we got it from the wheel cache. + if req.download_info is None: + req.download_info = direct_url_from_link(req.link, req.source_dir) + + def _force_fully_prepared(self, reqs: Iterable[InstallRequirement]) -> None: + """ + The legacy resolver seems to prepare requirements differently that can leave + them half-done in certain code paths. I'm not quite sure how it's doing things, + but at least we can do this to make sure they do things right. + """ + for req in reqs: + req.prepared = True + req.needs_more_preparation = False + + def finalize_linked_requirements( + self, + reqs: Iterable[InstallRequirement], + hydrate_virtual_reqs: bool, + parallel_builds: bool = False, ) -> None: - """Prepare linked requirements more, if needed.""" + """Prepare linked requirements more, if needed. + + Neighboring .metadata files as per PEP 658 or lazy wheels via fast-deps will be + preferred to extract metadata from any concrete requirement (one that has been + mapped to a Link) without downloading the underlying wheel or sdist. When ``pip + install --dry-run`` is called, we want to avoid ever downloading the underlying + dist, but we still need to provide all of the results that pip commands expect + from the typical resolve process. + + Those expectations vary, but one distinction lies in whether the command needs + an actual physical dist somewhere on the filesystem, or just the metadata about + it from the resolver (as in ``pip install --report``). If the command requires + actual physical filesystem locations for the resolved dists, it must call this + method with ``hydrate_virtual_reqs=True`` to fully download anything + that remains. + """ + if not hydrate_virtual_reqs: + self._ensure_download_info(reqs) + self._force_fully_prepared(reqs) + return + reqs = [req for req in reqs if req.needs_more_preparation] for req in reqs: # Determine if any of these requirements were already downloaded. @@ -549,6 +588,8 @@ def prepare_linked_requirements_more( file_path = _check_download_dir(req.link, self.download_dir, hashes) if file_path is not None: self._downloaded[req.link.url] = file_path + # This is a wheel, so we know there's nothing more we need to do to + # prepare it. req.needs_more_preparation = False # Prepare requirements we found were already downloaded for some @@ -566,6 +607,8 @@ def prepare_linked_requirements_more( partially_downloaded_reqs, parallel_builds=parallel_builds, ) + # NB: Must call this method before returning! + self._force_fully_prepared(reqs) def _prepare_linked_requirement( self, req: InstallRequirement, parallel_builds: bool diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py index 1f479713a94..302afafed51 100644 --- a/src/pip/_internal/req/req_install.py +++ b/src/pip/_internal/req/req_install.py @@ -183,6 +183,10 @@ def __init__( # This requirement needs more preparation before it can be built self.needs_more_preparation = False + # Distribution from the .metadata file referenced by the PEP 658 + # data-dist-info-metadata attribute. + self.dist_from_metadata: Optional[BaseDistribution] = None + def __str__(self) -> str: if self.req: s = str(self.req) @@ -230,7 +234,7 @@ def name(self) -> Optional[str]: return None return self.req.name - @functools.lru_cache() # use cached_property in python 3.8+ + @functools.lru_cache(maxsize=None) # TODO: use cached_property in python 3.8+ def supports_pyproject_editable(self) -> bool: if not self.use_pep517: return False @@ -583,6 +587,7 @@ def prepare_metadata(self) -> None: @property def metadata(self) -> Any: + # TODO: use cached_property in python 3.8+ if not hasattr(self, "_metadata"): self._metadata = self.get_dist().metadata @@ -595,6 +600,8 @@ def get_dist(self) -> BaseDistribution: return get_wheel_distribution( FilesystemWheel(self.local_file_path), canonicalize_name(self.name) ) + elif self.dist_from_metadata: + return self.dist_from_metadata raise AssertionError( f"InstallRequirement {self} has no metadata directory and no wheel: " f"can't make a distribution." diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py index d5b238608b2..32a3d3ff70f 100644 --- a/src/pip/_internal/resolution/resolvelib/resolver.py +++ b/src/pip/_internal/resolution/resolvelib/resolver.py @@ -157,11 +157,6 @@ def resolve( req_set.add_named_requirement(ireq) - reqs = req_set.all_requirements - self.factory.preparer.prepare_linked_requirements_more(reqs) - for req in reqs: - req.prepared = True - req.needs_more_preparation = False return req_set def get_installation_order( diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py index 8da185c066e..704eba2d1a5 100644 --- a/tests/functional/test_download.py +++ b/tests/functional/test_download.py @@ -1,24 +1,17 @@ import os -import re import shutil import textwrap -import uuid -from dataclasses import dataclass -from enum import Enum from hashlib import sha256 from pathlib import Path -from textwrap import dedent -from typing import Callable, Dict, List, Optional, Tuple +from typing import List import pytest from pip._internal.cli.status_codes import ERROR -from pip._internal.utils.urls import path_to_url from tests.conftest import MockServer, ScriptFactory from tests.lib import ( PipTestEnvironment, TestData, - TestPipResult, create_basic_sdist_for_package, create_really_basic_wheel, ) @@ -1235,413 +1228,3 @@ def test_download_use_pep517_propagation( downloads = os.listdir(download_dir) assert len(downloads) == 2 - - -class MetadataKind(Enum): - """All the types of values we might be provided for the data-dist-info-metadata - attribute from PEP 658.""" - - # Valid: will read metadata from the dist instead. - No = "none" - # Valid: will read the .metadata file, but won't check its hash. - Unhashed = "unhashed" - # Valid: will read the .metadata file and check its hash matches. - Sha256 = "sha256" - # Invalid: will error out after checking the hash. - WrongHash = "wrong-hash" - # Invalid: will error out after failing to fetch the .metadata file. - NoFile = "no-file" - - -@dataclass(frozen=True) -class Package: - """Mock package structure used to generate a PyPI repository. - - Package name and version should correspond to sdists (.tar.gz files) in our test - data.""" - - name: str - version: str - filename: str - metadata: MetadataKind - # This will override any dependencies specified in the actual dist's METADATA. - requires_dist: Tuple[str, ...] = () - # This will override the Name specified in the actual dist's METADATA. - metadata_name: Optional[str] = None - - def metadata_filename(self) -> str: - """This is specified by PEP 658.""" - return f"{self.filename}.metadata" - - def generate_additional_tag(self) -> str: - """This gets injected into the tag in the generated PyPI index page for this - package.""" - if self.metadata == MetadataKind.No: - return "" - if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]: - return 'data-dist-info-metadata="true"' - if self.metadata == MetadataKind.WrongHash: - return 'data-dist-info-metadata="sha256=WRONG-HASH"' - assert self.metadata == MetadataKind.Sha256 - checksum = sha256(self.generate_metadata()).hexdigest() - return f'data-dist-info-metadata="sha256={checksum}"' - - def requires_str(self) -> str: - if not self.requires_dist: - return "" - joined = " and ".join(self.requires_dist) - return f"Requires-Dist: {joined}" - - def generate_metadata(self) -> bytes: - """This is written to `self.metadata_filename()` and will override the actual - dist's METADATA, unless `self.metadata == MetadataKind.NoFile`.""" - return dedent( - f"""\ - Metadata-Version: 2.1 - Name: {self.metadata_name or self.name} - Version: {self.version} - {self.requires_str()} - """ - ).encode("utf-8") - - -@pytest.fixture(scope="function") -def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]: - """Generate a PyPI package index.html within a temporary local directory.""" - html_dir = tmpdir / "index_html_content" - html_dir.mkdir() - - def generate_index_html_subdir(index_html: str) -> Path: - """Create a new subdirectory after a UUID and write an index.html.""" - new_subdir = html_dir / uuid.uuid4().hex - new_subdir.mkdir() - - with open(new_subdir / "index.html", "w") as f: - f.write(index_html) - - return new_subdir - - return generate_index_html_subdir - - -@pytest.fixture(scope="function") -def html_index_for_packages( - shared_data: TestData, - write_index_html_content: Callable[[str], Path], -) -> Callable[..., Path]: - """Generate a PyPI HTML package index within a local directory pointing to - blank data.""" - - def generate_html_index_for_packages(packages: Dict[str, List[Package]]) -> Path: - """ - Produce a PyPI directory structure pointing to the specified packages. - """ - # (1) Generate the content for a PyPI index.html. - pkg_links = "\n".join( - f' {pkg}' for pkg in packages.keys() - ) - index_html = f"""\ - - - - - Simple index - - -{pkg_links} - -""" - # (2) Generate the index.html in a new subdirectory of the temp directory. - index_html_subdir = write_index_html_content(index_html) - - # (3) Generate subdirectories for individual packages, each with their own - # index.html. - for pkg, links in packages.items(): - pkg_subdir = index_html_subdir / pkg - pkg_subdir.mkdir() - - download_links: List[str] = [] - for package_link in links: - # (3.1) Generate the tag which pip can crawl pointing to this - # specific package version. - download_links.append( - f' {package_link.filename}
' # noqa: E501 - ) - # (3.2) Copy over the corresponding file in `shared_data.packages`. - shutil.copy( - shared_data.packages / package_link.filename, - pkg_subdir / package_link.filename, - ) - # (3.3) Write a metadata file, if applicable. - if package_link.metadata != MetadataKind.NoFile: - with open(pkg_subdir / package_link.metadata_filename(), "wb") as f: - f.write(package_link.generate_metadata()) - - # (3.4) After collating all the download links and copying over the files, - # write an index.html with the generated download links for each - # copied file for this specific package name. - download_links_str = "\n".join(download_links) - pkg_index_content = f"""\ - - - - - Links for {pkg} - - -

Links for {pkg}

-{download_links_str} - -""" - with open(pkg_subdir / "index.html", "w") as f: - f.write(pkg_index_content) - - return index_html_subdir - - return generate_html_index_for_packages - - -@pytest.fixture(scope="function") -def download_generated_html_index( - script: PipTestEnvironment, - html_index_for_packages: Callable[[Dict[str, List[Package]]], Path], - tmpdir: Path, -) -> Callable[..., Tuple[TestPipResult, Path]]: - """Execute `pip download` against a generated PyPI index.""" - download_dir = tmpdir / "download_dir" - - def run_for_generated_index( - packages: Dict[str, List[Package]], - args: List[str], - allow_error: bool = False, - ) -> Tuple[TestPipResult, Path]: - """ - Produce a PyPI directory structure pointing to the specified packages, then - execute `pip download -i ...` pointing to our generated index. - """ - index_dir = html_index_for_packages(packages) - pip_args = [ - "download", - "-d", - str(download_dir), - "-i", - path_to_url(str(index_dir)), - *args, - ] - result = script.pip(*pip_args, allow_error=allow_error) - return (result, download_dir) - - return run_for_generated_index - - -# The package database we generate for testing PEP 658 support. -_simple_packages: Dict[str, List[Package]] = { - "simple": [ - Package("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256), - Package("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No), - # This will raise a hashing error. - Package("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash), - ], - "simple2": [ - # Override the dependencies here in order to force pip to download - # simple-1.0.tar.gz as well. - Package( - "simple2", - "1.0", - "simple2-1.0.tar.gz", - MetadataKind.Unhashed, - ("simple==1.0",), - ), - # This will raise an error when pip attempts to fetch the metadata file. - Package("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile), - # This has a METADATA file with a mismatched name. - Package( - "simple2", - "3.0", - "simple2-3.0.tar.gz", - MetadataKind.Sha256, - metadata_name="not-simple2", - ), - ], - "colander": [ - # Ensure we can read the dependencies from a metadata file within a wheel - # *without* PEP 658 metadata. - Package( - "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No - ), - ], - "compilewheel": [ - # Ensure we can override the dependencies of a wheel file by injecting PEP - # 658 metadata. - Package( - "compilewheel", - "1.0", - "compilewheel-1.0-py2.py3-none-any.whl", - MetadataKind.Unhashed, - ("simple==1.0",), - ), - ], - "has-script": [ - # Ensure we check PEP 658 metadata hashing errors for wheel files. - Package( - "has-script", - "1.0", - "has.script-1.0-py2.py3-none-any.whl", - MetadataKind.WrongHash, - ), - ], - "translationstring": [ - Package( - "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No - ), - ], - "priority": [ - # Ensure we check for a missing metadata file for wheels. - Package( - "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile - ), - ], - "requires-simple-extra": [ - # Metadata name is not canonicalized. - Package( - "requires-simple-extra", - "0.1", - "requires_simple_extra-0.1-py2.py3-none-any.whl", - MetadataKind.Sha256, - metadata_name="Requires_Simple.Extra", - ), - ], -} - - -@pytest.mark.parametrize( - "requirement_to_download, expected_outputs", - [ - ("simple2==1.0", ["simple-1.0.tar.gz", "simple2-1.0.tar.gz"]), - ("simple==2.0", ["simple-2.0.tar.gz"]), - ( - "colander", - ["colander-0.9.9-py2.py3-none-any.whl", "translationstring-1.1.tar.gz"], - ), - ( - "compilewheel", - ["compilewheel-1.0-py2.py3-none-any.whl", "simple-1.0.tar.gz"], - ), - ], -) -def test_download_metadata( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], - requirement_to_download: str, - expected_outputs: List[str], -) -> None: - """Verify that if a data-dist-info-metadata attribute is present, then it is used - instead of the actual dist's METADATA.""" - _, download_dir = download_generated_html_index( - _simple_packages, - [requirement_to_download], - ) - assert sorted(os.listdir(download_dir)) == expected_outputs - - -@pytest.mark.parametrize( - "requirement_to_download, real_hash", - [ - ( - "simple==3.0", - "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996", - ), - ( - "has-script", - "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b", - ), - ], -) -def test_incorrect_metadata_hash( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], - requirement_to_download: str, - real_hash: str, -) -> None: - """Verify that if a hash for data-dist-info-metadata is provided, it must match the - actual hash of the metadata file.""" - result, _ = download_generated_html_index( - _simple_packages, - [requirement_to_download], - allow_error=True, - ) - assert result.returncode != 0 - expected_msg = f"""\ - Expected sha256 WRONG-HASH - Got {real_hash}""" - assert expected_msg in result.stderr - - -@pytest.mark.parametrize( - "requirement_to_download, expected_url", - [ - ("simple2==2.0", "simple2-2.0.tar.gz.metadata"), - ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"), - ], -) -def test_metadata_not_found( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], - requirement_to_download: str, - expected_url: str, -) -> None: - """Verify that if a data-dist-info-metadata attribute is provided, that pip will - fetch the .metadata file at the location specified by PEP 658, and error - if unavailable.""" - result, _ = download_generated_html_index( - _simple_packages, - [requirement_to_download], - allow_error=True, - ) - assert result.returncode != 0 - expected_re = re.escape(expected_url) - pattern = re.compile( - f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}" - ) - assert pattern.search(result.stderr), (pattern, result.stderr) - - -def test_produces_error_for_mismatched_package_name_in_metadata( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], -) -> None: - """Verify that the package name from the metadata matches the requested package.""" - result, _ = download_generated_html_index( - _simple_packages, - ["simple2==3.0"], - allow_error=True, - ) - assert result.returncode != 0 - assert ( - "simple2-3.0.tar.gz has inconsistent Name: expected 'simple2', but metadata " - "has 'not-simple2'" - ) in result.stdout - - -@pytest.mark.parametrize( - "requirement", - ( - "requires-simple-extra==0.1", - "REQUIRES_SIMPLE-EXTRA==0.1", - "REQUIRES....simple-_-EXTRA==0.1", - ), -) -def test_canonicalizes_package_name_before_verifying_metadata( - download_generated_html_index: Callable[..., Tuple[TestPipResult, Path]], - requirement: str, -) -> None: - """Verify that the package name from the command line and the package's - METADATA are both canonicalized before comparison. - - Regression test for https://github.com/pypa/pip/issues/12038 - """ - result, download_dir = download_generated_html_index( - _simple_packages, - [requirement], - allow_error=True, - ) - assert result.returncode == 0 - assert os.listdir(download_dir) == [ - "requires_simple_extra-0.1-py2.py3-none-any.whl", - ] diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py index eabddfe58fa..758b6ef8dbd 100644 --- a/tests/functional/test_install.py +++ b/tests/functional/test_install.py @@ -7,7 +7,7 @@ import textwrap from os.path import curdir, join, pardir from pathlib import Path -from typing import Dict, List, Tuple +from typing import Callable, Dict, Iterable, List, Optional, Tuple import pytest @@ -20,6 +20,7 @@ PipTestEnvironment, ResolverVariant, TestData, + TestPipResult, _create_svn_repo, _create_test_package, create_basic_wheel_for_package, @@ -2371,14 +2372,69 @@ def test_install_logs_pip_version_in_debug( assert_re_match(pattern, result.stdout) -def test_install_dry_run(script: PipTestEnvironment, data: TestData) -> None: - """Test that pip install --dry-run logs what it would install.""" - result = script.pip( - "install", "--dry-run", "--find-links", data.find_links, "simple" - ) +@pytest.fixture +def install_find_links( + script: PipTestEnvironment, + data: TestData, +) -> Callable[[Iterable[str], bool, Optional[Path]], TestPipResult]: + def install( + args: Iterable[str], dry_run: bool, target_dir: Optional[Path] + ) -> TestPipResult: + return script.pip( + "install", + *( + ( + "--target", + str(target_dir), + ) + if target_dir is not None + else () + ), + *(("--dry-run",) if dry_run else ()), + "--no-index", + "--find-links", + data.find_links, + *args, + ) + + return install + + +@pytest.mark.parametrize( + "with_target_dir", + (True, False), +) +def test_install_dry_run_nothing_installed( + script: PipTestEnvironment, + tmpdir: Path, + install_find_links: Callable[[Iterable[str], bool, Optional[Path]], TestPipResult], + with_target_dir: bool, +) -> None: + """Test that pip install --dry-run logs what it would install, but doesn't actually + install anything.""" + if with_target_dir: + install_dir = tmpdir / "fake-install" + install_dir.mkdir() + else: + install_dir = None + + result = install_find_links(["simple"], True, install_dir) assert "Would install simple-3.0" in result.stdout assert "Successfully installed" not in result.stdout + script.assert_not_installed("simple") + if with_target_dir: + assert not os.listdir(install_dir) + + # Ensure that the same install command would normally have worked if not for + # --dry-run. + install_find_links(["simple"], False, install_dir) + if with_target_dir: + assert os.listdir(install_dir) + else: + # This won't true if we had provided a target dir to install into. + script.assert_installed(simple="3.0") + @pytest.mark.skipif( sys.version_info < (3, 11), diff --git a/tests/functional/test_install_metadata.py b/tests/functional/test_install_metadata.py new file mode 100644 index 00000000000..8cf231f38d4 --- /dev/null +++ b/tests/functional/test_install_metadata.py @@ -0,0 +1,519 @@ +import json +import re +import shutil +import uuid +from dataclasses import dataclass +from enum import Enum +from hashlib import sha256 +from pathlib import Path +from textwrap import dedent +from typing import Any, Callable, Dict, Iterator, List, Optional, Tuple + +import pytest +from pip._vendor.packaging.requirements import Requirement + +from pip._internal.models.direct_url import DirectUrl +from pip._internal.utils.urls import path_to_url +from tests.lib import ( + PipTestEnvironment, + TestData, + TestPipResult, +) + + +class MetadataKind(Enum): + """All the types of values we might be provided for the data-dist-info-metadata + attribute from PEP 658.""" + + # Valid: will read metadata from the dist instead. + No = "none" + # Valid: will read the .metadata file, but won't check its hash. + Unhashed = "unhashed" + # Valid: will read the .metadata file and check its hash matches. + Sha256 = "sha256" + # Invalid: will error out after checking the hash. + WrongHash = "wrong-hash" + # Invalid: will error out after failing to fetch the .metadata file. + NoFile = "no-file" + + +@dataclass(frozen=True) +class FakePackage: + """Mock package structure used to generate a PyPI repository. + + FakePackage name and version should correspond to sdists (.tar.gz files) in our test + data.""" + + name: str + version: str + filename: str + metadata: MetadataKind + # This will override any dependencies specified in the actual dist's METADATA. + requires_dist: Tuple[str, ...] = () + # This will override the Name specified in the actual dist's METADATA. + metadata_name: Optional[str] = None + # Whether to delete the file this points to, which causes any attempt to fetch this + # package to fail unless it is processed as a metadata-only dist. + delete_linked_file: bool = False + + def metadata_filename(self) -> str: + """This is specified by PEP 658.""" + return f"{self.filename}.metadata" + + def generate_additional_tag(self) -> str: + """This gets injected into the tag in the generated PyPI index page for this + package.""" + if self.metadata == MetadataKind.No: + return "" + if self.metadata in [MetadataKind.Unhashed, MetadataKind.NoFile]: + return 'data-dist-info-metadata="true"' + if self.metadata == MetadataKind.WrongHash: + return 'data-dist-info-metadata="sha256=WRONG-HASH"' + assert self.metadata == MetadataKind.Sha256 + checksum = sha256(self.generate_metadata()).hexdigest() + return f'data-dist-info-metadata="sha256={checksum}"' + + def requires_str(self) -> str: + if not self.requires_dist: + return "" + joined = " and ".join(self.requires_dist) + return f"Requires-Dist: {joined}" + + def generate_metadata(self) -> bytes: + """This is written to `self.metadata_filename()` and will override the actual + dist's METADATA, unless `self.metadata == MetadataKind.NoFile`.""" + return dedent( + f"""\ + Metadata-Version: 2.1 + Name: {self.metadata_name or self.name} + Version: {self.version} + {self.requires_str()} + """ + ).encode("utf-8") + + +@pytest.fixture(scope="function") +def write_index_html_content(tmpdir: Path) -> Callable[[str], Path]: + """Generate a PyPI package index.html within a temporary local directory.""" + html_dir = tmpdir / "index_html_content" + html_dir.mkdir() + + def generate_index_html_subdir(index_html: str) -> Path: + """Create a new subdirectory after a UUID and write an index.html.""" + new_subdir = html_dir / uuid.uuid4().hex + new_subdir.mkdir() + + (new_subdir / "index.html").write_text(index_html) + + return new_subdir + + return generate_index_html_subdir + + +@pytest.fixture(scope="function") +def html_index_for_packages( + shared_data: TestData, + write_index_html_content: Callable[[str], Path], +) -> Callable[..., Path]: + """Generate a PyPI HTML package index within a local directory pointing to + blank data.""" + + def generate_html_index_for_packages( + packages: Dict[str, List[FakePackage]] + ) -> Path: + """ + Produce a PyPI directory structure pointing to the specified packages. + """ + # (1) Generate the content for a PyPI index.html. + pkg_links = "\n".join( + f' {pkg}' for pkg in packages.keys() + ) + index_html = f"""\ + + + + + Simple index + + +{pkg_links} + +""" + # (2) Generate the index.html in a new subdirectory of the temp directory. + index_html_subdir = write_index_html_content(index_html) + + # (3) Generate subdirectories for individual packages, each with their own + # index.html. + for pkg, links in packages.items(): + pkg_subdir = index_html_subdir / pkg + pkg_subdir.mkdir() + + download_links: List[str] = [] + for package_link in links: + # (3.1) Generate the tag which pip can crawl pointing to this + # specific package version. + download_links.append( + f' {package_link.filename}
' # noqa: E501 + ) + # (3.2) Copy over the corresponding file in `shared_data.packages`. + cached_file = shared_data.packages / package_link.filename + new_file = pkg_subdir / package_link.filename + if not package_link.delete_linked_file: + shutil.copy(cached_file, new_file) + # (3.3) Write a metadata file, if applicable. + if package_link.metadata != MetadataKind.NoFile: + (pkg_subdir / package_link.metadata_filename()).write_bytes( + package_link.generate_metadata() + ) + + # (3.4) After collating all the download links and copying over the files, + # write an index.html with the generated download links for each + # copied file for this specific package name. + download_links_str = "\n".join(download_links) + pkg_index_content = f"""\ + + + + + Links for {pkg} + + +

Links for {pkg}

+{download_links_str} + +""" + (pkg_subdir / "index.html").write_text(pkg_index_content) + + return index_html_subdir + + return generate_html_index_for_packages + + +@pytest.fixture(scope="function") +def install_with_generated_html_index( + script: PipTestEnvironment, + html_index_for_packages: Callable[[Dict[str, List[FakePackage]]], Path], + tmpdir: Path, +) -> Callable[..., Tuple[TestPipResult, Dict[str, Any]]]: + """Execute `pip download` against a generated PyPI index.""" + output_file = tmpdir / "output_file.json" + + def run_for_generated_index( + packages: Dict[str, List[FakePackage]], + args: List[str], + *, + dry_run: bool = True, + allow_error: bool = False, + ) -> Tuple[TestPipResult, Dict[str, Any]]: + """ + Produce a PyPI directory structure pointing to the specified packages, then + execute `pip install --report ... -i ...` pointing to our generated index. + """ + index_dir = html_index_for_packages(packages) + pip_args = [ + "install", + *(("--dry-run",) if dry_run else ()), + "--ignore-installed", + "--report", + str(output_file), + "-i", + path_to_url(str(index_dir)), + *args, + ] + result = script.pip(*pip_args, allow_error=allow_error) + try: + with open(output_file, "rb") as f: + report = json.load(f) + except FileNotFoundError: + if allow_error: + report = {} + else: + raise + return (result, report) + + return run_for_generated_index + + +def iter_dists(report: Dict[str, Any]) -> Iterator[Tuple[Requirement, DirectUrl]]: + """Parse a (req,url) tuple from each installed dist in the --report json.""" + for inst in report["install"]: + metadata = inst["metadata"] + name = metadata["name"] + version = metadata["version"] + req = Requirement(f"{name}=={version}") + direct_url = DirectUrl.from_dict(inst["download_info"]) + yield (req, direct_url) + + +# The package database we generate for testing PEP 658 support. +_simple_packages: Dict[str, List[FakePackage]] = { + "simple": [ + FakePackage("simple", "1.0", "simple-1.0.tar.gz", MetadataKind.Sha256), + FakePackage("simple", "2.0", "simple-2.0.tar.gz", MetadataKind.No), + # This will raise a hashing error. + FakePackage("simple", "3.0", "simple-3.0.tar.gz", MetadataKind.WrongHash), + ], + "simple2": [ + # Override the dependencies here in order to force pip to download + # simple-1.0.tar.gz as well. + FakePackage( + "simple2", + "1.0", + "simple2-1.0.tar.gz", + MetadataKind.Unhashed, + ("simple==1.0",), + ), + # This will raise an error when pip attempts to fetch the metadata file. + FakePackage("simple2", "2.0", "simple2-2.0.tar.gz", MetadataKind.NoFile), + # This has a METADATA file with a mismatched name. + FakePackage( + "simple2", + "3.0", + "simple2-3.0.tar.gz", + MetadataKind.Sha256, + metadata_name="not-simple2", + ), + ], + "colander": [ + # Ensure we can read the dependencies from a metadata file within a wheel + # *without* PEP 658 metadata. + FakePackage( + "colander", "0.9.9", "colander-0.9.9-py2.py3-none-any.whl", MetadataKind.No + ), + ], + "compilewheel": [ + # Ensure we can override the dependencies of a wheel file by injecting PEP + # 658 metadata. + FakePackage( + "compilewheel", + "1.0", + "compilewheel-1.0-py2.py3-none-any.whl", + MetadataKind.Unhashed, + ("simple==1.0",), + ), + ], + "complex-dist": [ + FakePackage( + "complex-dist", + "0.1", + "complex_dist-0.1-py2.py3-none-any.whl", + MetadataKind.Unhashed, + # Validate that the wheel isn't fetched if metadata is available and + # --dry-run is on, when the metadata presents no hash itself. + delete_linked_file=True, + ), + ], + "corruptwheel": [ + FakePackage( + "corruptwheel", + "1.0", + "corruptwheel-1.0-py2.py3-none-any.whl", + # Validate that the wheel isn't fetched if metadata is available and + # --dry-run is on, when the metadata *does* present a hash. + MetadataKind.Sha256, + ), + ], + "has-script": [ + # Ensure we check PEP 658 metadata hashing errors for wheel files. + FakePackage( + "has-script", + "1.0", + "has.script-1.0-py2.py3-none-any.whl", + MetadataKind.WrongHash, + ), + ], + "translationstring": [ + FakePackage( + "translationstring", "1.1", "translationstring-1.1.tar.gz", MetadataKind.No + ), + ], + "priority": [ + # Ensure we check for a missing metadata file for wheels. + FakePackage( + "priority", "1.0", "priority-1.0-py2.py3-none-any.whl", MetadataKind.NoFile + ), + ], + "requires-simple-extra": [ + # Metadata name is not canonicalized. + FakePackage( + "requires-simple-extra", + "0.1", + "requires_simple_extra-0.1-py2.py3-none-any.whl", + MetadataKind.Sha256, + metadata_name="Requires_Simple.Extra", + ), + ], +} + + +@pytest.mark.parametrize( + "requirement_to_install, expected_outputs", + [ + ("simple2==1.0", ["simple2==1.0", "simple==1.0"]), + ("simple==2.0", ["simple==2.0"]), + ( + "colander", + ["colander==0.9.9", "translationstring==1.1"], + ), + ( + "compilewheel", + ["compilewheel==1.0", "simple==1.0"], + ), + ], +) +def test_install_with_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + expected_outputs: List[str], +) -> None: + """Verify that if a data-dist-info-metadata attribute is present, then it is used + instead of the actual dist's METADATA.""" + _, report = install_with_generated_html_index( + _simple_packages, + [requirement_to_install], + ) + installed = sorted(str(r) for r, _ in iter_dists(report)) + assert installed == expected_outputs + + +@pytest.mark.parametrize( + "requirement_to_install, real_hash", + [ + ( + "simple==3.0", + "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996", + ), + ( + "has-script", + "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b", + ), + ], +) +def test_incorrect_metadata_hash( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + real_hash: str, +) -> None: + """Verify that if a hash for data-dist-info-metadata is provided, it must match the + actual hash of the metadata file.""" + result, _ = install_with_generated_html_index( + _simple_packages, + [requirement_to_install], + allow_error=True, + ) + assert result.returncode != 0 + expected_msg = f"""\ + Expected sha256 WRONG-HASH + Got {real_hash}""" + assert expected_msg in result.stderr + + +@pytest.mark.parametrize( + "requirement_to_install, expected_url", + [ + ("simple2==2.0", "simple2-2.0.tar.gz.metadata"), + ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"), + ], +) +def test_metadata_not_found( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement_to_install: str, + expected_url: str, +) -> None: + """Verify that if a data-dist-info-metadata attribute is provided, that pip will + fetch the .metadata file at the location specified by PEP 658, and error + if unavailable.""" + result, _ = install_with_generated_html_index( + _simple_packages, + [requirement_to_install], + allow_error=True, + ) + assert result.returncode != 0 + expected_re = re.escape(expected_url) + pattern = re.compile( + f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}" + ) + assert pattern.search(result.stderr), (pattern, result.stderr) + + +def test_produces_error_for_mismatched_package_name_in_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], +) -> None: + """Verify that the package name from the metadata matches the requested package.""" + result, _ = install_with_generated_html_index( + _simple_packages, + ["simple2==3.0"], + allow_error=True, + ) + assert result.returncode != 0 + assert ( + "simple2-3.0.tar.gz has inconsistent Name: expected 'simple2', but metadata " + "has 'not-simple2'" + ) in result.stdout + + +@pytest.mark.parametrize( + "requirement", + ( + "requires-simple-extra==0.1", + "REQUIRES_SIMPLE-EXTRA==0.1", + "REQUIRES....simple-_-EXTRA==0.1", + ), +) +def test_canonicalizes_package_name_before_verifying_metadata( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement: str, +) -> None: + """Verify that the package name from the command line and the package's + METADATA are both canonicalized before comparison, while the name from the METADATA + is always used verbatim to represent the installed candidate in --report. + + Regression test for https://github.com/pypa/pip/issues/12038 + """ + _, report = install_with_generated_html_index( + _simple_packages, + [requirement], + ) + reqs = [str(r) for r, _ in iter_dists(report)] + assert reqs == ["Requires_Simple.Extra==0.1"] + + +@pytest.mark.parametrize( + "requirement,err_string", + ( + ("complex-dist==0.1", "404 Client Error: FileNotFoundError"), + ("corruptwheel==1.0", ".whl is invalid."), + ), +) +def test_dry_run_avoids_downloading_metadata_only_dists( + install_with_generated_html_index: Callable[ + ..., Tuple[TestPipResult, Dict[str, Any]] + ], + requirement: str, + err_string: str, +) -> None: + """Verify that the underlying dist files are not downloaded at all when + `install --dry-run` is used to resolve dists with PEP 658 metadata.""" + _, report = install_with_generated_html_index( + _simple_packages, + [requirement], + ) + assert [requirement] == list(str(r) for r, _ in iter_dists(report)) + result, _ = install_with_generated_html_index( + _simple_packages, + [requirement], + dry_run=False, + allow_error=True, + ) + assert result.returncode != 0 + assert err_string in result.stderr