diff --git a/news/12186.bugfix.rst b/news/12186.bugfix.rst
new file mode 100644
index 00000000000..b51d84a0cb2
--- /dev/null
+++ b/news/12186.bugfix.rst
@@ -0,0 +1 @@
+Avoid downloading any dists in ``install --dry-run`` if PEP 658 ``.metadata`` files or lazy wheels are available.
diff --git a/news/12256.feature.rst b/news/12256.feature.rst
new file mode 100644
index 00000000000..0b5bb356188
--- /dev/null
+++ b/news/12256.feature.rst
@@ -0,0 +1 @@
+Cache computed metadata from sdists and lazy wheels in ``~/.cache/pip/link-metadata`` when ``--use-feature=metadata-cache`` is enabled.
diff --git a/news/12257.feature.rst b/news/12257.feature.rst
new file mode 100644
index 00000000000..21ea8cf5942
--- /dev/null
+++ b/news/12257.feature.rst
@@ -0,0 +1 @@
+Store HTTP caching headers in ``~/.cache/pip/fetch-resolve`` to reduce bandwidth usage when ``--use-feature=metadata-cache`` is enabled.
diff --git a/news/12863.trivial.rst b/news/12863.trivial.rst
new file mode 100644
index 00000000000..dc36a82a0df
--- /dev/null
+++ b/news/12863.trivial.rst
@@ -0,0 +1 @@
+Cache "concrete" dists by ``Distribution`` instead of ``InstallRequirement``.
diff --git a/news/12871.trivial.rst b/news/12871.trivial.rst
new file mode 100644
index 00000000000..186e2bcb3c4
--- /dev/null
+++ b/news/12871.trivial.rst
@@ -0,0 +1 @@
+Refactor much of ``RequirementPreparer`` to avoid duplicated code paths for metadata-only requirements.
diff --git a/src/pip/_internal/cache.py b/src/pip/_internal/cache.py
index ce98f288395..0f1c54c328c 100644
--- a/src/pip/_internal/cache.py
+++ b/src/pip/_internal/cache.py
@@ -2,12 +2,14 @@
from __future__ import annotations
+import abc
import hashlib
import json
import logging
import os
+import re
+from collections.abc import Iterator
from pathlib import Path
-from typing import Any
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
from pip._vendor.packaging.utils import canonicalize_name
@@ -16,21 +18,71 @@
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
+from pip._internal.req.req_install import InstallRequirement
from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
from pip._internal.utils.urls import path_to_url
+from pip._internal.vcs import vcs
logger = logging.getLogger(__name__)
+_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
+
ORIGIN_JSON_NAME = "origin.json"
+def _contains_egg_info(s: str) -> bool:
+ """Determine whether the string looks like an egg_info.
+
+ :param s: The string to parse. E.g. foo-2.1
+ """
+ return bool(_egg_info_re.search(s))
+
+
+def should_cache(
+ req: InstallRequirement,
+) -> bool:
+ """
+ Return whether a built InstallRequirement can be stored in the persistent
+ wheel cache, assuming the wheel cache is available, and _should_build()
+ has determined a wheel needs to be built.
+ """
+ if not req.link:
+ return False
+
+ if req.link.is_wheel:
+ return False
+
+ if req.editable or not req.source_dir:
+ # never cache editable requirements
+ return False
+
+ if req.link and req.link.is_vcs:
+ # VCS checkout. Do not cache
+ # unless it points to an immutable commit hash.
+ assert not req.editable
+ assert req.source_dir
+ vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
+ assert vcs_backend
+ if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
+ return True
+ return False
+
+ assert req.link
+ base, ext = req.link.splitext()
+ if _contains_egg_info(base):
+ return True
+
+ # Otherwise, do not cache.
+ return False
+
+
def _hash_dict(d: dict[str, str]) -> str:
"""Return a stable sha224 of a dictionary."""
s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
return hashlib.sha224(s.encode("ascii")).hexdigest()
-class Cache:
+class Cache(abc.ABC):
"""An abstract class - provides cache directories for data from links
:param cache_dir: The root of the cache.
@@ -41,7 +93,9 @@ def __init__(self, cache_dir: str) -> None:
assert not cache_dir or os.path.isabs(cache_dir)
self.cache_dir = cache_dir or None
- def _get_cache_path_parts(self, link: Link) -> list[str]:
+ def _get_cache_path_parts(
+ self, link: Link, *, interpreter_dependent: bool
+ ) -> list[str]:
"""Get parts of part that must be os.path.joined with cache_dir"""
# We want to generate an url to use as our cache key, we don't want to
@@ -53,13 +107,14 @@ def _get_cache_path_parts(self, link: Link) -> list[str]:
if link.subdirectory_fragment:
key_parts["subdirectory"] = link.subdirectory_fragment
- # Include interpreter name, major and minor version in cache key
- # to cope with ill-behaved sdists that build a different wheel
- # depending on the python version their setup.py is being run on,
- # and don't encode the difference in compatibility tags.
- # https://github.com/pypa/pip/issues/7296
- key_parts["interpreter_name"] = interpreter_name()
- key_parts["interpreter_version"] = interpreter_version()
+ if interpreter_dependent:
+ # Include interpreter name, major and minor version in cache key
+ # to cope with ill-behaved sdists that build a different wheel
+ # depending on the python version their setup.py is being run on,
+ # and don't encode the difference in compatibility tags.
+ # https://github.com/pypa/pip/issues/7296
+ key_parts["interpreter_name"] = interpreter_name()
+ key_parts["interpreter_version"] = interpreter_version()
# Encode our key url with sha224, we'll use this because it has similar
# security properties to sha256, but with a shorter total output (and
@@ -74,20 +129,37 @@ def _get_cache_path_parts(self, link: Link) -> list[str]:
return parts
- def _get_candidates(self, link: Link, canonical_package_name: str) -> list[Any]:
- can_not_cache = not self.cache_dir or not canonical_package_name or not link
- if can_not_cache:
- return []
+ @abc.abstractmethod
+ def get_path_for_link(self, link: Link) -> str:
+ """Return a directory to store cached items in for link."""
+ ...
- path = self.get_path_for_link(link)
- if os.path.isdir(path):
- return [(candidate, path) for candidate in os.listdir(path)]
- return []
+ def cache_path(self, link: Link) -> Path:
+ return Path(self.get_path_for_link(link))
+
+
+class LinkMetadataCache(Cache):
+ """Persistently store the metadata of dists found at each link."""
def get_path_for_link(self, link: Link) -> str:
- """Return a directory to store cached items in for link."""
- raise NotImplementedError()
+ parts = self._get_cache_path_parts(link, interpreter_dependent=True)
+ assert self.cache_dir
+ return os.path.join(self.cache_dir, "link-metadata", *parts)
+
+
+class FetchResolveCache(Cache):
+ def get_path_for_link(self, link: Link) -> str:
+ # We are reading index links to extract other links from, not executing any
+ # python code, so these caches are interpreter-independent.
+ parts = self._get_cache_path_parts(link, interpreter_dependent=False)
+ assert self.cache_dir
+ return os.path.join(self.cache_dir, "fetch-resolve", *parts)
+
+class WheelCacheBase(Cache):
+ """Specializations to the cache concept for wheels."""
+
+ @abc.abstractmethod
def get(
self,
link: Link,
@@ -97,10 +169,27 @@ def get(
"""Returns a link to a cached item if it exists, otherwise returns the
passed link.
"""
- raise NotImplementedError()
+ ...
+
+ def _can_cache(self, link: Link, canonical_package_name: str) -> bool:
+ return bool(self.cache_dir and canonical_package_name and link)
+
+ def _get_candidates(
+ self, link: Link, canonical_package_name: str
+ ) -> Iterator[tuple[str, str]]:
+ if not self._can_cache(link, canonical_package_name):
+ return
+ path = self.get_path_for_link(link)
+ if not os.path.isdir(path):
+ return
+
+ for candidate in os.scandir(path):
+ if candidate.is_file():
+ yield (candidate.name, path)
-class SimpleWheelCache(Cache):
+
+class SimpleWheelCache(WheelCacheBase):
"""A cache of wheels for future installs."""
def __init__(self, cache_dir: str) -> None:
@@ -121,7 +210,7 @@ def get_path_for_link(self, link: Link) -> str:
:param link: The link of the sdist for which this will cache wheels.
"""
- parts = self._get_cache_path_parts(link)
+ parts = self._get_cache_path_parts(link, interpreter_dependent=True)
assert self.cache_dir
# Store wheels within the root cache_dir
return os.path.join(self.cache_dir, "wheels", *parts)
@@ -132,7 +221,7 @@ def get(
package_name: str | None,
supported_tags: list[Tag],
) -> Link:
- candidates = []
+ candidates: list[tuple[int, str, str]] = []
if not package_name:
return link
@@ -206,7 +295,7 @@ def __init__(
)
-class WheelCache(Cache):
+class WheelCache(WheelCacheBase):
"""Wraps EphemWheelCache and SimpleWheelCache into a single Cache
This Cache allows for gracefully degradation, using the ephem wheel cache
@@ -224,6 +313,15 @@ def get_path_for_link(self, link: Link) -> str:
def get_ephem_path_for_link(self, link: Link) -> str:
return self._ephem_cache.get_path_for_link(link)
+ def resolve_cache_dir(self, req: InstallRequirement) -> str:
+ """Return the persistent or temporary cache directory where the built or
+ downloaded wheel should be stored."""
+ cache_available = bool(self.cache_dir)
+ assert req.link, req
+ if cache_available and should_cache(req):
+ return self.get_path_for_link(req.link)
+ return self.get_ephem_path_for_link(req.link)
+
def get(
self,
link: Link,
diff --git a/src/pip/_internal/cli/cmdoptions.py b/src/pip/_internal/cli/cmdoptions.py
index 3519dadf13d..68f3927d808 100644
--- a/src/pip/_internal/cli/cmdoptions.py
+++ b/src/pip/_internal/cli/cmdoptions.py
@@ -1072,6 +1072,8 @@ def check_list_path_option(options: Values) -> None:
default=[],
choices=[
"fast-deps",
+ "metadata-cache",
+ "truststore",
]
+ ALWAYS_ENABLED_FEATURES,
help="Enable new functionality, that may be backward incompatible.",
diff --git a/src/pip/_internal/cli/req_command.py b/src/pip/_internal/cli/req_command.py
index dc1328ff019..864cc2959c5 100644
--- a/src/pip/_internal/cli/req_command.py
+++ b/src/pip/_internal/cli/req_command.py
@@ -13,7 +13,7 @@
from typing import Any
from pip._internal.build_env import SubprocessBuildEnvironmentInstaller
-from pip._internal.cache import WheelCache
+from pip._internal.cache import FetchResolveCache, LinkMetadataCache, WheelCache
from pip._internal.cli import cmdoptions
from pip._internal.cli.index_command import IndexGroupCommand
from pip._internal.cli.index_command import SessionCommandMixin as SessionCommandMixin
@@ -132,6 +132,16 @@ def make_requirement_preparer(
"fast-deps has no effect when used with the legacy resolver."
)
+ if options.cache_dir and "metadata-cache" in options.features_enabled:
+ logger.warning(
+ "pip is using a local cache for metadata information. "
+ "This experimental feature is enabled through "
+ "--use-feature=metadata-cache and it is not ready for "
+ "production."
+ )
+ metadata_cache = LinkMetadataCache(options.cache_dir)
+ else:
+ metadata_cache = None
return RequirementPreparer(
build_dir=temp_build_dir_path,
src_dir=options.src_dir,
@@ -149,6 +159,7 @@ def make_requirement_preparer(
verbosity=verbosity,
legacy_resolver=legacy_resolver,
resume_retries=options.resume_retries,
+ metadata_cache=metadata_cache,
)
@classmethod
@@ -344,8 +355,13 @@ def _build_package_finder(
ignore_requires_python=ignore_requires_python,
)
+ if bool(options.cache_dir) and ("metadata-cache" in options.features_enabled):
+ fetch_resolve_cache = FetchResolveCache(options.cache_dir)
+ else:
+ fetch_resolve_cache = None
return PackageFinder.create(
link_collector=link_collector,
selection_prefs=selection_prefs,
target_python=target_python,
+ fetch_resolve_cache=fetch_resolve_cache,
)
diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py
index 900fb403d6f..a79fdec3050 100644
--- a/src/pip/_internal/commands/download.py
+++ b/src/pip/_internal/commands/download.py
@@ -129,6 +129,9 @@ def run(self, options: Values, args: list[str]) -> int:
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
+ preparer.finalize_linked_requirements(
+ requirement_set.requirements.values(), require_dist_files=True
+ )
downloaded: list[str] = []
for req in requirement_set.requirements.values():
@@ -137,8 +140,6 @@ def run(self, options: Values, args: list[str]) -> int:
preparer.save_linked_requirement(req)
downloaded.append(req.name)
- preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
-
if downloaded:
write_output("Successfully downloaded %s", " ".join(downloaded))
diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py
index 1ef7a0f4410..3f5d3e6c256 100644
--- a/src/pip/_internal/commands/install.py
+++ b/src/pip/_internal/commands/install.py
@@ -99,7 +99,8 @@ def add_options(self) -> None:
help=(
"Don't actually install anything, just print what would be. "
"Can be used in combination with --ignore-installed "
- "to 'resolve' the requirements."
+ "to 'resolve' the requirements. If package metadata is available "
+ "or cached, --dry-run also avoids downloading the dependency at all."
),
)
self.cmd_opts.add_option(
@@ -393,6 +394,10 @@ def run(self, options: Values, args: list[str]) -> int:
requirement_set = resolver.resolve(
reqs, check_supported_wheels=not options.target_dir
)
+ preparer.finalize_linked_requirements(
+ requirement_set.requirements.values(),
+ require_dist_files=not options.dry_run,
+ )
if options.json_report_file:
report = InstallationReport(requirement_set.requirements_to_install)
diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py
index 61be254912f..9519a940fc9 100644
--- a/src/pip/_internal/commands/wheel.py
+++ b/src/pip/_internal/commands/wheel.py
@@ -144,6 +144,9 @@ def run(self, options: Values, args: list[str]) -> int:
self.trace_basic_info(finder)
requirement_set = resolver.resolve(reqs, check_supported_wheels=True)
+ preparer.finalize_linked_requirements(
+ requirement_set.requirements.values(), require_dist_files=True
+ )
reqs_to_build: list[InstallRequirement] = []
for req in requirement_set.requirements.values():
@@ -152,8 +155,6 @@ def run(self, options: Values, args: list[str]) -> int:
else:
reqs_to_build.append(req)
- preparer.prepare_linked_requirements_more(requirement_set.requirements.values())
-
# build wheels
build_successes, build_failures = build(
reqs_to_build,
diff --git a/src/pip/_internal/distributions/__init__.py b/src/pip/_internal/distributions/__init__.py
index 9a89a838b9a..a870b227d1e 100644
--- a/src/pip/_internal/distributions/__init__.py
+++ b/src/pip/_internal/distributions/__init__.py
@@ -1,4 +1,5 @@
from pip._internal.distributions.base import AbstractDistribution
+from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.distributions.sdist import SourceDistribution
from pip._internal.distributions.wheel import WheelDistribution
from pip._internal.req.req_install import InstallRequirement
@@ -7,7 +8,18 @@
def make_distribution_for_install_requirement(
install_req: InstallRequirement,
) -> AbstractDistribution:
- """Returns a Distribution for the given InstallRequirement"""
+ """Returns an AbstractDistribution for the given InstallRequirement.
+
+ As AbstractDistribution only covers installable artifacts, this method may only be
+ invoked at the conclusion of a resolve, when the RequirementPreparer has downloaded
+ the file corresponding to the resolved dist. Commands which intend to consume
+ metadata-only resolves without downloading should not call this method or
+ consume AbstractDistribution objects.
+ """
+ # Only pre-installed requirements will have a .satisfied_by dist.
+ if install_req.satisfied_by:
+ return InstalledDistribution(install_req)
+
# Editable requirements will always be source distributions. They use the
# legacy logic until we create a modern standard for them.
if install_req.editable:
diff --git a/src/pip/_internal/distributions/base.py b/src/pip/_internal/distributions/base.py
index ea61f3501e7..3ccb5bd0288 100644
--- a/src/pip/_internal/distributions/base.py
+++ b/src/pip/_internal/distributions/base.py
@@ -39,11 +39,17 @@ def build_tracker_id(self) -> str | None:
If None, then this dist has no work to do in the build tracker, and
``.prepare_distribution_metadata()`` will not be called."""
- raise NotImplementedError()
+ ...
@abc.abstractmethod
def get_metadata_distribution(self) -> BaseDistribution:
- raise NotImplementedError()
+ """Generate a concrete ``BaseDistribution`` instance for this artifact.
+
+ The implementation should also cache the result with
+ ``self.req.cache_concrete_dist()`` so the distribution is available to other
+ users of the ``InstallRequirement``. This method is not called within the build
+ tracker context, so it should not identify any new setup requirements."""
+ ...
@abc.abstractmethod
def prepare_distribution_metadata(
@@ -52,4 +58,11 @@ def prepare_distribution_metadata(
build_isolation: bool,
check_build_deps: bool,
) -> None:
- raise NotImplementedError()
+ """Generate the information necessary to extract metadata from the artifact.
+
+ This method will be executed within the context of ``BuildTracker#track()``, so
+ it needs to fully identify any setup requirements so they can be added to the
+ same active set of tracked builds, while ``.get_metadata_distribution()`` takes
+ care of generating and caching the ``BaseDistribution`` to expose to the rest of
+ the resolve."""
+ ...
diff --git a/src/pip/_internal/distributions/installed.py b/src/pip/_internal/distributions/installed.py
index b6a67df24f4..5e6ee336866 100644
--- a/src/pip/_internal/distributions/installed.py
+++ b/src/pip/_internal/distributions/installed.py
@@ -21,8 +21,10 @@ def build_tracker_id(self) -> str | None:
return None
def get_metadata_distribution(self) -> BaseDistribution:
- assert self.req.satisfied_by is not None, "not actually installed"
- return self.req.satisfied_by
+ dist = self.req.satisfied_by
+ assert dist is not None, "not actually installed"
+ self.req.cache_concrete_dist(dist)
+ return dist
def prepare_distribution_metadata(
self,
diff --git a/src/pip/_internal/distributions/sdist.py b/src/pip/_internal/distributions/sdist.py
index e2821f89e00..c3b91cef621 100644
--- a/src/pip/_internal/distributions/sdist.py
+++ b/src/pip/_internal/distributions/sdist.py
@@ -7,7 +7,7 @@
from pip._internal.build_env import BuildEnvironment
from pip._internal.distributions.base import AbstractDistribution
from pip._internal.exceptions import InstallationError
-from pip._internal.metadata import BaseDistribution
+from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.utils.subprocess import runner_with_spinner_message
if TYPE_CHECKING:
@@ -24,13 +24,19 @@ class SourceDistribution(AbstractDistribution):
"""
@property
- def build_tracker_id(self) -> str | None:
+ def build_tracker_id(self) -> str:
"""Identify this requirement uniquely by its link."""
assert self.req.link
return self.req.link.url_without_fragment
def get_metadata_distribution(self) -> BaseDistribution:
- return self.req.get_dist()
+ assert (
+ self.req.metadata_directory
+ ), "Set as part of .prepare_distribution_metadata()"
+ dist = get_directory_distribution(self.req.metadata_directory)
+ self.req.cache_concrete_dist(dist)
+ self.req.validate_sdist_metadata()
+ return dist
def prepare_distribution_metadata(
self,
@@ -69,7 +75,11 @@ def prepare_distribution_metadata(
self._raise_conflicts("the backend dependencies", conflicting)
if missing:
self._raise_missing_reqs(missing)
- self.req.prepare_metadata()
+
+ # NB: we must still call .cache_concrete_dist() and .validate_sdist_metadata()
+ # before the InstallRequirement itself has been updated with the metadata from
+ # this directory!
+ self.req.prepare_metadata_directory()
def _prepare_build_backend(
self, build_env_installer: BuildEnvironmentInstaller
diff --git a/src/pip/_internal/distributions/wheel.py b/src/pip/_internal/distributions/wheel.py
index ee12bfadc2e..61898712d42 100644
--- a/src/pip/_internal/distributions/wheel.py
+++ b/src/pip/_internal/distributions/wheel.py
@@ -33,7 +33,9 @@ def get_metadata_distribution(self) -> BaseDistribution:
assert self.req.local_file_path, "Set as part of preparation during download"
assert self.req.name, "Wheels are never unnamed"
wheel = FilesystemWheel(self.req.local_file_path)
- return get_wheel_distribution(wheel, canonicalize_name(self.req.name))
+ dist = get_wheel_distribution(wheel, canonicalize_name(self.req.name))
+ self.req.cache_concrete_dist(dist)
+ return dist
def prepare_distribution_metadata(
self,
diff --git a/src/pip/_internal/exceptions.py b/src/pip/_internal/exceptions.py
index 98f95494c62..4d90f6877fe 100644
--- a/src/pip/_internal/exceptions.py
+++ b/src/pip/_internal/exceptions.py
@@ -252,6 +252,25 @@ def __str__(self) -> str:
return f"None {self.metadata_name} metadata found for distribution: {self.dist}"
+class CacheMetadataError(PipError):
+ """Raised when de/serializing a requirement into the metadata cache."""
+
+ def __init__(
+ self,
+ req: InstallRequirement,
+ reason: str,
+ ) -> None:
+ """
+ :param req: The requirement we attempted to cache.
+ :param reason: Context about the precise error that occurred.
+ """
+ self.req = req
+ self.reason = reason
+
+ def __str__(self) -> str:
+ return f"{self.reason} for {self.req} from {self.req.link}"
+
+
class UserInstallationInvalid(InstallationError):
"""A --user install is requested on an environment without user site."""
diff --git a/src/pip/_internal/index/collector.py b/src/pip/_internal/index/collector.py
index 00d66daa3bf..fbbeaf5a23e 100644
--- a/src/pip/_internal/index/collector.py
+++ b/src/pip/_internal/index/collector.py
@@ -87,7 +87,9 @@ class _NotHTTP(Exception):
pass
-def _ensure_api_response(url: str, session: PipSession) -> None:
+def _ensure_api_response(
+ url: str, session: PipSession, headers: dict[str, str] | None = None
+) -> None:
"""
Send a HEAD request to the URL, and ensure the response contains a simple
API Response.
@@ -99,13 +101,15 @@ def _ensure_api_response(url: str, session: PipSession) -> None:
if scheme not in {"http", "https"}:
raise _NotHTTP()
- resp = session.head(url, allow_redirects=True)
+ resp = session.head(url, allow_redirects=True, headers=headers)
raise_for_status(resp)
_ensure_api_header(resp)
-def _get_simple_response(url: str, session: PipSession) -> Response:
+def _get_simple_response(
+ url: str, session: PipSession, headers: dict[str, str] | None = None
+) -> Response:
"""Access an Simple API response with GET, and return the response.
This consists of three parts:
@@ -119,10 +123,13 @@ def _get_simple_response(url: str, session: PipSession) -> Response:
and raise `_NotAPIContent` otherwise.
"""
if is_archive_file(Link(url).filename):
- _ensure_api_response(url, session=session)
+ _ensure_api_response(url, session=session, headers=headers)
logger.debug("Getting page %s", redact_auth_from_url(url))
+ logger.debug("headers: %s", str(headers))
+ if headers is None:
+ headers = {}
resp = session.get(
url,
headers={
@@ -147,6 +154,7 @@ def _get_simple_response(url: str, session: PipSession) -> Response:
# once per 10 minutes.
# For more information, please see pypa/pip#5670.
"Cache-Control": "max-age=0",
+ **headers,
},
)
raise_for_status(resp)
@@ -225,7 +233,7 @@ def parse_links(page: IndexContent) -> Iterable[Link]:
if content_type_l.startswith("application/vnd.pypi.simple.v1+json"):
data = json.loads(page.content)
for file in data.get("files", []):
- link = Link.from_json(file, page.url)
+ link = Link.from_json(file, page.url, page_content=page)
if link is None:
continue
yield link
@@ -238,7 +246,9 @@ def parse_links(page: IndexContent) -> Iterable[Link]:
url = page.url
base_url = parser.base_url or url
for anchor in parser.anchors:
- link = Link.from_element(anchor, page_url=url, base_url=base_url)
+ link = Link.from_element(
+ anchor, page_url=url, base_url=base_url, page_content=page
+ )
if link is None:
continue
yield link
@@ -253,6 +263,8 @@ class IndexContent:
:param cache_link_parsing: whether links parsed from this page's url
should be cached. PyPI index urls should
have this set to False, for example.
+ :param etag: The ``ETag`` header from an HTTP request against ``url``.
+ :param date: The ``Date`` header from an HTTP request against ``url``.
"""
content: bytes
@@ -260,6 +272,8 @@ class IndexContent:
encoding: str | None
url: str
cache_link_parsing: bool = True
+ etag: str | None = None
+ date: str | None = None
def __str__(self) -> str:
return redact_auth_from_url(self.url)
@@ -304,7 +318,8 @@ def _handle_get_simple_fail(
def _make_index_content(
- response: Response, cache_link_parsing: bool = True
+ response: Response,
+ cache_link_parsing: bool = True,
) -> IndexContent:
encoding = _get_encoding_from_headers(response.headers)
return IndexContent(
@@ -313,11 +328,15 @@ def _make_index_content(
encoding=encoding,
url=response.url,
cache_link_parsing=cache_link_parsing,
+ etag=response.headers.get("ETag", None),
+ date=response.headers.get("Date", None),
)
-def _get_index_content(link: Link, *, session: PipSession) -> IndexContent | None:
- url = link.url.split("#", 1)[0]
+def _get_index_content(
+ link: Link, *, session: PipSession, headers: dict[str, str] | None = None
+) -> IndexContent | None:
+ url = link.url_without_fragment
# Check for VCS schemes that do not support lookup as web pages.
vcs_scheme = _match_vcs_scheme(url)
@@ -344,7 +363,7 @@ def _get_index_content(link: Link, *, session: PipSession) -> IndexContent | Non
logger.debug(" file: URL is directory, getting %s", url)
try:
- resp = _get_simple_response(url, session=session)
+ resp = _get_simple_response(url, session=session, headers=headers)
except _NotHTTP:
logger.warning(
"Skipping page %s because it looks like an archive, and cannot "
@@ -360,9 +379,7 @@ def _get_index_content(link: Link, *, session: PipSession) -> IndexContent | Non
exc.request_desc,
exc.content_type,
)
- except NetworkConnectionError as exc:
- _handle_get_simple_fail(link, exc)
- except RetryError as exc:
+ except (NetworkConnectionError, RetryError) as exc:
_handle_get_simple_fail(link, exc)
except SSLError as exc:
reason = "There was a problem confirming the ssl certificate: "
@@ -436,11 +453,14 @@ def create(
def find_links(self) -> list[str]:
return self.search_scope.find_links
- def fetch_response(self, location: Link) -> IndexContent | None:
+ def fetch_response(
+ self, location: Link, headers: dict[str, str] | None = None
+ ) -> IndexContent | None:
"""
Fetch an HTML page containing package links.
"""
- return _get_index_content(location, session=self.session)
+ logger.debug("headers: %s", str(headers))
+ return _get_index_content(location, session=self.session, headers=headers)
def collect_sources(
self,
diff --git a/src/pip/_internal/index/package_finder.py b/src/pip/_internal/index/package_finder.py
index bc523cd42d8..633f1eb855e 100644
--- a/src/pip/_internal/index/package_finder.py
+++ b/src/pip/_internal/index/package_finder.py
@@ -2,17 +2,21 @@
from __future__ import annotations
+import binascii
+import datetime
import enum
import functools
import itertools
import logging
+import os
import re
+import time
from collections.abc import Iterable
from dataclasses import dataclass
+from hashlib import sha256
+from pathlib import Path
from typing import (
TYPE_CHECKING,
- Optional,
- Union,
)
from pip._vendor.packaging import specifiers
@@ -21,13 +25,14 @@
from pip._vendor.packaging.version import InvalidVersion, _BaseVersion
from pip._vendor.packaging.version import parse as parse_version
+from pip._internal.cache import FetchResolveCache
from pip._internal.exceptions import (
BestVersionAlreadyInstalled,
DistributionNotFound,
InvalidWheelFilename,
UnsupportedWheel,
)
-from pip._internal.index.collector import LinkCollector, parse_links
+from pip._internal.index.collector import IndexContent, LinkCollector, parse_links
from pip._internal.models.candidate import InstallationCandidate
from pip._internal.models.format_control import FormatControl
from pip._internal.models.link import Link
@@ -47,14 +52,14 @@
if TYPE_CHECKING:
from typing_extensions import TypeGuard
+ BuildTag = tuple[()] | tuple[int, str]
+ CandidateSortingKey = tuple[int, int, int, _BaseVersion, int | None, BuildTag]
+
__all__ = ["FormatControl", "BestCandidateResult", "PackageFinder"]
logger = getLogger(__name__)
-BuildTag = Union[tuple[()], tuple[int, str]]
-CandidateSortingKey = tuple[int, int, int, _BaseVersion, Optional[int], BuildTag]
-
def _check_link_requires_python(
link: Link,
@@ -593,6 +598,7 @@ def __init__(
format_control: FormatControl | None = None,
candidate_prefs: CandidatePreferences | None = None,
ignore_requires_python: bool | None = None,
+ fetch_resolve_cache: FetchResolveCache | None = None,
) -> None:
"""
This constructor is primarily meant to be used by the create() class
@@ -627,6 +633,8 @@ def __init__(
BestCandidateResult,
] = {}
+ self._fetch_resolve_cache = fetch_resolve_cache
+
# Don't include an allow_yanked default value to make sure each call
# site considers whether yanked releases are allowed. This also causes
# that decision to be made explicit in the calling code, which helps
@@ -637,6 +645,7 @@ def create(
link_collector: LinkCollector,
selection_prefs: SelectionPreferences,
target_python: TargetPython | None = None,
+ fetch_resolve_cache: FetchResolveCache | None = None,
) -> PackageFinder:
"""Create a PackageFinder.
@@ -661,6 +670,7 @@ def create(
allow_yanked=selection_prefs.allow_yanked,
format_control=selection_prefs.format_control,
ignore_requires_python=selection_prefs.ignore_requires_python,
+ fetch_resolve_cache=fetch_resolve_cache,
)
@property
@@ -800,18 +810,235 @@ def evaluate_links(
return candidates
- def process_project_url(
+ _HTTP_DATE_FORMAT = "%a, %d %b %Y %H:%M:%S %Z"
+
+ @classmethod
+ def _try_load_http_cache_headers(
+ cls,
+ etag_path: Path,
+ date_path: Path,
+ checksum_path: Path,
+ project_url: Link,
+ headers: dict[str, str],
+ ) -> tuple[str | None, datetime.datetime | None, bytes | None]:
+ etag: str | None = None
+ try:
+ etag = etag_path.read_text()
+ etag = f'"{etag}"'
+ logger.debug(
+ "found cached etag for url %s at %s: %s",
+ project_url,
+ etag_path,
+ etag,
+ )
+ headers["If-None-Match"] = etag
+ except OSError as e:
+ logger.debug("no etag found for url %s (%s)", project_url, str(e))
+
+ date: datetime.datetime | None = None
+ try:
+ date_bytes = date_path.read_bytes()
+ date_int = int.from_bytes(date_bytes, byteorder="big", signed=False)
+ date = datetime.datetime.fromtimestamp(date_int, tz=datetime.timezone.utc)
+ logger.debug(
+ "found cached date for url %s at %s: '%s'",
+ project_url,
+ date_path,
+ date,
+ )
+ headers["If-Modified-Since"] = date.strftime(cls._HTTP_DATE_FORMAT)
+ except OSError as e:
+ logger.debug("no date found for url %s (%s)", project_url, str(e))
+
+ checksum: bytes | None = None
+ try:
+ checksum = checksum_path.read_bytes()
+ logger.debug(
+ "found checksum for url %s at %s: '%s'",
+ project_url,
+ checksum_path,
+ binascii.b2a_base64(checksum, newline=False).decode("ascii"),
+ )
+ except OSError as e:
+ logger.debug("no checksum found for url %s (%s)", project_url, str(e))
+
+ return (etag, date, checksum)
+
+ _quoted_value = re.compile(r'^"([^"]*)"$')
+
+ @classmethod
+ def _strip_quoted_value(cls, value: str) -> str:
+ return cls._quoted_value.sub(r"\1", value)
+
+ _now_local = datetime.datetime.now().astimezone()
+ _local_tz = _now_local.tzinfo
+ assert _local_tz is not None
+ _local_tz_name = _local_tz.tzname(_now_local)
+
+ @classmethod
+ def _write_http_cache_info(
+ cls,
+ etag_path: Path,
+ date_path: Path,
+ checksum_path: Path,
+ project_url: Link,
+ index_response: IndexContent,
+ prev_etag: str | None,
+ prev_checksum: bytes | None,
+ ) -> tuple[str | None, datetime.datetime | None, bytes, bool]:
+ hasher = sha256()
+ hasher.update(index_response.content)
+ new_checksum = hasher.digest()
+ checksum_path.write_bytes(new_checksum)
+ page_unmodified = new_checksum == prev_checksum
+
+ new_etag: str | None = index_response.etag
+ if new_etag is None:
+ logger.debug("no etag returned from fetch for url %s", project_url.url)
+ try:
+ etag_path.unlink()
+ except OSError:
+ pass
+ else:
+ new_etag = cls._strip_quoted_value(new_etag)
+ if new_etag != prev_etag:
+ logger.debug(
+ "etag for url %s updated from %s -> %s",
+ project_url.url,
+ prev_etag,
+ new_etag,
+ )
+ etag_path.write_text(new_etag)
+ else:
+ logger.debug(
+ "etag was unmodified for url %s (%s)", project_url.url, prev_etag
+ )
+ assert page_unmodified
+
+ new_date: datetime.datetime | None = None
+ date_str: str | None = index_response.date
+ if date_str is None:
+ logger.debug(
+ "no date header was provided in response for url %s", project_url
+ )
+ else:
+ date_str = date_str.strip()
+ new_time = time.strptime(date_str, cls._HTTP_DATE_FORMAT)
+ new_date = datetime.datetime.strptime(date_str, cls._HTTP_DATE_FORMAT)
+ # strptime() doesn't set the timezone according to the parsed %Z arg, which
+ # may be any of "UTC", "GMT", or any element of `time.tzname`.
+ if new_time.tm_zone in ["UTC", "GMT"]:
+ logger.debug(
+ "a UTC timezone was found in response for url %s", project_url
+ )
+ new_date = new_date.replace(tzinfo=datetime.timezone.utc)
+ else:
+ assert new_time.tm_zone in time.tzname, new_time
+ logger.debug(
+ "a local timezone %s was found in response for url %s",
+ new_time.tm_zone,
+ project_url,
+ )
+ if new_time.tm_zone == cls._local_tz_name:
+ new_date = new_date.replace(tzinfo=cls._local_tz)
+ else:
+ logger.debug(
+ "a local timezone %s had to be discarded in response %s",
+ new_time.tm_zone,
+ project_url,
+ )
+ new_date = None
+
+ if new_date is not None:
+ timestamp = new_date.timestamp()
+ # The timestamp will only have second resolution according to the parse
+ # format string _HTTP_DATE_FORMAT.
+ assert not (timestamp % 1), (new_date, timestamp)
+ epoch = int(timestamp)
+ assert epoch >= 0, (new_date, timestamp, epoch)
+ date_bytes = epoch.to_bytes(length=4, byteorder="big", signed=False)
+ date_path.write_bytes(date_bytes)
+
+ logger.debug('date "%s" written for url %s', new_date, project_url)
+ if new_date is None:
+ try:
+ date_path.unlink()
+ except OSError:
+ pass
+
+ return (new_etag, new_date, new_checksum, page_unmodified)
+
+ def _process_project_url_uncached(
self, project_url: Link, link_evaluator: LinkEvaluator
) -> list[InstallationCandidate]:
logger.debug(
"Fetching project page and analyzing links: %s",
project_url,
)
+
index_response = self._link_collector.fetch_response(project_url)
if index_response is None:
return []
- page_links = list(parse_links(index_response))
+ page_links = parse_links(index_response)
+
+ with indent_log():
+ package_links = self.evaluate_links(link_evaluator, links=page_links)
+ return package_links
+
+ def process_project_url(
+ self, project_url: Link, link_evaluator: LinkEvaluator
+ ) -> list[InstallationCandidate]:
+ if self._fetch_resolve_cache is None:
+ return self._process_project_url_uncached(project_url, link_evaluator)
+
+ cached_path = self._fetch_resolve_cache.cache_path(project_url)
+ os.makedirs(str(cached_path), exist_ok=True)
+
+ etag_path = cached_path / "etag"
+ date_path = cached_path / "modified-since-date"
+ checksum_path = cached_path / "checksum"
+
+ headers: dict[str, str] = {
+ # Wipe any other Cache-Control headers away--we are explicitly managing the
+ # caching here.
+ "Cache-Control": "",
+ }
+ # NB: mutates headers!
+ prev_etag, _prev_date, prev_checksum = self._try_load_http_cache_headers(
+ etag_path, date_path, checksum_path, project_url, headers
+ )
+
+ logger.debug(
+ "Fetching project page and analyzing links: %s",
+ project_url,
+ )
+
+ # A 304 Not Modified is implicitly converted into a reused cached response from
+ # the Cache-Control library, so we won't explicitly check for a 304.
+ index_response = self._link_collector.fetch_response(
+ project_url,
+ headers=headers,
+ )
+ if index_response is None:
+ return []
+
+ (
+ _new_etag,
+ _new_date,
+ _new_checksum,
+ page_unmodified,
+ ) = self._write_http_cache_info(
+ etag_path,
+ date_path,
+ checksum_path,
+ project_url,
+ index_response,
+ prev_etag=prev_etag,
+ prev_checksum=prev_checksum,
+ )
+
+ page_links = parse_links(index_response)
with indent_log():
package_links = self.evaluate_links(
diff --git a/src/pip/_internal/metadata/__init__.py b/src/pip/_internal/metadata/__init__.py
index 927e375cad0..368950f9946 100644
--- a/src/pip/_internal/metadata/__init__.py
+++ b/src/pip/_internal/metadata/__init__.py
@@ -9,7 +9,14 @@
from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.misc import strtobool
-from .base import BaseDistribution, BaseEnvironment, FilesystemWheel, MemoryWheel, Wheel
+from .base import (
+ BaseDistribution,
+ BaseEnvironment,
+ FilesystemWheel,
+ MemoryWheel,
+ Wheel,
+ serialize_metadata,
+)
__all__ = [
"BaseDistribution",
@@ -21,6 +28,7 @@
"get_environment",
"get_wheel_distribution",
"select_backend",
+ "serialize_metadata",
]
diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py
index 230e11473c6..4e0d23625cd 100644
--- a/src/pip/_internal/metadata/base.py
+++ b/src/pip/_internal/metadata/base.py
@@ -1,7 +1,10 @@
from __future__ import annotations
import csv
+import email.generator
import email.message
+import email.policy
+import email.utils
import functools
import json
import logging
@@ -11,10 +14,10 @@
from collections.abc import Collection, Container, Iterable, Iterator
from typing import (
IO,
+ TYPE_CHECKING,
Any,
NamedTuple,
Protocol,
- Union,
)
from pip._vendor.packaging.requirements import Requirement
@@ -36,7 +39,8 @@
from ._json import msg_to_json
-InfoPath = Union[str, pathlib.PurePath]
+if TYPE_CHECKING:
+ InfoPath = str | pathlib.PurePath
logger = logging.getLogger(__name__)
@@ -85,6 +89,20 @@ def _convert_installed_files_path(
return str(pathlib.Path(*info, *entry))
+def serialize_metadata(msg: email.message.Message) -> str:
+ """Write a dist's metadata to a string.
+
+ Calling ``str(dist.metadata)`` may raise an error by misinterpreting RST directives
+ as email headers. This method uses the more robust ``email.policy.EmailPolicy`` to
+ avoid those parsing errors."""
+ # Packages such as google_pasta-0.2.0 trigger a particular encoding behavior that
+ # required an upstream fix (https://github.com/python/cpython/pull/117369):
+ #
+ # > email.errors.HeaderWriteError: folded header
+ # > contains newline: 'Description: UNKNOWN\n\n\n'
+ return msg.as_string(policy=email.policy.HTTP.clone(refold_source="all"))
+
+
class RequiresEntry(NamedTuple):
requirement: str
extra: str
@@ -92,6 +110,15 @@ class RequiresEntry(NamedTuple):
class BaseDistribution(Protocol):
+ @property
+ def is_concrete(self) -> bool:
+ """Whether the distribution really exists somewhere on disk.
+
+ If this is false, it has been synthesized from metadata, e.g. via
+ ``.from_metadata_file_contents()``, or ``.from_wheel()`` against
+ a ``MemoryWheel``."""
+ raise NotImplementedError()
+
@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
"""Load the distribution from a metadata directory.
@@ -664,6 +691,10 @@ def iter_installed_distributions(
class Wheel(Protocol):
location: str
+ @property
+ def is_concrete(self) -> bool:
+ raise NotImplementedError()
+
def as_zipfile(self) -> zipfile.ZipFile:
raise NotImplementedError()
@@ -672,6 +703,10 @@ class FilesystemWheel(Wheel):
def __init__(self, location: str) -> None:
self.location = location
+ @property
+ def is_concrete(self) -> bool:
+ return True
+
def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.location, allowZip64=True)
@@ -681,5 +716,9 @@ def __init__(self, location: str, stream: IO[bytes]) -> None:
self.location = location
self.stream = stream
+ @property
+ def is_concrete(self) -> bool:
+ return False
+
def as_zipfile(self) -> zipfile.ZipFile:
return zipfile.ZipFile(self.stream, allowZip64=True)
diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py
index 97363af9a55..9bd3e0766fb 100644
--- a/src/pip/_internal/metadata/importlib/_dists.py
+++ b/src/pip/_internal/metadata/importlib/_dists.py
@@ -7,6 +7,7 @@
from collections.abc import Collection, Iterable, Iterator, Mapping, Sequence
from os import PathLike
from typing import (
+ TYPE_CHECKING,
cast,
)
@@ -19,7 +20,6 @@
from pip._internal.metadata.base import (
BaseDistribution,
BaseEntryPoint,
- InfoPath,
Wheel,
)
from pip._internal.utils.misc import normalize_path
@@ -33,6 +33,9 @@
parse_name_and_version_from_info_directory,
)
+if TYPE_CHECKING:
+ from pip._internal.metadata.base import InfoPath
+
class WheelDistribution(importlib.metadata.Distribution):
"""An ``importlib.metadata.Distribution`` read from a wheel.
@@ -104,16 +107,22 @@ def __init__(
dist: importlib.metadata.Distribution,
info_location: BasePath | None,
installed_location: BasePath | None,
+ concrete: bool,
) -> None:
self._dist = dist
self._info_location = info_location
self._installed_location = installed_location
+ self._concrete = concrete
+
+ @property
+ def is_concrete(self) -> bool:
+ return self._concrete
@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
info_location = pathlib.Path(directory)
dist = importlib.metadata.Distribution.at(info_location)
- return cls(dist, info_location, info_location.parent)
+ return cls(dist, info_location, info_location.parent, concrete=True)
@classmethod
def from_metadata_file_contents(
@@ -130,7 +139,7 @@ def from_metadata_file_contents(
metadata_path.write_bytes(metadata_contents)
# Construct dist pointing to the newly created directory.
dist = importlib.metadata.Distribution.at(metadata_path.parent)
- return cls(dist, metadata_path.parent, None)
+ return cls(dist, metadata_path.parent, None, concrete=False)
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
@@ -139,7 +148,14 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
dist = WheelDistribution.from_zipfile(zf, name, wheel.location)
except zipfile.BadZipFile as e:
raise InvalidWheel(wheel.location, name) from e
- return cls(dist, dist.info_location, pathlib.PurePosixPath(wheel.location))
+ except UnsupportedWheel as e:
+ raise UnsupportedWheel(f"{name} has an invalid wheel, {e}")
+ return cls(
+ dist,
+ dist.info_location,
+ pathlib.PurePosixPath(wheel.location),
+ concrete=wheel.is_concrete,
+ )
@property
def location(self) -> str | None:
diff --git a/src/pip/_internal/metadata/importlib/_envs.py b/src/pip/_internal/metadata/importlib/_envs.py
index 71a73b7311f..0aeade7857d 100644
--- a/src/pip/_internal/metadata/importlib/_envs.py
+++ b/src/pip/_internal/metadata/importlib/_envs.py
@@ -86,7 +86,7 @@ def find(self, location: str) -> Iterator[BaseDistribution]:
installed_location: BasePath | None = None
else:
installed_location = info_location.parent
- yield Distribution(dist, info_location, installed_location)
+ yield Distribution(dist, info_location, installed_location, concrete=True)
def find_legacy_editables(self, location: str) -> Iterator[BaseDistribution]:
"""Read location in egg-link files and return distributions in there.
@@ -110,7 +110,7 @@ def find_legacy_editables(self, location: str) -> Iterator[BaseDistribution]:
continue
target_location = str(path.joinpath(target_rel))
for dist, info_location in self._find_impl(target_location):
- yield Distribution(dist, info_location, path)
+ yield Distribution(dist, info_location, path, concrete=True)
class Environment(BaseEnvironment):
diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py
index 89fce8b6e5d..0158972cd3d 100644
--- a/src/pip/_internal/metadata/pkg_resources.py
+++ b/src/pip/_internal/metadata/pkg_resources.py
@@ -7,6 +7,7 @@
import zipfile
from collections.abc import Collection, Iterable, Iterator, Mapping
from typing import (
+ TYPE_CHECKING,
NamedTuple,
)
@@ -25,10 +26,12 @@
BaseDistribution,
BaseEntryPoint,
BaseEnvironment,
- InfoPath,
Wheel,
)
+if TYPE_CHECKING:
+ from .base import InfoPath
+
__all__ = ["NAME", "Distribution", "Environment"]
logger = logging.getLogger(__name__)
@@ -78,8 +81,9 @@ def run_script(self, script_name: str, namespace: str) -> None:
class Distribution(BaseDistribution):
- def __init__(self, dist: pkg_resources.Distribution) -> None:
+ def __init__(self, dist: pkg_resources.Distribution, concrete: bool) -> None:
self._dist = dist
+ self._concrete = concrete
# This is populated lazily, to avoid loading metadata for all possible
# distributions eagerly.
self.__extra_mapping: Mapping[NormalizedName, str] | None = None
@@ -93,6 +97,10 @@ def _extra_mapping(self) -> Mapping[NormalizedName, str]:
return self.__extra_mapping
+ @property
+ def is_concrete(self) -> bool:
+ return self._concrete
+
@classmethod
def from_directory(cls, directory: str) -> BaseDistribution:
dist_dir = directory.rstrip(os.sep)
@@ -111,7 +119,7 @@ def from_directory(cls, directory: str) -> BaseDistribution:
dist_name = os.path.splitext(dist_dir_name)[0].split("-")[0]
dist = dist_cls(base_dir, project_name=dist_name, metadata=metadata)
- return cls(dist)
+ return cls(dist, concrete=True)
@classmethod
def from_metadata_file_contents(
@@ -128,7 +136,7 @@ def from_metadata_file_contents(
metadata=InMemoryMetadata(metadata_dict, filename),
project_name=project_name,
)
- return cls(dist)
+ return cls(dist, concrete=False)
@classmethod
def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
@@ -149,7 +157,7 @@ def from_wheel(cls, wheel: Wheel, name: str) -> BaseDistribution:
metadata=InMemoryMetadata(metadata_dict, wheel.location),
project_name=name,
)
- return cls(dist)
+ return cls(dist, concrete=wheel.is_concrete)
@property
def location(self) -> str | None:
@@ -261,7 +269,7 @@ def from_paths(cls, paths: list[str] | None) -> BaseEnvironment:
def _iter_distributions(self) -> Iterator[BaseDistribution]:
for dist in self._ws:
- yield Distribution(dist)
+ yield Distribution(dist, concrete=True)
def _search_distribution(self, name: str) -> BaseDistribution | None:
"""Find a distribution matching the ``name`` in the environment.
diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py
index 2e2c0f836ac..42c948cd955 100644
--- a/src/pip/_internal/models/link.py
+++ b/src/pip/_internal/models/link.py
@@ -281,6 +281,7 @@ def from_json(
cls,
file_data: dict[str, Any],
page_url: str,
+ page_content: IndexContent | None = None,
) -> Link | None:
"""
Convert an pypi json document from a simple repository page into a Link.
@@ -320,7 +321,7 @@ def from_json(
return cls(
url,
- comes_from=page_url,
+ comes_from=page_content or page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
hashes=hashes,
@@ -333,6 +334,7 @@ def from_element(
anchor_attribs: dict[str, str | None],
page_url: str,
base_url: str,
+ page_content: IndexContent | None = None,
) -> Link | None:
"""
Convert an anchor element's attributes in a simple repository page to a Link.
@@ -373,7 +375,7 @@ def from_element(
return cls(
url,
- comes_from=page_url,
+ comes_from=page_content or page_url,
requires_python=pyrequire,
yanked_reason=yanked_reason,
metadata_file_data=metadata_file_data,
diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py
index 2d71fa5fff5..1ceca5c4f57 100644
--- a/src/pip/_internal/operations/check.py
+++ b/src/pip/_internal/operations/check.py
@@ -17,7 +17,6 @@
from pip._vendor.packaging.utils import NormalizedName, canonicalize_name
from pip._vendor.packaging.version import Version
-from pip._internal.distributions import make_distribution_for_install_requirement
from pip._internal.metadata import get_default_environment
from pip._internal.metadata.base import BaseDistribution
from pip._internal.req.req_install import InstallRequirement
@@ -148,8 +147,8 @@ def _simulate_installation_of(
# Modify it as installing requirement_set would (assuming no errors)
for inst_req in to_install:
- abstract_dist = make_distribution_for_install_requirement(inst_req)
- dist = abstract_dist.get_metadata_distribution()
+ assert inst_req.is_concrete
+ dist = inst_req.get_dist()
name = dist.canonical_name
package_set[name] = PackageDetails(dist.version, list(dist.iter_dependencies()))
diff --git a/src/pip/_internal/operations/prepare.py b/src/pip/_internal/operations/prepare.py
index 00b1a33a030..c037ba242b5 100644
--- a/src/pip/_internal/operations/prepare.py
+++ b/src/pip/_internal/operations/prepare.py
@@ -4,6 +4,8 @@
# mypy: strict-optional=False
from __future__ import annotations
+import bz2
+import json
import mimetypes
import os
import shutil
@@ -13,11 +15,13 @@
from typing import TYPE_CHECKING
from pip._vendor.packaging.utils import canonicalize_name
+from pip._vendor.requests.exceptions import InvalidSchema
from pip._internal.build_env import BuildEnvironmentInstaller
+from pip._internal.cache import LinkMetadataCache, should_cache
from pip._internal.distributions import make_distribution_for_install_requirement
-from pip._internal.distributions.installed import InstalledDistribution
from pip._internal.exceptions import (
+ CacheMetadataError,
DirectoryUrlHashUnsupported,
HashMismatch,
HashUnpinned,
@@ -27,7 +31,11 @@
VcsHashUnsupported,
)
from pip._internal.index.package_finder import PackageFinder
-from pip._internal.metadata import BaseDistribution, get_metadata_distribution
+from pip._internal.metadata import (
+ BaseDistribution,
+ get_metadata_distribution,
+ serialize_metadata,
+)
from pip._internal.models.direct_url import ArchiveInfo
from pip._internal.models.link import Link
from pip._internal.models.wheel import Wheel
@@ -68,16 +76,31 @@ def _get_prepared_distribution(
build_env_installer: BuildEnvironmentInstaller,
build_isolation: bool,
check_build_deps: bool,
-) -> BaseDistribution:
- """Prepare a distribution for installation."""
+) -> tuple[bool, BaseDistribution]:
+ """Prepare a distribution for installation.
+
+ This method will only be called by the preparer at the end of the resolve, and only
+ for commands which need installable artifacts (not just resolved metadata). If the
+ dist was previously metadata-only, the preparer must have downloaded the file
+ corresponding to the dist and set ``req.local_file_path``.
+
+ This method will execute ``req.cache_concrete_dist()``, so that after invocation,
+ ``req.is_concrete`` will be True, because ``req.get_dist()`` will return a concrete
+ ``Distribution``.
+
+ :returns: a 2-tuple:
+ - (bool): whether the metadata had to be constructed (e.g. from an sdist build),
+ - (BaseDistribution): the concrete distribution which is ready to be installed.
+ """
abstract_dist = make_distribution_for_install_requirement(req)
tracker_id = abstract_dist.build_tracker_id
- if tracker_id is not None:
+ builds_metadata = tracker_id is not None
+ if builds_metadata:
with build_tracker.track(req, tracker_id):
abstract_dist.prepare_distribution_metadata(
build_env_installer, build_isolation, check_build_deps
)
- return abstract_dist.get_metadata_distribution()
+ return (builds_metadata, abstract_dist.get_metadata_distribution())
def unpack_vcs_link(link: Link, location: str, verbosity: int) -> None:
@@ -200,6 +223,8 @@ def _check_download_dir(
) -> str | None:
"""Check download_dir for previously downloaded file with correct hash
If a correct file is found return its path else None
+
+ If a file is found at the given path, but with an invalid hash, the file is deleted.
"""
download_path = os.path.join(download_dir, link.filename)
@@ -222,6 +247,45 @@ def _check_download_dir(
return download_path
+@dataclass(frozen=True)
+class CacheableDist:
+ metadata: str
+ filename: Path
+ canonical_name: str
+
+ @classmethod
+ def from_dist(cls, link: Link, dist: BaseDistribution) -> CacheableDist:
+ """Extract the serializable data necessary to generate a metadata-only dist."""
+ return cls(
+ metadata=serialize_metadata(dist.metadata),
+ filename=Path(link.filename),
+ canonical_name=dist.canonical_name,
+ )
+
+ def to_dist(self) -> BaseDistribution:
+ """Return a metadata-only dist from the deserialized cache entry."""
+ return get_metadata_distribution(
+ metadata_contents=self.metadata.encode("utf-8"),
+ filename=str(self.filename),
+ canonical_name=self.canonical_name,
+ )
+
+ def to_json(self) -> dict[str, str]:
+ return {
+ "metadata": self.metadata,
+ "filename": str(self.filename),
+ "canonical_name": self.canonical_name,
+ }
+
+ @classmethod
+ def from_json(cls, args: dict[str, str]) -> CacheableDist:
+ return cls(
+ metadata=args["metadata"],
+ filename=Path(args["filename"]),
+ canonical_name=args["canonical_name"],
+ )
+
+
class RequirementPreparer:
"""Prepares a Requirement"""
@@ -244,6 +308,7 @@ def __init__( # noqa: PLR0913 (too many parameters)
verbosity: int,
legacy_resolver: bool,
resume_retries: int,
+ metadata_cache: LinkMetadataCache | None = None,
) -> None:
super().__init__()
@@ -286,6 +351,8 @@ def __init__( # noqa: PLR0913 (too many parameters)
# Previous "header" printed for a link-based InstallRequirement
self._previous_requirement_header = ("", "")
+ self._metadata_cache = metadata_cache
+
def _log_preparing_link(self, req: InstallRequirement) -> None:
"""Provide context for the requirement being prepared."""
if req.link.is_file and not req.is_wheel_from_cache:
@@ -317,11 +384,7 @@ def _ensure_link_req_src_dir(
self, req: InstallRequirement, parallel_builds: bool
) -> None:
"""Ensure source_dir of a linked InstallRequirement."""
- # Since source_dir is only set for editable requirements.
- if req.link.is_wheel:
- # We don't need to unpack wheels, so no need for a source
- # directory.
- return
+ assert not req.link.is_wheel
assert req.source_dir is None
if req.link.is_existing_dir():
# build local directories in-tree
@@ -368,6 +431,47 @@ def _get_linked_req_hashes(self, req: InstallRequirement) -> Hashes:
# showing the user what the hash should be.
return req.hashes(trust_internet=False) or MissingHashes()
+ def _rewrite_link_and_hashes_for_cached_wheel(
+ self, req: InstallRequirement, hashes: Hashes
+ ) -> Hashes | None:
+ """Check the hash of the requirement's source eagerly and rewrite its link.
+
+ ``req.link`` is unconditionally rewritten to the cached wheel source so that the
+ requirement corresponds to where it was actually downloaded from instead of the
+ local cache entry.
+
+ :returns: None if the source hash validated successfully.
+ """
+ assert hashes
+ assert req.is_wheel_from_cache
+ assert req.download_info is not None
+ assert req.link.is_wheel
+ assert req.link.is_file
+
+ # TODO: is it possible to avoid providing a "wrong" req.link in the first place
+ # in the resolver, instead of having to patch it up afterwards?
+ req.link = req.cached_wheel_source_link
+
+ # We need to verify hashes, and we have found the requirement in the cache
+ # of locally built wheels.
+ if (
+ isinstance(req.download_info.info, ArchiveInfo)
+ and req.download_info.info.hashes
+ and hashes.has_one_of(req.download_info.info.hashes)
+ ):
+ # At this point we know the requirement was built from a hashable source
+ # artifact, and we verified that the cache entry's hash of the original
+ # artifact matches one of the hashes we expect. We don't verify hashes
+ # against the cached wheel, because the wheel is not the original.
+ return None
+
+ logger.warning(
+ "The hashes of the source archive found in cache entry "
+ "don't match, ignoring cached built wheel "
+ "and re-downloading source."
+ )
+ return hashes
+
def _fetch_metadata_only(
self,
req: InstallRequirement,
@@ -378,14 +482,96 @@ def _fetch_metadata_only(
)
return None
if self.require_hashes:
+ # Hash checking also means hashes are provided for all reqs, so no resolve
+ # is necessary and metadata-only fetching provides no speedup.
logger.debug(
"Metadata-only fetching is not used as hash checking is required",
)
return None
- # Try PEP 658 metadata first, then fall back to lazy wheel if unavailable.
- return self._fetch_metadata_using_link_data_attr(
- req
- ) or self._fetch_metadata_using_lazy_wheel(req.link)
+
+ if cached_dist := self._fetch_cached_metadata(req):
+ return cached_dist
+ # If we've used the lazy wheel approach, then PEP 658 metadata is not available.
+ # If the wheel is very large (>1GB), then retrieving it from the CacheControl
+ # HTTP cache may take multiple seconds, even on a fast computer, and the
+ # preparer will unnecessarily copy the cached response to disk before deleting
+ # it at the end of the run. Caching the dist metadata in LinkMetadataCache means
+ # later pip executions can retrieve metadata within milliseconds and avoid
+ # thrashing the disk.
+ # Even if we do employ PEP 658 metadata, we would still have to ping PyPI to
+ # ensure the .metadata file hasn't changed if we relied on CacheControl, even
+ # though PEP 658 metadata is guaranteed to be immutable. We optimize for this
+ # case by referring to our local cache.
+ if cached_dist := (
+ self._fetch_metadata_using_link_data_attr(req)
+ or self._fetch_metadata_using_lazy_wheel(req)
+ ):
+ self._cache_metadata(req, cached_dist)
+ return cached_dist
+ return None
+
+ def _locate_metadata_cache_entry(self, link: Link) -> Path | None:
+ """If the metadata cache is active, generate a filesystem path from the hash of
+ the given Link."""
+ if self._metadata_cache is None:
+ return None
+
+ return self._metadata_cache.cache_path(link)
+
+ def _fetch_cached_metadata(
+ self, req: InstallRequirement
+ ) -> BaseDistribution | None:
+ cached_path = self._locate_metadata_cache_entry(req.link)
+ if cached_path is None:
+ return None
+
+ # Quietly continue if the cache entry does not exist.
+ if not os.path.isfile(cached_path):
+ logger.debug(
+ "no cached metadata for link %s at %s",
+ req.link,
+ cached_path,
+ )
+ return None
+
+ try:
+ with bz2.open(cached_path, mode="rt", encoding="utf-8") as f:
+ logger.debug(
+ "found cached metadata for link %s at %s", req.link, cached_path
+ )
+ args = json.load(f)
+ cached_dist = CacheableDist.from_json(args)
+ return cached_dist.to_dist()
+ except Exception:
+ raise CacheMetadataError(req, "error reading cached metadata")
+
+ def _cache_metadata(
+ self,
+ req: InstallRequirement,
+ metadata_dist: BaseDistribution,
+ ) -> None:
+ cached_path = self._locate_metadata_cache_entry(req.link)
+ if cached_path is None:
+ return
+
+ # The cache file exists already, so we have nothing to do.
+ if os.path.isfile(cached_path):
+ logger.debug(
+ "metadata for link %s is already cached at %s", req.link, cached_path
+ )
+ return
+
+ # The metadata cache is split across several subdirectories, so ensure the
+ # containing directory for the cache file exists before writing.
+ os.makedirs(str(cached_path.parent), exist_ok=True)
+ try:
+ cacheable_dist = CacheableDist.from_dist(req.link, metadata_dist)
+ args = cacheable_dist.to_json()
+ logger.debug("caching metadata for link %s at %s", req.link, cached_path)
+ with bz2.open(cached_path, mode="wt", encoding="utf-8") as f:
+ json.dump(args, f)
+ except Exception:
+ raise CacheMetadataError(req, "failed to serialize metadata")
def _fetch_metadata_using_link_data_attr(
self,
@@ -403,6 +589,9 @@ def _fetch_metadata_using_link_data_attr(
metadata_link,
)
# (2) Download the contents of the METADATA file, separate from the dist itself.
+ # NB: this request will hit the CacheControl HTTP cache, which will be very
+ # quick since the METADATA file is very small. Therefore, we can rely on
+ # HTTP caching instead of LinkMetadataCache.
metadata_file = get_http_url(
metadata_link,
self._download,
@@ -430,36 +619,38 @@ def _fetch_metadata_using_link_data_attr(
def _fetch_metadata_using_lazy_wheel(
self,
- link: Link,
+ req: InstallRequirement,
) -> BaseDistribution | None:
"""Fetch metadata using lazy wheel, if possible."""
# --use-feature=fast-deps must be provided.
if not self.use_lazy_wheel:
return None
- if link.is_file or not link.is_wheel:
+ if req.link.is_file or not req.link.is_wheel:
logger.debug(
"Lazy wheel is not used as %r does not point to a remote wheel",
- link,
+ req.link,
)
return None
- wheel = Wheel(link.filename)
+ wheel = Wheel(req.link.filename)
name = canonicalize_name(wheel.name)
logger.info(
"Obtaining dependency information from %s %s",
name,
wheel.version,
)
- url = link.url.split("#", 1)[0]
+
try:
- return dist_from_wheel_url(name, url, self._session)
+ return dist_from_wheel_url(
+ name, req.link.url_without_fragment, self._session
+ )
except HTTPRangeRequestUnsupported:
- logger.debug("%s does not support range requests", url)
+ logger.debug("%s does not support range requests", req.link)
return None
def _complete_partial_requirements(
self,
- partially_downloaded_reqs: Iterable[InstallRequirement],
+ metadata_only_reqs: list[InstallRequirement],
parallel_builds: bool = False,
) -> None:
"""Download any requirements which were only fetched by metadata."""
@@ -471,9 +662,23 @@ def _complete_partial_requirements(
# `req.local_file_path` on the appropriate requirement after passing
# all the links at once into BatchDownloader.
links_to_fully_download: dict[Link, InstallRequirement] = {}
- for req in partially_downloaded_reqs:
+ for req in metadata_only_reqs:
assert req.link
- links_to_fully_download[req.link] = req
+
+ # (1) File URLs don't need to be downloaded, so skip them.
+ if req.link.scheme == "file":
+ continue
+ # (2) If this is e.g. a git url, we don't know how to handle that in the
+ # BatchDownloader, so leave it for self._prepare_linked_requirement() at
+ # the end of this method, which knows how to handle any URL.
+ can_simply_download = True
+ try:
+ # This will raise InvalidSchema if our Session can't download it.
+ self._session.get_adapter(req.link.url)
+ except InvalidSchema:
+ can_simply_download = False
+ if can_simply_download:
+ links_to_fully_download[req.link] = req
batch_download = self._download.batch(links_to_fully_download.keys(), temp_dir)
for link, (filepath, _) in batch_download:
@@ -495,9 +700,33 @@ def _complete_partial_requirements(
# This step is necessary to ensure all lazy wheels are processed
# successfully by the 'download', 'wheel', and 'install' commands.
- for req in partially_downloaded_reqs:
+ for req in metadata_only_reqs:
self._prepare_linked_requirement(req, parallel_builds)
+ def _check_download_path(self, req: InstallRequirement) -> None:
+ """Check if the relevant file is already available in the download directory.
+
+ If so, check its hash, and delete the file if the hash doesn't match."""
+ if self.download_dir is None:
+ return
+ if not req.link.is_wheel:
+ return
+
+ hashes = self._get_linked_req_hashes(req)
+ if file_path := _check_download_dir(
+ req.link,
+ self.download_dir,
+ hashes,
+ # When a locally built wheel has been found in cache, we don't warn
+ # about re-downloading when the already downloaded wheel hash does
+ # not match. This is because the hash must be checked against the
+ # original link, not the cached link. It that case the already
+ # downloaded file will be removed and re-fetched from cache (which
+ # implies a hash check against the cache entry's origin.json).
+ warn_on_hash_mismatch=not req.is_wheel_from_cache,
+ ):
+ self._downloaded[req.link.url] = file_path
+
def prepare_linked_requirement(
self, req: InstallRequirement, parallel_builds: bool = False
) -> BaseDistribution:
@@ -505,108 +734,109 @@ def prepare_linked_requirement(
assert req.link
self._log_preparing_link(req)
with indent_log():
- # Check if the relevant file is already available
- # in the download directory
- file_path = None
- if self.download_dir is not None and req.link.is_wheel:
- hashes = self._get_linked_req_hashes(req)
- file_path = _check_download_dir(
- req.link,
- self.download_dir,
- hashes,
- # When a locally built wheel has been found in cache, we don't warn
- # about re-downloading when the already downloaded wheel hash does
- # not match. This is because the hash must be checked against the
- # original link, not the cached link. It that case the already
- # downloaded file will be removed and re-fetched from cache (which
- # implies a hash check against the cache entry's origin.json).
- warn_on_hash_mismatch=not req.is_wheel_from_cache,
- )
+ # See if the file is already downloaded, and check its hash if so.
+ self._check_download_path(req)
- if file_path is not None:
- # The file is already available, so mark it as downloaded
- self._downloaded[req.link.url] = file_path
- else:
- # The file is not available, attempt to fetch only metadata
- metadata_dist = self._fetch_metadata_only(req)
- if metadata_dist is not None:
- req.needs_more_preparation = True
- return metadata_dist
+ # First try to fetch only metadata.
+ if metadata_dist := self._fetch_metadata_only(req):
+ # These reqs now have the dependency information from the downloaded
+ # metadata, without having downloaded the actual dist at all.
+ req.cache_virtual_metadata_only_dist(metadata_dist)
+ return metadata_dist
# None of the optimizations worked, fully prepare the requirement
return self._prepare_linked_requirement(req, parallel_builds)
- def prepare_linked_requirements_more(
- self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
- ) -> None:
- """Prepare linked requirements more, if needed."""
- reqs = [req for req in reqs if req.needs_more_preparation]
+ def _extract_download_info(self, reqs: Iterable[InstallRequirement]) -> None:
+ """
+ `pip install --report` extracts the download info from each requirement for its
+ JSON output, so we need to make sure every requirement has this before finishing
+ the resolve. But .download_info will only be populated by the point this method
+ is called for requirements already found in the wheel cache, so we need to
+ synthesize it for uncached results. Luckily, a DirectUrl can be parsed directly
+ from a url without any other context. However, this also means the download info
+ will only contain a hash if the link itself declares the hash.
+ """
for req in reqs:
- # Determine if any of these requirements were already downloaded.
- if self.download_dir is not None and req.link.is_wheel:
- hashes = self._get_linked_req_hashes(req)
- file_path = _check_download_dir(req.link, self.download_dir, hashes)
- if file_path is not None:
- self._downloaded[req.link.url] = file_path
- req.needs_more_preparation = False
-
- # Prepare requirements we found were already downloaded for some
- # reason. The other downloads will be completed separately.
- partially_downloaded_reqs: list[InstallRequirement] = []
+ if req.download_info is None:
+ self._ensure_download_info(req)
+
+ def _force_fully_prepared(
+ self, reqs: Iterable[InstallRequirement], assert_has_dist_files: bool
+ ) -> None:
+ """
+ The legacy resolver seems to prepare requirements differently that can leave
+ them half-done in certain code paths. I'm not quite sure how it's doing things,
+ but at least we can do this to make sure they do things right.
+ """
for req in reqs:
- if req.needs_more_preparation:
- partially_downloaded_reqs.append(req)
- else:
- self._prepare_linked_requirement(req, parallel_builds)
+ req.prepared = True
+ if assert_has_dist_files:
+ assert req.is_concrete
- # TODO: separate this part out from RequirementPreparer when the v1
- # resolver can be removed!
+ def _ensure_dist_files(
+ self, reqs: Iterable[InstallRequirement], parallel_builds: bool = False
+ ) -> None:
+ """Download any metadata-only linked requirements."""
+ metadata_only_reqs = [req for req in reqs if not req.is_concrete]
self._complete_partial_requirements(
- partially_downloaded_reqs,
+ metadata_only_reqs,
parallel_builds=parallel_builds,
)
- def _prepare_linked_requirement(
- self, req: InstallRequirement, parallel_builds: bool
- ) -> BaseDistribution:
- assert req.link
- link = req.link
-
- hashes = self._get_linked_req_hashes(req)
+ def finalize_linked_requirements(
+ self,
+ reqs: Iterable[InstallRequirement],
+ require_dist_files: bool,
+ parallel_builds: bool = False,
+ ) -> None:
+ """Prepare linked requirements more, if needed.
+
+ Neighboring .metadata files as per PEP 658 or lazy wheels via fast-deps will be
+ preferred to extract metadata from any concrete requirement (one that has been
+ mapped to a Link) without downloading the underlying wheel or sdist. When ``pip
+ install --dry-run`` is called, we want to avoid ever downloading the underlying
+ dist, but we still need to provide all of the results that pip commands expect
+ from the typical resolve process.
+
+ Those expectations vary, but one distinction lies in whether the command needs
+ an actual physical dist somewhere on the filesystem, or just the metadata about
+ it from the resolver (as in ``pip install --report``). If the command requires
+ actual physical filesystem locations for the resolved dists, it must call this
+ method with ``require_dist_files=True`` to fully download anything
+ that remains.
+ """
+ if require_dist_files:
+ self._ensure_dist_files(reqs, parallel_builds=parallel_builds)
+ else:
+ self._extract_download_info(reqs)
+ self._force_fully_prepared(reqs, assert_has_dist_files=require_dist_files)
- if hashes and req.is_wheel_from_cache:
- assert req.download_info is not None
- assert link.is_wheel
- assert link.is_file
- # We need to verify hashes, and we have found the requirement in the cache
- # of locally built wheels.
- if (
- isinstance(req.download_info.info, ArchiveInfo)
- and req.download_info.info.hashes
- and hashes.has_one_of(req.download_info.info.hashes)
- ):
- # At this point we know the requirement was built from a hashable source
- # artifact, and we verified that the cache entry's hash of the original
- # artifact matches one of the hashes we expect. We don't verify hashes
- # against the cached wheel, because the wheel is not the original.
- hashes = None
- else:
- logger.warning(
- "The hashes of the source archive found in cache entry "
- "don't match, ignoring cached built wheel "
- "and re-downloading source."
- )
- req.link = req.cached_wheel_source_link
- link = req.link
+ def _ensure_local_file_path(
+ self, req: InstallRequirement, hashes: Hashes | None
+ ) -> None:
+ """Ensure that ``req.link`` is downloaded locally, matches the expected hash,
+ and that ``req.local_file_path`` is set to the download location."""
+ if req.link.is_existing_dir():
+ return
- self._ensure_link_req_src_dir(req, parallel_builds)
+ # NB: req.local_file_path may be set already, if it was:
+ # (1) sourced from a local file (such as a local .tar.gz path),
+ # (2) also in the wheel cache (e.g. built from an sdist).
+ # We will overwrite it if so, since the local file path will still point to the
+ # .tar.gz source instead of the wheel cache entry.
- if link.is_existing_dir():
- local_file = None
- elif link.url not in self._downloaded:
+ local_file: File | None = None
+ # The file may have already been downloaded in batch if it was
+ # a metadata-only requirement, or if it was already in the download directory.
+ if file_path := self._downloaded.get(req.link.url, None):
+ if hashes:
+ hashes.check_against_path(file_path)
+ local_file = File(file_path, content_type=None)
+ else:
try:
local_file = unpack_url(
- link,
+ req.link,
req.source_dir,
self._download,
self.verbosity,
@@ -616,51 +846,82 @@ def _prepare_linked_requirement(
except NetworkConnectionError as exc:
raise InstallationError(
f"Could not install requirement {req} because of HTTP "
- f"error {exc} for URL {link}"
- )
- else:
- file_path = self._downloaded[link.url]
- if hashes:
- hashes.check_against_path(file_path)
- local_file = File(file_path, content_type=None)
+ f"error {exc} for URL {req.link}"
+ ) from exc
- # If download_info is set, we got it from the wheel cache.
- if req.download_info is None:
- # Editables don't go through this function (see
- # prepare_editable_requirement).
- assert not req.editable
- req.download_info = direct_url_from_link(link, req.source_dir)
- # Make sure we have a hash in download_info. If we got it as part of the
- # URL, it will have been verified and we can rely on it. Otherwise we
- # compute it from the downloaded file.
- # FIXME: https://github.com/pypa/pip/issues/11943
- if (
- isinstance(req.download_info.info, ArchiveInfo)
- and not req.download_info.info.hashes
- and local_file
- ):
- hash = hash_file(local_file.path)[0].hexdigest()
- # We populate info.hash for backward compatibility.
- # This will automatically populate info.hashes.
- req.download_info.info.hash = f"sha256={hash}"
-
- # For use in later processing,
- # preserve the file path on the requirement.
- if local_file:
+ if local_file is not None:
req.local_file_path = local_file.path
- dist = _get_prepared_distribution(
+ def _prepare_and_finalize_dist(self, req: InstallRequirement) -> BaseDistribution:
+ (builds_metadata, dist) = _get_prepared_distribution(
req,
self.build_tracker,
self.build_env_installer,
self.build_isolation,
self.check_build_deps,
)
+ assert dist.is_concrete
+ assert req.is_concrete
+ assert req.get_dist() is dist
+
+ if builds_metadata and should_cache(req):
+ self._cache_metadata(req, dist)
+
return dist
+ def _prepare_linked_requirement(
+ self, req: InstallRequirement, parallel_builds: bool
+ ) -> BaseDistribution:
+ """Ensure the dist pointing to the fully-resolved requirement is downloaded and
+ installable."""
+ assert req.link, "this requirement must have a download link to fully prepare"
+
+ hashes: Hashes | None = self._get_linked_req_hashes(req)
+
+ if hashes and req.is_wheel_from_cache:
+ hashes = self._rewrite_link_and_hashes_for_cached_wheel(req, hashes)
+
+ # req.source_dir is only set for editable requirements. We don't need to unpack
+ # wheels, so no need for a source directory.
+ if not req.link.is_wheel:
+ self._ensure_link_req_src_dir(req, parallel_builds)
+
+ # Ensure the dist is downloaded, check its hash, and unpack it into the source
+ # directory (if applicable).
+ self._ensure_local_file_path(req, hashes)
+
+ # Set req.download_info for --report output.
+ if req.download_info is None:
+ # If download_info is set, we got it from the wheel cache.
+ self._ensure_download_info(req)
+
+ # Build (if necessary) and prepare the distribution for installation.
+ return self._prepare_and_finalize_dist(req)
+
+ def _ensure_download_info(self, req: InstallRequirement) -> None:
+ """Ensure that ``req.download_info`` is set, for uses such as --report."""
+ assert req.download_info is None
+ # Editables don't go through this function (see prepare_editable_requirement).
+ assert not req.editable
+ req.download_info = direct_url_from_link(req.link, req.source_dir)
+ # Make sure we have a hash in download_info. If we got it as part of the
+ # URL, it will have been verified and we can rely on it. Otherwise we
+ # compute it from the downloaded file.
+ # FIXME: https://github.com/pypa/pip/issues/11943
+ if (
+ isinstance(req.download_info.info, ArchiveInfo)
+ and not req.download_info.info.hashes
+ and req.local_file_path
+ ):
+ hash = hash_file(req.local_file_path)[0].hexdigest()
+ # We populate info.hash for backward compatibility.
+ # This will automatically populate info.hashes.
+ req.download_info.info.hash = f"sha256={hash}"
+
def save_linked_requirement(self, req: InstallRequirement) -> None:
assert self.download_dir is not None
assert req.link is not None
+ assert req.is_concrete
link = req.link
if link.is_vcs or (link.is_existing_dir() and req.editable):
# Make a .zip of the source_dir we already created.
@@ -704,18 +965,8 @@ def prepare_editable_requirement(
req.update_editable()
assert req.source_dir
req.download_info = direct_url_for_editable(req.unpacked_source_directory)
-
- dist = _get_prepared_distribution(
- req,
- self.build_tracker,
- self.build_env_installer,
- self.build_isolation,
- self.check_build_deps,
- )
-
req.check_if_exists(self.use_user_site)
-
- return dist
+ return self._prepare_and_finalize_dist(req)
def prepare_installed_requirement(
self,
@@ -739,4 +990,4 @@ def prepare_installed_requirement(
"completely repeatable environment, install into an "
"empty virtualenv."
)
- return InstalledDistribution(req).get_metadata_distribution()
+ return self._prepare_and_finalize_dist(req)
diff --git a/src/pip/_internal/req/constructors.py b/src/pip/_internal/req/constructors.py
index 056e7e3a7f1..121b75f0df2 100644
--- a/src/pip/_internal/req/constructors.py
+++ b/src/pip/_internal/req/constructors.py
@@ -554,7 +554,7 @@ def install_req_extend_extras(
"""
result = copy.copy(ireq)
result.extras = {*ireq.extras, *extras}
- result.req = (
+ result._req = (
_set_requirement_extras(ireq.req, result.extras)
if ireq.req is not None
else None
diff --git a/src/pip/_internal/req/req_install.py b/src/pip/_internal/req/req_install.py
index c9f6bff17e8..378fd63f33b 100644
--- a/src/pip/_internal/req/req_install.py
+++ b/src/pip/_internal/req/req_install.py
@@ -10,7 +10,7 @@
from collections.abc import Collection, Iterable, Sequence
from optparse import Values
from pathlib import Path
-from typing import Any
+from typing import TYPE_CHECKING
from pip._vendor.packaging.markers import Marker
from pip._vendor.packaging.requirements import Requirement
@@ -26,10 +26,7 @@
from pip._internal.metadata import (
BaseDistribution,
get_default_environment,
- get_directory_distribution,
- get_wheel_distribution,
)
-from pip._internal.metadata.base import FilesystemWheel
from pip._internal.models.direct_url import DirectUrl
from pip._internal.models.link import Link
from pip._internal.operations.build.metadata import generate_metadata
@@ -62,6 +59,9 @@
from pip._internal.utils.virtualenv import running_under_virtualenv
from pip._internal.vcs import vcs
+if TYPE_CHECKING:
+ import email.message
+
logger = logging.getLogger(__name__)
@@ -91,7 +91,7 @@ def __init__(
permit_editable_wheels: bool = False,
) -> None:
assert req is None or isinstance(req, Requirement), req
- self.req = req
+ self._req = req
self.comes_from = comes_from
self.constraint = constraint
self.editable = editable
@@ -153,6 +153,7 @@ def __init__(
self.hash_options = hash_options if hash_options else {}
self.config_settings = config_settings
# Set to True after successful preparation of this requirement
+ # TODO: this is only used in the legacy resolver: remove this!
self.prepared = False
# User supplied requirement are explicitly requested for installation
# by the user via CLI arguments or requirements files, as opposed to,
@@ -194,12 +195,32 @@ def __init__(
)
self.use_pep517 = True
- # This requirement needs more preparation before it can be built
- self.needs_more_preparation = False
+ # When a dist is computed for this requirement, cache it here so it's visible
+ # everywhere within pip and isn't computed more than once. This may be
+ # a "virtual" dist without a physical location on the filesystem, or
+ # a "concrete" dist which has been fully downloaded.
+ self._dist: BaseDistribution | None = None
# This requirement needs to be unpacked before it can be installed.
self._archive_source: Path | None = None
+ @property
+ def req(self) -> Requirement | None:
+ """Calculate a requirement from the cached dist, if populated.
+
+ The cached dist can be populated by either
+ ``self.cache_virtual_metadata_only_dist()`` or
+ ``self.cache_concrete_dist()`` and can also be retrieved with
+ ``self.get_dist()``."""
+ if self._req is not None:
+ return self._req
+ if self._dist is not None:
+ name = self._dist.canonical_name
+ version = str(self._dist.version)
+ self._req = Requirement(f"{name}=={version}")
+ return self._req
+ return None
+
def __str__(self) -> str:
if self.req:
s = redact_auth_from_requirement(self.req)
@@ -389,7 +410,7 @@ def ensure_build_location(
def _set_requirement(self) -> None:
"""Set requirement after generating metadata."""
- assert self.req is None
+ assert self._req is None
assert self.metadata is not None
assert self.source_dir is not None
@@ -399,7 +420,7 @@ def _set_requirement(self) -> None:
else:
op = "==="
- self.req = get_requirement(
+ self._req = get_requirement(
"".join(
[
self.metadata["Name"],
@@ -425,7 +446,7 @@ def warn_on_mismatching_name(self) -> None:
metadata_name,
self.name,
)
- self.req = get_requirement(metadata_name)
+ self._req = get_requirement(metadata_name)
def check_if_exists(self, use_user_site: bool) -> None:
"""Find an installed distribution that satisfies or conflicts
@@ -553,11 +574,11 @@ def isolated_editable_sanity_check(self) -> None:
f"Consider using a build backend that supports PEP 660."
)
- def prepare_metadata(self) -> None:
+ def prepare_metadata_directory(self) -> None:
"""Ensure that project metadata is available.
- Under PEP 517 and PEP 660, call the backend hook to prepare the metadata.
- Under legacy processing, call setup.py egg-info.
+ Under PEP 517 and PEP 660, call the backend hook to prepare the metadata
+ directory. Under legacy processing, call setup.py egg-info.
"""
assert self.source_dir, f"No source dir for {self}"
details = self.name or f"from {self.link}"
@@ -589,6 +610,8 @@ def prepare_metadata(self) -> None:
details=details,
)
+ def validate_sdist_metadata(self) -> None:
+ """Ensure that we have a dist, and ensure it corresponds to expectations."""
# Act on the newly generated metadata, based on the name and version.
if not self.name:
self._set_requirement()
@@ -598,25 +621,51 @@ def prepare_metadata(self) -> None:
self.assert_source_matches_version()
@property
- def metadata(self) -> Any:
- if not hasattr(self, "_metadata"):
- self._metadata = self.get_dist().metadata
-
- return self._metadata
+ def metadata(self) -> email.message.Message:
+ return self.get_dist().metadata
def get_dist(self) -> BaseDistribution:
- if self.metadata_directory:
- return get_directory_distribution(self.metadata_directory)
- elif self.local_file_path and self.is_wheel:
- assert self.req is not None
- return get_wheel_distribution(
- FilesystemWheel(self.local_file_path),
- canonicalize_name(self.req.name),
- )
- raise AssertionError(
- f"InstallRequirement {self} has no metadata directory and no wheel: "
- f"can't make a distribution."
- )
+ """Retrieve the dist resolved from this requirement.
+
+ :raises AssertionError: if the resolver has not yet been executed.
+ """
+ if self._dist is None:
+ raise AssertionError(f"{self!r} has no dist associated.")
+ return self._dist
+
+ def cache_virtual_metadata_only_dist(self, dist: BaseDistribution) -> None:
+ """Associate a "virtual" metadata-only dist to this requirement.
+
+ This dist cannot be installed, but it can be used to complete the resolve
+ process.
+
+ :raises AssertionError: if a dist has already been associated.
+ :raises AssertionError: if the provided dist is "concrete", i.e. exists
+ somewhere on the filesystem.
+ """
+ assert self._dist is None, self
+ assert not dist.is_concrete, dist
+ self._dist = dist
+
+ def cache_concrete_dist(self, dist: BaseDistribution) -> None:
+ """Associate a "concrete" dist to this requirement.
+
+ A concrete dist exists somewhere on the filesystem and can be installed.
+
+ :raises AssertionError: if a concrete dist has already been associated.
+ :raises AssertionError: if the provided dist is not concrete.
+ """
+ if self._dist is not None:
+ # If we set a dist twice for the same requirement, we must be hydrating
+ # a concrete dist for what was previously virtual. This will occur in the
+ # case of `install --dry-run` when PEP 658 metadata is available.
+ assert not self._dist.is_concrete
+ assert dist.is_concrete
+ self._dist = dist
+
+ @property
+ def is_concrete(self) -> bool:
+ return self._dist is not None and self._dist.is_concrete
def assert_source_matches_version(self) -> None:
assert self.source_dir, f"No source dir for {self}"
diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py
index 3451b24f27b..8db9b89d85f 100644
--- a/src/pip/_internal/req/req_set.py
+++ b/src/pip/_internal/req/req_set.py
@@ -42,7 +42,7 @@ def add_unnamed_requirement(self, install_req: InstallRequirement) -> None:
self.unnamed_requirements.append(install_req)
def add_named_requirement(self, install_req: InstallRequirement) -> None:
- assert install_req.name
+ assert install_req.name, install_req
project_name = canonicalize_name(install_req.name)
self.requirements[project_name] = install_req
diff --git a/src/pip/_internal/resolution/resolvelib/resolver.py b/src/pip/_internal/resolution/resolvelib/resolver.py
index 1ba70c2b39e..c4fd4e28f93 100644
--- a/src/pip/_internal/resolution/resolvelib/resolver.py
+++ b/src/pip/_internal/resolution/resolvelib/resolver.py
@@ -180,11 +180,6 @@ def resolve(
req_set.add_named_requirement(ireq)
- reqs = req_set.all_requirements
- self.factory.preparer.prepare_linked_requirements_more(reqs)
- for req in reqs:
- req.prepared = True
- req.needs_more_preparation = False
return req_set
def get_installation_order(
diff --git a/src/pip/_internal/wheel_builder.py b/src/pip/_internal/wheel_builder.py
index beed02f00c5..5ced2679e33 100644
--- a/src/pip/_internal/wheel_builder.py
+++ b/src/pip/_internal/wheel_builder.py
@@ -4,7 +4,6 @@
import logging
import os.path
-import re
import shutil
from collections.abc import Iterable
@@ -26,23 +25,12 @@
from pip._internal.utils.subprocess import call_subprocess
from pip._internal.utils.temp_dir import TempDirectory
from pip._internal.utils.urls import path_to_url
-from pip._internal.vcs import vcs
logger = logging.getLogger(__name__)
-_egg_info_re = re.compile(r"([a-z0-9_.]+)-([a-z0-9_.!+-]+)", re.IGNORECASE)
-
BuildResult = tuple[list[InstallRequirement], list[InstallRequirement]]
-def _contains_egg_info(s: str) -> bool:
- """Determine whether the string looks like an egg_info.
-
- :param s: The string to parse. E.g. foo-2.1
- """
- return bool(_egg_info_re.search(s))
-
-
def _should_build(
req: InstallRequirement,
) -> bool:
@@ -67,54 +55,6 @@ def should_build_for_install_command(
return _should_build(req)
-def _should_cache(
- req: InstallRequirement,
-) -> bool | None:
- """
- Return whether a built InstallRequirement can be stored in the persistent
- wheel cache, assuming the wheel cache is available, and _should_build()
- has determined a wheel needs to be built.
- """
- if req.editable or not req.source_dir:
- # never cache editable requirements
- return False
-
- if req.link and req.link.is_vcs:
- # VCS checkout. Do not cache
- # unless it points to an immutable commit hash.
- assert not req.editable
- assert req.source_dir
- vcs_backend = vcs.get_backend_for_scheme(req.link.scheme)
- assert vcs_backend
- if vcs_backend.is_immutable_rev_checkout(req.link.url, req.source_dir):
- return True
- return False
-
- assert req.link
- base, ext = req.link.splitext()
- if _contains_egg_info(base):
- return True
-
- # Otherwise, do not cache.
- return False
-
-
-def _get_cache_dir(
- req: InstallRequirement,
- wheel_cache: WheelCache,
-) -> str:
- """Return the persistent or temporary cache directory where the built
- wheel need to be stored.
- """
- cache_available = bool(wheel_cache.cache_dir)
- assert req.link
- if cache_available and _should_cache(req):
- cache_dir = wheel_cache.get_path_for_link(req.link)
- else:
- cache_dir = wheel_cache.get_ephem_path_for_link(req.link)
- return cache_dir
-
-
def _verify_one(req: InstallRequirement, wheel_path: str) -> None:
canonical_name = canonicalize_name(req.name or "")
w = Wheel(os.path.basename(wheel_path))
@@ -295,7 +235,7 @@ def build(
build_successes, build_failures = [], []
for req in requirements:
assert req.name
- cache_dir = _get_cache_dir(req, wheel_cache)
+ cache_dir = wheel_cache.resolve_cache_dir(req)
wheel_file = _build_one(
req,
cache_dir,
diff --git a/tests/conftest.py b/tests/conftest.py
index c98b871170f..80d5ba8bb3f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -753,6 +753,9 @@ class FakePackage:
requires_dist: tuple[str, ...] = ()
# This will override the Name specified in the actual dist's METADATA.
metadata_name: str | None = None
+ # Whether to delete the file this points to, which causes any attempt to fetch this
+ # package to fail unless it is processed as a metadata-only dist.
+ delete_linked_file: bool = False
def metadata_filename(self) -> str:
"""This is specified by PEP 658."""
@@ -842,6 +845,27 @@ def fake_packages() -> dict[str, list[FakePackage]]:
("simple==1.0",),
),
],
+ "complex-dist": [
+ FakePackage(
+ "complex-dist",
+ "0.1",
+ "complex_dist-0.1-py2.py3-none-any.whl",
+ MetadataKind.Unhashed,
+ # Validate that the wheel isn't fetched if metadata is available and
+ # --dry-run is on, when the metadata presents no hash itself.
+ delete_linked_file=True,
+ ),
+ ],
+ "corruptwheel": [
+ FakePackage(
+ "corruptwheel",
+ "1.0",
+ "corruptwheel-1.0-py2.py3-none-any.whl",
+ # Validate that the wheel isn't fetched if metadata is available and
+ # --dry-run is on, when the metadata *does* present a hash.
+ MetadataKind.Sha256,
+ ),
+ ],
"has-script": [
# Ensure we check PEP 658 metadata hashing errors for wheel files.
FakePackage(
@@ -927,10 +951,10 @@ def html_index_for_packages(
f' {package_link.filename}
' # noqa: E501
)
# (3.2) Copy over the corresponding file in `shared_data.packages`.
- shutil.copy(
- shared_data.packages / package_link.filename,
- pkg_subdir / package_link.filename,
- )
+ cached_file = shared_data.packages / package_link.filename
+ new_file = pkg_subdir / package_link.filename
+ if not package_link.delete_linked_file:
+ shutil.copy(cached_file, new_file)
# (3.3) Write a metadata file, if applicable.
if package_link.metadata != MetadataKind.NoFile:
with open(pkg_subdir / package_link.metadata_filename(), "wb") as f:
@@ -985,7 +1009,8 @@ def html_index_with_onetime_server(
"""Serve files from a generated pypi index, erroring if a file is downloaded more
than once.
- Provide `-i http://localhost:8000` to pip invocations to point them at this server.
+ Provide `-i http://localhost:` to pip invocations to point them at
+ this server.
"""
class InDirectoryServer(http.server.ThreadingHTTPServer):
@@ -1000,7 +1025,7 @@ def finish_request(self: Self, request: Any, client_address: Any) -> None:
class Handler(OneTimeDownloadHandler):
_seen_paths: ClassVar[set[str]] = set()
- with InDirectoryServer(("", 8000), Handler) as httpd:
+ with InDirectoryServer(("", 0), Handler) as httpd:
server_thread = threading.Thread(target=httpd.serve_forever)
server_thread.start()
diff --git a/tests/functional/test_check.py b/tests/functional/test_check.py
index 06ed1b08eac..f3c309b0f45 100644
--- a/tests/functional/test_check.py
+++ b/tests/functional/test_check.py
@@ -123,10 +123,7 @@ def test_check_complicated_name_missing(script: PipTestEnvironment) -> None:
# Without dependency
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip("check", expect_error=True)
expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",)
@@ -149,10 +146,7 @@ def test_check_complicated_name_broken(script: PipTestEnvironment) -> None:
# With broken dependency
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip(
"install",
@@ -185,10 +179,7 @@ def test_check_complicated_name_clean(script: PipTestEnvironment) -> None:
)
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip(
"install",
@@ -216,10 +207,7 @@ def test_check_considers_conditional_reqs(script: PipTestEnvironment) -> None:
)
result = script.pip("install", "--no-index", package_a_path, "--no-deps")
- assert (
- "Successfully installed package_A-1.0" in result.stdout
- or "Successfully installed package-A-1.0" in result.stdout
- ), str(result)
+ assert "Successfully installed package-a-1.0" in result.stdout, str(result)
result = script.pip("check", expect_error=True)
expected_lines = ("package-a 1.0 requires dependency-b, which is not installed.",)
diff --git a/tests/functional/test_download.py b/tests/functional/test_download.py
index c5887aa1bf0..555822a903e 100644
--- a/tests/functional/test_download.py
+++ b/tests/functional/test_download.py
@@ -1276,6 +1276,7 @@ def download_server_html_index(
) -> Callable[..., tuple[TestPipResult, Path]]:
"""Execute `pip download` against a generated PyPI index."""
download_dir = tmpdir / "download_dir"
+ server_port = html_index_with_onetime_server.server_address[1]
def run_for_generated_index(
args: list[str],
@@ -1290,7 +1291,7 @@ def run_for_generated_index(
"-d",
str(download_dir),
"-i",
- "http://localhost:8000",
+ f"http://localhost:{server_port}",
*args,
]
result = script.pip(*pip_args, allow_error=allow_error)
diff --git a/tests/functional/test_install.py b/tests/functional/test_install.py
index a1bd81d31d0..5092c28a218 100644
--- a/tests/functional/test_install.py
+++ b/tests/functional/test_install.py
@@ -2128,7 +2128,7 @@ def test_install_conflict_results_in_warning(
# Install pkgA without its dependency
result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps")
- assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1)
+ assert "Successfully installed pkga-1.0" in result1.stdout, str(result1)
# Then install an incorrect version of the dependency
result2 = script.pip(
@@ -2138,7 +2138,7 @@ def test_install_conflict_results_in_warning(
allow_stderr_error=True,
)
assert "pkga 1.0 requires pkgb==1.0" in result2.stderr, str(result2)
- assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2)
+ assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2)
def test_install_conflict_warning_can_be_suppressed(
@@ -2158,11 +2158,11 @@ def test_install_conflict_warning_can_be_suppressed(
# Install pkgA without its dependency
result1 = script.pip("install", "--no-index", pkgA_path, "--no-deps")
- assert "Successfully installed pkgA-1.0" in result1.stdout, str(result1)
+ assert "Successfully installed pkga-1.0" in result1.stdout, str(result1)
# Then install an incorrect version of the dependency; suppressing warning
result2 = script.pip("install", "--no-index", pkgB_path, "--no-warn-conflicts")
- assert "Successfully installed pkgB-2.0" in result2.stdout, str(result2)
+ assert "Successfully installed pkgb-2.0" in result2.stdout, str(result2)
def test_target_install_ignores_distutils_config_install_prefix(
diff --git a/tests/functional/test_install_check.py b/tests/functional/test_install_check.py
index a0598fe2703..7f40a711d73 100644
--- a/tests/functional/test_install_check.py
+++ b/tests/functional/test_install_check.py
@@ -28,7 +28,7 @@ def test_check_install_canonicalization(script: PipTestEnvironment) -> None:
# Let's install pkgA without its dependency
result = script.pip("install", "--no-index", pkga_path, "--no-deps")
- assert "Successfully installed pkgA-1.0" in result.stdout, str(result)
+ assert "Successfully installed pkga-1.0" in result.stdout, str(result)
# Install the first missing dependency. Only an error for the
# second dependency should remain.
diff --git a/tests/functional/test_install_metadata.py b/tests/functional/test_install_metadata.py
new file mode 100644
index 00000000000..8a5e28b0b1b
--- /dev/null
+++ b/tests/functional/test_install_metadata.py
@@ -0,0 +1,242 @@
+import json
+import re
+from collections.abc import Iterator
+from pathlib import Path
+from typing import Any, Callable
+
+import pytest
+
+from pip._vendor.packaging.requirements import Requirement
+
+from pip._internal.models.direct_url import DirectUrl
+from pip._internal.utils.urls import path_to_url
+
+from tests.lib import (
+ PipTestEnvironment,
+ TestPipResult,
+)
+
+
+@pytest.fixture
+def install_with_generated_html_index(
+ script: PipTestEnvironment,
+ html_index_for_packages: Path,
+ tmpdir: Path,
+) -> Callable[..., tuple[TestPipResult, dict[str, Any]]]:
+ """Execute `pip download` against a generated PyPI index."""
+ output_file = tmpdir / "output_file.json"
+
+ def run_for_generated_index(
+ args: list[str],
+ *,
+ dry_run: bool = True,
+ allow_error: bool = False,
+ ) -> tuple[TestPipResult, dict[str, Any]]:
+ """
+ Produce a PyPI directory structure pointing to the specified packages, then
+ execute `pip install --report ... -i ...` pointing to our generated index.
+ """
+ pip_args = [
+ "install",
+ *(("--dry-run",) if dry_run else ()),
+ "--ignore-installed",
+ "--report",
+ str(output_file),
+ "-i",
+ path_to_url(str(html_index_for_packages)),
+ *args,
+ ]
+ result = script.pip(*pip_args, allow_error=allow_error)
+ try:
+ with open(output_file, "rb") as f:
+ report = json.load(f)
+ except FileNotFoundError:
+ if allow_error:
+ report = {}
+ else:
+ raise
+ return (result, report)
+
+ return run_for_generated_index
+
+
+def iter_dists(report: dict[str, Any]) -> Iterator[tuple[Requirement, DirectUrl]]:
+ """Parse a (req,url) tuple from each installed dist in the --report json."""
+ for inst in report["install"]:
+ metadata = inst["metadata"]
+ name = metadata["name"]
+ version = metadata["version"]
+ req = Requirement(f"{name}=={version}")
+ direct_url = DirectUrl.from_dict(inst["download_info"])
+ yield (req, direct_url)
+
+
+@pytest.mark.parametrize(
+ "requirement_to_install, expected_outputs",
+ [
+ ("simple2==1.0", ["simple2==1.0", "simple==1.0"]),
+ ("simple==2.0", ["simple==2.0"]),
+ (
+ "colander",
+ ["colander==0.9.9", "translationstring==1.1"],
+ ),
+ (
+ "compilewheel",
+ ["compilewheel==1.0", "simple==1.0"],
+ ),
+ ],
+)
+def test_install_with_metadata(
+ install_with_generated_html_index: Callable[
+ ..., tuple[TestPipResult, dict[str, Any]]
+ ],
+ requirement_to_install: str,
+ expected_outputs: list[str],
+) -> None:
+ """Verify that if a data-dist-info-metadata attribute is present, then it is used
+ instead of the actual dist's METADATA."""
+ _, report = install_with_generated_html_index(
+ [requirement_to_install],
+ )
+ installed = sorted(str(r) for r, _ in iter_dists(report))
+ assert installed == expected_outputs
+
+
+@pytest.mark.parametrize(
+ "requirement_to_install, real_hash",
+ [
+ (
+ "simple==3.0",
+ "95e0f200b6302989bcf2cead9465cf229168295ea330ca30d1ffeab5c0fed996",
+ ),
+ (
+ "has-script",
+ "16ba92d7f6f992f6de5ecb7d58c914675cf21f57f8e674fb29dcb4f4c9507e5b",
+ ),
+ ],
+)
+def test_incorrect_metadata_hash(
+ install_with_generated_html_index: Callable[
+ ..., tuple[TestPipResult, dict[str, Any]]
+ ],
+ requirement_to_install: str,
+ real_hash: str,
+) -> None:
+ """Verify that if a hash for data-dist-info-metadata is provided, it must match the
+ actual hash of the metadata file."""
+ result, _ = install_with_generated_html_index(
+ [requirement_to_install],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ expected_msg = f"""\
+ Expected sha256 wrong-hash
+ Got {real_hash}"""
+ assert expected_msg in result.stderr
+
+
+@pytest.mark.parametrize(
+ "requirement_to_install, expected_url",
+ [
+ ("simple2==2.0", "simple2-2.0.tar.gz.metadata"),
+ ("priority", "priority-1.0-py2.py3-none-any.whl.metadata"),
+ ],
+)
+def test_metadata_not_found(
+ install_with_generated_html_index: Callable[
+ ..., tuple[TestPipResult, dict[str, Any]]
+ ],
+ requirement_to_install: str,
+ expected_url: str,
+) -> None:
+ """Verify that if a data-dist-info-metadata attribute is provided, that pip will
+ fetch the .metadata file at the location specified by PEP 658, and error
+ if unavailable."""
+ result, _ = install_with_generated_html_index(
+ [requirement_to_install],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ expected_re = re.escape(expected_url)
+ pattern = re.compile(
+ f"ERROR: 404 Client Error: FileNotFoundError for url:.*{expected_re}"
+ )
+ assert pattern.search(result.stderr), (pattern, result.stderr)
+
+
+def test_produces_error_for_mismatched_package_name_in_metadata(
+ install_with_generated_html_index: Callable[
+ ..., tuple[TestPipResult, dict[str, Any]]
+ ],
+) -> None:
+ """Verify that the package name from the metadata matches the requested package."""
+ result, _ = install_with_generated_html_index(
+ ["simple2==3.0"],
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ assert (
+ "simple2-3.0.tar.gz has inconsistent Name: expected 'simple2', but metadata "
+ "has 'not-simple2'"
+ ) in result.stdout
+
+
+@pytest.mark.parametrize(
+ "requirement",
+ [
+ "requires-simple-extra==0.1",
+ "REQUIRES_SIMPLE-EXTRA==0.1",
+ "REQUIRES....simple-_-EXTRA==0.1",
+ ],
+)
+def test_canonicalizes_package_name_before_verifying_metadata(
+ install_with_generated_html_index: Callable[
+ ..., tuple[TestPipResult, dict[str, Any]]
+ ],
+ requirement: str,
+) -> None:
+ """Verify that the package name from the command line and the package's
+ METADATA are both canonicalized before comparison, while the name from the METADATA
+ is always used verbatim to represent the installed candidate in --report.
+
+ Regression test for https://github.com/pypa/pip/issues/12038
+ """
+ _, report = install_with_generated_html_index(
+ [requirement],
+ )
+ reqs = [str(r) for r, _ in iter_dists(report)]
+ assert reqs == ["Requires_Simple.Extra==0.1"]
+
+
+@pytest.mark.parametrize(
+ "requirement,err_string",
+ [
+ # It's important that we verify pip won't even attempt to fetch the file, so we
+ # construct an input that will cause it to error if it tries at all.
+ (
+ "complex-dist==0.1",
+ "Could not install packages due to an OSError: [Errno 2] No such file or directory", # noqa: E501
+ ),
+ ("corruptwheel==1.0", ".whl is invalid."),
+ ],
+)
+def test_dry_run_avoids_downloading_metadata_only_dists(
+ install_with_generated_html_index: Callable[
+ ..., tuple[TestPipResult, dict[str, Any]]
+ ],
+ requirement: str,
+ err_string: str,
+) -> None:
+ """Verify that the underlying dist files are not downloaded at all when
+ `install --dry-run` is used to resolve dists with PEP 658 metadata."""
+ _, report = install_with_generated_html_index(
+ [requirement],
+ )
+ assert [requirement] == [str(r) for r, _ in iter_dists(report)]
+ result, _ = install_with_generated_html_index(
+ [requirement],
+ dry_run=False,
+ allow_error=True,
+ )
+ assert result.returncode != 0
+ assert err_string in result.stderr
diff --git a/tests/functional/test_install_reqs.py b/tests/functional/test_install_reqs.py
index 3c5b6db4a68..66497c9eb70 100644
--- a/tests/functional/test_install_reqs.py
+++ b/tests/functional/test_install_reqs.py
@@ -728,7 +728,7 @@ def test_install_distribution_full_union(
result = script.pip_install_local(
to_install, f"{to_install}[bar]", f"{to_install}[baz]"
)
- assert "Building wheel for LocalExtras" in result.stdout
+ assert "Building wheel for localextras" in result.stdout
result.did_create(script.site_packages / "simple")
result.did_create(script.site_packages / "singlemodule.py")
diff --git a/tests/functional/test_wheel.py b/tests/functional/test_wheel.py
index e1ede880496..3496522c4b0 100644
--- a/tests/functional/test_wheel.py
+++ b/tests/functional/test_wheel.py
@@ -322,7 +322,7 @@ def test_wheel_package_with_latin1_setup(
pkg_to_wheel = data.packages.joinpath("SetupPyLatin1")
result = script.pip("wheel", pkg_to_wheel)
- assert "Successfully built SetupPyUTF8" in result.stdout
+ assert "Successfully built setuppyutf8" in result.stdout
def test_pip_wheel_with_pep518_build_reqs(
diff --git a/tests/unit/metadata/test_metadata_pkg_resources.py b/tests/unit/metadata/test_metadata_pkg_resources.py
index ccb0b7dcf0f..64433803893 100644
--- a/tests/unit/metadata/test_metadata_pkg_resources.py
+++ b/tests/unit/metadata/test_metadata_pkg_resources.py
@@ -105,6 +105,7 @@ def test_wheel_metadata_works() -> None:
metadata=InMemoryMetadata({"METADATA": metadata.as_bytes()}, ""),
project_name=name,
),
+ concrete=False,
)
assert name == dist.canonical_name == dist.raw_name
diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py
index 30cdb6ebece..d7dc91bfcbb 100644
--- a/tests/unit/test_cache.py
+++ b/tests/unit/test_cache.py
@@ -1,13 +1,33 @@
import os
from pathlib import Path
+import pytest
+
from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version
-from pip._internal.cache import WheelCache, _hash_dict
+from pip._internal.cache import WheelCache, _contains_egg_info, _hash_dict
from pip._internal.models.link import Link
from pip._internal.utils.misc import ensure_dir
+@pytest.mark.parametrize(
+ "s, expected",
+ [
+ # Trivial.
+ ("pip-18.0", True),
+ # Ambiguous.
+ ("foo-2-2", True),
+ ("im-valid", True),
+ # Invalid.
+ ("invalid", False),
+ ("im_invalid", False),
+ ],
+)
+def test_contains_egg_info(s: str, expected: bool) -> None:
+ result = _contains_egg_info(s)
+ assert result == expected
+
+
def test_falsey_path_none() -> None:
wc = WheelCache("")
assert wc.cache_dir is None
diff --git a/tests/unit/test_collector.py b/tests/unit/test_collector.py
index fa688f8e42f..56446bb7aa9 100644
--- a/tests/unit/test_collector.py
+++ b/tests/unit/test_collector.py
@@ -98,7 +98,7 @@ def test_get_simple_response_archive_to_http_scheme(
session.assert_has_calls(
[
- mock.call.head(url, allow_redirects=True),
+ mock.call.head(url, allow_redirects=True, headers=None),
]
)
mock_raise_for_status.assert_called_once_with(session.head.return_value)
@@ -160,7 +160,7 @@ def test_get_simple_response_archive_to_http_scheme_is_html(
assert resp is not None
assert session.mock_calls == [
- mock.call.head(url, allow_redirects=True),
+ mock.call.head(url, allow_redirects=True, headers=None),
mock.call.get(
url,
headers={
@@ -248,7 +248,7 @@ def test_get_simple_response_dont_log_clear_text_password(
assert resp is not None
mock_raise_for_status.assert_called_once_with(resp)
- assert len(caplog.records) == 2
+ assert len(caplog.records) == 3
record = caplog.records[0]
assert record.levelname == "DEBUG"
assert record.message.splitlines() == [
@@ -256,6 +256,9 @@ def test_get_simple_response_dont_log_clear_text_password(
]
record = caplog.records[1]
assert record.levelname == "DEBUG"
+ assert record.message.splitlines() == ["headers: None"]
+ record = caplog.records[2]
+ assert record.levelname == "DEBUG"
assert record.message.splitlines() == [
"Fetched page https://user:****@example.com/simple/ as text/html",
]
@@ -849,7 +852,7 @@ def test_get_index_content_directory_append_index(tmpdir: Path) -> None:
mock_func.return_value = fake_response
actual = _get_index_content(Link(dir_url), session=session)
assert mock_func.mock_calls == [
- mock.call(expected_url, session=session),
+ mock.call(expected_url, headers=None, session=session),
], f"actual calls: {mock_func.mock_calls}"
assert actual is not None
@@ -965,6 +968,7 @@ def test_fetch_response(self, mock_get_simple_response: mock.Mock) -> None:
# _get_simple_response().
mock_get_simple_response.assert_called_once_with(
url,
+ headers=None,
session=link_collector.session,
)
diff --git a/tests/unit/test_req.py b/tests/unit/test_req.py
index a2c4cf243ca..f3a79351221 100644
--- a/tests/unit/test_req.py
+++ b/tests/unit/test_req.py
@@ -28,6 +28,7 @@
PreviousBuildDirError,
)
from pip._internal.index.package_finder import PackageFinder
+from pip._internal.metadata import get_metadata_distribution
from pip._internal.models.direct_url import ArchiveInfo, DirectUrl, DirInfo, VcsInfo
from pip._internal.models.link import Link
from pip._internal.network.session import PipSession
@@ -154,7 +155,11 @@ def test_no_reuse_existing_build_dir(self, data: TestData) -> None:
):
resolver.resolve(reqset.all_requirements, True)
- def test_environment_marker_extras(self, data: TestData) -> None:
+ def test_environment_marker_extras(
+ self,
+ data: TestData,
+ monkeypatch: pytest.MonkeyPatch,
+ ) -> None:
"""
Test that the environment marker extras are used with
non-wheel installs.
@@ -164,6 +169,13 @@ def test_environment_marker_extras(self, data: TestData) -> None:
os.fspath(data.packages.joinpath("LocalEnvironMarker")),
)
req.user_supplied = True
+
+ def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def]
+ self._dist = dist
+
+ monkeypatch.setattr(
+ req, "cache_concrete_dist", partial(cache_concrete_dist, req)
+ )
reqset.add_unnamed_requirement(req)
finder = make_test_finder(find_links=[data.find_links])
with self._basic_resolver(finder) as resolver:
@@ -505,12 +517,23 @@ def test_download_info_local_dir(self, data: TestData) -> None:
assert req.download_info.url.startswith("file://")
assert isinstance(req.download_info.info, DirInfo)
- def test_download_info_local_editable_dir(self, data: TestData) -> None:
+ def test_download_info_local_editable_dir(
+ self,
+ data: TestData,
+ monkeypatch: pytest.MonkeyPatch,
+ ) -> None:
"""Test that download_info is set for requirements from a local editable dir."""
finder = make_test_finder()
with self._basic_resolver(finder) as resolver:
ireq_url = data.packages.joinpath("FSPkg").as_uri()
ireq = get_processed_req_from_line(f"-e {ireq_url}#egg=FSPkg")
+
+ def cache_concrete_dist(self, dist): # type: ignore[no-untyped-def]
+ self._dist = dist
+
+ monkeypatch.setattr(
+ ireq, "cache_concrete_dist", partial(cache_concrete_dist, ireq)
+ )
reqset = resolver.resolve([ireq], True)
assert len(reqset.all_requirements) == 1
req = reqset.all_requirements[0]
@@ -915,7 +938,9 @@ def test_mismatched_versions(caplog: pytest.LogCaptureFixture) -> None:
metadata = email.message.Message()
metadata["name"] = "simplewheel"
metadata["version"] = "1.0"
- req._metadata = metadata
+ req._dist = get_metadata_distribution(
+ bytes(metadata), "simplewheel-1.0.whl", "simplewheel"
+ )
req.assert_source_matches_version()
assert caplog.records[-1].message == (
diff --git a/tests/unit/test_wheel_builder.py b/tests/unit/test_wheel_builder.py
index 0547ac818bc..b2ed3a89b28 100644
--- a/tests/unit/test_wheel_builder.py
+++ b/tests/unit/test_wheel_builder.py
@@ -8,7 +8,8 @@
import pytest
-from pip._internal import wheel_builder
+from pip._internal import cache, wheel_builder
+from pip._internal.cache import _contains_egg_info
from pip._internal.models.link import Link
from pip._internal.operations.build.wheel_legacy import format_command_result
from pip._internal.req.req_install import InstallRequirement
@@ -31,7 +32,7 @@
],
)
def test_contains_egg_info(s: str, expected: bool) -> None:
- result = wheel_builder._contains_egg_info(s)
+ result = _contains_egg_info(s)
assert result == expected
@@ -96,7 +97,7 @@ def test_should_build_for_install_command(req: ReqMock, expected: bool) -> None:
],
)
def test_should_cache(req: ReqMock, expected: bool) -> None:
- assert wheel_builder._should_cache(cast(InstallRequirement, req)) is expected
+ assert cache.should_cache(cast(InstallRequirement, req)) is expected
def test_should_cache_git_sha(tmpdir: Path) -> None:
@@ -106,12 +107,12 @@ def test_should_cache_git_sha(tmpdir: Path) -> None:
# a link referencing a sha should be cached
url = "git+https://g.c/o/r@" + commit + "#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
- assert wheel_builder._should_cache(cast(InstallRequirement, req))
+ assert cache.should_cache(cast(InstallRequirement, req))
# a link not referencing a sha should not be cached
url = "git+https://g.c/o/r@master#egg=mypkg"
req = ReqMock(link=Link(url), source_dir=repo_path)
- assert not wheel_builder._should_cache(cast(InstallRequirement, req))
+ assert not cache.should_cache(cast(InstallRequirement, req))
def test_format_command_result__INFO(caplog: pytest.LogCaptureFixture) -> None: