From a08a27888465f3f071a2e97ef5c16e4385984e78 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Thu, 20 Oct 2022 09:14:45 -0700 Subject: [PATCH 1/3] Restore pre 22.3 wheel cache pathing Prior to bad03ef931d9b3ff4f9e75f35f9c41f45839e2a1 wheel cache paths incorporated source material hashes in their paths. This commit which ended up in 22.3 stopped including that information. This is problematic for two reasons. First our cache is no longer encoding data integrity information that was previously intentionally included. Second it means anyone upgrading from < 22.3 to 22.3 will have orphaned wheel cache entries. The fix here is to update the Link object to set Link.link_hash in the Link.from_json method. Otherwise the hash information is simply missing. This will cause anyone upgrading from 22.3 to newer to have orphaned wheels, but that seems worthwile considering 22.3 hasn't been around as long as the previous implementation and we get stronger data integrity controls out of it. This fixes https://github.com/pypa/pip/issues/11527 --- news/11527.bugfix.rst | 2 ++ src/pip/_internal/models/link.py | 6 +++++ tests/unit/test_cache.py | 44 +++++++++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 1 deletion(-) create mode 100644 news/11527.bugfix.rst diff --git a/news/11527.bugfix.rst b/news/11527.bugfix.rst new file mode 100644 index 00000000000..98446247ecf --- /dev/null +++ b/news/11527.bugfix.rst @@ -0,0 +1,2 @@ +This change restores 22.2.x wheel cache behavior to pip allowing the +cache to find existing entries. diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index c792d128bcf..52c883204c3 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -248,6 +248,11 @@ def from_json( yanked_reason = file_data.get("yanked") dist_info_metadata = file_data.get("dist-info-metadata") hashes = file_data.get("hashes", {}) + link_hash = None + if hashes: + for hash_name in _SUPPORTED_HASHES: + if hash_name in hashes: + link_hash = LinkHash(name=hash_name, value=hashes[hash_name]) # The Link.yanked_reason expects an empty string instead of a boolean. if yanked_reason and not isinstance(yanked_reason, str): @@ -262,6 +267,7 @@ def from_json( requires_python=pyrequire, yanked_reason=yanked_reason, hashes=hashes, + link_hash=link_hash, dist_info_metadata=dist_info_metadata, ) diff --git a/tests/unit/test_cache.py b/tests/unit/test_cache.py index f1f0141c708..f27daa266c8 100644 --- a/tests/unit/test_cache.py +++ b/tests/unit/test_cache.py @@ -1,7 +1,7 @@ import os from pathlib import Path -from pip._vendor.packaging.tags import Tag +from pip._vendor.packaging.tags import Tag, interpreter_name, interpreter_version from pip._internal.cache import WheelCache, _hash_dict from pip._internal.models.format_control import FormatControl @@ -52,6 +52,48 @@ def test_cache_hash() -> None: assert h == "f83b32dfa27a426dec08c21bf006065dd003d0aac78e7fc493d9014d" +def test_link_to_cache(tmpdir: Path) -> None: + """ + Test that Link.from_json() produces Links with consistent cache + locations + """ + wc = WheelCache(os.fspath(tmpdir), FormatControl()) + # Define our expectations for stable cache path. + i_name = interpreter_name() + i_version = interpreter_version() + key_parts = { + "url": "https://files.pythonhosted.org/packages/a6/91/" + "86a6eac449ddfae239e93ffc1918cf33fd9bab35c04d1e963b311e347a73/" + "netifaces-0.11.0.tar.gz", + "sha256": "043a79146eb2907edf439899f262b3dfe41717d34124298ed281139a8b93ca32", + "interpreter_name": i_name, + "interpreter_version": i_version, + } + expected_hash = _hash_dict(key_parts) + parts = [ + expected_hash[:2], + expected_hash[2:4], + expected_hash[4:6], + expected_hash[6:], + ] + pathed_hash = os.path.join(*parts) + # Check working from a Link produces the same result. + file_data = { + "filename": "netifaces-0.11.0.tar.gz", + "hashes": { + "sha256": key_parts["sha256"], + }, + "requires-python": "", + "url": key_parts["url"], + "yanked": False, + } + page_url = "https://pypi.org/simple/netifaces/" + link = Link.from_json(file_data=file_data, page_url=page_url) + assert link + path = wc.get_path_for_link(link) + assert pathed_hash in path + + def test_get_cache_entry(tmpdir: Path) -> None: wc = WheelCache(os.fspath(tmpdir), FormatControl()) persi_link = Link("https://g.c/o/r/persi") From f924c004920946a13ac40744c018a1a36b017a70 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Thu, 3 Nov 2022 11:10:33 -0700 Subject: [PATCH 2/3] Add a missing loop break statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was suggested in code review. Co-authored-by: Stéphane Bidoul --- src/pip/_internal/models/link.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pip/_internal/models/link.py b/src/pip/_internal/models/link.py index 52c883204c3..ffac2fecad8 100644 --- a/src/pip/_internal/models/link.py +++ b/src/pip/_internal/models/link.py @@ -253,6 +253,7 @@ def from_json( for hash_name in _SUPPORTED_HASHES: if hash_name in hashes: link_hash = LinkHash(name=hash_name, value=hashes[hash_name]) + break # The Link.yanked_reason expects an empty string instead of a boolean. if yanked_reason and not isinstance(yanked_reason, str): From 46f7e5cfd49ac7eb6dc471c7f0d6e638c57293a3 Mon Sep 17 00:00:00 2001 From: Clark Boylan Date: Thu, 3 Nov 2022 16:23:47 -0700 Subject: [PATCH 3/3] Fix news entry This came up in code review. It was pointed out that the news entry appears in the changelog so needs to be written for that audience. Co-authored-by: Tzu-ping Chung --- news/11527.bugfix.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/news/11527.bugfix.rst b/news/11527.bugfix.rst index 98446247ecf..0185a804ff7 100644 --- a/news/11527.bugfix.rst +++ b/news/11527.bugfix.rst @@ -1,2 +1,2 @@ -This change restores 22.2.x wheel cache behavior to pip allowing the +Wheel cache behavior is restored to match previous versions, allowing the cache to find existing entries.