Skip to content

Commit 13a8ae1

Browse files
authored
feat(toxgen): Cache release data (#4835)
### Description We're fetching the same release data from PyPI everytime toxgen is run. Cache it instead to avoid unnecessary network requests. This makes toxgen about twice as fast. I'm sorting the cache file to hopefully avoid merge conflicts (at least it should make them less likely compared to if we'd only append to the file). Let's see how this works, if the cache makes it harder to merge stuff because of merge conflicts, we can get rid of it. Ease of use is more important than runtime. #### Issues Closes #4817 #### Reminders - Please add tests to validate your changes, and lint your code using `tox -e linters`. - Add GH Issue ID _&_ Linear ID (if applicable) - PR title should use [conventional commit](https://develop.sentry.dev/engineering-practices/commit-messages/#type) style (`feat:`, `fix:`, `ref:`, `meta:`) - For external contributors: [CONTRIBUTING.md](https://github.com/getsentry/sentry-python/blob/master/CONTRIBUTING.md), [Sentry SDK development docs](https://develop.sentry.dev/sdk/), [Discord community](https://discord.gg/Ww9hbqr)
1 parent 808c180 commit 13a8ae1

File tree

3 files changed

+323
-16
lines changed

3 files changed

+323
-16
lines changed

scripts/populate_tox/populate_tox.py

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import functools
66
import hashlib
7+
import json
78
import os
89
import sys
910
import time
@@ -34,18 +35,20 @@
3435
# CUTOFF = datetime.now(tz=timezone.utc) - timedelta(days=365 * 5)
3536

3637
TOX_FILE = Path(__file__).resolve().parent.parent.parent / "tox.ini"
38+
RELEASES_CACHE_FILE = Path(__file__).resolve().parent / "releases.jsonl"
3739
ENV = Environment(
3840
loader=FileSystemLoader(Path(__file__).resolve().parent),
3941
trim_blocks=True,
4042
lstrip_blocks=True,
4143
)
4244

43-
PYPI_COOLDOWN = 0.1 # seconds to wait between requests to PyPI
45+
PYPI_COOLDOWN = 0.05 # seconds to wait between requests to PyPI
4446

4547
PYPI_PROJECT_URL = "https://pypi.python.org/pypi/{project}/json"
4648
PYPI_VERSION_URL = "https://pypi.python.org/pypi/{project}/{version}/json"
4749
CLASSIFIER_PREFIX = "Programming Language :: Python :: "
4850

51+
CACHE = defaultdict(dict)
4952

5053
IGNORE = {
5154
# Do not try auto-generating the tox entries for these. They will be
@@ -94,9 +97,32 @@ def fetch_package(package: str) -> Optional[dict]:
9497

9598
@functools.cache
9699
def fetch_release(package: str, version: Version) -> Optional[dict]:
97-
"""Fetch release metadata from PyPI."""
100+
"""Fetch release metadata from cache or, failing that, PyPI."""
101+
release = _fetch_from_cache(package, version)
102+
if release is not None:
103+
return release
104+
98105
url = PYPI_VERSION_URL.format(project=package, version=version)
99-
return fetch_url(url)
106+
release = fetch_url(url)
107+
if release is not None:
108+
_save_to_cache(package, version, release)
109+
return release
110+
111+
112+
def _fetch_from_cache(package: str, version: Version) -> Optional[dict]:
113+
package = _normalize_name(package)
114+
if package in CACHE and str(version) in CACHE[package]:
115+
CACHE[package][str(version)]["_accessed"] = True
116+
return CACHE[package][str(version)]
117+
118+
return None
119+
120+
121+
def _save_to_cache(package: str, version: Version, release: Optional[dict]) -> None:
122+
with open(RELEASES_CACHE_FILE, "a") as releases_cache:
123+
releases_cache.write(json.dumps(_normalize_release(release)) + "\n")
124+
125+
CACHE[_normalize_name(package)][str(version)] = release
100126

101127

102128
def _prefilter_releases(
@@ -600,6 +626,24 @@ def get_last_updated() -> Optional[datetime]:
600626
return timestamp
601627

602628

629+
def _normalize_name(package: str) -> str:
630+
return package.lower().replace("-", "_")
631+
632+
633+
def _normalize_release(release: dict) -> dict:
634+
"""Filter out unneeded parts of the release JSON."""
635+
normalized = {
636+
"info": {
637+
"classifiers": release["info"]["classifiers"],
638+
"name": release["info"]["name"],
639+
"requires_python": release["info"]["requires_python"],
640+
"version": release["info"]["version"],
641+
"yanked": release["info"]["yanked"],
642+
},
643+
}
644+
return normalized
645+
646+
603647
def main(fail_on_changes: bool = False) -> None:
604648
"""
605649
Generate tox.ini from the tox.jinja template.
@@ -636,6 +680,20 @@ def main(fail_on_changes: bool = False) -> None:
636680
f"The SDK supports Python versions {MIN_PYTHON_VERSION} - {MAX_PYTHON_VERSION}."
637681
)
638682

683+
# Load file cache
684+
global CACHE
685+
686+
with open(RELEASES_CACHE_FILE) as releases_cache:
687+
for line in releases_cache:
688+
release = json.loads(line)
689+
name = _normalize_name(release["info"]["name"])
690+
version = release["info"]["version"]
691+
CACHE[name][version] = release
692+
CACHE[name][version][
693+
"_accessed"
694+
] = False # for cleaning up unused cache entries
695+
696+
# Process packages
639697
packages = defaultdict(list)
640698

641699
for group, integrations in GROUPS.items():
@@ -701,6 +759,21 @@ def main(fail_on_changes: bool = False) -> None:
701759
packages, update_timestamp=not fail_on_changes, last_updated=last_updated
702760
)
703761

762+
# Sort the release cache file
763+
releases = []
764+
with open(RELEASES_CACHE_FILE) as releases_cache:
765+
releases = [json.loads(line) for line in releases_cache]
766+
releases.sort(key=lambda r: (r["info"]["name"], r["info"]["version"]))
767+
with open(RELEASES_CACHE_FILE, "w") as releases_cache:
768+
for release in releases:
769+
if (
770+
CACHE[_normalize_name(release["info"]["name"])][
771+
release["info"]["version"]
772+
]["_accessed"]
773+
is True
774+
):
775+
releases_cache.write(json.dumps(release) + "\n")
776+
704777
if fail_on_changes:
705778
new_file_hash = get_file_hash()
706779
if old_file_hash != new_file_hash:

0 commit comments

Comments
 (0)