Skip to content

Commit fb1bec8

Browse files
committed
Implement PEP 708
fixes: #998
1 parent 46e3c60 commit fb1bec8

File tree

12 files changed

+525
-38
lines changed

12 files changed

+525
-38
lines changed

CHANGES/998.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implemented PEP 708 support, added new ProjectMetadataContent model to track a package's project level metadata at the repository level.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Generated by Django 4.2.26 on 2025-11-13 21:52
2+
3+
import django.contrib.postgres.fields
4+
from django.db import migrations, models
5+
import django.db.models.deletion
6+
import pulpcore.app.util
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
("core", "0145_domainize_import_export"),
13+
("python", "0017_pythonpackagecontent_size"),
14+
]
15+
16+
operations = [
17+
migrations.AddField(
18+
model_name="pythonremote",
19+
name="project_metadata",
20+
field=models.BooleanField(default=False),
21+
),
22+
migrations.CreateModel(
23+
name="ProjectMetadataContent",
24+
fields=[
25+
(
26+
"content_ptr",
27+
models.OneToOneField(
28+
auto_created=True,
29+
on_delete=django.db.models.deletion.CASCADE,
30+
parent_link=True,
31+
primary_key=True,
32+
serialize=False,
33+
to="core.content",
34+
),
35+
),
36+
("project_name", models.TextField()),
37+
(
38+
"tracks",
39+
django.contrib.postgres.fields.ArrayField(
40+
base_field=models.TextField(), default=list, size=None
41+
),
42+
),
43+
(
44+
"alternate_locations",
45+
django.contrib.postgres.fields.ArrayField(
46+
base_field=models.TextField(), default=list, size=None
47+
),
48+
),
49+
("sha256", models.CharField(max_length=64)),
50+
(
51+
"_pulp_domain",
52+
models.ForeignKey(
53+
default=pulpcore.app.util.get_domain_pk,
54+
on_delete=django.db.models.deletion.PROTECT,
55+
to="core.domain",
56+
),
57+
),
58+
],
59+
options={
60+
"default_related_name": "%(app_label)s_%(model_name)s",
61+
"unique_together": {("sha256", "_pulp_domain")},
62+
},
63+
bases=("core.content",),
64+
),
65+
]

pulp_python/app/models.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import hashlib
2+
import json
13
from logging import getLogger
24

35
from aiohttp.web import json_response
46
from django.contrib.postgres.fields import ArrayField
57
from django.core.exceptions import ObjectDoesNotExist
68
from django.db import models
79
from django.conf import settings
10+
from django_lifecycle import hook, BEFORE_SAVE
811
from pulpcore.plugin.models import (
912
AutoAddObjPermsMixin,
1013
Content,
@@ -235,6 +238,69 @@ class Meta:
235238
]
236239

237240

241+
class ProjectMetadataContent(Content):
242+
"""
243+
A Content Type representing metadata at the project level.
244+
245+
Currently used to implement PEP 708.
246+
# TODO: Implement PEP 792
247+
Fields:
248+
project_name (models.TextField): The name of the project (normalized)
249+
tracks (models.ArrayField): Array of external repository urls that extend the project's
250+
available files (PEP 708)
251+
alternate_locations (models.ArrayField): Array of external repository urls that extends the
252+
project's namespace (PEP 708)
253+
254+
sha256 (models.CharField): Digest of all the fields above
255+
"""
256+
257+
TYPE = "project_metadata"
258+
repo_key_fields = ("project_name",)
259+
260+
project_name = models.TextField()
261+
tracks = ArrayField(models.TextField(), default=list)
262+
alternate_locations = ArrayField(models.TextField(), default=list)
263+
264+
sha256 = models.CharField(max_length=64, null=False)
265+
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)
266+
267+
@classmethod
268+
def from_simple_page(cls, page):
269+
"""Creates a ProjectMetadataContent from a pypi_simple.ProjectPage."""
270+
metadata_fields = ("alternate_locations", "tracks")
271+
project_metadata = {k: getattr(page, k) for k in metadata_fields if getattr(page, k)}
272+
metadata = cls(
273+
project_name=page.project,
274+
**project_metadata,
275+
)
276+
metadata.calculate_sha256()
277+
return metadata
278+
279+
def to_metadata(self):
280+
"""Converts model to dict of present fields."""
281+
return {
282+
"tracks": self.tracks,
283+
"alternate_locations": self.alternate_locations,
284+
}
285+
286+
@hook(BEFORE_SAVE)
287+
def calculate_sha256(self):
288+
"""Calculates the sha256 from the other metadata fields."""
289+
data = {
290+
"project_name": self.project_name,
291+
"tracks": self.tracks,
292+
"alternate_locations": self.alternate_locations,
293+
}
294+
295+
metadata_json = json.dumps(data, sort_keys=True).encode("utf-8")
296+
hasher = hashlib.sha256(metadata_json)
297+
self.sha256 = hasher.hexdigest()
298+
299+
class Meta:
300+
default_related_name = "%(app_label)s_%(model_name)s"
301+
unique_together = ("sha256", "_pulp_domain")
302+
303+
238304
class PythonPublication(Publication, AutoAddObjPermsMixin):
239305
"""
240306
A Publication for PythonContent.
@@ -270,6 +336,7 @@ class PythonRemote(Remote, AutoAddObjPermsMixin):
270336
exclude_platforms = ArrayField(
271337
models.CharField(max_length=10, blank=True), choices=PLATFORMS, default=list
272338
)
339+
project_metadata = models.BooleanField(default=False)
273340

274341
def get_remote_artifact_url(self, relative_path=None, request=None):
275342
"""Get url for remote_artifact"""
@@ -295,7 +362,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
295362
"""
296363

297364
TYPE = "python"
298-
CONTENT_TYPES = [PythonPackageContent]
365+
CONTENT_TYPES = [PythonPackageContent, ProjectMetadataContent]
299366
REMOTE_TYPES = [PythonRemote]
300367
PULL_THROUGH_SUPPORTED = True
301368

pulp_python/app/pypi/views.py

Lines changed: 36 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
import json
21
import logging
32

4-
from aiohttp.client_exceptions import ClientError
53
from rest_framework.viewsets import ViewSet
64
from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer, TemplateHTMLRenderer
75
from rest_framework.response import Response
@@ -27,13 +25,12 @@
2725
from packaging.utils import canonicalize_name
2826
from urllib.parse import urljoin, urlparse, urlunsplit
2927
from pathlib import PurePath
30-
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage
3128

3229
from pulpcore.plugin.viewsets import OperationPostponedResponse
3330
from pulpcore.plugin.tasking import dispatch
3431
from pulpcore.plugin.util import get_domain, get_url
35-
from pulpcore.plugin.exceptions import TimeoutException
3632
from pulp_python.app.models import (
33+
ProjectMetadataContent,
3734
PythonDistribution,
3835
PythonPackageContent,
3936
PythonPublication,
@@ -53,6 +50,7 @@
5350
PYPI_LAST_SERIAL,
5451
PYPI_SERIAL_CONSTANT,
5552
get_remote_package_filter,
53+
get_remote_simple_page,
5654
)
5755

5856
from pulp_python.app import tasks
@@ -120,6 +118,11 @@ def get_content(repository_version):
120118
"""Returns queryset of the content in this repository version."""
121119
return PythonPackageContent.objects.filter(pk__in=repository_version.content)
122120

121+
@staticmethod
122+
def get_projects_metadata(repository_version):
123+
"""Returns queryset of the project metadata in this repository version."""
124+
return ProjectMetadataContent.objects.filter(pk__in=repository_version.content)
125+
123126
def should_redirect(self, repo_version=None):
124127
"""Checks if there is a publication the content app can serve."""
125128
if self.distribution.publication:
@@ -136,6 +139,12 @@ def get_rvc(self):
136139
content = self.get_content(repo_ver)
137140
return repo_ver, content
138141

142+
def get_rvcm(self):
143+
"""Takes the base_path and returns the repository_version, content, and project metadata."""
144+
repo_ver, content = self.get_rvc()
145+
project_metadata = self.get_projects_metadata(repo_ver) if repo_ver else None
146+
return repo_ver, content, project_metadata
147+
139148
def initial(self, request, *args, **kwargs):
140149
"""Perform common initialization tasks for PyPI endpoints."""
141150
super().initial(request, *args, **kwargs)
@@ -312,42 +321,37 @@ def parse_package(release_package):
312321

313322
rfilter = get_remote_package_filter(remote)
314323
if not rfilter.filter_project(package):
315-
return {}
324+
return {}, {}
316325

317-
url = remote.get_remote_artifact_url(f"simple/{package}/")
318-
remote.headers = remote.headers or []
319-
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
320-
downloader = remote.get_downloader(url=url, max_retries=1)
321-
try:
322-
d = downloader.fetch()
323-
except (ClientError, TimeoutException):
326+
page = get_remote_simple_page(package, remote)
327+
if not page:
324328
log.info(f"Failed to fetch {package} simple page from {remote.url}")
325-
return {}
329+
return {}, {}
326330

327-
if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
328-
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
329-
else:
330-
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
331-
return {
331+
releases = {
332332
p.filename: parse_package(p)
333333
for p in page.packages
334334
if rfilter.filter_release(package, p.version)
335335
}
336+
return releases, ProjectMetadataContent.from_simple_page(page).to_metadata()
336337

337338
@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
338339
def retrieve(self, request, path, package):
339340
"""Retrieves the simple api html/json page for a package."""
340341
media_type = request.accepted_renderer.media_type
341342

342-
repo_ver, content = self.get_rvc()
343+
repo_ver, content, metadatas = self.get_rvcm()
343344
# Should I redirect if the normalized name is different?
344345
normalized = canonicalize_name(package)
345346
releases = {}
347+
project_metadata = {}
346348
if self.distribution.remote:
347-
releases = self.pull_through_package_simple(normalized, path, self.distribution.remote)
349+
releases, project_metadata = self.pull_through_package_simple(
350+
normalized, path, self.distribution.remote
351+
)
348352
elif self.should_redirect(repo_version=repo_ver):
349353
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
350-
if content:
354+
if content is not None:
351355
packages = content.filter(name__normalize=normalized).values(
352356
"filename",
353357
"sha256",
@@ -366,17 +370,25 @@ def retrieve(self, request, path, package):
366370
for p in packages
367371
}
368372
releases.update(local_releases)
369-
if not releases:
373+
if metadatas is not None:
374+
local_project_metadata = (
375+
metadatas.filter(project_name=normalized)
376+
.values("tracks", "alternate_locations")
377+
.first()
378+
)
379+
if local_project_metadata:
380+
project_metadata.update(local_project_metadata)
381+
if not (releases or project_metadata):
370382
return HttpResponseNotFound(f"{normalized} does not exist.")
371383

372384
media_type = request.accepted_renderer.media_type
373385
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
374386

375387
if media_type == PYPI_SIMPLE_V1_JSON:
376-
detail_data = write_simple_detail_json(normalized, releases.values())
388+
detail_data = write_simple_detail_json(normalized, releases.values(), project_metadata)
377389
return Response(detail_data, headers=headers)
378390
else:
379-
detail_data = write_simple_detail(normalized, releases.values())
391+
detail_data = write_simple_detail(normalized, releases.values(), project_metadata)
380392
kwargs = {"content_type": media_type, "headers": headers}
381393
return HttpResponse(detail_data, **kwargs)
382394

pulp_python/app/serializers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
artifact_to_python_content_data,
1717
get_project_metadata_from_file,
1818
parse_project_metadata,
19+
canonicalize_name,
1920
)
2021

2122

@@ -464,6 +465,52 @@ class Meta:
464465
model = python_models.PythonPackageContent
465466

466467

468+
class ProjectMetadataContentSerializer(core_serializers.NoArtifactContentSerializer):
469+
"""
470+
A Serializer for ProjectMetadataContent.
471+
"""
472+
473+
project_name = serializers.CharField(
474+
required=True,
475+
help_text=_("The name of the python project."),
476+
)
477+
tracks = serializers.ListField(
478+
child=serializers.CharField(allow_blank=False),
479+
required=False,
480+
allow_empty=True,
481+
)
482+
alternate_locations = serializers.ListField(
483+
child=serializers.CharField(allow_blank=False),
484+
required=False,
485+
allow_empty=True,
486+
)
487+
sha256 = serializers.CharField(
488+
read_only=True,
489+
help_text=_("The SHA256 digest of the project metadata."),
490+
)
491+
492+
def validate_project_name(self, value):
493+
"""Ensures name is normalized."""
494+
return canonicalize_name(value)
495+
496+
def retrieve(self, validated_data):
497+
"""Retrieves the project metadata for a project."""
498+
md = python_models.ProjectMetadataContent(**validated_data)
499+
md.calculate_sha256()
500+
return python_models.ProjectMetadataContent.objects.filter(
501+
sha256=md.sha256, _pulp_domain=get_domain()
502+
).first()
503+
504+
class Meta:
505+
fields = core_serializers.NoArtifactContentSerializer.Meta.fields + (
506+
"project_name",
507+
"tracks",
508+
"alternate_locations",
509+
"sha256",
510+
)
511+
model = python_models.ProjectMetadataContent
512+
513+
467514
class MultipleChoiceArrayField(serializers.MultipleChoiceField):
468515
"""
469516
A wrapper to make sure this DRF serializer works properly with ArrayFields.

0 commit comments

Comments
 (0)