Skip to content

Commit 04ed021

Browse files
committed
Implement PEP 708
fixes: #998
1 parent 46e3c60 commit 04ed021

File tree

12 files changed

+525
-34
lines changed

12 files changed

+525
-34
lines changed

CHANGES/998.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implemented PEP 708 support, added new ProjectMetadataContent model to track a package's project level metadata at the repository level.
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# Generated by Django 4.2.26 on 2025-11-13 21:52
2+
3+
import django.contrib.postgres.fields
4+
from django.db import migrations, models
5+
import django.db.models.deletion
6+
import pulpcore.app.util
7+
8+
9+
class Migration(migrations.Migration):
10+
11+
dependencies = [
12+
("core", "0145_domainize_import_export"),
13+
("python", "0017_pythonpackagecontent_size"),
14+
]
15+
16+
operations = [
17+
migrations.AddField(
18+
model_name="pythonremote",
19+
name="project_metadata",
20+
field=models.BooleanField(default=False),
21+
),
22+
migrations.CreateModel(
23+
name="ProjectMetadataContent",
24+
fields=[
25+
(
26+
"content_ptr",
27+
models.OneToOneField(
28+
auto_created=True,
29+
on_delete=django.db.models.deletion.CASCADE,
30+
parent_link=True,
31+
primary_key=True,
32+
serialize=False,
33+
to="core.content",
34+
),
35+
),
36+
("project_name", models.TextField()),
37+
(
38+
"tracks",
39+
django.contrib.postgres.fields.ArrayField(
40+
base_field=models.TextField(), default=list, size=None
41+
),
42+
),
43+
(
44+
"alternate_locations",
45+
django.contrib.postgres.fields.ArrayField(
46+
base_field=models.TextField(), default=list, size=None
47+
),
48+
),
49+
("sha256", models.CharField(max_length=64)),
50+
(
51+
"_pulp_domain",
52+
models.ForeignKey(
53+
default=pulpcore.app.util.get_domain_pk,
54+
on_delete=django.db.models.deletion.PROTECT,
55+
to="core.domain",
56+
),
57+
),
58+
],
59+
options={
60+
"default_related_name": "%(app_label)s_%(model_name)s",
61+
"unique_together": {("sha256", "_pulp_domain")},
62+
},
63+
bases=("core.content",),
64+
),
65+
]

pulp_python/app/models.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
1+
import hashlib
2+
import json
13
from logging import getLogger
24

35
from aiohttp.web import json_response
46
from django.contrib.postgres.fields import ArrayField
57
from django.core.exceptions import ObjectDoesNotExist
68
from django.db import models
79
from django.conf import settings
10+
from django_lifecycle import hook, BEFORE_SAVE
811
from pulpcore.plugin.models import (
912
AutoAddObjPermsMixin,
1013
Content,
@@ -235,6 +238,69 @@ class Meta:
235238
]
236239

237240

241+
class ProjectMetadataContent(Content):
242+
"""
243+
A Content Type representing metadata at the project level.
244+
245+
Currently used to implement PEP 708.
246+
# TODO: Implement PEP 792
247+
Fields:
248+
project_name (models.TextField): The name of the project (normalized)
249+
tracks (models.ArrayField): Array of external repository urls that extend the project's
250+
available files (PEP 708)
251+
alternate_locations (models.ArrayField): Array of external repository urls that extends the
252+
project's namespace (PEP 708)
253+
254+
sha256 (models.CharField): Digest of all the fields above
255+
"""
256+
257+
TYPE = "project_metadata"
258+
repo_key_fields = ("project_name",)
259+
260+
project_name = models.TextField()
261+
tracks = ArrayField(models.TextField(), default=list)
262+
alternate_locations = ArrayField(models.TextField(), default=list)
263+
264+
sha256 = models.CharField(max_length=64, null=False)
265+
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)
266+
267+
@classmethod
268+
def from_simple_page(cls, page):
269+
"""Creates a ProjectMetadataContent from a pypi_simple.ProjectPage."""
270+
metadata_fields = ("alternate_locations", "tracks")
271+
project_metadata = {k: getattr(page, k) for k in metadata_fields if getattr(page, k)}
272+
metadata = cls(
273+
project_name=page.project,
274+
**project_metadata,
275+
)
276+
metadata.calculate_sha256()
277+
return metadata
278+
279+
def to_metadata(self):
280+
"""Converts model to dict of present fields."""
281+
return {
282+
"tracks": self.tracks,
283+
"alternate_locations": self.alternate_locations,
284+
}
285+
286+
@hook(BEFORE_SAVE)
287+
def calculate_sha256(self):
288+
"""Calculates the sha256 from the other metadata fields."""
289+
data = {
290+
"project_name": self.project_name,
291+
"tracks": self.tracks,
292+
"alternate_locations": self.alternate_locations,
293+
}
294+
295+
metadata_json = json.dumps(data, sort_keys=True).encode("utf-8")
296+
hasher = hashlib.sha256(metadata_json)
297+
self.sha256 = hasher.hexdigest()
298+
299+
class Meta:
300+
default_related_name = "%(app_label)s_%(model_name)s"
301+
unique_together = ("sha256", "_pulp_domain")
302+
303+
238304
class PythonPublication(Publication, AutoAddObjPermsMixin):
239305
"""
240306
A Publication for PythonContent.
@@ -270,6 +336,7 @@ class PythonRemote(Remote, AutoAddObjPermsMixin):
270336
exclude_platforms = ArrayField(
271337
models.CharField(max_length=10, blank=True), choices=PLATFORMS, default=list
272338
)
339+
project_metadata = models.BooleanField(default=False)
273340

274341
def get_remote_artifact_url(self, relative_path=None, request=None):
275342
"""Get url for remote_artifact"""
@@ -295,7 +362,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
295362
"""
296363

297364
TYPE = "python"
298-
CONTENT_TYPES = [PythonPackageContent]
365+
CONTENT_TYPES = [PythonPackageContent, ProjectMetadataContent]
299366
REMOTE_TYPES = [PythonRemote]
300367
PULL_THROUGH_SUPPORTED = True
301368

pulp_python/app/pypi/views.py

Lines changed: 36 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pulpcore.plugin.util import get_domain, get_url
3535
from pulpcore.plugin.exceptions import TimeoutException
3636
from pulp_python.app.models import (
37+
ProjectMetadataContent,
3738
PythonDistribution,
3839
PythonPackageContent,
3940
PythonPublication,
@@ -53,6 +54,7 @@
5354
PYPI_LAST_SERIAL,
5455
PYPI_SERIAL_CONSTANT,
5556
get_remote_package_filter,
57+
get_remote_simple_page,
5658
)
5759

5860
from pulp_python.app import tasks
@@ -120,6 +122,11 @@ def get_content(repository_version):
120122
"""Returns queryset of the content in this repository version."""
121123
return PythonPackageContent.objects.filter(pk__in=repository_version.content)
122124

125+
@staticmethod
126+
def get_projects_metadata(repository_version):
127+
"""Returns queryset of the project metadata in this repository version."""
128+
return ProjectMetadataContent.objects.filter(pk__in=repository_version.content)
129+
123130
def should_redirect(self, repo_version=None):
124131
"""Checks if there is a publication the content app can serve."""
125132
if self.distribution.publication:
@@ -136,6 +143,12 @@ def get_rvc(self):
136143
content = self.get_content(repo_ver)
137144
return repo_ver, content
138145

146+
def get_rvcm(self):
147+
"""Takes the base_path and returns the repository_version, content, and project metadata."""
148+
repo_ver, content = self.get_rvc()
149+
project_metadata = self.get_projects_metadata(repo_ver) if repo_ver else None
150+
return repo_ver, content, project_metadata
151+
139152
def initial(self, request, *args, **kwargs):
140153
"""Perform common initialization tasks for PyPI endpoints."""
141154
super().initial(request, *args, **kwargs)
@@ -312,42 +325,37 @@ def parse_package(release_package):
312325

313326
rfilter = get_remote_package_filter(remote)
314327
if not rfilter.filter_project(package):
315-
return {}
328+
return {}, {}
316329

317-
url = remote.get_remote_artifact_url(f"simple/{package}/")
318-
remote.headers = remote.headers or []
319-
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
320-
downloader = remote.get_downloader(url=url, max_retries=1)
321-
try:
322-
d = downloader.fetch()
323-
except (ClientError, TimeoutException):
330+
page = get_remote_simple_page(package, remote)
331+
if not page:
324332
log.info(f"Failed to fetch {package} simple page from {remote.url}")
325-
return {}
333+
return {}, {}
326334

327-
if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
328-
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
329-
else:
330-
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
331-
return {
335+
releases = {
332336
p.filename: parse_package(p)
333337
for p in page.packages
334338
if rfilter.filter_release(package, p.version)
335339
}
340+
return releases, ProjectMetadataContent.from_simple_page(page).to_metadata()
336341

337342
@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
338343
def retrieve(self, request, path, package):
339344
"""Retrieves the simple api html/json page for a package."""
340345
media_type = request.accepted_renderer.media_type
341346

342-
repo_ver, content = self.get_rvc()
347+
repo_ver, content, metadatas = self.get_rvcm()
343348
# Should I redirect if the normalized name is different?
344349
normalized = canonicalize_name(package)
345350
releases = {}
351+
project_metadata = {}
346352
if self.distribution.remote:
347-
releases = self.pull_through_package_simple(normalized, path, self.distribution.remote)
353+
releases, project_metadata = self.pull_through_package_simple(
354+
normalized, path, self.distribution.remote
355+
)
348356
elif self.should_redirect(repo_version=repo_ver):
349357
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
350-
if content:
358+
if content is not None:
351359
packages = content.filter(name__normalize=normalized).values(
352360
"filename",
353361
"sha256",
@@ -366,17 +374,25 @@ def retrieve(self, request, path, package):
366374
for p in packages
367375
}
368376
releases.update(local_releases)
369-
if not releases:
377+
if metadatas is not None:
378+
local_project_metadata = (
379+
metadatas.filter(project_name=normalized)
380+
.values("tracks", "alternate_locations")
381+
.first()
382+
)
383+
if local_project_metadata:
384+
project_metadata.update(local_project_metadata)
385+
if not (releases or project_metadata):
370386
return HttpResponseNotFound(f"{normalized} does not exist.")
371387

372388
media_type = request.accepted_renderer.media_type
373389
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}
374390

375391
if media_type == PYPI_SIMPLE_V1_JSON:
376-
detail_data = write_simple_detail_json(normalized, releases.values())
392+
detail_data = write_simple_detail_json(normalized, releases.values(), project_metadata)
377393
return Response(detail_data, headers=headers)
378394
else:
379-
detail_data = write_simple_detail(normalized, releases.values())
395+
detail_data = write_simple_detail(normalized, releases.values(), project_metadata)
380396
kwargs = {"content_type": media_type, "headers": headers}
381397
return HttpResponse(detail_data, **kwargs)
382398

pulp_python/app/serializers.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
artifact_to_python_content_data,
1717
get_project_metadata_from_file,
1818
parse_project_metadata,
19+
canonicalize_name,
1920
)
2021

2122

@@ -464,6 +465,52 @@ class Meta:
464465
model = python_models.PythonPackageContent
465466

466467

468+
class ProjectMetadataContentSerializer(core_serializers.NoArtifactContentSerializer):
469+
"""
470+
A Serializer for ProjectMetadataContent.
471+
"""
472+
473+
project_name = serializers.CharField(
474+
required=True,
475+
help_text=_("The name of the python project."),
476+
)
477+
tracks = serializers.ListField(
478+
child=serializers.CharField(allow_blank=False),
479+
required=False,
480+
allow_empty=True,
481+
)
482+
alternate_locations = serializers.ListField(
483+
child=serializers.CharField(allow_blank=False),
484+
required=False,
485+
allow_empty=True,
486+
)
487+
sha256 = serializers.CharField(
488+
read_only=True,
489+
help_text=_("The SHA256 digest of the project metadata."),
490+
)
491+
492+
def validate_project_name(self, value):
493+
"""Ensures name is normalized."""
494+
return canonicalize_name(value)
495+
496+
def retrieve(self, validated_data):
497+
"""Retrieves the project metadata for a project."""
498+
md = python_models.ProjectMetadataContent(**validated_data)
499+
md.calculate_sha256()
500+
return python_models.ProjectMetadataContent.objects.filter(
501+
sha256=md.sha256, _pulp_domain=get_domain()
502+
).first()
503+
504+
class Meta:
505+
fields = core_serializers.NoArtifactContentSerializer.Meta.fields + (
506+
"project_name",
507+
"tracks",
508+
"alternate_locations",
509+
"sha256",
510+
)
511+
model = python_models.ProjectMetadataContent
512+
513+
467514
class MultipleChoiceArrayField(serializers.MultipleChoiceField):
468515
"""
469516
A wrapper to make sure this DRF serializer works properly with ArrayFields.

0 commit comments

Comments
 (0)