Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGES/998.feature
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implemented PEP 708 support, added new ProjectMetadataContent model to track a package's project level metadata at the repository level.
65 changes: 65 additions & 0 deletions pulp_python/app/migrations/0018_project_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Generated by Django 4.2.26 on 2025-11-13 21:52

import django.contrib.postgres.fields
from django.db import migrations, models
import django.db.models.deletion
import pulpcore.app.util


class Migration(migrations.Migration):

dependencies = [
("core", "0145_domainize_import_export"),
("python", "0017_pythonpackagecontent_size"),
]

operations = [
migrations.AddField(
model_name="pythonremote",
name="project_metadata",
field=models.BooleanField(default=False),
),
migrations.CreateModel(
name="ProjectMetadataContent",
fields=[
(
"content_ptr",
models.OneToOneField(
auto_created=True,
on_delete=django.db.models.deletion.CASCADE,
parent_link=True,
primary_key=True,
serialize=False,
to="core.content",
),
),
("project_name", models.TextField()),
(
"tracks",
django.contrib.postgres.fields.ArrayField(
base_field=models.TextField(), default=list, size=None
),
),
(
"alternate_locations",
django.contrib.postgres.fields.ArrayField(
base_field=models.TextField(), default=list, size=None
),
),
("sha256", models.CharField(max_length=64)),
(
"_pulp_domain",
models.ForeignKey(
default=pulpcore.app.util.get_domain_pk,
on_delete=django.db.models.deletion.PROTECT,
to="core.domain",
),
),
],
options={
"default_related_name": "%(app_label)s_%(model_name)s",
"unique_together": {("sha256", "_pulp_domain")},
},
bases=("core.content",),
),
]
69 changes: 68 additions & 1 deletion pulp_python/app/models.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import hashlib
import json
from logging import getLogger

from aiohttp.web import json_response
from django.contrib.postgres.fields import ArrayField
from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from django.conf import settings
from django_lifecycle import hook, BEFORE_SAVE
from pulpcore.plugin.models import (
AutoAddObjPermsMixin,
Content,
Expand Down Expand Up @@ -235,6 +238,69 @@ class Meta:
]


class ProjectMetadataContent(Content):
"""
A Content Type representing metadata at the project level.

Currently used to implement PEP 708.
# TODO: Implement PEP 792
Fields:
project_name (models.TextField): The name of the project (normalized)
tracks (models.ArrayField): Array of external repository urls that extend the project's
available files (PEP 708)
alternate_locations (models.ArrayField): Array of external repository urls that extends the
project's namespace (PEP 708)

sha256 (models.CharField): Digest of all the fields above
"""

TYPE = "project_metadata"
repo_key_fields = ("project_name",)

project_name = models.TextField()
tracks = ArrayField(models.TextField(), default=list)
alternate_locations = ArrayField(models.TextField(), default=list)

sha256 = models.CharField(max_length=64, null=False)
_pulp_domain = models.ForeignKey("core.Domain", default=get_domain_pk, on_delete=models.PROTECT)

@classmethod
def from_simple_page(cls, page):
"""Creates a ProjectMetadataContent from a pypi_simple.ProjectPage."""
metadata_fields = ("alternate_locations", "tracks")
project_metadata = {k: getattr(page, k) for k in metadata_fields if getattr(page, k)}
metadata = cls(
project_name=page.project,
**project_metadata,
)
metadata.calculate_sha256()
return metadata

def to_metadata(self):
"""Converts model to dict of present fields."""
return {
"tracks": self.tracks,
"alternate_locations": self.alternate_locations,
}

@hook(BEFORE_SAVE)
def calculate_sha256(self):
"""Calculates the sha256 from the other metadata fields."""
data = {
"project_name": self.project_name,
"tracks": self.tracks,
"alternate_locations": self.alternate_locations,
}

metadata_json = json.dumps(data, sort_keys=True).encode("utf-8")
hasher = hashlib.sha256(metadata_json)
self.sha256 = hasher.hexdigest()

class Meta:
default_related_name = "%(app_label)s_%(model_name)s"
unique_together = ("sha256", "_pulp_domain")


class PythonPublication(Publication, AutoAddObjPermsMixin):
"""
A Publication for PythonContent.
Expand Down Expand Up @@ -270,6 +336,7 @@ class PythonRemote(Remote, AutoAddObjPermsMixin):
exclude_platforms = ArrayField(
models.CharField(max_length=10, blank=True), choices=PLATFORMS, default=list
)
project_metadata = models.BooleanField(default=False)

def get_remote_artifact_url(self, relative_path=None, request=None):
"""Get url for remote_artifact"""
Expand All @@ -295,7 +362,7 @@ class PythonRepository(Repository, AutoAddObjPermsMixin):
"""

TYPE = "python"
CONTENT_TYPES = [PythonPackageContent]
CONTENT_TYPES = [PythonPackageContent, ProjectMetadataContent]
REMOTE_TYPES = [PythonRemote]
PULL_THROUGH_SUPPORTED = True

Expand Down
60 changes: 36 additions & 24 deletions pulp_python/app/pypi/views.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import json
import logging

from aiohttp.client_exceptions import ClientError
from rest_framework.viewsets import ViewSet
from rest_framework.renderers import BrowsableAPIRenderer, JSONRenderer, TemplateHTMLRenderer
from rest_framework.response import Response
Expand All @@ -27,13 +25,12 @@
from packaging.utils import canonicalize_name
from urllib.parse import urljoin, urlparse, urlunsplit
from pathlib import PurePath
from pypi_simple import ACCEPT_JSON_PREFERRED, ProjectPage

from pulpcore.plugin.viewsets import OperationPostponedResponse
from pulpcore.plugin.tasking import dispatch
from pulpcore.plugin.util import get_domain, get_url
from pulpcore.plugin.exceptions import TimeoutException
from pulp_python.app.models import (
ProjectMetadataContent,
PythonDistribution,
PythonPackageContent,
PythonPublication,
Expand All @@ -53,6 +50,7 @@
PYPI_LAST_SERIAL,
PYPI_SERIAL_CONSTANT,
get_remote_package_filter,
get_remote_simple_page,
)

from pulp_python.app import tasks
Expand Down Expand Up @@ -120,6 +118,11 @@ def get_content(repository_version):
"""Returns queryset of the content in this repository version."""
return PythonPackageContent.objects.filter(pk__in=repository_version.content)

@staticmethod
def get_projects_metadata(repository_version):
"""Returns queryset of the project metadata in this repository version."""
return ProjectMetadataContent.objects.filter(pk__in=repository_version.content)

def should_redirect(self, repo_version=None):
"""Checks if there is a publication the content app can serve."""
if self.distribution.publication:
Expand All @@ -136,6 +139,12 @@ def get_rvc(self):
content = self.get_content(repo_ver)
return repo_ver, content

def get_rvcm(self):
"""Takes the base_path and returns the repository_version, content, and project metadata."""
repo_ver, content = self.get_rvc()
project_metadata = self.get_projects_metadata(repo_ver) if repo_ver else None
return repo_ver, content, project_metadata

def initial(self, request, *args, **kwargs):
"""Perform common initialization tasks for PyPI endpoints."""
super().initial(request, *args, **kwargs)
Expand Down Expand Up @@ -312,42 +321,37 @@ def parse_package(release_package):

rfilter = get_remote_package_filter(remote)
if not rfilter.filter_project(package):
return {}
return {}, {}

url = remote.get_remote_artifact_url(f"simple/{package}/")
remote.headers = remote.headers or []
remote.headers.append({"Accept": ACCEPT_JSON_PREFERRED})
downloader = remote.get_downloader(url=url, max_retries=1)
try:
d = downloader.fetch()
except (ClientError, TimeoutException):
page = get_remote_simple_page(package, remote)
if not page:
log.info(f"Failed to fetch {package} simple page from {remote.url}")
return {}
return {}, {}

if d.headers["content-type"] == PYPI_SIMPLE_V1_JSON:
page = ProjectPage.from_json_data(json.load(open(d.path, "rb")), base_url=url)
else:
page = ProjectPage.from_html(package, open(d.path, "rb").read(), base_url=url)
return {
releases = {
p.filename: parse_package(p)
for p in page.packages
if rfilter.filter_release(package, p.version)
}
return releases, ProjectMetadataContent.from_simple_page(page).to_metadata()

@extend_schema(operation_id="pypi_simple_package_read", summary="Get package simple page")
def retrieve(self, request, path, package):
"""Retrieves the simple api html/json page for a package."""
media_type = request.accepted_renderer.media_type

repo_ver, content = self.get_rvc()
repo_ver, content, metadatas = self.get_rvcm()
# Should I redirect if the normalized name is different?
normalized = canonicalize_name(package)
releases = {}
project_metadata = {}
if self.distribution.remote:
releases = self.pull_through_package_simple(normalized, path, self.distribution.remote)
releases, project_metadata = self.pull_through_package_simple(
normalized, path, self.distribution.remote
)
elif self.should_redirect(repo_version=repo_ver):
return redirect(urljoin(self.base_content_url, f"{path}/simple/{normalized}/"))
if content:
if content is not None:
packages = content.filter(name__normalize=normalized).values(
"filename",
"sha256",
Expand All @@ -366,17 +370,25 @@ def retrieve(self, request, path, package):
for p in packages
}
releases.update(local_releases)
if not releases:
if metadatas is not None:
local_project_metadata = (
metadatas.filter(project_name=normalized)
.values("tracks", "alternate_locations")
.first()
)
if local_project_metadata:
project_metadata.update(local_project_metadata)
if not (releases or project_metadata):
return HttpResponseNotFound(f"{normalized} does not exist.")

media_type = request.accepted_renderer.media_type
headers = {"X-PyPI-Last-Serial": str(PYPI_SERIAL_CONSTANT)}

if media_type == PYPI_SIMPLE_V1_JSON:
detail_data = write_simple_detail_json(normalized, releases.values())
detail_data = write_simple_detail_json(normalized, releases.values(), project_metadata)
return Response(detail_data, headers=headers)
else:
detail_data = write_simple_detail(normalized, releases.values())
detail_data = write_simple_detail(normalized, releases.values(), project_metadata)
kwargs = {"content_type": media_type, "headers": headers}
return HttpResponse(detail_data, **kwargs)

Expand Down
47 changes: 47 additions & 0 deletions pulp_python/app/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
artifact_to_python_content_data,
get_project_metadata_from_file,
parse_project_metadata,
canonicalize_name,
)


Expand Down Expand Up @@ -464,6 +465,52 @@ class Meta:
model = python_models.PythonPackageContent


class ProjectMetadataContentSerializer(core_serializers.NoArtifactContentSerializer):
"""
A Serializer for ProjectMetadataContent.
"""

project_name = serializers.CharField(
required=True,
help_text=_("The name of the python project."),
)
tracks = serializers.ListField(
child=serializers.CharField(allow_blank=False),
required=False,
allow_empty=True,
)
alternate_locations = serializers.ListField(
child=serializers.CharField(allow_blank=False),
required=False,
allow_empty=True,
)
sha256 = serializers.CharField(
read_only=True,
help_text=_("The SHA256 digest of the project metadata."),
)

def validate_project_name(self, value):
"""Ensures name is normalized."""
return canonicalize_name(value)

def retrieve(self, validated_data):
"""Retrieves the project metadata for a project."""
md = python_models.ProjectMetadataContent(**validated_data)
md.calculate_sha256()
return python_models.ProjectMetadataContent.objects.filter(
sha256=md.sha256, _pulp_domain=get_domain()
).first()

class Meta:
fields = core_serializers.NoArtifactContentSerializer.Meta.fields + (
"project_name",
"tracks",
"alternate_locations",
"sha256",
)
model = python_models.ProjectMetadataContent


class MultipleChoiceArrayField(serializers.MultipleChoiceField):
"""
A wrapper to make sure this DRF serializer works properly with ArrayFields.
Expand Down
Loading
Loading