Skip to content

Commit d8683b3

Browse files
authored
Optimize the legacy JSON endpoints (#11920)
* Add Release.is_prerelease to denormalize prerelease data * Migrate existing data and switch to using Release.is_prerelease * Switch legacy JSON endpoints to a better performing query * Move migration revision forward * Move classifier ordering into the database * Update trove-classifiers * Use the new methods in trove-classifiers * Fix tests * reformat * more test fixes * Run each migration in it's own transaction * Run 100000 updates per chunk * reformat * reformat
1 parent c30a60b commit d8683b3

File tree

18 files changed

+399
-153
lines changed

18 files changed

+399
-153
lines changed

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ initdb: .state/docker-build-web
9191
docker-compose run --rm web psql -h db -d warehouse -U postgres -c "UPDATE users SET name='Ee Durbin' WHERE username='ewdurbin'"
9292
docker-compose run --rm web python -m warehouse db upgrade head
9393
docker-compose run --rm web python -m warehouse sponsors populate-db
94+
docker-compose run --rm web python -m warehouse classifiers sync
9495
$(MAKE) reindex
9596

9697
reindex: .state/docker-build-web

bin/release

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,6 @@ python -m warehouse db upgrade head
88

99
# Insert/upgrade malware checks.
1010
python -m warehouse malware sync-checks
11+
12+
# Insert/upgrade classifiers.
13+
python -m warehouse classifiers sync

requirements/main.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1253,9 +1253,9 @@ translationstring==1.4 \
12531253
--hash=sha256:5f4dc4d939573db851c8d840551e1a0fb27b946afe3b95aafc22577eed2d6262 \
12541254
--hash=sha256:bf947538d76e69ba12ab17283b10355a9ecfbc078e6123443f43f2107f6376f3
12551255
# via pyramid
1256-
trove-classifiers==2022.6.26 \
1257-
--hash=sha256:361d6e85bcea11b90be8b4c3ab4f23ddea0c6ee566ca4a82f5f2e4318d08c1b8 \
1258-
--hash=sha256:97be455919ba5d0f715147e7f4e17f64c8d46645d9f1dbac92a68201ca2373d7
1256+
trove-classifiers==2022.7.22 \
1257+
--hash=sha256:0545d0e62af12c722578c9c99f3391b9ce81fa4fe1fd608d6c029b6a55da6456 \
1258+
--hash=sha256:b5d23dbbaf3969c1b8d14e2f56cda4c3b6427c0f9917f367f179f9db393b8721
12591259
# via -r requirements/main.in
12601260
typeguard==2.13.3 \
12611261
--hash=sha256:00edaa8da3a133674796cf5ea87d9f4b4c367d77476e185e80251cc13dfbb8c4 \

tests/common/db/packaging.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,9 @@ class Meta:
4343

4444
id = factory.Faker("uuid4", cast_to=None)
4545
name = factory.Faker("pystr", max_chars=12)
46+
normalized_name = factory.LazyAttribute(
47+
lambda o: packaging.utils.canonicalize_name(o.name)
48+
)
4649

4750

4851
class ProjectEventFactory(WarehouseFactory):

tests/unit/cli/test_classifiers.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
# Licensed under the Apache License, Version 2.0 (the "License");
2+
# you may not use this file except in compliance with the License.
3+
# You may obtain a copy of the License at
4+
#
5+
# http://www.apache.org/licenses/LICENSE-2.0
6+
#
7+
# Unless required by applicable law or agreed to in writing, software
8+
# distributed under the License is distributed on an "AS IS" BASIS,
9+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10+
# See the License for the specific language governing permissions and
11+
# limitations under the License.
12+
13+
import pretend
14+
15+
from warehouse import db
16+
from warehouse.classifiers.models import Classifier
17+
from warehouse.cli import classifiers
18+
19+
20+
def test_classifiers_update(db_request, monkeypatch, cli):
21+
engine = pretend.stub()
22+
config = pretend.stub(registry={"sqlalchemy.engine": engine})
23+
session_cls = pretend.call_recorder(lambda bind: db_request.db)
24+
monkeypatch.setattr(db, "Session", session_cls)
25+
26+
cs = [
27+
c.classifier
28+
for c in db_request.db.query(Classifier).order_by(Classifier.ordering).all()
29+
]
30+
31+
monkeypatch.setattr(classifiers, "sorted_classifiers", ["C :: D", "A :: B"] + cs)
32+
33+
db_request.db.add(Classifier(classifier="A :: B", ordering=0))
34+
assert db_request.db.query(Classifier).filter_by(classifier="C :: D").count() == 0
35+
cli.invoke(classifiers.sync, obj=config)
36+
37+
c = db_request.db.query(Classifier).filter_by(classifier="C :: D").one()
38+
39+
assert c.classifier == "C :: D"
40+
assert c.ordering == 0
41+
42+
c = db_request.db.query(Classifier).filter_by(classifier="A :: B").one()
43+
44+
assert c.classifier == "A :: B"
45+
assert c.ordering == 1
46+
47+
48+
def test_classifiers_no_update(db_request, monkeypatch, cli):
49+
engine = pretend.stub()
50+
config = pretend.stub(registry={"sqlalchemy.engine": engine})
51+
session_cls = pretend.call_recorder(lambda bind: db_request.db)
52+
monkeypatch.setattr(db, "Session", session_cls)
53+
54+
original = db_request.db.query(Classifier).order_by(Classifier.ordering).all()
55+
56+
monkeypatch.setattr(
57+
classifiers, "sorted_classifiers", [c.classifier for c in original]
58+
)
59+
60+
cli.invoke(classifiers.sync, obj=config)
61+
62+
after = db_request.db.query(Classifier).order_by(Classifier.ordering).all()
63+
64+
assert original == after

tests/unit/forklift/test_legacy.py

Lines changed: 0 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -3134,77 +3134,6 @@ def test_upload_succeeds_creates_release(
31343134
),
31353135
]
31363136

3137-
def test_upload_succeeds_creates_classifier(
3138-
self, pyramid_config, db_request, metrics, monkeypatch
3139-
):
3140-
pyramid_config.testing_securitypolicy(userid=1)
3141-
3142-
user = UserFactory.create()
3143-
EmailFactory.create(user=user)
3144-
project = ProjectFactory.create()
3145-
RoleFactory.create(user=user, project=project)
3146-
3147-
monkeypatch.setattr(legacy, "classifiers", {"AA :: BB", "CC :: DD"})
3148-
3149-
db_request.db.add(Classifier(classifier="AA :: BB"))
3150-
3151-
filename = "{}-{}.tar.gz".format(project.name, "1.0")
3152-
3153-
db_request.user = user
3154-
db_request.user_agent = "warehouse-tests/6.6.6"
3155-
db_request.POST = MultiDict(
3156-
{
3157-
"metadata_version": "1.2",
3158-
"name": project.name,
3159-
"version": "1.0",
3160-
"summary": "This is my summary!",
3161-
"filetype": "sdist",
3162-
"md5_digest": _TAR_GZ_PKG_MD5,
3163-
"content": pretend.stub(
3164-
filename=filename,
3165-
file=io.BytesIO(_TAR_GZ_PKG_TESTDATA),
3166-
type="application/tar",
3167-
),
3168-
}
3169-
)
3170-
db_request.POST.extend(
3171-
[
3172-
("classifiers", "AA :: BB"),
3173-
("classifiers", "CC :: DD"),
3174-
("requires_dist", "foo"),
3175-
("requires_dist", "bar (>1.0)"),
3176-
("project_urls", "Test, https://example.com/"),
3177-
("requires_external", "Cheese (>1.0)"),
3178-
("provides", "testing"),
3179-
]
3180-
)
3181-
3182-
storage_service = pretend.stub(store=lambda path, filepath, meta: None)
3183-
db_request.find_service = lambda svc, name=None, context=None: {
3184-
IFileStorage: storage_service,
3185-
IMetricsService: metrics,
3186-
}.get(svc)
3187-
3188-
resp = legacy.file_upload(db_request)
3189-
3190-
assert resp.status_code == 200
3191-
3192-
# Ensure that a new Classifier has been created
3193-
classifier = (
3194-
db_request.db.query(Classifier)
3195-
.filter(Classifier.classifier == "CC :: DD")
3196-
.one()
3197-
)
3198-
assert classifier.classifier == "CC :: DD"
3199-
3200-
# Ensure that the Release has the new classifier
3201-
release = (
3202-
db_request.db.query(Release)
3203-
.filter((Release.project == project) & (Release.version == "1.0"))
3204-
.one()
3205-
)
3206-
assert release.classifiers == ["AA :: BB", "CC :: DD"]
3207-
32083137
def test_all_valid_classifiers_can_be_created(self, db_request):
32093138
for classifier in classifiers:
32103139
db_request.db.add(Classifier(classifier=classifier))

tests/unit/legacy/api/test_json.py

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ def _assert_has_cors_headers(headers):
4242
class TestJSONProject:
4343
def test_normalizing_redirects(self, db_request):
4444
project = ProjectFactory.create()
45+
ReleaseFactory.create(project=project, version="1.0")
4546

4647
name = project.name.lower()
4748
if name == project.normalized_name:
@@ -52,7 +53,7 @@ def test_normalizing_redirects(self, db_request):
5253
lambda name: "/project/the-redirect/"
5354
)
5455

55-
resp = json.json_project(project, db_request)
56+
resp = json.json_project(db_request)
5657

5758
assert isinstance(resp, HTTPMovedPermanently)
5859
assert resp.headers["Location"] == "/project/the-redirect/"
@@ -63,7 +64,8 @@ def test_normalizing_redirects(self, db_request):
6364

6465
def test_missing_release(self, db_request):
6566
project = ProjectFactory.create()
66-
resp = json.json_project(project, db_request)
67+
db_request.matchdict = {"name": project.normalized_name}
68+
resp = json.json_project(db_request)
6769
assert isinstance(resp, HTTPNotFound)
6870
_assert_has_cors_headers(resp.headers)
6971

@@ -81,8 +83,9 @@ def test_with_prereleases(self, monkeypatch, db_request):
8183
lambda request, project, release, *, all_releases: data
8284
)
8385
monkeypatch.setattr(json, "_json_data", json_data)
86+
db_request.matchdict = {"name": project.normalized_name}
8487

85-
rvalue = json.json_project(project, db_request)
88+
rvalue = json.json_project(db_request)
8689

8790
assert rvalue is data
8891
assert json_data.calls == [
@@ -102,8 +105,9 @@ def test_only_prereleases(self, monkeypatch, db_request):
102105
lambda request, project, release, *, all_releases: data
103106
)
104107
monkeypatch.setattr(json, "_json_data", json_data)
108+
db_request.matchdict = {"name": project.normalized_name}
105109

106-
rvalue = json.json_project(project, db_request)
110+
rvalue = json.json_project(db_request)
107111

108112
assert rvalue is data
109113
assert json_data.calls == [
@@ -129,8 +133,9 @@ def test_all_releases_yanked(self, monkeypatch, db_request):
129133
lambda request, project, release, *, all_releases: data
130134
)
131135
monkeypatch.setattr(json, "_json_data", json_data)
136+
db_request.matchdict = {"name": project.normalized_name}
132137

133-
rvalue = json.json_project(project, db_request)
138+
rvalue = json.json_project(db_request)
134139

135140
assert rvalue is data
136141
assert json_data.calls == [
@@ -156,8 +161,9 @@ def test_latest_release_yanked(self, monkeypatch, db_request):
156161
lambda request, project, release, *, all_releases: data
157162
)
158163
monkeypatch.setattr(json, "_json_data", json_data)
164+
db_request.matchdict = {"name": project.normalized_name}
159165

160-
rvalue = json.json_project(project, db_request)
166+
rvalue = json.json_project(db_request)
161167

162168
assert rvalue is data
163169
assert json_data.calls == [
@@ -184,8 +190,9 @@ def test_all_non_prereleases_yanked(self, monkeypatch, db_request):
184190
lambda request, project, release, *, all_releases: data
185191
)
186192
monkeypatch.setattr(json, "_json_data", json_data)
193+
db_request.matchdict = {"name": project.normalized_name}
187194

188-
rvalue = json.json_project(project, db_request)
195+
rvalue = json.json_project(db_request)
189196

190197
assert rvalue is data
191198
assert json_data.calls == [
@@ -254,8 +261,9 @@ def test_renders(self, pyramid_config, db_request, db_session):
254261
je = JournalEntryFactory.create(name=project.name, submitted_by=user)
255262

256263
db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
264+
db_request.matchdict = {"name": project.normalized_name}
257265

258-
result = json.json_project(project, db_request)
266+
result = json.json_project(db_request)
259267

260268
assert set(db_request.route_url.calls) == {
261269
pretend.call("packaging.file", path=files[0].path),
@@ -405,6 +413,7 @@ def test_renders(self, pyramid_config, db_request, db_session):
405413
class TestJSONProjectSlash:
406414
def test_normalizing_redirects(self, db_request):
407415
project = ProjectFactory.create()
416+
ReleaseFactory.create(project=project, version="1.0")
408417

409418
name = project.name.lower()
410419
if name == project.normalized_name:
@@ -415,7 +424,7 @@ def test_normalizing_redirects(self, db_request):
415424
lambda name: "/project/the-redirect/"
416425
)
417426

418-
resp = json.json_project_slash(project, db_request)
427+
resp = json.json_project_slash(db_request)
419428

420429
assert isinstance(resp, HTTPMovedPermanently)
421430
assert resp.headers["Location"] == "/project/the-redirect/"
@@ -434,12 +443,12 @@ def test_normalizing_redirects(self, db_request):
434443
if name == release.project.normalized_name:
435444
name = release.project.name.upper()
436445

437-
db_request.matchdict = {"name": name}
446+
db_request.matchdict = {"name": name, "version": "3.0"}
438447
db_request.current_route_path = pretend.call_recorder(
439448
lambda name: "/project/the-redirect/3.0/"
440449
)
441450

442-
resp = json.json_release(release, db_request)
451+
resp = json.json_release(db_request)
443452

444453
assert isinstance(resp, HTTPMovedPermanently)
445454
assert resp.headers["Location"] == "/project/the-redirect/3.0/"
@@ -448,6 +457,13 @@ def test_normalizing_redirects(self, db_request):
448457
pretend.call(name=release.project.normalized_name)
449458
]
450459

460+
def test_missing_release(self, db_request):
461+
project = ProjectFactory.create()
462+
db_request.matchdict = {"name": project.normalized_name, "version": "3.0"}
463+
resp = json.json_release(db_request)
464+
assert isinstance(resp, HTTPNotFound)
465+
_assert_has_cors_headers(resp.headers)
466+
451467
def test_detail_renders(self, pyramid_config, db_request, db_session):
452468
project = ProjectFactory.create(has_docs=True)
453469
description_content_type = "text/x-rst"
@@ -510,8 +526,12 @@ def test_detail_renders(self, pyramid_config, db_request, db_session):
510526
je = JournalEntryFactory.create(name=project.name, submitted_by=user)
511527

512528
db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
529+
db_request.matchdict = {
530+
"name": project.normalized_name,
531+
"version": releases[3].canonical_version,
532+
}
513533

514-
result = json.json_release(releases[3], db_request)
534+
result = json.json_release(db_request)
515535

516536
assert set(db_request.route_url.calls) == {
517537
pretend.call("packaging.file", path=files[2].path),
@@ -597,8 +617,12 @@ def test_minimal_renders(self, pyramid_config, db_request):
597617

598618
url = "/the/fake/url/"
599619
db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
620+
db_request.matchdict = {
621+
"name": project.normalized_name,
622+
"version": release.canonical_version,
623+
}
600624

601-
result = json.json_release(release, db_request)
625+
result = json.json_release(db_request)
602626

603627
assert set(db_request.route_url.calls) == {
604628
pretend.call("packaging.file", path=file.path),
@@ -679,8 +703,12 @@ def test_vulnerabilities_renders(self, pyramid_config, db_request):
679703

680704
url = "/the/fake/url/"
681705
db_request.route_url = pretend.call_recorder(lambda *args, **kw: url)
706+
db_request.matchdict = {
707+
"name": project.normalized_name,
708+
"version": release.canonical_version,
709+
}
682710

683-
result = json.json_release(release, db_request)
711+
result = json.json_release(db_request)
684712

685713
assert result["vulnerabilities"] == [
686714
{
@@ -704,12 +732,12 @@ def test_normalizing_redirects(self, db_request):
704732
if name == release.project.normalized_name:
705733
name = release.project.name.upper()
706734

707-
db_request.matchdict = {"name": name}
735+
db_request.matchdict = {"name": name, "version": "3.0"}
708736
db_request.current_route_path = pretend.call_recorder(
709737
lambda name: "/project/the-redirect/3.0/"
710738
)
711739

712-
resp = json.json_release_slash(release, db_request)
740+
resp = json.json_release_slash(db_request)
713741

714742
assert isinstance(resp, HTTPMovedPermanently)
715743
assert resp.headers["Location"] == "/project/the-redirect/3.0/"

0 commit comments

Comments
 (0)