Skip to content

Commit f06d3c0

Browse files
committed
Fix requires_python metadata + add repair metadata command
fixes: #773
1 parent 85b3398 commit f06d3c0

File tree

8 files changed

+159
-35
lines changed

8 files changed

+159
-35
lines changed

CHANGES/773.bugfix

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
Fixed `requires_python` field not being properly set on package upload.
2+
3+
Run the new `pulpcore-manager repair-python-metadata` command with repositories containing affected
4+
packages to repair their metadata.

pulp_python/app/management/__init__.py

Whitespace-only changes.

pulp_python/app/management/commands/__init__.py

Whitespace-only changes.
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
import re
2+
from django.core.management import BaseCommand, CommandError
3+
from gettext import gettext as _
4+
5+
from django.conf import settings
6+
7+
from pulpcore.plugin.util import extract_pk
8+
from pulp_python.app.models import PythonPackageContent, PythonRepository
9+
from pulp_python.app.utils import artifact_to_python_content_data
10+
11+
12+
def repair_metadata(content):
13+
"""
14+
Repairs the metadata for the passed in content queryset.
15+
:param content: The PythonPackageContent queryset.
16+
Return: number of content units that were repaired
17+
"""
18+
# TODO: Add on_demand content repair?
19+
20+
content = content.select_related("pulp_domain")
21+
immediate_content = content.filter(contentartifact__artifact__isnull=False)
22+
batch = []
23+
set_of_update_fields = set()
24+
total_repaired = 0
25+
for package in immediate_content.prefetch_related('_artifacts').iterator(chunk_size=1000):
26+
new_data = artifact_to_python_content_data(
27+
package.filename, package._artifacts.get(), package.pulp_domain
28+
)
29+
changed = False
30+
for field, value in new_data.items():
31+
if getattr(package, field) != value:
32+
setattr(package, field, value)
33+
set_of_update_fields.add(field)
34+
changed = True
35+
if changed:
36+
batch.append(package)
37+
if len(batch) == 1000:
38+
total_repaired += len(batch)
39+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
40+
batch = []
41+
set_of_update_fields.clear()
42+
43+
if len(batch) > 0:
44+
total_repaired += len(batch)
45+
PythonPackageContent.objects.bulk_update(batch, set_of_update_fields)
46+
47+
return total_repaired
48+
49+
50+
def href_prn_list_handler(value):
51+
"""Common list parsing for a string of hrefs/prns."""
52+
h = rf"(?:{settings.API_ROOT}(?:[-_a-zA-Z0-9]+/)?api/v3/repositories/python/python/[-a-f0-9]+/)"
53+
p = r"(?:prn:python\.pythonrepository:[-a-f0-9]+)"
54+
r = rf"{h}|{p}"
55+
return re.findall(r, value)
56+
57+
58+
class Command(BaseCommand):
59+
"""
60+
Management command to repair metadata of PythonPackageContent.
61+
"""
62+
63+
help = _("Repair the metadata of PythonPackageContent stored in PythonRepositories")
64+
65+
def add_arguments(self, parser):
66+
"""Set up arguments."""
67+
parser.add_argument(
68+
"--repositories",
69+
type=href_prn_list_handler,
70+
required=False,
71+
help=_(
72+
"List of PythonRepository hrefs/prns whose content's metadata will be repaired. "
73+
"Leave blank to include all repositories in all domains. Mutually exclusive "
74+
"with domain."
75+
),
76+
)
77+
parser.add_argument(
78+
"--domain",
79+
default=None,
80+
required=False,
81+
help=_(
82+
"The pulp domain to gather the repositories from if specified. Mutually"
83+
" exclusive with repositories."
84+
),
85+
)
86+
87+
def handle(self, *args, **options):
88+
"""Implement the command."""
89+
domain = options.get("domain")
90+
repository_hrefs = options.get("repositories")
91+
if domain and repository_hrefs:
92+
raise CommandError(_("--domain and --repositories are mutually exclusive"))
93+
94+
repositories = PythonRepository.objects.all()
95+
if repository_hrefs:
96+
repos_ids = [extract_pk(r) for r in repository_hrefs]
97+
repositories = repositories.filter(pk__in=repos_ids)
98+
elif domain:
99+
repositories = repositories.filter(pulp_domain__name=domain)
100+
101+
content_set = set()
102+
for repository in repositories:
103+
content_set.update(repository.latest_version().content.values_list("pk", flat=True))
104+
content = PythonPackageContent.objects.filter(pk__in=content_set)
105+
num_repaired = repair_metadata(content)
106+
print(f"{len(content_set)} packages processed, {num_repaired} package metadata repaired.")

pulp_python/app/models.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,8 @@
1717

1818
from pathlib import PurePath
1919
from .utils import (
20+
artifact_to_python_content_data,
2021
canonicalize_name,
21-
get_project_metadata_from_artifact,
22-
parse_project_metadata,
2322
python_content_to_json,
2423
PYPI_LAST_SERIAL,
2524
PYPI_SERIAL_CONSTANT,
@@ -189,14 +188,7 @@ class PythonPackageContent(Content):
189188
def init_from_artifact_and_relative_path(artifact, relative_path):
190189
"""Used when downloading package from pull-through cache."""
191190
path = PurePath(relative_path)
192-
metadata = get_project_metadata_from_artifact(path.name, artifact)
193-
data = parse_project_metadata(vars(metadata))
194-
data["packagetype"] = metadata.packagetype
195-
data["version"] = metadata.version
196-
data["filename"] = path.name
197-
data["sha256"] = artifact.sha256
198-
data["pulp_domain_id"] = artifact.pulp_domain_id
199-
data["_pulp_domain_id"] = artifact.pulp_domain_id
191+
data = artifact_to_python_content_data(path.name, artifact, domain=get_domain())
200192
return PythonPackageContent(**data)
201193

202194
def __str__(self):

pulp_python/app/serializers.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
from pulp_python.app import models as python_models
1111
from pulp_python.app import fields
12-
from pulp_python.app.utils import get_project_metadata_from_artifact, parse_project_metadata
12+
from pulp_python.app.utils import artifact_to_python_content_data
1313

1414

1515
class PythonRepositorySerializer(core_serializers.RepositorySerializer):
@@ -217,7 +217,7 @@ def deferred_validate(self, data):
217217

218218
artifact = data["artifact"]
219219
try:
220-
metadata = get_project_metadata_from_artifact(filename, artifact)
220+
_data = artifact_to_python_content_data(filename, artifact, domain=get_domain())
221221
except ValueError:
222222
raise serializers.ValidationError(_(
223223
"Extension on {} is not a valid python extension "
@@ -231,14 +231,6 @@ def deferred_validate(self, data):
231231
)}
232232
)
233233

234-
_data = parse_project_metadata(vars(metadata))
235-
_data['packagetype'] = metadata.packagetype
236-
_data['version'] = metadata.version
237-
_data['filename'] = filename
238-
_data['sha256'] = artifact.sha256
239-
data["pulp_domain_id"] = artifact.pulp_domain_id
240-
data["_pulp_domain_id"] = artifact.pulp_domain_id
241-
242234
data.update(_data)
243235

244236
return data

pulp_python/app/tasks/upload.py

Lines changed: 2 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pulpcore.plugin.util import get_domain
88

99
from pulp_python.app.models import PythonPackageContent, PythonRepository
10-
from pulp_python.app.utils import get_project_metadata_from_artifact, parse_project_metadata
10+
from pulp_python.app.utils import artifact_to_python_content_data
1111

1212

1313
def upload(artifact_sha256, filename, repository_pk=None):
@@ -76,15 +76,7 @@ def create_content(artifact_sha256, filename, domain):
7676
queryset of the new created content
7777
"""
7878
artifact = Artifact.objects.get(sha256=artifact_sha256, pulp_domain=domain)
79-
metadata = get_project_metadata_from_artifact(filename, artifact)
80-
81-
data = parse_project_metadata(vars(metadata))
82-
data['packagetype'] = metadata.packagetype
83-
data['version'] = metadata.version
84-
data['filename'] = filename
85-
data['sha256'] = artifact.sha256
86-
data['pulp_domain'] = domain
87-
data['_pulp_domain'] = domain
79+
data = artifact_to_python_content_data(filename, artifact, domain)
8880

8981
@transaction.atomic()
9082
def create():

pulp_python/app/utils.py

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import pkginfo
2+
import re
23
import shutil
34
import tempfile
45
import json
@@ -51,6 +52,20 @@
5152
".zip": "sdist",
5253
}
5354

55+
DIST_REGEXES = {
56+
# regex from https://github.com/pypa/pip/blob/18.0/src/pip/_internal/wheel.py#L569
57+
".whl": re.compile(
58+
r"""^(?P<name>.+?)-(?P<version>.*?)
59+
((-(?P<build>\d[^-]*?))?-(?P<pyver>.+?)-(?P<abi>.+?)-(?P<plat>.+?)
60+
\.whl|\.dist-info)$""",
61+
re.VERBOSE
62+
),
63+
# regex based on https://setuptools.pypa.io/en/latest/deprecated/python_eggs.html#filename-embedded-metadata # noqa: E501
64+
".egg": re.compile(r"^(?P<name>.+?)-(?P<version>.*?)(-(?P<pyver>.+?(-(?P<plat>.+?))?))?\.egg|\.egg-info$"), # noqa: E501
65+
# regex based on https://github.com/python/cpython/blob/v3.7.0/Lib/distutils/command/bdist_wininst.py#L292 # noqa: E501
66+
".exe": re.compile(r"^(?P<name>.+?)-(?P<version>.*?)\.(?P<plat>.+?)(-(?P<pyver>.+?))?\.exe$"),
67+
}
68+
5469
DIST_TYPES = {
5570
"bdist_wheel": pkginfo.Wheel,
5671
"bdist_wininst": pkginfo.Distribution,
@@ -72,6 +87,8 @@ def parse_project_metadata(project):
7287
"""
7388
package = {}
7489
package['name'] = project.get('name') or ""
90+
package['version'] = project.get('version') or ""
91+
package['packagetype'] = project.get('packagetype') or ""
7592
package['metadata_version'] = project.get('metadata_version') or ""
7693
package['summary'] = project.get('summary') or ""
7794
package['description'] = project.get('description') or ""
@@ -86,13 +103,15 @@ def parse_project_metadata(project):
86103
package['project_url'] = project.get('project_url') or ""
87104
package['platform'] = project.get('platform') or ""
88105
package['supported_platform'] = project.get('supported_platform') or ""
106+
package['requires_python'] = project.get('requires_python') or ""
89107
package['requires_dist'] = json.dumps(project.get('requires_dist', []))
90108
package['provides_dist'] = json.dumps(project.get('provides_dist', []))
91109
package['obsoletes_dist'] = json.dumps(project.get('obsoletes_dist', []))
92110
package['requires_external'] = json.dumps(project.get('requires_external', []))
93111
package['classifiers'] = json.dumps(project.get('classifiers', []))
94112
package['project_urls'] = json.dumps(project.get('project_urls', {}))
95113
package['description_content_type'] = project.get('description_content_type') or ""
114+
package['python_version'] = project.get('python_version') or ""
96115

97116
return package
98117

@@ -113,17 +132,15 @@ def parse_metadata(project, version, distribution):
113132
dictionary: of useful python metadata
114133
115134
"""
116-
package = {}
135+
package = parse_project_metadata(project)
117136

118137
package['filename'] = distribution.get('filename') or ""
119138
package['packagetype'] = distribution.get('packagetype') or ""
120139
package['version'] = version
121140
package['url'] = distribution.get('url') or ""
122141
package['sha256'] = distribution.get('digests', {}).get('sha256') or ""
123-
package['python_version'] = distribution.get('python_version') or ""
124-
package['requires_python'] = distribution.get('requires_python') or ""
125-
126-
package.update(parse_project_metadata(project))
142+
package['python_version'] = distribution.get('python_version') or package.get('python_version')
143+
package['requires_python'] = distribution.get('requires_python') or package.get('requires_python') # noqa: E501
127144

128145
return package
129146

@@ -147,9 +164,30 @@ def get_project_metadata_from_artifact(filename, artifact):
147164
temp_file.flush()
148165
metadata = DIST_TYPES[packagetype](temp_file.name)
149166
metadata.packagetype = packagetype
167+
if packagetype == "sdist":
168+
metadata.python_version = "source"
169+
else:
170+
pyver = ""
171+
regex = DIST_REGEXES[extensions[pkg_type_index]]
172+
if bdist_name := regex.match(filename):
173+
pyver = bdist_name.group("pyver") or ""
174+
metadata.python_version = pyver
150175
return metadata
151176

152177

178+
def artifact_to_python_content_data(filename, artifact, domain=None):
179+
"""
180+
Takes the artifact/filename and returns the metadata needed to create a PythonPackageContent.
181+
"""
182+
metadata = get_project_metadata_from_artifact(filename, artifact)
183+
data = parse_project_metadata(vars(metadata))
184+
data['sha256'] = artifact.sha256
185+
data['filename'] = filename
186+
data['pulp_domain'] = domain or artifact.pulp_domain
187+
data['_pulp_domain'] = data['pulp_domain']
188+
return data
189+
190+
153191
def python_content_to_json(base_path, content_query, version=None, domain=None):
154192
"""
155193
Converts a QuerySet of PythonPackageContent into the PyPi JSON format

0 commit comments

Comments
 (0)