Skip to content

Commit 983d1e6

Browse files
authored
Move wheel record checking to standalone module (#19135)
1 parent c5d3338 commit 983d1e6

File tree

2 files changed

+85
-34
lines changed

2 files changed

+85
-34
lines changed

warehouse/forklift/legacy.py

Lines changed: 10 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
# SPDX-License-Identifier: Apache-2.0
2-
import csv
32
import hashlib
43
import hmac
54
import os.path
@@ -65,6 +64,11 @@
6564
from warehouse.rate_limiting.interfaces import RateLimiterException
6665
from warehouse.utils import readme, zipfiles
6766
from warehouse.utils.release import strip_keywords
67+
from warehouse.utils.wheel import (
68+
InvalidWheelRecordError,
69+
MissingWheelRecordError,
70+
validate_record,
71+
)
6872

6973
PATH_HASHER = "blake2_256"
7074

@@ -455,11 +459,6 @@ def _sort_releases(request: Request, project: Project):
455459
r._pypi_ordering = i
456460

457461

458-
def _zip_filename_is_dir(filename: str) -> bool:
459-
"""Return True if this ZIP archive member is a directory."""
460-
return filename.endswith(("/", "\\"))
461-
462-
463462
@view_config(
464463
route_name="forklift.legacy.file_upload",
465464
uses_session=True,
@@ -1417,30 +1416,9 @@ def file_upload(request):
14171416
f"distribution file {filename} at {license_filename}",
14181417
)
14191418

1420-
"""
1421-
Extract RECORD file from a wheel and check the ZIP archive contents
1422-
against the files listed in the RECORD. Mismatches are reported via email.
1423-
"""
1424-
record_filename = f"{name}-{version}.dist-info/RECORD"
1425-
# Files that must be missing from 'RECORD',
1426-
# so we ignore them when cross-checking.
1427-
record_exemptions = {
1428-
f"{name}-{version}.dist-info/RECORD.jws",
1429-
f"{name}-{version}.dist-info/RECORD.p7s",
1430-
}
14311419
try:
1432-
with zipfile.ZipFile(temporary_filename) as zfp:
1433-
wheel_record_contents = zfp.read(record_filename).decode()
1434-
record_entries = {
1435-
fn.replace("\\", "/") # Normalize Windows path separators.
1436-
for fn, *_ in csv.reader(wheel_record_contents.splitlines())
1437-
}
1438-
zip_entries = {
1439-
fn
1440-
for fn in zfp.namelist()
1441-
if not _zip_filename_is_dir(fn) and fn not in record_exemptions
1442-
}
1443-
except (UnicodeError, KeyError, csv.Error) as e:
1420+
validate_record(temporary_filename)
1421+
except MissingWheelRecordError:
14441422
request.metrics.increment(
14451423
"warehouse.upload.failed",
14461424
tags=[
@@ -1451,13 +1429,12 @@ def file_upload(request):
14511429
raise _exc_with_message(
14521430
HTTPBadRequest,
14531431
"Wheel '{filename}' does not contain the required "
1454-
"RECORD file: {record_filename} {e}".format(
1432+
"RECORD file: {record_filename}".format(
14551433
filename=filename,
1456-
record_filename=record_filename,
1457-
e=str(type(e)) + repr(e),
1434+
record_filename=f"{name}-{version}.dist-info/RECORD",
14581435
),
14591436
)
1460-
if record_entries != zip_entries:
1437+
except InvalidWheelRecordError:
14611438
send_wheel_record_mismatch_email(
14621439
request,
14631440
set(project.users),

warehouse/utils/wheel.py

Lines changed: 75 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,22 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3+
import csv
4+
import os
35
import re
6+
import sys
7+
import zipfile
48

59
import packaging.tags
610
import packaging.utils
711

8-
# import sentry_sdk
12+
13+
class MissingWheelRecordError(Exception):
14+
"""Internal exception used by this module"""
15+
16+
17+
class InvalidWheelRecordError(Exception):
18+
"""Internal exception used by this module"""
19+
920

1021
_PLATFORMS = [
1122
(re.compile(r"^win_(.*?)$"), lambda m: f"Windows {_normalize_arch(m.group(1))}"),
@@ -153,3 +164,66 @@ def tags_to_filters(tags: set[packaging.tags.Tag]) -> dict[str, list[str]]:
153164
"abis": sorted(abis),
154165
"platforms": sorted(platforms),
155166
}
167+
168+
169+
def _zip_filename_is_dir(filename: str) -> bool:
170+
"""Return True if this ZIP archive member is a directory."""
171+
return filename.endswith(("/", "\\"))
172+
173+
174+
def validate_record(wheel_filepath: str) -> bool:
175+
"""
176+
Extract RECORD file from a wheel and check the ZIP archive contents
177+
against the files listed in the RECORD. Mismatches are reported via email.
178+
"""
179+
filename = os.path.basename(wheel_filepath)
180+
name, version, _ = filename.split("-", 2)
181+
record_filename = f"{name}-{version}.dist-info/RECORD"
182+
# Files that must be missing from 'RECORD',
183+
# so we ignore them when cross-checking.
184+
record_exemptions = {
185+
f"{name}-{version}.dist-info/RECORD.jws",
186+
f"{name}-{version}.dist-info/RECORD.p7s",
187+
}
188+
try:
189+
with zipfile.ZipFile(wheel_filepath) as zfp:
190+
wheel_record_contents = zfp.read(record_filename).decode()
191+
record_entries = {
192+
fn.replace("\\", "/") # Normalize Windows path separators.
193+
for fn, *_ in csv.reader(wheel_record_contents.splitlines())
194+
}
195+
wheel_entries = {
196+
fn
197+
for fn in zfp.namelist()
198+
if not _zip_filename_is_dir(fn) and fn not in record_exemptions
199+
}
200+
except (UnicodeError, KeyError, csv.Error):
201+
raise MissingWheelRecordError()
202+
if record_entries != wheel_entries:
203+
record_is_missing = wheel_entries - record_entries
204+
wheel_is_missing = record_entries - wheel_entries
205+
raise InvalidWheelRecordError(
206+
(f"Record is missing {record_is_missing})" if record_is_missing else "")
207+
+ ("; " if record_is_missing and wheel_is_missing else "")
208+
+ (f"Wheel is missing {wheel_is_missing})" if wheel_is_missing else "")
209+
)
210+
return True
211+
212+
213+
def main(argv) -> int: # pragma: no cover
214+
if len(argv) != 1:
215+
print("Usage: python -m warehouse.utils.wheel <wheel path>")
216+
return 1
217+
wheel_filepath = argv[0]
218+
wheel_filename = os.path.basename(wheel_filepath)
219+
try:
220+
validate_record(wheel_filepath)
221+
print(f"{wheel_filename}: OK")
222+
return 0
223+
except Exception as error:
224+
print(f"{wheel_filename}: {error!r}")
225+
return 1
226+
227+
228+
if __name__ == "__main__": # pragma: no cover
229+
sys.exit(main(sys.argv[1:]))

0 commit comments

Comments
 (0)