diff --git a/warehouse/forklift/legacy.py b/warehouse/forklift/legacy.py index 41b9e64ca124..d25ed2b1694d 100644 --- a/warehouse/forklift/legacy.py +++ b/warehouse/forklift/legacy.py @@ -1,5 +1,4 @@ # SPDX-License-Identifier: Apache-2.0 -import csv import hashlib import hmac import os.path @@ -65,6 +64,11 @@ from warehouse.rate_limiting.interfaces import RateLimiterException from warehouse.utils import readme, zipfiles from warehouse.utils.release import strip_keywords +from warehouse.utils.wheel import ( + InvalidWheelRecordError, + MissingWheelRecordError, + validate_record, +) PATH_HASHER = "blake2_256" @@ -455,11 +459,6 @@ def _sort_releases(request: Request, project: Project): r._pypi_ordering = i -def _zip_filename_is_dir(filename: str) -> bool: - """Return True if this ZIP archive member is a directory.""" - return filename.endswith(("/", "\\")) - - @view_config( route_name="forklift.legacy.file_upload", uses_session=True, @@ -1417,30 +1416,9 @@ def file_upload(request): f"distribution file {filename} at {license_filename}", ) - """ - Extract RECORD file from a wheel and check the ZIP archive contents - against the files listed in the RECORD. Mismatches are reported via email. - """ - record_filename = f"{name}-{version}.dist-info/RECORD" - # Files that must be missing from 'RECORD', - # so we ignore them when cross-checking. - record_exemptions = { - f"{name}-{version}.dist-info/RECORD.jws", - f"{name}-{version}.dist-info/RECORD.p7s", - } try: - with zipfile.ZipFile(temporary_filename) as zfp: - wheel_record_contents = zfp.read(record_filename).decode() - record_entries = { - fn.replace("\\", "/") # Normalize Windows path separators. - for fn, *_ in csv.reader(wheel_record_contents.splitlines()) - } - zip_entries = { - fn - for fn in zfp.namelist() - if not _zip_filename_is_dir(fn) and fn not in record_exemptions - } - except (UnicodeError, KeyError, csv.Error) as e: + validate_record(temporary_filename) + except MissingWheelRecordError: request.metrics.increment( "warehouse.upload.failed", tags=[ @@ -1451,13 +1429,12 @@ def file_upload(request): raise _exc_with_message( HTTPBadRequest, "Wheel '{filename}' does not contain the required " - "RECORD file: {record_filename} {e}".format( + "RECORD file: {record_filename}".format( filename=filename, - record_filename=record_filename, - e=str(type(e)) + repr(e), + record_filename=f"{name}-{version}.dist-info/RECORD", ), ) - if record_entries != zip_entries: + except InvalidWheelRecordError: send_wheel_record_mismatch_email( request, set(project.users), diff --git a/warehouse/utils/wheel.py b/warehouse/utils/wheel.py index db8065ab9a17..460017f99123 100644 --- a/warehouse/utils/wheel.py +++ b/warehouse/utils/wheel.py @@ -1,11 +1,22 @@ # SPDX-License-Identifier: Apache-2.0 +import csv +import os import re +import sys +import zipfile import packaging.tags import packaging.utils -# import sentry_sdk + +class MissingWheelRecordError(Exception): + """Internal exception used by this module""" + + +class InvalidWheelRecordError(Exception): + """Internal exception used by this module""" + _PLATFORMS = [ (re.compile(r"^win_(.*?)$"), lambda m: f"Windows {_normalize_arch(m.group(1))}"), @@ -153,3 +164,66 @@ def tags_to_filters(tags: set[packaging.tags.Tag]) -> dict[str, list[str]]: "abis": sorted(abis), "platforms": sorted(platforms), } + + +def _zip_filename_is_dir(filename: str) -> bool: + """Return True if this ZIP archive member is a directory.""" + return filename.endswith(("/", "\\")) + + +def validate_record(wheel_filepath: str) -> bool: + """ + Extract RECORD file from a wheel and check the ZIP archive contents + against the files listed in the RECORD. Mismatches are reported via email. + """ + filename = os.path.basename(wheel_filepath) + name, version, _ = filename.split("-", 2) + record_filename = f"{name}-{version}.dist-info/RECORD" + # Files that must be missing from 'RECORD', + # so we ignore them when cross-checking. + record_exemptions = { + f"{name}-{version}.dist-info/RECORD.jws", + f"{name}-{version}.dist-info/RECORD.p7s", + } + try: + with zipfile.ZipFile(wheel_filepath) as zfp: + wheel_record_contents = zfp.read(record_filename).decode() + record_entries = { + fn.replace("\\", "/") # Normalize Windows path separators. + for fn, *_ in csv.reader(wheel_record_contents.splitlines()) + } + wheel_entries = { + fn + for fn in zfp.namelist() + if not _zip_filename_is_dir(fn) and fn not in record_exemptions + } + except (UnicodeError, KeyError, csv.Error): + raise MissingWheelRecordError() + if record_entries != wheel_entries: + record_is_missing = wheel_entries - record_entries + wheel_is_missing = record_entries - wheel_entries + raise InvalidWheelRecordError( + (f"Record is missing {record_is_missing})" if record_is_missing else "") + + ("; " if record_is_missing and wheel_is_missing else "") + + (f"Wheel is missing {wheel_is_missing})" if wheel_is_missing else "") + ) + return True + + +def main(argv) -> int: # pragma: no cover + if len(argv) != 1: + print("Usage: python -m warehouse.utils.wheel ") + return 1 + wheel_filepath = argv[0] + wheel_filename = os.path.basename(wheel_filepath) + try: + validate_record(wheel_filepath) + print(f"{wheel_filename}: OK") + return 0 + except Exception as error: + print(f"{wheel_filename}: {error!r}") + return 1 + + +if __name__ == "__main__": # pragma: no cover + sys.exit(main(sys.argv[1:]))