|
1 | 1 | # SPDX-License-Identifier: Apache-2.0 |
2 | 2 |
|
| 3 | +import csv |
| 4 | +import os |
3 | 5 | import re |
| 6 | +import sys |
| 7 | +import zipfile |
4 | 8 |
|
5 | 9 | import packaging.tags |
6 | 10 | import packaging.utils |
7 | 11 |
|
8 | | -# import sentry_sdk |
| 12 | + |
| 13 | +class MissingWheelRecordError(Exception): |
| 14 | + """Internal exception used by this module""" |
| 15 | + |
| 16 | + |
| 17 | +class InvalidWheelRecordError(Exception): |
| 18 | + """Internal exception used by this module""" |
| 19 | + |
9 | 20 |
|
10 | 21 | _PLATFORMS = [ |
11 | 22 | (re.compile(r"^win_(.*?)$"), lambda m: f"Windows {_normalize_arch(m.group(1))}"), |
@@ -153,3 +164,66 @@ def tags_to_filters(tags: set[packaging.tags.Tag]) -> dict[str, list[str]]: |
153 | 164 | "abis": sorted(abis), |
154 | 165 | "platforms": sorted(platforms), |
155 | 166 | } |
| 167 | + |
| 168 | + |
| 169 | +def _zip_filename_is_dir(filename: str) -> bool: |
| 170 | + """Return True if this ZIP archive member is a directory.""" |
| 171 | + return filename.endswith(("/", "\\")) |
| 172 | + |
| 173 | + |
| 174 | +def validate_record(wheel_filepath: str) -> bool: |
| 175 | + """ |
| 176 | + Extract RECORD file from a wheel and check the ZIP archive contents |
| 177 | + against the files listed in the RECORD. Mismatches are reported via email. |
| 178 | + """ |
| 179 | + filename = os.path.basename(wheel_filepath) |
| 180 | + name, version, _ = filename.split("-", 2) |
| 181 | + record_filename = f"{name}-{version}.dist-info/RECORD" |
| 182 | + # Files that must be missing from 'RECORD', |
| 183 | + # so we ignore them when cross-checking. |
| 184 | + record_exemptions = { |
| 185 | + f"{name}-{version}.dist-info/RECORD.jws", |
| 186 | + f"{name}-{version}.dist-info/RECORD.p7s", |
| 187 | + } |
| 188 | + try: |
| 189 | + with zipfile.ZipFile(wheel_filepath) as zfp: |
| 190 | + wheel_record_contents = zfp.read(record_filename).decode() |
| 191 | + record_entries = { |
| 192 | + fn.replace("\\", "/") # Normalize Windows path separators. |
| 193 | + for fn, *_ in csv.reader(wheel_record_contents.splitlines()) |
| 194 | + } |
| 195 | + wheel_entries = { |
| 196 | + fn |
| 197 | + for fn in zfp.namelist() |
| 198 | + if not _zip_filename_is_dir(fn) and fn not in record_exemptions |
| 199 | + } |
| 200 | + except (UnicodeError, KeyError, csv.Error): |
| 201 | + raise MissingWheelRecordError() |
| 202 | + if record_entries != wheel_entries: |
| 203 | + record_is_missing = wheel_entries - record_entries |
| 204 | + wheel_is_missing = record_entries - wheel_entries |
| 205 | + raise InvalidWheelRecordError( |
| 206 | + (f"Record is missing {record_is_missing})" if record_is_missing else "") |
| 207 | + + ("; " if record_is_missing and wheel_is_missing else "") |
| 208 | + + (f"Wheel is missing {wheel_is_missing})" if wheel_is_missing else "") |
| 209 | + ) |
| 210 | + return True |
| 211 | + |
| 212 | + |
| 213 | +def main(argv) -> int: # pragma: no cover |
| 214 | + if len(argv) != 1: |
| 215 | + print("Usage: python -m warehouse.utils.wheel <wheel path>") |
| 216 | + return 1 |
| 217 | + wheel_filepath = argv[0] |
| 218 | + wheel_filename = os.path.basename(wheel_filepath) |
| 219 | + try: |
| 220 | + validate_record(wheel_filepath) |
| 221 | + print(f"{wheel_filename}: OK") |
| 222 | + return 0 |
| 223 | + except Exception as error: |
| 224 | + print(f"{wheel_filename}: {error!r}") |
| 225 | + return 1 |
| 226 | + |
| 227 | + |
| 228 | +if __name__ == "__main__": # pragma: no cover |
| 229 | + sys.exit(main(sys.argv[1:])) |
0 commit comments