diff --git a/.github/workflows/build-wheels-defined.yml b/.github/workflows/build-wheels-defined.yml index 3d55e76..2b94aeb 100644 --- a/.github/workflows/build-wheels-defined.yml +++ b/.github/workflows/build-wheels-defined.yml @@ -254,10 +254,13 @@ jobs: -w /work \ -e GH_TOKEN="${GH_TOKEN}" \ -e PIP_NO_CACHE_DIR=1 \ + -e PIP_INDEX_URL=https://www.piwheels.org/simple \ + -e PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ python:${{ matrix.python-version }}-bookworm \ bash -c " set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh # Install pip packages without cache to reduce memory usage python -m pip install --no-cache-dir --upgrade pip python -m pip install --no-cache-dir -r build_requirements.txt @@ -339,10 +342,13 @@ jobs: -w /work \ -e GH_TOKEN="${GH_TOKEN}" \ -e PIP_NO_CACHE_DIR=1 \ + -e PIP_INDEX_URL=https://www.piwheels.org/simple \ + -e PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ python:${{ matrix.python-version }}-bullseye \ bash -c " set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh # Install pip packages without cache to reduce memory usage python -m pip install --no-cache-dir --upgrade pip python -m pip install --no-cache-dir -r build_requirements.txt diff --git a/.github/workflows/build-wheels-platforms.yml b/.github/workflows/build-wheels-platforms.yml index 50965b1..defd19a 100644 --- a/.github/workflows/build-wheels-platforms.yml +++ b/.github/workflows/build-wheels-platforms.yml @@ -127,10 +127,13 @@ jobs: -e MIN_IDF_MINOR_VERSION=${{ needs.get-supported-versions.outputs.min_idf_minor_version }} \ -e GH_TOKEN="${GH_TOKEN}" \ -e PIP_NO_CACHE_DIR=1 \ + -e PIP_INDEX_URL=https://www.piwheels.org/simple \ + -e PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ python:${{ matrix.python-version }}-bookworm \ bash -c " set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh # Install pip packages without cache to reduce memory usage python -m pip install --no-cache-dir --upgrade pip python -m pip install --no-cache-dir -r build_requirements.txt @@ -152,10 +155,13 @@ jobs: -e MIN_IDF_MINOR_VERSION=${{ needs.get-supported-versions.outputs.min_idf_minor_version }} \ -e GH_TOKEN="${GH_TOKEN}" \ -e PIP_NO_CACHE_DIR=1 \ + -e PIP_INDEX_URL=https://www.piwheels.org/simple \ + -e PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ python:${{ matrix.python-version }}-bullseye \ bash -c " set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh # Install pip packages without cache to reduce memory usage python -m pip install --no-cache-dir --upgrade pip python -m pip install --no-cache-dir -r build_requirements.txt diff --git a/.github/workflows/build-wheels-python-dependent.yml b/.github/workflows/build-wheels-python-dependent.yml index 89e0973..275e28e 100644 --- a/.github/workflows/build-wheels-python-dependent.yml +++ b/.github/workflows/build-wheels-python-dependent.yml @@ -140,10 +140,13 @@ jobs: -e PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1 \ -e GH_TOKEN="${GH_TOKEN}" \ -e PIP_NO_CACHE_DIR=1 \ + -e PIP_INDEX_URL=https://www.piwheels.org/simple \ + -e PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ python:${{ matrix.python-version }}-bookworm \ bash -c " set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh # Install pip packages without cache to reduce memory usage python -m pip install --no-cache-dir --upgrade pip python -m pip install --no-cache-dir -r build_requirements.txt @@ -163,10 +166,13 @@ jobs: -e PYO3_USE_ABI3_FORWARD_COMPATIBILITY=1 \ -e GH_TOKEN="${GH_TOKEN}" \ -e PIP_NO_CACHE_DIR=1 \ + -e PIP_INDEX_URL=https://www.piwheels.org/simple \ + -e PIP_EXTRA_INDEX_URL=https://pypi.org/simple \ python:${{ matrix.python-version }}-bullseye \ bash -c " set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh # Install pip packages without cache to reduce memory usage python -m pip install --no-cache-dir --upgrade pip python -m pip install --no-cache-dir -r build_requirements.txt diff --git a/.github/workflows/test-wheels-install.yml b/.github/workflows/test-wheels-install.yml index 7a854ef..9e69d44 100644 --- a/.github/workflows/test-wheels-install.yml +++ b/.github/workflows/test-wheels-install.yml @@ -93,7 +93,9 @@ jobs: -w /work \ python:${{ matrix.python-version }}-bookworm \ bash -c " + set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh python -m pip install --upgrade pip pip install -r build_requirements.txt python test_wheels_install.py @@ -107,12 +109,16 @@ jobs: -w /work \ python:${{ matrix.python-version }}-bullseye \ bash -c " + set -e python --version + source os_dependencies/linux_armv7_docker_prepare.sh python -m pip install --upgrade pip pip install -r build_requirements.txt python test_wheels_install.py " + # After test_wheels_install.py, ./downloaded_wheels contains only wheels for this + # matrix Python + platform (see prune step in test_wheels_install.main). - name: Upload tested wheels uses: actions/upload-artifact@v4 with: diff --git a/.github/workflows/wheels-repair.yml b/.github/workflows/wheels-repair.yml index cf936f7..cf8f9d0 100644 --- a/.github/workflows/wheels-repair.yml +++ b/.github/workflows/wheels-repair.yml @@ -161,6 +161,9 @@ jobs: run: | docker run --rm \ --platform ${{ matrix.docker_platform }} \ + -e AUDITWHEEL_PLAT=manylinux_2_36_armv7l \ + -e AUDITWHEEL_ONLY_PLAT=1 \ + -e AUDITWHEEL_ALLOW_LINUX_TAG=1 \ -v $(pwd):/work \ -w /work \ ${{ matrix.docker_image }} \ @@ -177,6 +180,9 @@ jobs: run: | docker run --rm \ --platform ${{ matrix.docker_platform }} \ + -e AUDITWHEEL_PLAT=manylinux_2_31_armv7l \ + -e AUDITWHEEL_ONLY_PLAT=1 \ + -e AUDITWHEEL_ALLOW_LINUX_TAG=1 \ -v $(pwd):/work \ -w /work \ ${{ matrix.docker_image }} \ @@ -201,12 +207,26 @@ jobs: needs: repair-wheels runs-on: ubuntu-latest steps: - - name: Download all repaired wheels + - name: Checkout repository + uses: actions/checkout@v4 + + # Download each wheels-repaired-* artifact into its own subdirectory so + # same-named wheels from ARMv7 vs ARMv7 Legacy are not silently overwritten + # before collision detection or S3 upload (see README: ARMv7 wheel collisions). + - name: Download all repaired wheels (per-artifact subdirectories) uses: actions/download-artifact@v4 with: pattern: wheels-repaired-* - path: ./all_wheels - merge-multiple: true + path: ./all_wheels_staging + merge-multiple: false + + - name: Check for duplicate wheel basenames across lineages + run: python3 check_wheel_collisions.py ./all_wheels_staging + + - name: Flatten merged wheels directory + run: | + mkdir -p ./all_wheels + find ./all_wheels_staging -type f -name '*.whl' -exec cp -f {} ./all_wheels/ \; - name: List merged wheels run: | diff --git a/README.md b/README.md index 8fc3678..385e1da 100644 --- a/README.md +++ b/README.md @@ -146,6 +146,18 @@ The repair tools are used after build to link and bundle all the needed librarie This logic is done by the [repair workflow](./.github/workflows/wheels-repair.yml) and the [`repair_wheels.py` script](./repair_wheels.py) +### ARMv7 vs ARMv7 Legacy: same wheel filename, different binaries + +`Linux ARMv7` and `Linux ARMv7 Legacy` can both produce a wheel whose **filename is identical** (same PEP 425 tags) while the **ELF contents differ** (different glibc/OpenSSL/Rust toolchain lineage). **Note:** `wheels-download-directory-*` CI artifacts are the **pre-repair** build outputs; comparing those can still show identical names until the [repair workflow](./.github/workflows/wheels-repair.yml) runs. Two bad outcomes follow if that is not handled after repair/merge: + +1. **Artifact merge / local flatten** — downloading multiple `wheels-repaired-*` artifacts into one directory with `merge-multiple: true` can make the second file **silently overwrite** the first on disk before any upload runs. +2. **S3 upload** — [`upload_wheels.py`](./upload_wheels.py) publishes to `pypi//`. Uploading a second wheel with the **same key** replaces the object; clients then see whichever build ran last, which can surface as import crashes or segfaults. + +Mitigations in this repo: + +- Repair sets **`AUDITWHEEL_PLAT`** and **`AUDITWHEEL_ONLY_PLAT`** per lineage (`manylinux_2_36_armv7l` vs `manylinux_2_31_armv7l`) so [`repair_wheels.py`](./repair_wheels.py) runs `auditwheel repair --plat ... --only-plat` and emitted wheels get **distinct single-tag filenames** when auditwheel supports it. If **`AUDITWHEEL_PLAT` is set**, ARMv7 “libc detection failed” outcomes are **not** treated as non-fatal skips (that would leave identical filenames across lineages). +- The repair workflow merges repaired artifacts using **per-artifact subdirectories**, then runs [`check_wheel_collisions.py`](./check_wheel_collisions.py) to **fail CI** if the same `*.whl` basename appears with **different contents** across lineages, before flattening for tests/upload. + ## Activity Diagram The main file is `build-wheels-platforms.yml` which is scheduled to run periodically to build Python wheels for any requirement of all [ESP-IDF]-supported versions. @@ -167,4 +179,17 @@ Docker files are in its own repository where there are build and published from. - For older ARMv7 operating systems - For packages requiring glibc 2.31 +[!NOTE] +### ARMv7: prefer piwheels for resolution + +For ARMv7 (and ARMv7 Legacy) environments, you may want to prefer [piwheels](https://www.piwheels.org/) as the primary index and use Espressif's index as a secondary source: + +```bash +python -m pip install --index-url https://www.piwheels.org/simple --extra-index-url https://dl.espressif.com/pypi/ +``` + +This repository's ARMv7 CI workflows also set these as `PIP_INDEX_URL` / `PIP_EXTRA_INDEX_URL` inside the ARMv7 Docker builds. + +**Warning:** piwheels wheels may rely on system-provided shared libraries (i.e. may not bundle `.libs/`). If a target OS is missing those libraries or has an incompatible version, imports may fail at runtime. + [ESP-IDF]: https://github.com/espressif/esp-idf diff --git a/_helper_functions.py b/_helper_functions.py index 789453e..40cc791 100644 --- a/_helper_functions.py +++ b/_helper_functions.py @@ -37,19 +37,10 @@ from packaging.version import Version from packaging.version import parse as parse_version -# Packages that should be built from source on Linux to ensure correct library linking -# These packages often have pre-built wheels on PyPI that link against different library versions -# NOTE: This only applies to Linux (especially ARM) - Windows and macOS pre-built wheels work fine -# NOTE: Do NOT add packages with Rust components (cryptography, pynacl, bcrypt) here -# as they have complex build requirements and may not support all Python versions -FORCE_SOURCE_BUILD_PACKAGES_LINUX = [ - "cffi", - "pillow", - "pyyaml", - "brotli", - "greenlet", - "bitarray", -] +# Linux ``--no-binary`` names: one per line in ``force_no_binary_linux.txt`` (also ``PIP_NO_BINARY`` in ARMv7 Docker). + +_REPO_ROOT = Path(__file__).resolve().parent +FORCE_NO_BINARY_LINUX_FILE = "force_no_binary_linux.txt" EXCLUDE_LIST_PATH = "exclude_list.yaml" @@ -255,6 +246,20 @@ def exclude_entry_applies_to_platform(entry: dict, current_platform: str) -> boo return False +def load_force_no_binary_linux_names(repo_root: Path | None = None) -> list[str]: + """Package names for Linux ``--no-binary`` / ``PIP_NO_BINARY`` (``force_no_binary_linux.txt``).""" + root = repo_root if repo_root is not None else _REPO_ROOT + path = root / FORCE_NO_BINARY_LINUX_FILE + out: list[str] = [] + for line in path.read_text(encoding="utf-8").splitlines(): + line = line.split("#", 1)[0].strip() + if line: + out.append(line) + if not out: + raise ValueError(f"{path}: need at least one non-comment package name") + return out + + def get_no_binary_args(requirement_name: str) -> list: """Get --no-binary arguments if this package should be built from source. @@ -277,7 +282,7 @@ def get_no_binary_args(requirement_name: str) -> list: return [] pkg_name = match.group(1).lower().replace("-", "_") - for pkg in FORCE_SOURCE_BUILD_PACKAGES_LINUX: + for pkg in load_force_no_binary_linux_names(_REPO_ROOT): if pkg.lower().replace("-", "_") == pkg_name: return ["--no-binary", match.group(1)] return [] diff --git a/check_wheel_collisions.py b/check_wheel_collisions.py new file mode 100644 index 0000000..7246bce --- /dev/null +++ b/check_wheel_collisions.py @@ -0,0 +1,78 @@ +# +# SPDX-FileCopyrightText: 2026 Espressif Systems (Shanghai) CO LTD +# +# SPDX-License-Identifier: Apache-2.0 +# +"""Detect duplicate *.whl basenames with different file contents under a tree. + +Used after downloading per-arch ``wheels-repaired-*`` artifacts into separate +subdirectories (``merge-multiple: false``) so a filesystem flatten step cannot +hide ARMv7 vs ARMv7 Legacy collisions before upload to S3. +""" + +from __future__ import annotations + +import hashlib +import sys + +from collections import defaultdict +from pathlib import Path + + +def _sha256_file(path: Path, chunk_size: int = 1024 * 1024) -> str: + h = hashlib.sha256() + with path.open("rb") as f: + while True: + b = f.read(chunk_size) + if not b: + break + h.update(b) + return h.hexdigest() + + +def collect_collision_errors(root: Path) -> list[str]: + """Return human-readable error lines; empty if OK.""" + wheels: list[Path] = [] + for p in sorted(root.rglob("*.whl")): + if p.is_file(): + wheels.append(p) + + by_name: defaultdict[str, list[Path]] = defaultdict(list) + for p in wheels: + by_name[p.name].append(p) + + errors: list[str] = [] + for name, paths in sorted(by_name.items()): + if len(paths) < 2: + continue + by_digest: defaultdict[str, list[Path]] = defaultdict(list) + for p in paths: + by_digest[_sha256_file(p)].append(p) + if len(by_digest) == 1: + # Identical content in multiple artifact trees — unusual but safe. + continue + lines = [f"Duplicate wheel basename with different contents: {name}"] + for p in paths: + lines.append(f" - {p} sha256={_sha256_file(p)}") + errors.append("\n".join(lines)) + return errors + + +def main(argv: list[str]) -> int: + root = Path(argv[1] if len(argv) > 1 else ".").resolve() + if not root.is_dir(): + print(f"Error: not a directory: {root}", file=sys.stderr) + return 2 + + errors = collect_collision_errors(root) + if errors: + print("Wheel basename collision check failed:\n", file=sys.stderr) + for block in errors: + print(block, file=sys.stderr) + print(file=sys.stderr) + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv)) diff --git a/force_no_binary_linux.txt b/force_no_binary_linux.txt new file mode 100644 index 0000000..989ea4a --- /dev/null +++ b/force_no_binary_linux.txt @@ -0,0 +1,9 @@ +# Linux: pip PIP_NO_BINARY / per-wheel --no-binary (ARMv7 Docker + build_wheels on Linux) +cffi +# PyPI maturin manylinux wheels need newer glibc than Debian bookworm/bullseye armhf; sdist uses setuptools-rust. +maturin +pillow +pyyaml +brotli +greenlet +bitarray diff --git a/include_list.yaml b/include_list.yaml index 484f4c4..e6073c8 100644 --- a/include_list.yaml +++ b/include_list.yaml @@ -1,5 +1,7 @@ # List of Python packages to additionally include to the automatically assembled requirements -#"For assembled
additionally include with on for version". +# "For assembled
additionally include with on for version". +# +# Linux packages to build from wheels (pip --no-binary): see force_no_binary_linux.txt # include_list template #- package_name: '' diff --git a/os_dependencies/linux_arm.sh b/os_dependencies/linux_arm.sh index cbf79a6..8f1b406 100755 --- a/os_dependencies/linux_arm.sh +++ b/os_dependencies/linux_arm.sh @@ -85,6 +85,15 @@ if [ "$arch" == "armv7l" ]; then apt remove --auto-remove --purge rust-gdb rustc libstd-rust-dev libstd-rust-1.48 2>/dev/null || true # install Rust dependencies apt-get install -y libssl-dev libffi-dev gcc musl-dev + # Match linux_armv7_docker_prepare.sh: Bookworm armhf has libffi.so.8 only; binary cffi wheels + # may still dlopen libffi.so.7 (pip build isolation, piwheels/PyPI manylinux tags). + for _ffi_libdir in /usr/lib/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabi; do + if [ -d "$_ffi_libdir" ] && [ -f "$_ffi_libdir/libffi.so.8" ] && [ ! -e "$_ffi_libdir/libffi.so.7" ]; then + ln -sfn libffi.so.8 "$_ffi_libdir/libffi.so.7" + ldconfig 2>/dev/null || true + break + fi + done # install Rust curl --proto '=https' --tlsv1.3 -sSf https://sh.rustup.rs | bash -s -- -y . $HOME/.cargo/env diff --git a/os_dependencies/linux_armv7_docker_prepare.sh b/os_dependencies/linux_armv7_docker_prepare.sh new file mode 100644 index 0000000..c53cfe7 --- /dev/null +++ b/os_dependencies/linux_armv7_docker_prepare.sh @@ -0,0 +1,46 @@ +#!/usr/bin/env bash +# Minimal OS setup for ARMv7 Docker builds *before* pip installs (build_requirements / wheels). +# Expects to run as root (official python:*-bookworm / *-bullseye images). +# Must be sourced (not subprocess bash) so PIP_NO_BINARY persists for later pip / PEP 517 builds. + +set -e + +export DEBIAN_FRONTEND=noninteractive + +# Explicit libffi runtime matches the dev headers (bullseye: libffi7, bookworm: libffi8). +# Piwheels manylinux cffi wheels can link against a newer libffi than the image ships; +# pairing dev + runtime keeps installs predictable; PIP_NO_BINARY (force_no_binary_linux.txt) +# forces source builds for those packages before build_requirements / wheels. +. /etc/os-release +case "${VERSION_CODENAME:-}" in + bullseye) LIBFFI_RUNTIME=libffi7 ;; + bookworm) LIBFFI_RUNTIME=libffi8 ;; + *) LIBFFI_RUNTIME= ;; +esac + +apt-get update -qq +apt-get install -y --no-install-recommends \ + ca-certificates \ + libffi-dev \ + libssl-dev +if [ -n "$LIBFFI_RUNTIME" ]; then + apt-get install -y --no-install-recommends "$LIBFFI_RUNTIME" +fi + +export PIP_NO_BINARY="$( + grep -vE '^[[:space:]]*#|^[[:space:]]*$' force_no_binary_linux.txt | tr '\n' ',' | sed 's/,$//' +)" + +# Manylinux/piwheels cffi wheels on armhf still reference libffi.so.7. Debian Bookworm only +# ships libffi.so.8, so "import _cffi_backend" fails inside pip's isolated build env +# (e.g. argon2-cffi-bindings metadata). Bullseye typically already provides .so.7 via libffi7. +arch=$(uname -m) +if [ "$arch" = "armv7l" ]; then + for libdir in /usr/lib/arm-linux-gnueabihf /usr/lib/arm-linux-gnueabi; do + if [ -d "$libdir" ] && [ -f "$libdir/libffi.so.8" ] && [ ! -e "$libdir/libffi.so.7" ]; then + ln -sfn libffi.so.8 "$libdir/libffi.so.7" + ldconfig 2>/dev/null || true + break + fi + done +fi diff --git a/repair_wheels.py b/repair_wheels.py index 6ab90bd..fdb5863 100644 --- a/repair_wheels.py +++ b/repair_wheels.py @@ -12,6 +12,7 @@ - Linux: auditwheel (bundles SOs) """ +import os import platform import subprocess @@ -94,6 +95,46 @@ def get_wheel_arch(wheel_name: str) -> Union[str, None]: return None +def _only_plat_env_enabled() -> bool: + return os.environ.get("AUDITWHEEL_ONLY_PLAT", "").strip().lower() in ("1", "true", "yes") + + +def _allow_linux_tag_env_enabled() -> bool: + """When true, allow keeping linux-tag wheels on ARMv7 even if --plat is set. + + This is useful when resolution prefers piwheels, which may provide wheels + tagged as ``linux_armv7l`` that are not repairable to the desired manylinux + tag in our repair containers. + """ + return os.environ.get("AUDITWHEEL_ALLOW_LINUX_TAG", "").strip().lower() in ("1", "true", "yes") + + +def _is_linux_tag_wheel(wheel_name: str) -> bool: + wn = wheel_name.lower() + return "-linux_" in wn and "manylinux" not in wn and "musllinux" not in wn + + +def _armv7_forced_plat_filename_ok(wheel_name: str, plat: str) -> bool: + """True if ``wheel_name`` matches ``AUDITWHEEL_PLAT`` for ARMv7 / ARMv7 Legacy splits. + + When ``AUDITWHEEL_ONLY_PLAT`` is set, legacy wheels must not carry a ``manylinux_2_36`` + tag (auditwheel dual-tag would collide with the standard lineage again). + """ + plat_l = plat.lower() + wn = wheel_name.lower() + if _allow_linux_tag_env_enabled() and _is_linux_tag_wheel(wn): + return True + if "manylinux_2_36" in plat_l: + return "manylinux_2_36" in wn + if "manylinux_2_31" in plat_l and "manylinux_2_36" not in plat_l: + if "manylinux_2_31" not in wn: + return False + if _only_plat_env_enabled() and "manylinux_2_36" in wn: + return False + return True + return True + + def repair_wheel_windows(wheel_path: Path, temp_dir: Path) -> subprocess.CompletedProcess[str]: """Repair Windows wheel using delvewheel.""" result = subprocess.run( @@ -157,12 +198,28 @@ def repair_wheel_linux(wheel_path: Path, temp_dir: Path) -> subprocess.Completed Uses --strip option to strip debugging symbols which can help with ELF alignment issues on ARM (fixes "ELF load command address/offset not properly aligned" errors). + + If ``AUDITWHEEL_PLAT`` is set (e.g. in CI for ARMv7 vs ARMv7 Legacy), it is passed as + ``auditwheel repair --plat ...`` so repaired wheels get distinct PEP 425 platform tags + when build lineages would otherwise emit the same filename. """ - result = subprocess.run( - ["auditwheel", "repair", str(wheel_path), "-w", str(temp_dir), "--strip"], - capture_output=True, - text=True, - ) + plat = os.environ.get("AUDITWHEEL_PLAT", "").strip() + only_plat = os.environ.get("AUDITWHEEL_ONLY_PLAT", "").strip().lower() in ("1", "true", "yes") + + cmd = ["auditwheel", "repair", str(wheel_path), "-w", str(temp_dir), "--strip"] + if plat: + cmd.extend(["--plat", plat]) + if only_plat: + cmd.append("--only-plat") + + result = subprocess.run(cmd, capture_output=True, text=True) + + # Older auditwheel versions may not support --only-plat. If requested, retry once without it. + combined_err = (result.stderr or "") + (result.stdout or "") + if only_plat and result.returncode != 0 and "unrecognized arguments: --only-plat" in combined_err: + cmd_no_only = [c for c in cmd if c != "--only-plat"] + result = subprocess.run(cmd_no_only, capture_output=True, text=True) + return result @@ -250,7 +307,8 @@ def main() -> None: print_color(f" {result.stderr.strip()}", Fore.RED) # Check for errors - error_msg = result.stderr.strip() if result.stderr else "" + # auditwheel may log failures on stdout or stderr depending on version / logging. + error_msg = ((result.stderr or "") + "\n" + (result.stdout or "")).strip() # Corrupt zip / bad central directory (delocate opens the wheel as a zip) if _stderr_indicates_bad_zip(error_msg): @@ -289,7 +347,7 @@ def main() -> None: # Update wheel reference and error message for subsequent checks wheel = Path(renamed_wheel) - error_msg = result.stderr.strip() if result.stderr else "" + error_msg = ((result.stderr or "") + "\n" + (result.stdout or "")).strip() # Special handling forLinux ARMv7 broken wheels if ( @@ -302,16 +360,33 @@ def main() -> None: deleted_count += 1 continue + plat_env = os.environ.get("AUDITWHEEL_PLAT", "").strip() + allow_linux_tag = _allow_linux_tag_env_enabled() + is_linux_tag = _is_linux_tag_wheel(wheel.name) + # Check for non-critical errors (keep original wheel) is_noncritical = ( "too-recent versioned symbols" in error_msg # manylinux wheel can't find its libraries # it means it was already properly repaired - or ("manylinux" in wheel.name and "could not be located" in error_msg) + or (("manylinux" in wheel.name and "could not be located" in error_msg) and not plat_env) + # When allowing linux-tag wheels (piwheels), treat missing graft libs as non-fatal + # and keep the original linux-tag wheel rather than failing the whole repair job. + or ( + plat_env + and allow_linux_tag + and is_linux_tag + and ( + "Cannot repair wheel, because required library" in error_msg or "could not be located" in error_msg + ) + ) # ARMv7 CI runs under QEMU; auditwheel may fail libc detection on abi3/native .so + # When AUDITWHEEL_PLAT is set (ARMv7 vs ARMv7 Legacy), skipping repair would keep + # identical wheel filenames across lineages — do not treat libc detection as non-critical. or ( current_platform == "Linux" and current_arch == "armv7l" + and not plat_env and ("InvalidLibc" in error_msg or "couldn't detect libc" in error_msg) ) ) @@ -335,15 +410,37 @@ def main() -> None: print_color(" -> Keeping original wheel (build issue: needs older toolchain)", Fore.YELLOW) elif "manylinux" in wheel.name and "could not be located" in error_msg: print_color(" -> Keeping original wheel (already bundled from PyPI)", Fore.GREEN) + elif plat_env and allow_linux_tag and is_linux_tag: + print_color( + " -> Keeping original wheel (linux-tag wheel; not forcing manylinux under current policy)", + Fore.YELLOW, + ) elif ( current_platform == "Linux" and current_arch == "armv7l" + and not plat_env and ("InvalidLibc" in error_msg or "couldn't detect libc" in error_msg) ): print_color( " -> Keeping original wheel (auditwheel libc detection failed on ARMv7 runner; often QEMU)", Fore.YELLOW, ) + if ( + plat_env + and current_platform == "Linux" + and current_arch == "armv7l" + and not _armv7_forced_plat_filename_ok(wheel.name, plat_env) + and not (allow_linux_tag and is_linux_tag) + ): + msg = ( + f"Wheel filename does not match forced AUDITWHEEL_PLAT={plat_env!r} " + f"after non-fatal repair path: {wheel.name}" + ) + print_color(f" -> ERROR: {msg}", Fore.RED) + errors.append(f"{wheel.name}: {msg}") + wheel.unlink(missing_ok=True) + error_count += 1 + continue skipped_count += 1 elif has_error: # Actual error occurred (even if a wheel was created, it may be broken) @@ -374,10 +471,40 @@ def main() -> None: print_color(" -> Deleting repaired output (not a valid / readable zip archive)", Fore.RED) final_path.unlink(missing_ok=True) deleted_count += 1 + elif ( + plat_env + and current_platform == "Linux" + and current_arch == "armv7l" + and not _armv7_forced_plat_filename_ok(final_path.name, plat_env) + and not (allow_linux_tag and _is_linux_tag_wheel(final_path.name)) + ): + msg = ( + f"Repaired wheel filename does not match forced AUDITWHEEL_PLAT={plat_env!r}: {final_path.name}" + ) + print_color(f" -> ERROR: {msg}", Fore.RED) + errors.append(f"{final_path.name}: {msg}") + final_path.unlink(missing_ok=True) + error_count += 1 else: repaired_count += 1 elif result.returncode == 0: # No repaired wheel created, but command succeeded (already compatible) + if ( + plat_env + and current_platform == "Linux" + and current_arch == "armv7l" + and not _armv7_forced_plat_filename_ok(wheel.name, plat_env) + and not (allow_linux_tag and is_linux_tag) + ): + msg = ( + "auditwheel reported success but left the wheel unchanged with a filename " + f"that does not match AUDITWHEEL_PLAT={plat_env!r}: {wheel.name}" + ) + print_color(f" -> ERROR: {msg}", Fore.RED) + errors.append(f"{wheel.name}: {msg}") + wheel.unlink(missing_ok=True) + error_count += 1 + continue print_color(" -> Keeping original wheel (already compatible)", Fore.GREEN) skipped_count += 1 else: diff --git a/test_build_wheels.py b/test_build_wheels.py index ba6af76..1f6ed4f 100644 --- a/test_build_wheels.py +++ b/test_build_wheels.py @@ -222,6 +222,28 @@ def test_abi3_wheel(self): self.assertTrue(self.is_wheel_compatible(f"cryptography-41.0.0-cp39-abi3-{tag}.whl", "39")) +class TestPruneWheelsForArtifact(unittest.TestCase): + """``prune_wheels_not_for_current_python`` keeps per-matrix wheels for CI artifacts.""" + + def test_prune_removes_other_python_same_platform(self): + import tempfile + + from test_wheels_install import prune_wheels_not_for_current_python + + tag = _current_platform_wheel_tag() + with tempfile.TemporaryDirectory() as tmp: + d = Path(tmp) + (d / f"drop-1.0-cp310-cp310-{tag}.whl").write_bytes(b"a") + (d / f"keep-1.0-cp311-cp311-{tag}.whl").write_bytes(b"b") + (d / "universal-1.0-py3-none-any.whl").write_bytes(b"c") + + removed = prune_wheels_not_for_current_python("311", d) + self.assertEqual(removed, 1) + self.assertFalse((d / f"drop-1.0-cp310-cp310-{tag}.whl").exists()) + self.assertTrue((d / f"keep-1.0-cp311-cp311-{tag}.whl").exists()) + self.assertTrue((d / "universal-1.0-py3-none-any.whl").exists()) + + class TestParseWheelName(unittest.TestCase): """Test the parse_wheel_name function from _helper_functions.py.""" diff --git a/test_check_wheel_collisions.py b/test_check_wheel_collisions.py new file mode 100644 index 0000000..ad08152 --- /dev/null +++ b/test_check_wheel_collisions.py @@ -0,0 +1,60 @@ +# +# SPDX-FileCopyrightText: 2026 Espressif Systems (Shanghai) CO LTD +# +# SPDX-License-Identifier: Apache-2.0 +# + +import tempfile +import unittest + +from pathlib import Path + +import check_wheel_collisions as cwc + + +class TestCheckWheelCollisions(unittest.TestCase): + def test_no_collision_unique_basenames(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "a").mkdir() + (root / "b").mkdir() + (root / "a" / "foo-1.0-py3-none-any.whl").write_bytes(b"a") + (root / "b" / "bar-1.0-py3-none-any.whl").write_bytes(b"b") + self.assertEqual(cwc.collect_collision_errors(root), []) + + def test_collision_same_basename_different_bytes(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "linux-armv7").mkdir() + (root / "linux-armv7legacy").mkdir() + name = "pkg-1.0-cp39-cp39-linux_armv7l.whl" + (root / "linux-armv7" / name).write_bytes(b"v1") + (root / "linux-armv7legacy" / name).write_bytes(b"v2-different") + errs = cwc.collect_collision_errors(root) + self.assertEqual(len(errs), 1) + self.assertIn(name, errs[0]) + + def test_same_basename_identical_bytes_allowed(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "x").mkdir() + (root / "y").mkdir() + name = "same-1.0-py3-none-any.whl" + payload = b"identical" + (root / "x" / name).write_bytes(payload) + (root / "y" / name).write_bytes(payload) + self.assertEqual(cwc.collect_collision_errors(root), []) + + def test_main_returns_one_on_collision(self) -> None: + with tempfile.TemporaryDirectory() as tmp: + root = Path(tmp) + (root / "a").mkdir() + (root / "b").mkdir() + name = "dup-1.0-py3-none-any.whl" + (root / "a" / name).write_bytes(b"1") + (root / "b" / name).write_bytes(b"2") + self.assertEqual(cwc.main(["_", str(root)]), 1) + + +if __name__ == "__main__": + unittest.main() diff --git a/test_wheels_install.py b/test_wheels_install.py index 5ac182b..bfc0533 100644 --- a/test_wheels_install.py +++ b/test_wheels_install.py @@ -9,6 +9,10 @@ verifying that wheel files are valid and platform-compatible. It also checks wheels against exclude_list.yaml and removes incompatible ones. +After a successful run, wheels that do not match this job's Python version and host +platform are deleted from ``downloaded_wheels`` so CI ``wheels-tested-*`` artifacts +do not carry the full multi-Python merge (only ``wheels-repaired-all`` is merged). + Wheels are ZIP archives (PEP 427). pip opens them with the zipfile module; a BadZipFile / "Bad magic number" error means the bytes on disk are not a valid ZIP (truncated, corrupted, or not a wheel), not that ".whl" was mistaken for ".zip". @@ -109,6 +113,29 @@ def find_compatible_wheels(python_version: str) -> list[Path]: return sorted(wheels) +def prune_wheels_not_for_current_python( + python_version_tag: str, + wheels_dir: Path | None = None, +) -> int: + """Remove ``*.whl`` files that are not compatible with this Python + platform. + + CI downloads the full merged ``wheels-repaired-all`` tree into ``downloaded_wheels``, + then tests only compatible wheels. Without pruning, the subsequent + ``wheels-tested--`` artifact would still contain every cp/py tag from the + merge, which is misleading and huge. ``wheels_dir`` defaults to ``WHEELS_DIR`` for + production; tests may pass a temporary directory. + """ + base = wheels_dir if wheels_dir is not None else WHEELS_DIR + if not base.exists(): + return 0 + removed = 0 + for wheel_path in base.glob("*.whl"): + if not is_wheel_compatible(wheel_path.name, python_version_tag): + wheel_path.unlink(missing_ok=True) + removed += 1 + return removed + + def install_wheel(wheel_path: Path) -> tuple[bool, str]: """ Install a wheel with --no-deps to verify wheel validity. @@ -277,6 +304,14 @@ def main() -> int: print(f" - {wheel_name}") return 1 + pruned = prune_wheels_not_for_current_python(python_version_tag) + if pruned: + print_color( + f"Pruned {pruned} wheel(s) not for this matrix (Python {python_version} / " + f"current platform) before artifact upload", + Fore.YELLOW, + ) + print_color("\nAll compatible wheels processed successfully!", Fore.GREEN) return 0 diff --git a/upload_wheels.py b/upload_wheels.py index 270bf3f..ca94459 100644 --- a/upload_wheels.py +++ b/upload_wheels.py @@ -7,12 +7,16 @@ - argument S3 bucket """ +import hashlib import os import re import sys +from typing import Optional + import boto3 +from botocore.exceptions import ClientError from colorama import Fore from _helper_functions import print_color @@ -32,6 +36,44 @@ def normalize(name): return re.sub(r"[-_.]+", "-", name).lower() +def _file_md5_hex(path: str, chunk_size: int = 1024 * 1024) -> str: + h = hashlib.md5() + with open(path, "rb") as f: + while True: + chunk = f.read(chunk_size) + if not chunk: + break + h.update(chunk) + return h.hexdigest() + + +def _overwrite_would_hide_different_wheel(s3_key: str, local_path: str) -> Optional[str]: + """Return an error message if an existing object differs from local_path; else None.""" + obj = BUCKET.Object(s3_key) + try: + obj.load() + except ClientError as e: + code = e.response.get("Error", {}).get("Code", "") + if code in ("404", "NoSuchKey"): + return None + raise + remote_size = obj.content_length + local_size = os.path.getsize(local_path) + if remote_size != local_size: + return f"Refusing to overwrite {s3_key}: remote size {remote_size} != local size {local_size}" + etag = (obj.e_tag or "").strip('"') + if not etag or "-" in etag: + # Multipart upload ETag is not a raw MD5; size match is the best check here. + return None + local_md5 = _file_md5_hex(local_path) + if etag != local_md5: + return ( + f"Refusing to overwrite {s3_key}: remote ETag {etag!r} != local MD5 {local_md5!r}. " + "Same wheel filename would publish different bytes (e.g. ARMv7 vs ARMv7 Legacy collision)." + ) + return None + + def get_existing_wheels(): """Get set of S3 keys for wheels currently on server.""" existing = set() @@ -76,9 +118,14 @@ def collect_wheel_paths(): wheel_name = match.group(1) wheel_name = normalize(wheel_name) - is_new = f"pypi/{wheel_name}/{wheel}" not in existing_wheels + s3_key = f"pypi/{wheel_name}/{wheel}" + is_new = s3_key not in existing_wheels + + conflict = _overwrite_would_hide_different_wheel(s3_key, full_path) + if conflict: + raise SystemExit(conflict) - BUCKET.upload_file(full_path, f"pypi/{wheel_name}/{wheel}", ExtraArgs={"ACL": "public-read"}) + BUCKET.upload_file(full_path, s3_key, ExtraArgs={"ACL": "public-read"}) if is_new: new_wheels += 1 diff --git a/verify_s3_wheels.py b/verify_s3_wheels.py index 839b32f..ec0ee6a 100644 --- a/verify_s3_wheels.py +++ b/verify_s3_wheels.py @@ -15,6 +15,8 @@ import re import sys +from collections import defaultdict + import boto3 from colorama import Fore @@ -36,6 +38,30 @@ ] +def _normalize_pkg_dir(name: str) -> str: + """Normalize S3 package directory naming differences. + + Historically, this repo used both underscore and dash package directories on S3 + (e.g. ``flask_compress`` vs ``flask-compress``). Those can legitimately contain the + same wheel basenames. We treat that as a warning, not a violation. + """ + return re.sub(r"[-_.]+", "-", name).lower() + + +def _canonical_pkg_dirs_from_wheel_filename(wheel_name: str) -> set[str]: + """Return PEP 503-normalized candidate package dirs derived from the wheel filename. + + Mirrors the wheel-name parsing used by ``upload_wheels.py`` (first ``-`` before a digit). + """ + parsed = parse_wheel_name(wheel_name) + if parsed: + return {_normalize_pkg_dir(parsed[0])} + match = re.compile(r"^(.+?)-(\d+)").search(wheel_name) + if not match: + return set() + return {_normalize_pkg_dir(match.group(1))} + + def get_supported_python_versions(supported_python_json: str) -> list[str]: """Parse supported_python from get-supported-versions output (jq -c .supported_python).""" try: @@ -103,26 +129,56 @@ def main(): # Get all wheels from S3 print_color("---------- SCANNING S3 WHEELS ----------") - wheels = [] + basename_to_keys: defaultdict[str, list[str]] = defaultdict(list) for obj in bucket.objects.filter(Prefix="pypi/"): if obj.key.endswith(".whl"): wheel_name = obj.key.split("/")[-1] - wheels.append(wheel_name) + basename_to_keys[wheel_name].append(obj.key) - print(f"Found {len(wheels)} wheels on S3\n") + wheel_names = sorted(basename_to_keys.keys()) + wheels_on_s3_count = sum(len(v) for v in basename_to_keys.values()) + + print(f"Found {wheels_on_s3_count} wheel objects ({len(wheel_names)} unique filenames) on S3\n") # Check each wheel print_color("---------- CHECKING WHEELS ----------") violations = [] old_python_wheels = [] - for wheel in wheels: + for wheel in wheel_names: # Check for unsupported Python versions (warning only, not a violation) is_old, reason = is_unsupported_python(wheel, oldest_supported_python) if is_old: old_python_wheels.append((wheel, reason)) continue + keys_for_name = basename_to_keys[wheel] + if len(keys_for_name) > 1: + # Determine whether the duplicate keys are only due to directory normalization + # differences (underscore vs dash). Those are historical and are not treated as + # violations (we cannot infer which object is authoritative without comparing bytes). + pkg_dirs = [] + for k in keys_for_name: + parts = k.split("/") + pkg_dirs.append(parts[1] if len(parts) >= 3 else "") + normalized = {_normalize_pkg_dir(d) for d in pkg_dirs if d} + reason_dup = "Duplicate wheel basename across multiple S3 keys: " + ", ".join(sorted(keys_for_name)) + canonical = _canonical_pkg_dirs_from_wheel_filename(wheel) + if len(normalized) <= 1 or (canonical and canonical.issubset(normalized)): + print_color(f"-- {wheel}", Fore.YELLOW) + print(f" {reason_dup}") + if len(normalized) <= 1: + print(f" Note: directories normalize to {next(iter(normalized), '')!r}; treated as warning") + else: + print( + " Note: at least one prefix matches the wheel's canonical project dir " + f"{sorted(canonical)!r}; extra keys are likely stale/wrong-path duplicates; treated as warning" + ) + else: + violations.append((wheel, reason_dup)) + print_color(f"-- {wheel}", Fore.RED) + print(f" {reason_dup}") + # Check against exclude_list (actual violations) should_exclude, reason = should_exclude_wheel_s3( wheel, exclude_requirements, supported_python_versions=supported_python_versions @@ -138,7 +194,7 @@ def main(): # Statistics print_color("---------- STATISTICS ----------") - print(f"Checked: {len(wheels)} wheels") + print(f"Checked: {wheels_on_s3_count} wheel objects ({len(wheel_names)} unique filenames)") if old_python_wheels: print_color(f"Old Python wheels: {len(old_python_wheels)} (warning only)", Fore.YELLOW) if violations: diff --git a/yaml_list_adapter.py b/yaml_list_adapter.py index b043442..ac72ee4 100644 --- a/yaml_list_adapter.py +++ b/yaml_list_adapter.py @@ -84,11 +84,21 @@ class YAMLListAdapter: requirements: set = set() def __init__(self, yaml_file: str, exclude: bool = False, current_platform: Optional[str] = None) -> None: + self._yaml_list: list = [] try: with open(yaml_file, "r") as f: - self._yaml_list = yaml.load(f, yaml.Loader) + raw = yaml.load(f, yaml.Loader) except FileNotFoundError: print_color(f"File not found, please check the file: {yaml_file}", Fore.RED) + raw = None + if isinstance(raw, dict): + # Optional mapping root (e.g. includes under ``includes``); list root is still supported. + self._yaml_list = raw.get("includes") or raw.get("include_packages") or [] + elif isinstance(raw, list): + self._yaml_list = raw + elif raw is not None: + print_color(f"Unexpected YAML root type in {yaml_file}: {type(raw).__name__}", Fore.RED) + self._yaml_list = [] self.exclude = exclude # When building wheels: only exclude entries that apply to this platform