diff --git a/.github/workflows/release-validation.yml b/.github/workflows/release-validation.yml index ba1a6c5a4..0e0a64343 100644 --- a/.github/workflows/release-validation.yml +++ b/.github/workflows/release-validation.yml @@ -35,6 +35,9 @@ on: - 'v*.*.*-incubating-RC*' pull_request: types: [opened, synchronize, reopened] + schedule: + # Weekly run against main: catches dependency breakage between releases. + - cron: '0 9 * * 1' workflow_dispatch: concurrency: @@ -113,8 +116,13 @@ jobs: if: steps.cache-rat.outputs.cache-hit != 'true' run: | mkdir -p ~/.cache/apache-rat - curl -fL -o ~/.cache/apache-rat/apache-rat-0.18.jar \ + JAR="$HOME/.cache/apache-rat/apache-rat-0.18.jar" + curl -fL -o "$JAR" \ https://repo1.maven.org/maven2/org/apache/rat/apache-rat/0.18/apache-rat-0.18.jar + # Verify integrity: SHA256 computed from the official Maven Central download + # and cross-checked against Maven Central's published SHA1. + echo "fe513ddd10cdc07e965ba430f2c093d8745ff24a0fb54efe0933653752c53301 $JAR" \ + | sha256sum --check - name: Extract version id: version @@ -194,13 +202,116 @@ jobs: retention-days: 7 if-no-files-found: ignore + # Installs the wheel without any optional extras ([learn], etc.) and imports + # core symbols. Catches accidental leakage of optional dependencies into core + # code — a bare `pip install apache-burr` user would hit an ImportError that + # the [learn] smoke test would never see. + bare-install: + name: "Release Validation / bare-install" + needs: [check-paths, build-artifacts] + if: needs.check-paths.outputs.should_run == 'true' + runs-on: ubuntu-latest + timeout-minutes: 10 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: '3.12' + + - name: Download release artifacts + uses: actions/download-artifact@v4 + with: + name: release-artifacts + path: dist + + - name: Install wheel without optional extras + env: + BURR_VERSION: ${{ needs.build-artifacts.outputs.version }} + run: | + pip install "dist/apache_burr-${BURR_VERSION}-py3-none-any.whl" + + - name: Verify core imports succeed without optional dependencies + run: | + python -c " + import burr + from burr.core import ApplicationBuilder, State + from burr.core.action import action + print('Core imports OK') + " + + # Extracts the sdist tarball, rebuilds the wheel from it (including the + # frontend npm build), then compares the resulting wheel's file contents + # against the release wheel using content hashes. Catches cases where the + # sdist is missing files that the direct wheel build includes. + sdist-wheel-equivalence: + name: "Release Validation / sdist-wheel-equivalence" + needs: [check-paths, build-artifacts] + if: needs.check-paths.outputs.should_run == 'true' + runs-on: ubuntu-latest + timeout-minutes: 25 + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v4 + with: + python-version: '3.12' + cache: pip + + - uses: actions/setup-node@v4 + with: + node-version: '20' + cache: npm + cache-dependency-path: telemetry/ui/package-lock.json + + - uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: '17' + + - name: Install system deps + run: sudo apt-get install -y --no-install-recommends graphviz + + - name: Install Python build deps + run: pip install flit twine jinja2 + + - name: Download release artifacts + uses: actions/download-artifact@v4 + with: + name: release-artifacts + path: dist + + - name: Extract sdist and build wheel from it + env: + BURR_VERSION: ${{ needs.build-artifacts.outputs.version }} + run: | + mkdir -p /tmp/sdist-extract /tmp/sdist-wheel + tar -xzf "dist/apache-burr-${BURR_VERSION}-incubating-sdist.tar.gz" \ + -C /tmp/sdist-extract + # Find the single top-level directory the tarball extracted into + SDIST_ROOT=$(find /tmp/sdist-extract -maxdepth 1 -mindepth 1 -type d | head -1) + cd "$SDIST_ROOT" + # Build wheel from within the extracted sdist. The sdist contains the + # React frontend source (telemetry/ui/) but not the compiled output, + # so the full npm build runs here — same as the original build. + python scripts/apache_release.py wheel "$BURR_VERSION" 0 \ + --skip-signing --output-dir /tmp/sdist-wheel + + - name: Compare sdist-built wheel against release wheel + env: + BURR_VERSION: ${{ needs.build-artifacts.outputs.version }} + run: | + python scripts/verify_apache_artifacts.py compare-wheels \ + "dist/apache_burr-${BURR_VERSION}-py3-none-any.whl" \ + "/tmp/sdist-wheel/apache_burr-${BURR_VERSION}-py3-none-any.whl" + # Single stable required-check name. Always runs (if: always()) so it produces # a definite SUCCESS or FAILURE — never SKIPPED. Branch protection in # .asf.yaml requires this context, not the underlying jobs, so path-filtered # docs/website PRs (where the upstream jobs are skipped) still go green here. summary: name: "Release Validation / summary" - needs: [check-paths, build-artifacts, install-and-smoke] + needs: [check-paths, build-artifacts, install-and-smoke, bare-install, sdist-wheel-equivalence] if: always() runs-on: ubuntu-latest timeout-minutes: 2 @@ -210,13 +321,17 @@ jobs: CHECK_PATHS: ${{ needs.check-paths.result }} BUILD_ARTIFACTS: ${{ needs.build-artifacts.result }} INSTALL_AND_SMOKE: ${{ needs.install-and-smoke.result }} + BARE_INSTALL: ${{ needs.bare-install.result }} + SDIST_WHEEL_EQUIV: ${{ needs.sdist-wheel-equivalence.result }} run: | - echo "check-paths: $CHECK_PATHS" - echo "build-artifacts: $BUILD_ARTIFACTS" - echo "install-and-smoke: $INSTALL_AND_SMOKE" + echo "check-paths: $CHECK_PATHS" + echo "build-artifacts: $BUILD_ARTIFACTS" + echo "install-and-smoke: $INSTALL_AND_SMOKE" + echo "bare-install: $BARE_INSTALL" + echo "sdist-wheel-equivalence: $SDIST_WHEEL_EQUIV" # Pass if every needed job is success or skipped; fail if any # failed or was cancelled. - for r in "$CHECK_PATHS" "$BUILD_ARTIFACTS" "$INSTALL_AND_SMOKE"; do + for r in "$CHECK_PATHS" "$BUILD_ARTIFACTS" "$INSTALL_AND_SMOKE" "$BARE_INSTALL" "$SDIST_WHEEL_EQUIV"; do case "$r" in success|skipped) ;; *) echo "::error::Release Validation failed (one or more jobs not success/skipped)"; exit 1 ;; diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 472ccd441..e6c86f3b9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -78,3 +78,12 @@ repos: entry: npx --prefix telemetry/ui lint-staged pass_filenames: false always_run: true + - id: check-asf-headers + name: Check ASF license headers + language: python + entry: python scripts/check_asf_headers.py + # Run on Python, YAML, and shell files — the source types that must + # carry the Apache 2.0 header. Exclusions are read from .rat-excludes + # at runtime so known third-party files are automatically respected. + types_or: [python, yaml, shell] + pass_filenames: true diff --git a/.rat-excludes b/.rat-excludes index 5ef43f57f..5609dd080 100644 --- a/.rat-excludes +++ b/.rat-excludes @@ -44,15 +44,12 @@ # Third-party MIT-licensed files (attributed in LICENSE). # Most names are unique within the repo so basename matching is safe. -# Known collisions: -# - utils.py: also matches our own ASF code in burr/tracking/, etc. -# (4 other utils.py files; all currently have ASF headers) +# Known collision: # - button.tsx: also matches telemetry/ui/src/components/common/button.tsx # (our own ASF code with header) -# A future regression in any of those collision targets would silently pass -# RAT. Tracked as a follow-up to rename or restructure. +# A future regression in that collision target would silently pass RAT. **/prompts.py -**/utils.py +**/deep_researcher_utils.py **/animated-beam.tsx **/animated-shiny-text.tsx **/blur-fade.tsx diff --git a/examples/deep-researcher/application.py b/examples/deep-researcher/application.py index 9547998bd..f64afe174 100644 --- a/examples/deep-researcher/application.py +++ b/examples/deep-researcher/application.py @@ -32,9 +32,9 @@ import prompts try: - utils = importlib.import_module("burr.examples.deep-researcher.utils") + utils = importlib.import_module("burr.examples.deep-researcher.deep_researcher_utils") except ModuleNotFoundError: - import utils + import deep_researcher_utils as utils @functools.lru_cache diff --git a/examples/deep-researcher/utils.py b/examples/deep-researcher/deep_researcher_utils.py similarity index 100% rename from examples/deep-researcher/utils.py rename to examples/deep-researcher/deep_researcher_utils.py diff --git a/scripts/apache_release.py b/scripts/apache_release.py index 1d823fe1c..4fd43bfd0 100644 --- a/scripts/apache_release.py +++ b/scripts/apache_release.py @@ -1385,6 +1385,7 @@ def cmd_verify(args) -> bool: """Handle 'verify' subcommand.""" _print_section(f"Verifying Artifacts - v{args.version}-RC{args.rc_num}") + skip_signing = getattr(args, "skip_signing", False) artifacts = _collect_all_artifacts(args.version, args.artifacts_dir) if not artifacts: @@ -1395,7 +1396,7 @@ def cmd_verify(args) -> bool: for artifact in artifacts: if artifact.endswith((".asc", ".sha512")): continue # Skip signature/checksum files - if not _verify_artifact_complete(artifact): + if not _verify_artifact_complete(artifact, skip_signing=skip_signing): all_valid = False if all_valid: @@ -1594,6 +1595,11 @@ def _build_parser() -> argparse.ArgumentParser: verify_parser.add_argument("version", help="Version") verify_parser.add_argument("rc_num", help="RC number") verify_parser.add_argument("--artifacts-dir", default="dist") + verify_parser.add_argument( + "--skip-signing", + action="store_true", + help="Skip GPG signature verification (for builds produced with --skip-signing).", + ) # vote-email subcommand vote_email_parser = subparsers.add_parser("vote-email", help="Generate release vote email") diff --git a/scripts/check_asf_headers.py b/scripts/check_asf_headers.py new file mode 100644 index 000000000..15d44a2e5 --- /dev/null +++ b/scripts/check_asf_headers.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python3 +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +""" +Check that Python, YAML, and shell files carry the ASF license header. + +Called by pre-commit with the list of staged files. Reads .rat-excludes at +runtime so known third-party files are automatically respected without any +duplication of the exclusion list. + +Usage (pre-commit invokes this automatically): + python scripts/check_asf_headers.py file1.py file2.yml ... +""" + +import sys +from fnmatch import fnmatch +from pathlib import Path +from typing import Optional + +# Extensions whose source files must carry an ASF header. +CHECKED_EXTENSIONS = {".py", ".yml", ".yaml", ".sh"} + +# Only search this many lines from the top of each file. +# Headers are always at the start; searching the whole file would be slow +# and would risk false positives from files that quote the license in prose. +HEADER_SEARCH_LINES = 30 + +# The one string that appears in every valid ASF license header regardless +# of comment style (# for Python/YAML/shell, // for Java, /* for C, etc.). +ASF_HEADER_MARKER = "Licensed to the Apache Software Foundation (ASF)" + + +def _find_repo_root(start: Path) -> Path: + """Walk upward from start until we find .rat-excludes or pyproject.toml.""" + for candidate in [start.resolve(), *start.resolve().parents]: + if (candidate / ".rat-excludes").exists() or (candidate / "pyproject.toml").exists(): + return candidate + return start.resolve() + + +def _load_rat_exclude_patterns(repo_root: Path) -> list: + """Return non-comment, non-blank lines from .rat-excludes as glob patterns.""" + path = repo_root / ".rat-excludes" + if not path.exists(): + return [] + return [ + line.strip() + for line in path.read_text(encoding="utf-8").splitlines() + if line.strip() and not line.strip().startswith("#") + ] + + +def _is_excluded(file_path: Path, repo_root: Path, patterns: list) -> bool: + """Return True if file_path matches any pattern from .rat-excludes. + + Patterns use RAT's **/ syntax. We handle this by checking the + file's basename against patterns that start with **/, and also checking + the full relative path against each pattern directly. + """ + try: + rel = str(file_path.resolve().relative_to(repo_root.resolve())) + except ValueError: + rel = str(file_path) + name = file_path.name + for pattern in patterns: + if pattern.startswith("**/"): + # Strip the **/ prefix and match against the bare filename. + if fnmatch(name, pattern[3:]): + return True + if fnmatch(rel, pattern): + return True + return False + + +def _has_asf_header(file_path: Path) -> bool: + """Return True if the ASF header marker appears within the first HEADER_SEARCH_LINES.""" + try: + with file_path.open(encoding="utf-8", errors="replace") as fh: + for i, line in enumerate(fh): + if i >= HEADER_SEARCH_LINES: + break + if ASF_HEADER_MARKER in line: + return True + except OSError: + pass + return False + + +def main(argv: Optional[list] = None) -> int: + files = [Path(p) for p in (argv if argv is not None else sys.argv[1:])] + if not files: + return 0 + + repo_root = _find_repo_root(files[0].parent) + patterns = _load_rat_exclude_patterns(repo_root) + + violations = [] + for f in files: + if f.suffix not in CHECKED_EXTENSIONS: + continue + if _is_excluded(f, repo_root, patterns): + continue + if not _has_asf_header(f): + violations.append(f) + + if violations: + print("Missing ASF license header in the following file(s):") + for v in violations: + print(f" {v}") + print() + print("Add the standard Apache 2.0 header block to each file.") + print("See any existing .py file in scripts/ for the correct format.") + return 1 + + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/ci_smoke_server.py b/scripts/ci_smoke_server.py index ba51d6fa4..870cc80bb 100644 --- a/scripts/ci_smoke_server.py +++ b/scripts/ci_smoke_server.py @@ -36,6 +36,7 @@ import argparse import json import os +import shutil import signal import socket import subprocess @@ -45,6 +46,7 @@ import urllib.error import urllib.request from pathlib import Path +from typing import Optional def _free_port() -> int: @@ -79,7 +81,49 @@ def _poll_url(url: str, timeout_s: int = 30, server_proc: "subprocess.Popen | No return False -def main() -> None: +def _poll_projects( + base_url: str, + project_name: str, + timeout_s: int = 30, + server_proc: "subprocess.Popen | None" = None, +) -> bool: + """Poll /api/v0/projects until project_name appears or timeout. + + The Burr server discovers tracking data from the filesystem on demand, so + there is a short lag between a tracked app writing its data and the server + reporting the project over the API. Polling is more reliable than a fixed + sleep because it succeeds as soon as the data is visible and bails early + if the server process has already died. + """ + deadline = time.time() + timeout_s + while time.time() < deadline: + if server_proc is not None and server_proc.poll() is not None: + return False + try: + with urllib.request.urlopen(f"{base_url}/api/v0/projects", timeout=2) as resp: + if resp.status == 200: + data = json.loads(resp.read().decode("utf-8")) + if project_name in [p.get("name") for p in data]: + return True + except (urllib.error.URLError, ConnectionResetError, TimeoutError): + pass + time.sleep(1) + return False + + +def _should_cleanup(explicit: Optional[bool]) -> bool: + """Return True if the work directory should be removed after the run. + + Priority: explicit flag > GITHUB_ACTIONS env var > default (clean locally). + In GitHub Actions the workspace is preserved so the upload-artifact step + can capture it on failure; locally it is cleaned up by default. + """ + if explicit is not None: + return explicit + return os.environ.get("GITHUB_ACTIONS") != "true" + + +def _build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(description=__doc__) parser.add_argument("--wheel", required=True, help="Path to the wheel to smoke-test") parser.add_argument( @@ -99,13 +143,28 @@ def main() -> None: default=45, help="Seconds to wait for the server to become ready", ) - args = parser.parse_args() + parser.add_argument( + "--cleanup", + action=argparse.BooleanOptionalAction, + default=None, + help=( + "Remove work directory after run. " + "Defaults to True locally and False in GitHub Actions " + "(so CI can upload the workspace as a debug artifact)." + ), + ) + return parser + + +def main() -> None: + args = _build_parser().parse_args() wheel_path = Path(args.wheel).resolve() if not wheel_path.is_file(): _fail(f"Wheel not found: {wheel_path}") port = args.port if args.port else _free_port() + should_cleanup = _should_cleanup(args.cleanup) # Fresh working dirs, outside of any source tree work_dir = Path(tempfile.mkdtemp(prefix="burr-smoke-")) @@ -118,6 +177,7 @@ def main() -> None: _log(f"Workspace: {work_dir}") _log(f"Python: {args.python}") _log(f"Wheel: {wheel_path}") + _log(f"Cleanup after run: {should_cleanup}") server_proc = None try: @@ -149,6 +209,9 @@ def main() -> None: ) # 4. Start server from outside the source tree so CWD can't shadow the install. + # start_new_session=True puts the server and all its children (uvicorn) into a + # dedicated process group. This lets us send SIGTERM to the entire group on + # teardown, preventing orphaned uvicorn processes from holding the port. _log(f"Starting burr server on port {port}...") env = os.environ.copy() env["burr_path"] = str(burr_data_dir) @@ -160,6 +223,7 @@ def main() -> None: env=env, stdout=log_fh, stderr=subprocess.STDOUT, + start_new_session=True, ) base_url = f"http://127.0.0.1:{port}" @@ -173,7 +237,15 @@ def main() -> None: _fail("Server did not become ready") _log("Server is up") - # 5. Run a tracked Burr app as a separate process using the venv. + # 5. Verify the UI is served at the web root. If the frontend build is + # missing from the wheel, GET / returns 404 even though the API works. + _log("Checking UI is served at GET /...") + with urllib.request.urlopen(f"{base_url}/", timeout=5) as resp: + if resp.status != 200: + _fail(f"GET / returned HTTP {resp.status}, expected 200 — UI may be missing from wheel") + _log("UI served correctly") + + # 6. Run a tracked Burr app as a separate process using the venv. _log("Running tracked Burr app...") app_script.write_text( f"""\ @@ -207,27 +279,42 @@ def inc(state: State) -> State: ) subprocess.run([str(venv_py), str(app_script)], check=True, cwd=str(work_dir), env=env) - # 6. Verify the server sees the project. - _log("Verifying server sees project 'ci-smoke-test'...") - time.sleep(2) # give the server a moment to pick up the filesystem change - with urllib.request.urlopen(f"{base_url}/api/v0/projects", timeout=5) as resp: - data = json.loads(resp.read().decode("utf-8")) - names = [p.get("name") for p in data] - if "ci-smoke-test" not in names: - _fail(f"Project 'ci-smoke-test' not found. Projects seen: {names}") - _log(f"Projects: {names}") + # 7. Poll until the server reports the project. The server discovers tracking + # data from the filesystem on demand, so there is a short lag after the app + # writes its data. Polling is preferable to a fixed sleep: it succeeds as soon + # as the data appears and gives a clear failure message on timeout. + _log("Waiting for server to report project 'ci-smoke-test'...") + if not _poll_projects( + base_url, "ci-smoke-test", timeout_s=30, server_proc=server_proc + ): + if server_proc.poll() is not None: + _log(f"Server process exited with code {server_proc.returncode}") + _log("--- server log ---") + print(server_log.read_text(), flush=True) + _log("--- end server log ---") + _fail("Project 'ci-smoke-test' never appeared in /api/v0/projects") _log("SUCCESS") finally: if server_proc is not None and server_proc.poll() is None: - _log("Stopping server...") - server_proc.send_signal(signal.SIGTERM) + _log("Stopping server (sending SIGTERM to process group)...") + try: + # Kill the entire process group so uvicorn (a child of burr) is also + # terminated. Without this, uvicorn becomes an orphan that holds the + # port and consumes resources after the script exits. + os.killpg(os.getpgid(server_proc.pid), signal.SIGTERM) + except ProcessLookupError: + pass # process group already gone try: server_proc.wait(timeout=10) except subprocess.TimeoutExpired: server_proc.kill() - # Leave work_dir intact in CI (uploadable as artifact); also leave it locally - # on failure for easier debugging. Caller can rm -rf /tmp/burr-smoke-* to clean up. + + if should_cleanup: + _log(f"Cleaning up workspace {work_dir} ...") + shutil.rmtree(work_dir, ignore_errors=True) + else: + _log(f"Workspace preserved at {work_dir} (upload as CI artifact if needed)") if __name__ == "__main__": diff --git a/scripts/verify_apache_artifacts.py b/scripts/verify_apache_artifacts.py index 583991e48..03d60ab84 100755 --- a/scripts/verify_apache_artifacts.py +++ b/scripts/verify_apache_artifacts.py @@ -194,6 +194,46 @@ def _wheel_file_bytes(artifact_path: str) -> dict[str, bytes]: return {name: wheel.read(name) for name in wheel.namelist() if not name.endswith("/")} +def _wheel_content_hashes(wheel_path: str) -> dict[str, str]: + """Return {member_path: sha256_hex} for all non-directory members of a wheel. + + RECORD is excluded because it is a manifest that lists other files' hashes. + Two wheels built from identical source at different times will produce + different RECORD files, but their other content will be the same. + """ + result: dict[str, str] = {} + with zipfile.ZipFile(wheel_path, "r") as zf: + for name in zf.namelist(): + if name.endswith("/"): + continue # directory entry — no content to hash + if PurePosixPath(name).name == "RECORD": + continue # manifest of other files' hashes — legitimately differs + result[name] = hashlib.sha256(zf.read(name)).hexdigest() + return result + + +def _compare_wheel_contents(wheel_a: str, wheel_b: str) -> tuple[bool, list[str]]: + """Compare two wheels by file content hash, ignoring zip metadata and RECORD. + + Returns (all_match, list_of_difference_descriptions). Uses content hashes + rather than whole-file SHA because zip timestamps make binary comparison + fail for wheels built from the same source at different times. + """ + hashes_a = _wheel_content_hashes(wheel_a) + hashes_b = _wheel_content_hashes(wheel_b) + name_a = os.path.basename(wheel_a) + name_b = os.path.basename(wheel_b) + diffs: list[str] = [] + for key in sorted(set(hashes_a) | set(hashes_b)): + if key not in hashes_b: + diffs.append(f"only in {name_a}: {key}") + elif key not in hashes_a: + diffs.append(f"only in {name_b}: {key}") + elif hashes_a[key] != hashes_b[key]: + diffs.append(f"content differs: {key}") + return len(diffs) == 0, diffs + + def _find_files_by_basename(file_bytes: dict[str, bytes], basename: str) -> list[str]: matches = [] for path in file_bytes: @@ -558,8 +598,12 @@ def _check_licenses_with_rat( print(" Extracting archive...") try: - with tarfile.open(artifact_path, "r:gz") as tar: - _safe_extract_tar(tar, extract_dir) + if artifact_path.endswith(".whl"): + with zipfile.ZipFile(artifact_path, "r") as whl: + whl.extractall(extract_dir) + else: + with tarfile.open(artifact_path, "r:gz") as tar: + _safe_extract_tar(tar, extract_dir) print(" ✓ Extracted to temp directory") except Exception as exc: print(f" ✗ Error extracting archive: {exc}") @@ -703,15 +747,17 @@ def verify_licenses( _fail("Java not found. Required for Apache RAT.") tar_artifacts = [name for name in _artifact_files(artifacts_dir) if name.endswith(".tar.gz")] - if not tar_artifacts: - print(f"⚠️ No tar.gz artifacts found in {artifacts_dir}") - summary.fail("Apache RAT", "no tar.gz artifacts found") + wheel_artifacts = [name for name in _artifact_files(artifacts_dir) if name.endswith(".whl")] + rat_artifacts = tar_artifacts + wheel_artifacts + if not rat_artifacts: + print(f"⚠️ No tar.gz or .whl artifacts found in {artifacts_dir}") + summary.fail("Apache RAT", "no tar.gz or .whl artifacts found") return False - print(f"Found {len(tar_artifacts)} tar.gz artifact(s) to check:\n") + print(f"Found {len(rat_artifacts)} artifact(s) to check ({len(tar_artifacts)} tarball(s), {len(wheel_artifacts)} wheel(s)):\n") all_valid = True - for artifact_name in tar_artifacts: + for artifact_name in rat_artifacts: artifact_path = os.path.join(artifacts_dir, artifact_name) report_name = artifact_name.replace(".tar.gz", "").replace(".", "-") if not _check_licenses_with_rat( @@ -881,15 +927,19 @@ def _compare_rebuilt_artifacts( all_valid = False continue rebuilt_wheel = matching_wheels[0] - if _sha512_for_file(release_wheel) == _sha512_for_file(rebuilt_wheel): - summary.pass_(f"Rebuilt wheel checksum: {release_name}") + match, diffs = _compare_wheel_contents(release_wheel, rebuilt_wheel) + if match: + summary.pass_(f"Rebuilt wheel contents: {release_name}") else: + for diff in diffs[:5]: + print(f" {diff}") summary.fail( - f"Rebuilt wheel checksum: {release_name}", "rebuilt wheel differs from release" + f"Rebuilt wheel contents: {release_name}", + f"{len(diffs)} file(s) differ between release and rebuilt wheel", ) all_valid = False else: - summary.skip("Rebuilt wheel checksum", "no release wheel found") + summary.skip("Rebuilt wheel contents", "no release wheel found") return all_valid @@ -1116,6 +1166,32 @@ def cmd_all(args: argparse.Namespace) -> bool: return summary.ok +def cmd_compare_wheels(args: argparse.Namespace) -> bool: + """Handle 'compare-wheels' subcommand. + + Compares two wheel files by their file content hashes, ignoring zip + metadata (timestamps) and the RECORD manifest. Exits non-zero on any + difference so it can be used as a CI gate. + """ + _print_section("Comparing Wheel Contents") + for path in [args.wheel_a, args.wheel_b]: + if not os.path.isfile(path): + _fail(f"Wheel not found: {path}") + + print(f" Wheel A: {os.path.basename(args.wheel_a)}") + print(f" Wheel B: {os.path.basename(args.wheel_b)}") + + match, diffs = _compare_wheel_contents(args.wheel_a, args.wheel_b) + if match: + print("\n✅ Wheel contents are equivalent (same files, same content)") + return True + + print(f"\n❌ Wheel contents differ ({len(diffs)} difference(s)):") + for diff in diffs: + print(f" {diff}") + return False + + def cmd_list_contents(args: argparse.Namespace) -> None: list_contents(args.artifact) @@ -1239,6 +1315,13 @@ def main() -> None: "--artifacts-dir", default="dist", help="Directory containing artifacts (default: dist)" ) + compare_wheels_parser = subparsers.add_parser( + "compare-wheels", + help="Compare two wheel files by content hash (ignores zip metadata and RECORD)", + ) + compare_wheels_parser.add_argument("wheel_a", help="Path to first wheel") + compare_wheels_parser.add_argument("wheel_b", help="Path to second wheel") + args = parser.parse_args() success = False @@ -1258,6 +1341,8 @@ def main() -> None: success = cmd_all(args) elif args.command == "twine-check": success = cmd_twine_check(args) + elif args.command == "compare-wheels": + success = cmd_compare_wheels(args) else: _fail(f"Unknown command: {args.command}") except KeyboardInterrupt: diff --git a/tests/test_apache_release.py b/tests/test_apache_release.py index ac0e48245..f43867d75 100644 --- a/tests/test_apache_release.py +++ b/tests/test_apache_release.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import hashlib import importlib.util import sys from argparse import Namespace @@ -163,9 +164,30 @@ def fake_promote(source_url, target_url, message, apache_id, dry_run=False): assert release.cmd_promote(args) is True # only the RC is checked out; the release tree is never downloaded assert len(calls["checkout"]) == 1 + assert calls["checkout"][0][0].endswith("/0.42.0-incubating-RC1") source_url, target_url, message, apache_id, dry_run = calls["promote"] assert source_url.endswith("/0.42.0-incubating-RC1") assert target_url == "https://dist.apache.org/repos/dist/release/incubator/burr/0.42.0" assert apache_id == "hari" assert dry_run is True + + +def test_verify_parser_accepts_skip_signing(): + parser = release._build_parser() + args = parser.parse_args(["verify", "0.42.0", "0", "--skip-signing"]) + assert args.skip_signing is True + + +def test_cmd_verify_skip_signing_succeeds_without_asc_files(tmp_path): + version = "0.42.0" + content = b"fake artifact content" + sha = hashlib.sha512(content).hexdigest() + + artifact_name = f"apache-burr-{version}-incubating-src.tar.gz" + (tmp_path / artifact_name).write_bytes(content) + (tmp_path / f"{artifact_name}.sha512").write_text(f"{sha} {artifact_name}\n") + # No .asc file — simulates a --skip-signing build + + args = Namespace(version=version, rc_num="0", artifacts_dir=str(tmp_path), skip_signing=True) + assert release.cmd_verify(args) is True diff --git a/tests/test_check_asf_headers.py b/tests/test_check_asf_headers.py new file mode 100644 index 000000000..cad6fa49d --- /dev/null +++ b/tests/test_check_asf_headers.py @@ -0,0 +1,179 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import importlib.util +import sys +from pathlib import Path + +import pytest + + +def _load_module(): + module_path = Path(__file__).resolve().parent.parent / "scripts" / "check_asf_headers.py" + spec = importlib.util.spec_from_file_location("check_asf_headers", module_path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +chk = _load_module() + +ASF_HEADER = "# Licensed to the Apache Software Foundation (ASF) under one\n" + + +# --------------------------------------------------------------------------- +# _has_asf_header +# --------------------------------------------------------------------------- + + +def test_has_asf_header_returns_true_when_marker_present(tmp_path): + """A file whose first line contains the ASF marker is accepted.""" + f = tmp_path / "good.py" + f.write_text(ASF_HEADER + "print('hello')\n") + assert chk._has_asf_header(f) is True + + +def test_has_asf_header_returns_false_when_marker_absent(tmp_path): + """A file with no mention of the ASF is rejected.""" + f = tmp_path / "bad.py" + f.write_text("print('hello')\n") + assert chk._has_asf_header(f) is False + + +def test_has_asf_header_only_searches_first_n_lines(tmp_path): + """The marker appearing after HEADER_SEARCH_LINES is not found.""" + padding = "# padding\n" * chk.HEADER_SEARCH_LINES + f = tmp_path / "late.py" + f.write_text(padding + ASF_HEADER) + assert chk._has_asf_header(f) is False + + +def test_has_asf_header_accepts_marker_anywhere_within_search_window(tmp_path): + """A shebang line before the header is fine — still within search window.""" + f = tmp_path / "script.sh" + f.write_text("#!/usr/bin/env bash\n" + ASF_HEADER) + assert chk._has_asf_header(f) is True + + +# --------------------------------------------------------------------------- +# _load_rat_exclude_patterns +# --------------------------------------------------------------------------- + + +def test_load_rat_exclude_patterns_strips_comments_and_blanks(tmp_path): + """Comments (#) and blank lines are stripped; only glob patterns remain.""" + rat = tmp_path / ".rat-excludes" + rat.write_text( + "# This is a comment\n" + "\n" + "**/prompts.py\n" + " # indented comment\n" + "**/deep_researcher_utils.py\n" + ) + patterns = chk._load_rat_exclude_patterns(tmp_path) + assert patterns == ["**/prompts.py", "**/deep_researcher_utils.py"] + + +def test_load_rat_exclude_patterns_returns_empty_when_file_missing(tmp_path): + """Returns an empty list when .rat-excludes does not exist.""" + assert chk._load_rat_exclude_patterns(tmp_path) == [] + + +# --------------------------------------------------------------------------- +# _is_excluded +# --------------------------------------------------------------------------- + + +def test_is_excluded_matches_basename_glob(tmp_path): + """A file matching **/name.py is excluded regardless of directory depth.""" + f = tmp_path / "examples" / "deep-researcher" / "prompts.py" + f.parent.mkdir(parents=True) + f.touch() + assert chk._is_excluded(f, tmp_path, ["**/prompts.py"]) is True + + +def test_is_excluded_returns_false_for_non_matching_file(tmp_path): + """An ordinary source file that matches no pattern is not excluded.""" + f = tmp_path / "burr" / "core.py" + f.parent.mkdir(parents=True) + f.touch() + assert chk._is_excluded(f, tmp_path, ["**/prompts.py"]) is False + + +def test_is_excluded_matches_extension_glob(tmp_path): + """A **/*.json pattern excludes all JSON files.""" + f = tmp_path / "some" / "config.json" + f.parent.mkdir(parents=True) + f.touch() + assert chk._is_excluded(f, tmp_path, ["**/*.json"]) is True + + +# --------------------------------------------------------------------------- +# main +# --------------------------------------------------------------------------- + + +def test_main_returns_0_with_no_files(): + """Invoked with no arguments, main returns 0 (nothing to check).""" + assert chk.main([]) == 0 + + +def test_main_returns_0_when_all_files_have_headers(tmp_path): + """Clean files: exits 0.""" + f = tmp_path / "good.py" + f.write_text(ASF_HEADER + "x = 1\n") + assert chk.main([str(f)]) == 0 + + +def test_main_returns_1_when_file_is_missing_header(tmp_path): + """A staged Python file without the header causes exit 1.""" + f = tmp_path / "bad.py" + f.write_text("x = 1\n") + assert chk.main([str(f)]) == 1 + + +def test_main_skips_unchecked_extensions(tmp_path): + """File types that don't need headers (e.g. .json) are silently skipped.""" + f = tmp_path / "config.json" + f.write_text("{}\n") + assert chk.main([str(f)]) == 0 + + +def test_main_skips_rat_excluded_files(tmp_path): + """A file that matches a .rat-excludes pattern is not checked.""" + # Write a .rat-excludes that excludes prompts.py + (tmp_path / ".rat-excludes").write_text("**/prompts.py\n") + # Write a prompts.py with no header — would normally fail + f = tmp_path / "examples" / "prompts.py" + f.parent.mkdir() + f.write_text("SYSTEM_PROMPT = 'hello'\n") + assert chk.main([str(f)]) == 0 + + +def test_main_reports_all_violations(tmp_path, capsys): + """When multiple files are missing headers, all are reported.""" + a = tmp_path / "a.py" + b = tmp_path / "b.yml" + a.write_text("x = 1\n") + b.write_text("key: value\n") + result = chk.main([str(a), str(b)]) + out = capsys.readouterr().out + assert result == 1 + assert "a.py" in out + assert "b.yml" in out diff --git a/tests/test_ci_smoke_server.py b/tests/test_ci_smoke_server.py new file mode 100644 index 000000000..5910b6be4 --- /dev/null +++ b/tests/test_ci_smoke_server.py @@ -0,0 +1,204 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import importlib.util +import sys +from pathlib import Path + +import pytest + + +def _load_smoke_module(): + module_path = Path(__file__).resolve().parent.parent / "scripts" / "ci_smoke_server.py" + spec = importlib.util.spec_from_file_location("ci_smoke_server", module_path) + module = importlib.util.module_from_spec(spec) + assert spec.loader is not None + sys.modules[spec.name] = module + spec.loader.exec_module(module) + return module + + +smoke = _load_smoke_module() + + +# --------------------------------------------------------------------------- +# _should_cleanup: pure function mapping (explicit flag, env) → bool +# --------------------------------------------------------------------------- + + +def test_should_cleanup_defaults_true_outside_ci(monkeypatch): + """When GITHUB_ACTIONS is not set, default is to clean up (saves disk space).""" + monkeypatch.delenv("GITHUB_ACTIONS", raising=False) + assert smoke._should_cleanup(explicit=None) is True + + +def test_should_cleanup_defaults_false_in_ci(monkeypatch): + """When GITHUB_ACTIONS=true, default is to preserve workspace for artifact upload.""" + monkeypatch.setenv("GITHUB_ACTIONS", "true") + assert smoke._should_cleanup(explicit=None) is False + + +def test_should_cleanup_explicit_true_overrides_ci(monkeypatch): + """--cleanup flag forces cleanup even inside GitHub Actions.""" + monkeypatch.setenv("GITHUB_ACTIONS", "true") + assert smoke._should_cleanup(explicit=True) is True + + +def test_should_cleanup_explicit_false_overrides_local(monkeypatch): + """--no-cleanup flag preserves workspace even outside CI.""" + monkeypatch.delenv("GITHUB_ACTIONS", raising=False) + assert smoke._should_cleanup(explicit=False) is False + + +# --------------------------------------------------------------------------- +# _build_parser: argument parsing for --cleanup / --no-cleanup +# --------------------------------------------------------------------------- + + +def test_parser_cleanup_flag_sets_true(): + """--cleanup sets args.cleanup to True.""" + parser = smoke._build_parser() + args = parser.parse_args(["--wheel", "fake.whl", "--cleanup"]) + assert args.cleanup is True + + +def test_parser_no_cleanup_flag_sets_false(): + """--no-cleanup sets args.cleanup to False.""" + parser = smoke._build_parser() + args = parser.parse_args(["--wheel", "fake.whl", "--no-cleanup"]) + assert args.cleanup is False + + +def test_parser_cleanup_defaults_to_none(): + """Without either flag, args.cleanup is None (deferred to _should_cleanup).""" + parser = smoke._build_parser() + args = parser.parse_args(["--wheel", "fake.whl"]) + assert args.cleanup is None + + +# --------------------------------------------------------------------------- +# _poll_projects: polls /api/v0/projects until named project appears +# --------------------------------------------------------------------------- + + +def test_poll_projects_returns_true_when_project_found(monkeypatch): + """Returns True immediately once the target project name appears in the response.""" + calls = [] + + def fake_urlopen(url, timeout=None): + calls.append(url) + + class FakeResp: + status = 200 + + def read(self): + import json + return json.dumps([{"name": "ci-smoke-test"}, {"name": "other"}]).encode() + + def __enter__(self): + return self + + def __exit__(self, *_): + pass + + return FakeResp() + + monkeypatch.setattr(smoke.urllib.request, "urlopen", fake_urlopen) + result = smoke._poll_projects("http://127.0.0.1:9999", "ci-smoke-test", timeout_s=5) + assert result is True + assert len(calls) == 1 + + +def test_poll_projects_returns_false_on_timeout(monkeypatch): + """Returns False when the project never appears before the deadline.""" + + def fake_urlopen(url, timeout=None): + raise smoke.urllib.error.URLError("connection refused") + + monkeypatch.setattr(smoke.urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr(smoke.time, "sleep", lambda _: None) + monkeypatch.setattr(smoke.time, "time", _make_deadline_clock(budget=0.0)) + + result = smoke._poll_projects("http://127.0.0.1:9999", "ci-smoke-test", timeout_s=1) + assert result is False + + +def test_poll_projects_returns_false_when_server_proc_exits(monkeypatch): + """Returns False immediately if the server process has already exited.""" + + class FakeProc: + def poll(self): + return 1 # non-None → process is dead + + monkeypatch.setattr(smoke.urllib.request, "urlopen", lambda *a, **kw: (_ for _ in ()).throw(AssertionError("should not reach urlopen"))) + + result = smoke._poll_projects( + "http://127.0.0.1:9999", "ci-smoke-test", timeout_s=5, server_proc=FakeProc() + ) + assert result is False + + +def test_poll_projects_keeps_trying_until_project_appears(monkeypatch): + """Retries when project is absent, then succeeds once it appears.""" + import json + responses = [ + json.dumps([]).encode(), + json.dumps([{"name": "other"}]).encode(), + json.dumps([{"name": "ci-smoke-test"}]).encode(), + ] + call_count = [0] + + def fake_urlopen(url, timeout=None): + class FakeResp: + status = 200 + + def read(self): + idx = min(call_count[0], len(responses) - 1) + call_count[0] += 1 + return responses[idx] + + def __enter__(self): + return self + + def __exit__(self, *_): + pass + + return FakeResp() + + monkeypatch.setattr(smoke.urllib.request, "urlopen", fake_urlopen) + monkeypatch.setattr(smoke.time, "sleep", lambda _: None) + + result = smoke._poll_projects("http://127.0.0.1:9999", "ci-smoke-test", timeout_s=30) + assert result is True + assert call_count[0] == 3 + + +# --------------------------------------------------------------------------- +# helpers +# --------------------------------------------------------------------------- + + +def _make_deadline_clock(budget: float): + """Return a fake time.time() that expires after `budget` seconds of calls.""" + start = [0.0] + + def _fake_time(): + val = start[0] + start[0] += budget + 1.0 + return val + + return _fake_time diff --git a/tests/test_verify_apache_artifacts.py b/tests/test_verify_apache_artifacts.py index 795a4a8f6..bd48f6147 100644 --- a/tests/test_verify_apache_artifacts.py +++ b/tests/test_verify_apache_artifacts.py @@ -16,6 +16,7 @@ # under the License. import importlib.util +import os import sys import tarfile import tempfile @@ -157,7 +158,7 @@ def _fake_build(source_artifact: str, output_dir: str): for result in summary.results ) assert any( - result.name == f"Rebuilt wheel checksum: {release_wheel.name}" + result.name == f"Rebuilt wheel contents: {release_wheel.name}" and result.status == verify.PASS for result in summary.results ) @@ -261,3 +262,148 @@ def test_artifact_files_ignores_rat_reports(): artifact_files = verify._artifact_files(str(artifacts_dir)) assert artifact_files == ["apache_burr-0.41.0-py3-none-any.whl"] + + +def test_wheel_content_hashes_returns_sha256_per_file(tmp_path): + """Returns a dict mapping each member path to its SHA256 hex digest.""" + import hashlib + wheel_path = tmp_path / "test-1.0-py3-none-any.whl" + content = b"hello burr" + _write_wheel(wheel_path, {"burr/__init__.py": content}) + + hashes = verify._wheel_content_hashes(str(wheel_path)) + + assert hashes == {"burr/__init__.py": hashlib.sha256(content).hexdigest()} + + +def test_wheel_content_hashes_excludes_record_file(tmp_path): + """RECORD (the manifest) is excluded — it lists other files' hashes and + will legitimately differ between two wheels built from identical source.""" + wheel_path = tmp_path / "test-1.0-py3-none-any.whl" + _write_wheel(wheel_path, { + "burr/__init__.py": b"code", + "burr-1.0.dist-info/RECORD": b"burr/__init__.py,sha256=abc,4\n", + }) + + hashes = verify._wheel_content_hashes(str(wheel_path)) + + assert "burr-1.0.dist-info/RECORD" not in hashes + assert "burr/__init__.py" in hashes + + +def test_wheel_content_hashes_excludes_directory_entries(tmp_path): + """Directory entries (zip members whose name ends with /) have no content.""" + wheel_path = tmp_path / "test-1.0-py3-none-any.whl" + _write_wheel(wheel_path, { + "burr/": b"", + "burr/__init__.py": b"code", + }) + + hashes = verify._wheel_content_hashes(str(wheel_path)) + + assert "burr/" not in hashes + assert "burr/__init__.py" in hashes + + +def test_compare_wheel_contents_returns_true_for_identical_content(tmp_path): + """Two wheels with the same files and byte content compare as equal.""" + files = {"burr/__init__.py": b"code", "burr/core.py": b"more code"} + wheel_a = tmp_path / "a.whl" + wheel_b = tmp_path / "b.whl" + _write_wheel(wheel_a, files) + _write_wheel(wheel_b, files) + + match, diffs = verify._compare_wheel_contents(str(wheel_a), str(wheel_b)) + + assert match is True + assert diffs == [] + + +def test_compare_wheel_contents_ignores_record_differences(tmp_path): + """RECORD files that differ between wheels are not reported as differences.""" + wheel_a = tmp_path / "a.whl" + wheel_b = tmp_path / "b.whl" + _write_wheel(wheel_a, { + "burr/__init__.py": b"code", + "burr-1.0.dist-info/RECORD": b"burr/__init__.py,sha256=aaa,4\n", + }) + _write_wheel(wheel_b, { + "burr/__init__.py": b"code", + "burr-1.0.dist-info/RECORD": b"burr/__init__.py,sha256=bbb,4\n", + }) + + match, diffs = verify._compare_wheel_contents(str(wheel_a), str(wheel_b)) + + assert match is True + assert diffs == [] + + +def test_compare_wheel_contents_detects_content_difference(tmp_path): + """Returns False when a file exists in both wheels but has different bytes.""" + wheel_a = tmp_path / "a.whl" + wheel_b = tmp_path / "b.whl" + _write_wheel(wheel_a, {"burr/__init__.py": b"version = '1'"}) + _write_wheel(wheel_b, {"burr/__init__.py": b"version = '2'"}) + + match, diffs = verify._compare_wheel_contents(str(wheel_a), str(wheel_b)) + + assert match is False + assert any("burr/__init__.py" in d for d in diffs) + + +def test_compare_wheel_contents_detects_file_missing_from_second_wheel(tmp_path): + """Returns False when wheel_a contains a file absent from wheel_b.""" + wheel_a = tmp_path / "a.whl" + wheel_b = tmp_path / "b.whl" + _write_wheel(wheel_a, {"burr/__init__.py": b"code", "burr/extra.py": b"bonus"}) + _write_wheel(wheel_b, {"burr/__init__.py": b"code"}) + + match, diffs = verify._compare_wheel_contents(str(wheel_a), str(wheel_b)) + + assert match is False + assert any("burr/extra.py" in d for d in diffs) + + +def test_compare_wheel_contents_detects_file_missing_from_first_wheel(tmp_path): + """Returns False when wheel_b contains a file absent from wheel_a.""" + wheel_a = tmp_path / "a.whl" + wheel_b = tmp_path / "b.whl" + _write_wheel(wheel_a, {"burr/__init__.py": b"code"}) + _write_wheel(wheel_b, {"burr/__init__.py": b"code", "burr/extra.py": b"bonus"}) + + match, diffs = verify._compare_wheel_contents(str(wheel_a), str(wheel_b)) + + assert match is False + assert any("burr/extra.py" in d for d in diffs) + + +def test_verify_licenses_runs_rat_on_wheel_in_addition_to_tarball(tmp_path, monkeypatch): + """verify_licenses must run Apache RAT on .whl artifacts as well as .tar.gz tarballs.""" + tar_path = tmp_path / "apache-burr-0.42.0-incubating-src.tar.gz" + wheel_path = tmp_path / "apache_burr-0.42.0-py3-none-any.whl" + _write_tar_gz(tar_path, "apache-burr-0.42.0-incubating-src", {"README.md": b"content"}) + _write_wheel(wheel_path, {"burr/__init__.py": b"content"}) + + rat_targets = [] + + def fake_check_licenses(artifact_path, rat_jar, report_name, summary, report_only=False): + rat_targets.append(artifact_path) + summary.pass_(f"RAT: {Path(artifact_path).name}") + return True + + monkeypatch.setattr(verify, "_check_licenses_with_rat", fake_check_licenses) + monkeypatch.setattr(verify.shutil, "which", lambda _: "/usr/bin/java") + + real_exists = os.path.exists + monkeypatch.setattr( + verify.os.path, + "exists", + lambda p: True if p == "/fake/rat.jar" else real_exists(p), + ) + + summary = verify.VerificationSummary() + result = verify.verify_licenses(str(tmp_path), "/fake/rat.jar", summary) + + assert result is True + assert str(tar_path) in rat_targets + assert str(wheel_path) in rat_targets