From 8d177f86052362dc85ce9a002decbe9dd0e931da Mon Sep 17 00:00:00 2001 From: Deon Taljaard Date: Tue, 5 May 2026 12:51:06 +0200 Subject: [PATCH] STAC-24685: pin vexhub-index-check action sha --- .github/workflows/index-check.yml | 23 ++---- tools/README.md | 78 -------------------- tools/build_index.py | 118 ------------------------------ 3 files changed, 5 insertions(+), 214 deletions(-) delete mode 100644 tools/README.md delete mode 100644 tools/build_index.py diff --git a/.github/workflows/index-check.yml b/.github/workflows/index-check.yml index 902c4b3..54f0120 100644 --- a/.github/workflows/index-check.yml +++ b/.github/workflows/index-check.yml @@ -1,14 +1,8 @@ # Asserts that index.json is in sync with the contents of pkg/. # -# index.json is the lookup map Trivy consults via `--vex repo` to find -# OpenVEX statement files for a given PURL. It is generated from the -# pkg/ tree by tools/build_index.py and must never be edited by hand. -# -# This workflow runs `tools/build_index.py --check`, which regenerates -# the index in memory and exits non-zero if the on-disk index.json -# differs. When that happens, the contributor must run -# `python3 tools/build_index.py` locally and commit the regenerated -# file. +# When this check fails, the contributor must regenerate index.json +# locally (see CONTRIBUTING.md) and commit the result. + name: index-check on: @@ -16,7 +10,6 @@ on: paths: - "pkg/**" - "index.json" - - "tools/build_index.py" - ".github/workflows/index-check.yml" push: branches: @@ -24,7 +17,6 @@ on: paths: - "pkg/**" - "index.json" - - "tools/build_index.py" permissions: contents: read @@ -37,10 +29,5 @@ jobs: - name: Checkout uses: actions/checkout@v5 - - name: Set up Python - uses: actions/setup-python@v6 - with: - python-version: "3.14" - - - name: Verify index.json matches pkg/ tree - run: python3 tools/build_index.py --check + - name: Verify index.json matches pkg/ + uses: StackVista/scan-pipeline/.github/actions/vexhub-index-check@fcb8a5f78e1527116ee573ce538ddc884c635987 diff --git a/tools/README.md b/tools/README.md deleted file mode 100644 index eea842e..0000000 --- a/tools/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# vexhub tools - -Two pieces of tooling, both intentionally minimal: - -- **vexctl** (third-party, owned by the OpenVEX project) — authors - OpenVEX statement files. -- **build_index.py** (this repo, stdlib only) — regenerates - `index.json` from the contents of `pkg/`. - -We deliberately do not maintain a per-statement Python script. The -authoring path goes through vexctl; the index is rebuilt from the -tree. - -## Authoring statements with vexctl - -Install vexctl: - -``` -go install github.com/openvex/vexctl@latest -# or download a release binary from https://github.com/openvex/vexctl/releases -``` - -Lane 1 example (package PURL — the default): - -``` -mkdir -p pkg/maven/org.eclipse.jetty/jetty-http -vexctl create \ - --product 'pkg:maven/org.eclipse.jetty/jetty-http@9.4.57.v20241219' \ - --vuln CVE-2026-2332 \ - --status not_affected \ - --justification vulnerable_code_not_in_execute_path \ - --author 'SUSE Observability Security Team' \ - > pkg/maven/org.eclipse.jetty/jetty-http/scan.openvex.json -``` - -vexctl emits a fresh `@id` (URI), `timestamp`, and OpenVEX-conforming -JSON. The `impact_statement` field can be added by editing the -resulting JSON — small file, infrequent edits. - -For Lane 2 (image PURL with subcomponent), see -[vexctl docs](https://github.com/openvex/vexctl) and the OpenVEX spec. -The `--product` flag accepts an OCI PURL; subcomponents can be added -to the resulting JSON. - -If multiple statements apply to the same package, append them to the -same file's `statements` array, or use `vexctl merge` to combine -documents. - -## Regenerating index.json - -After adding, modifying, or removing any `pkg/.../scan.openvex.json` -file, run: - -``` -python3 tools/build_index.py -``` - -The script walks `pkg/`, extracts product PURLs from every statement, -normalises them (drops version and qualifiers), and rewrites -`index.json` from scratch — sorted, deduplicated, and matching the -[Aqua VEX Repository specification](https://github.com/aquasecurity/vex-repo-spec). - -## CI check - -Wire this into the PR pipeline so the index can't drift: - -``` -python3 tools/build_index.py --check -``` - -Exits non-zero (with a hint to run the regenerator) if the on-disk -`index.json` doesn't match what `pkg/` says it should be. - -## Dependencies - -- vexctl — for authoring (or hand-author the JSON; the format is - small and stable). -- Python 3.9+ — stdlib only, no third-party packages. diff --git a/tools/build_index.py b/tools/build_index.py deleted file mode 100644 index 004974e..0000000 --- a/tools/build_index.py +++ /dev/null @@ -1,118 +0,0 @@ -#!/usr/bin/env python3 -"""Regenerate index.json from the contents of pkg/. - -Walks every *.openvex.json file under pkg/, extracts the product PURLs -from each statement, normalises them (drops version and qualifiers), -and writes a fresh index.json conforming to the Aqua VEX Repository -specification. - -Idempotent: produces the same `packages` list for the same on-disk -state. Authors should run this whenever they add, modify, or remove a -VEX statement file. A CI check (`--check`) asserts the on-disk -index.json is in sync with the tree. -""" -from __future__ import annotations - -import argparse -import json -import sys -from datetime import datetime, timezone -from pathlib import Path - - -def now_iso() -> str: - return datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") - - -def index_id_for_purl(purl: str) -> str: - """Return the canonical index id for a PURL: version and qualifiers stripped.""" - head = purl - if "?" in head: - head, _ = head.split("?", 1) - if "@" in head: - head, _ = head.split("@", 1) - return head - - -def collect_packages(hub_root: Path) -> list[dict]: - """Walk pkg/ and produce a sorted, deduplicated list of index entries.""" - pkg_root = hub_root / "pkg" - if not pkg_root.exists(): - return [] - entries: dict[str, dict] = {} - for vex_file in sorted(pkg_root.rglob("*.openvex.json")): - try: - with vex_file.open() as f: - doc = json.load(f) - except json.JSONDecodeError as exc: - sys.exit(f"invalid OpenVEX JSON in {vex_file}: {exc}") - rel_location = vex_file.relative_to(hub_root).as_posix() - purls: set[str] = set() - for statement in doc.get("statements", []): - for product in statement.get("products", []): - pid = product.get("@id") - if pid and pid.startswith("pkg:"): - purls.add(pid) - for purl in sorted(purls): - pid = index_id_for_purl(purl) - existing = entries.get(pid) - if existing and existing["location"] != rel_location: - sys.exit( - f"PURL {pid} appears in multiple files: " - f"{existing['location']} and {rel_location}. " - "Each PURL must live in a single VEX file." - ) - entries[pid] = { - "id": pid, - "location": rel_location, - "format": "openvex", - } - return sorted(entries.values(), key=lambda e: e["id"]) - - -def main() -> None: - parser = argparse.ArgumentParser(description="Regenerate index.json from pkg/.") - parser.add_argument( - "--hub-root", - type=Path, - default=Path.cwd(), - help="Path to the vexhub repo root (default: current directory).", - ) - parser.add_argument( - "--check", - action="store_true", - help="Exit non-zero if the on-disk index.json differs from the regenerated one (CI mode).", - ) - args = parser.parse_args() - - packages = collect_packages(args.hub_root) - fresh = { - "updated_at": now_iso(), - "packages": packages, - } - index_path = args.hub_root / "index.json" - - if args.check: - if index_path.exists(): - with index_path.open() as f: - current = json.load(f) - else: - current = {"packages": []} - if current.get("packages") != packages: - print( - "index.json is out of sync with pkg/ contents.\n" - "Run: python3 tools/build_index.py", - file=sys.stderr, - ) - sys.exit(1) - print(f"index.json is in sync ({len(packages)} package(s))") - return - - with index_path.open("w") as f: - json.dump(fresh, f, indent=2) - f.write("\n") - print(f"Regenerated {index_path} ({len(packages)} package(s))") - - -if __name__ == "__main__": - main()