From ef3dc00ffbf60040f9b81ecce0cb86cb6737de30 Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Wed, 3 Jun 2026 23:59:01 -0500 Subject: [PATCH 1/2] Prepare 0.4 release version parity Signed-off-by: Nelson Spence --- .github/workflows/ci.yml | 29 ++- .github/workflows/fuzz.yml | 4 + .github/workflows/release.yml | 209 ++++++++++++++++----- CHANGELOG.md | 16 ++ CONTRIBUTING.md | 18 +- Cargo.lock | 8 +- Cargo.toml | 2 +- RELEASING.md | 59 +++--- THREAT_MODEL.md | 2 +- fuzz/Cargo.lock | 2 +- ordvec-ffi/Cargo.toml | 3 +- ordvec-go/ordvec.go | 12 ++ ordvec-go/ordvec_test.go | 18 ++ ordvec-manifest/Cargo.toml | 4 +- ordvec-manifest/README.md | 9 +- ordvec-python/Cargo.toml | 2 +- ordvec-python/pyproject.toml | 2 +- ordvec-python/python/ordvec/__init__.py | 10 +- tests/release_publish_invariants.py | 209 +++++++++++++++++++++ tests/release_signed_release_invariants.sh | 95 +++++++--- 20 files changed, 577 insertions(+), 136 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 84357b4..4a27876 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -219,10 +219,11 @@ jobs: # ---------------------------------------------------------------------- # No-system-deps guarantee + packaging check. Fails if any forbidden # numerical/BLAS crate has crept into the dependency tree, then checks the - # publishable crate artifacts before a real release. The lockstep - # ordvec-manifest crate cannot run a registry-backed dry-run for a bumped - # version until ordvec itself is published, so CI verifies the pre-publish - # package artifact that release.yml later byte-compares after the core publish. + # publishable crate artifacts before a real release. A fresh lockstep + # ordvec-manifest version cannot be packaged until the matching ordvec core + # version exists on crates.io, so CI records that exact dependency-resolution + # case as deferred; release.yml performs the real manifest package check + # after publish-crate succeeds. # ---------------------------------------------------------------------- deps: name: deps (no-system-deps + publish dry-run) @@ -261,8 +262,18 @@ jobs: # from the current directory. The Python binding remains publish = false # and ships to PyPI via maturin. run: cargo publish -p ordvec --dry-run --locked - - name: cargo package --no-verify -p ordvec-manifest - run: cargo package -p ordvec-manifest --locked --no-verify + - name: cargo package -p ordvec-manifest when lockstep core exists + run: | + set -euo pipefail + log="${RUNNER_TEMP}/ordvec-manifest-package.log" + if cargo package -p ordvec-manifest --locked 2>&1 | tee "$log"; then + exit 0 + fi + if grep -q 'failed to select a version for the requirement `ordvec = "' "$log"; then + echo "::notice::ordvec-manifest package check is deferred: the lockstep ordvec version is not published yet. release.yml packages ordvec-manifest after publish-crate succeeds." + exit 0 + fi + exit 1 # ---------------------------------------------------------------------- # Pin the release-publish invariants. release.yml is tag-triggered (with the @@ -274,9 +285,9 @@ jobs: # * release_signed_release_invariants.sh — the signed-release / provenance # graph stays intact: release-assets-draft stays draft, the SLSA # generator emits a .intoto.jsonl, registry publishes need the draft - # assets, both Rust crates prove byte-identity vs their attested .crate - # files, and publish-github-release un-drafts ONLY after all registry - # publishes succeed. + # assets, Rust crates prove byte-identity vs their attested .crate files, + # and publish-github-release un-drafts ONLY after all registry publishes + # succeed. # ---------------------------------------------------------------------- release-guard: name: release-publish invariants diff --git a/.github/workflows/fuzz.yml b/.github/workflows/fuzz.yml index f04963e..30298de 100644 --- a/.github/workflows/fuzz.yml +++ b/.github/workflows/fuzz.yml @@ -67,6 +67,8 @@ jobs: toolchain: ${{ env.FUZZ_NIGHTLY }} - name: Install cargo-fuzz (version-pinned) run: cargo "+${CARGO_FUZZ_INSTALL_TOOLCHAIN}" install cargo-fuzz --version "${CARGO_FUZZ_VERSION}" --locked + - name: Check fuzz lockfile is current + run: cargo metadata --manifest-path fuzz/Cargo.toml --locked --format-version 1 >/dev/null - name: Smoke env: TARGET: ${{ matrix.target }} @@ -105,6 +107,8 @@ jobs: toolchain: ${{ env.FUZZ_NIGHTLY }} - name: Install cargo-fuzz (version-pinned) run: cargo "+${CARGO_FUZZ_INSTALL_TOOLCHAIN}" install cargo-fuzz --version "${CARGO_FUZZ_VERSION}" --locked + - name: Check fuzz lockfile is current + run: cargo metadata --manifest-path fuzz/Cargo.toml --locked --format-version 1 >/dev/null - name: Fuzz env: TARGET: ${{ matrix.target }} diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index df30075..63d902f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -4,11 +4,13 @@ # wheels + sdist) -> canonicalize the Python dist (current build for new # versions, verified PyPI-served bytes if PyPI already owns the immutable # version) -> attest / SLSA-provenance the files this run actually built -> -# stage EVERYTHING on the DRAFT GitHub Release (`release-assets-draft`) -> -# gated registry publishes / verification -> un-draft ONLY after all registry -# jobs succeed (`publish-github-release`). The registry gates (two crates.io -# jobs plus PyPI) are bound to GitHub Environments with Required Reviewers, so -# they pause for a human. +# stage core/Python assets on the DRAFT GitHub Release (`release-assets-draft`) +# -> publish the core crate -> build/attest/stage the lockstep manifest crate +# (now that the core version exists on crates.io) -> gated registry publishes / +# verification -> un-draft ONLY after all registry jobs succeed +# (`publish-github-release`). The registry gates (two crates.io jobs plus PyPI) +# are bound to GitHub Environments with Required Reviewers, so they pause for a +# human. # # The un-draft-after-publish ordering is deliberate: it prevents a public # GitHub Release from existing for a version that crates.io / PyPI later @@ -41,7 +43,7 @@ # * SLSA generator -> `*.intoto.jsonl` on the Release (OpenSSF Scorecard # Signed-Releases provenance probe; older unsigned releases may keep that # score below 10 temporarily; SLSA Build L3). Recovery mode limits SLSA -# subjects to the Rust crates built by this run, because PyPI files are +# subjects to the Rust crate assets built by this run, because PyPI files are # immutable bytes from an earlier Trusted Publishing upload. # * actions/attest-build-provenance -> GitHub attestation store + a # `*.sigstore.json` bundle on the Release (`gh attestation verify`; also the @@ -57,10 +59,11 @@ # * crates.io / PyPI publish/verification via Trusted Publishing gates (OIDC # only when uploading) — NO stored tokens. # -# Fail-closed: `release-assets-draft` and registry publishes `needs:` attest + -# provenance and canonical Python dist selection, so nothing is attached or -# published unless the artifact source is verified; and `publish-github-release` -# `needs:` every registry gate, so the Release stays DRAFT unless all pass. +# Fail-closed: `release-assets-draft`, `release-manifest-assets-draft`, and +# registry publishes `needs:` attest + provenance for their respective asset +# sets, so nothing is published unless the artifact source is verified; and +# `publish-github-release` `needs:` every registry gate, so the Release stays +# DRAFT unless all pass. # The signed-release graph is pinned in # `tests/release_signed_release_invariants.sh` (run by ci.yml's release-guard # on every push/PR) so a future commit can't silently dismantle it. @@ -254,7 +257,7 @@ jobs: build-manifest-crate: name: build ordvec-manifest .crate + SBOM - needs: guard + needs: [guard, publish-crate] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest steps: @@ -269,14 +272,11 @@ jobs: with: toolchain: stable - name: Package the manifest crate - # Emits the SLSA-attested sidecar-verifier .crate artifact. The - # lockstep manifest crate depends on the just-tagged `ordvec` version, - # which is not on crates.io until `publish-crate` succeeds, so this - # pre-publish packaging step must skip Cargo's registry-backed verify. - # `publish-manifest-crate` later runs a normal verified repackage after - # `ordvec` is published and compares those bytes to this artifact - # before minting the manifest crate's OIDC token. - run: cargo package -p ordvec-manifest --locked --no-verify + # The lockstep manifest crate depends on the just-tagged `ordvec` + # version, so package it only after `publish-crate` has published and + # verified the core crate. This is the first point where Cargo can + # resolve the dependency through the registry for a fresh release. + run: cargo package -p ordvec-manifest --locked - name: Generate CycloneDX SBOM for the manifest crate run: | cargo install cargo-cyclonedx --version 0.5.9 --locked @@ -558,7 +558,7 @@ jobs: attest: name: GitHub artifact attestation (+ .sigstore.json bundle) - needs: [guard, build-crate, build-manifest-crate, pypi-canonical-dist] + needs: [guard, build-crate, pypi-canonical-dist] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest permissions: @@ -576,18 +576,13 @@ jobs: with: name: dist-crate path: dist - - name: Collect the manifest crate distributable - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: dist-manifest-crate - path: dist - name: Collect the canonical Python dist uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: name: pypi-canonical-dist path: dist # Fresh release: one consolidated attestation references every subject. - - name: Attest build provenance for crates + canonical wheels + sdist + - name: Attest build provenance for crate + canonical wheels + sdist id: attest_all if: needs.pypi-canonical-dist.outputs.source == 'build' uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 @@ -598,7 +593,7 @@ jobs: dist/*.tar.gz # Recovery release: PyPI already owns immutable wheel/sdist bytes from a # previous Trusted Publishing upload. Do not claim this run rebuilt them. - - name: Attest build provenance for crates only + - name: Attest build provenance for crate only id: attest_crate if: needs.pypi-canonical-dist.outputs.source == 'pypi' uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 @@ -617,7 +612,7 @@ jobs: combine-hashes: name: combine artifact digests for SLSA provenance - needs: [guard, build-crate, build-manifest-crate, pypi-canonical-dist] + needs: [guard, build-crate, pypi-canonical-dist] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest outputs: @@ -632,11 +627,6 @@ jobs: with: name: dist-crate path: dist - - name: Collect the manifest crate distributable - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: dist-manifest-crate - path: dist - name: Collect the canonical Python dist if: needs.pypi-canonical-dist.outputs.source == 'build' uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 @@ -656,7 +646,7 @@ jobs: if [ "$PYPI_SOURCE" = "build" ]; then echo "hashes=$(sha256sum ./*.crate ./*.whl ./*.tar.gz | base64 -w0)" >> "$GITHUB_OUTPUT" elif [ "$PYPI_SOURCE" = "pypi" ]; then - echo "::notice::PyPI dist already exists; SLSA subjects are limited to the Rust crates built by this run." + echo "::notice::PyPI dist already exists; SLSA subjects are limited to the Rust crate built by this run." echo "hashes=$(sha256sum ./*.crate | base64 -w0)" >> "$GITHUB_OUTPUT" else echo "::error::unexpected pypi-canonical-dist source: $PYPI_SOURCE" @@ -678,14 +668,14 @@ jobs: uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0 # zizmor: ignore[unpinned-uses] with: base64-subjects: ${{ needs.combine-hashes.outputs.hashes }} - # Produce the signed provenance as a workflow artifact ONLY; - # `release-assets-draft` is the single owner of all Release uploads - # (no concurrent writers). + # Produce the signed provenance as a workflow artifact ONLY; the draft + # asset-staging jobs upload Release assets after collecting the complete + # provenance material for their package lane. upload-assets: false provenance-name: ordvec-${{ needs.guard.outputs.version }}.intoto.jsonl release-assets-draft: - name: stage all assets on the DRAFT Release (does NOT un-draft) + name: stage core/Python assets on the DRAFT Release (does NOT un-draft) needs: [guard, notes, attest, provenance, pypi-canonical-dist, require-ci-green, smoke-linux-aarch64-wheel] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest @@ -701,11 +691,6 @@ jobs: with: name: dist-crate path: dist - - name: Collect the manifest crate distributable - uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 - with: - name: dist-manifest-crate - path: dist - name: Collect the canonical Python dist uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 with: @@ -731,11 +716,12 @@ jobs: fi cp "${provenance[0]}" dist/ - name: Attach distributables, signature and provenance to the draft Release - # SOLE Release-asset writer. SBOMs stay build artifacts (registries don't - # host them); the GitHub-native bundle (.sigstore.json) and the SLSA - # provenance (.intoto.jsonl) ship with the Release. In recovery mode, - # they attest only the Rust crates built by this run; canonical Python - # files are verified against PyPI's immutable hashes instead. + # First Release-asset writer: core crate + canonical Python assets. + # SBOMs stay build artifacts (registries don't host them); the + # GitHub-native bundle (.sigstore.json) and the SLSA provenance + # (.intoto.jsonl) ship with the Release. In recovery mode, they attest + # only the Rust crate built by this run; canonical Python files are + # verified against PyPI's immutable hashes instead. # The Release is left DRAFT — un-drafting happens in # `publish-github-release` only after all registry publishes succeed, # so a partial publish never leaves a "public Release with no @@ -864,9 +850,134 @@ jobs: fi echo "OK: crates.io-served .crate is byte-identical to the SLSA-attested artifact ($A_SHA)." + attest-manifest: + name: GitHub artifact attestation for ordvec-manifest + needs: [guard, build-manifest-crate] + if: needs.guard.outputs.ok == 'true' + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write # mint the Sigstore signing cert (OIDC) + attestations: write # persist the attestation to the store + artifact-metadata: write # create the artifact storage record (GA 2026-01-13) + steps: + - name: Harden the runner + uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 + with: + egress-policy: audit + - name: Collect the manifest crate distributable + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: dist-manifest-crate + path: dist + - name: Attest build provenance for manifest crate + id: attest_manifest + uses: actions/attest-build-provenance@a2bbfa25375fe432b6a289bc6b6cd05ecd0c4c32 # v4.1.0 + with: + subject-path: dist/*.crate + - name: Stage the manifest Sigstore bundle as a release asset + env: + BUNDLE: ${{ steps.attest_manifest.outputs.bundle-path }} + VERSION: ${{ needs.guard.outputs.version }} + run: cp "$BUNDLE" "ordvec-manifest-${VERSION}.sigstore.json" + - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: sigstore-bundle-manifest + path: ordvec-manifest-*.sigstore.json + if-no-files-found: error + + combine-manifest-hash: + name: combine ordvec-manifest digest for SLSA provenance + needs: [guard, build-manifest-crate] + if: needs.guard.outputs.ok == 'true' + runs-on: ubuntu-latest + outputs: + hashes: ${{ steps.hash.outputs.hashes }} + steps: + - name: Harden the runner + uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 + with: + egress-policy: audit + - name: Collect the manifest crate distributable + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: dist-manifest-crate + path: dist + - name: Compute base64 sha256sum over manifest SLSA subject + id: hash + working-directory: dist + run: | + set -euo pipefail + echo "hashes=$(sha256sum ./*.crate | base64 -w0)" >> "$GITHUB_OUTPUT" + + manifest-provenance: + name: SLSA provenance for ordvec-manifest (.intoto.jsonl) + needs: [guard, combine-manifest-hash] + if: needs.guard.outputs.ok == 'true' + permissions: + actions: read + id-token: write + contents: write + uses: slsa-framework/slsa-github-generator/.github/workflows/generator_generic_slsa3.yml@v2.1.0 # zizmor: ignore[unpinned-uses] + with: + base64-subjects: ${{ needs.combine-manifest-hash.outputs.hashes }} + upload-assets: false + provenance-name: ordvec-manifest-${{ needs.guard.outputs.version }}.intoto.jsonl + + release-manifest-assets-draft: + name: stage ordvec-manifest assets on the DRAFT Release (does NOT un-draft) + needs: [guard, build-manifest-crate, attest-manifest, manifest-provenance] + if: needs.guard.outputs.ok == 'true' + runs-on: ubuntu-latest + permissions: + contents: write + steps: + - name: Harden the runner + uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 + with: + egress-policy: audit + - name: Collect the manifest crate distributable + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: dist-manifest-crate + path: dist + - name: Collect the manifest Sigstore bundle + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + name: sigstore-bundle-manifest + path: dist + - name: Collect workflow artifacts for manifest SLSA provenance + uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8.0.1 + with: + path: provenance-artifacts + - name: Copy the manifest SLSA provenance into the release dist + env: + VERSION: ${{ needs.guard.outputs.version }} + run: | + set -euo pipefail + mapfile -t provenance < <(find provenance-artifacts -type f -name "ordvec-manifest-${VERSION}.intoto.jsonl" | sort) + if [ "${#provenance[@]}" -ne 1 ]; then + printf '%s\n' "${provenance[@]}" + echo "::error::expected exactly one ordvec-manifest .intoto.jsonl provenance artifact, found ${#provenance[@]}" + exit 1 + fi + cp "${provenance[0]}" dist/ + - name: Attach manifest distributable, signature and provenance to the draft Release + env: + GH_TOKEN: ${{ github.token }} + GH_REPO: ${{ github.repository }} + TAG_NAME: ${{ github.ref_name }} + run: | + set -euo pipefail + gh release upload "$TAG_NAME" \ + dist/*.crate \ + dist/*.sigstore.json \ + dist/*.intoto.jsonl \ + --clobber + publish-manifest-crate: name: publish ordvec-manifest to crates.io - needs: [guard, release-assets-draft, publish-crate] + needs: [guard, release-manifest-assets-draft, publish-crate] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest environment: crates-io # MANUAL GATE — Required reviewer diff --git a/CHANGELOG.md b/CHANGELOG.md index c84d347..02425a2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,16 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +## 0.4.0 - 2026-06-04 + ### Added - Added a `signbitmap_rankquant_twostage` fuzz target and deterministic tests for the SignBitmap candidate generation plus RankQuant subset rerank pipeline used by downstream retrieval systems. +- Added lockstep `ordvec-manifest` crate publishing to the unified release + pipeline, including OIDC trusted publishing, pre/post-publish byte-identity + checks, and release invariants covering both `.crate` artifacts. - Added a verifier-only `VerifiedLoadPlan` helper to `ordvec-manifest` so Rust callers can verify a manifest, retain the typed report, and load from the resolved artifact and sidecar paths without re-resolving manifest strings. @@ -21,11 +26,21 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Documentation +- Documented the `VerifiedLoadPlan` verify-then-load boundary, including the + fact that returned paths are not immutable file handles and must not be + treated as TOCTOU protection on mutable storage. +- Documented duplicate-candidate behavior for `RankQuant` subset reranking and + the repo-local C ABI / Go wrapper. - Added a pre-1.0 compatibility policy covering stable and experimental Rust APIs, Python bindings, the lockstep Manifest crate, repo-local C/Go sidecars, primitive persisted formats, examples/docs, MSRV/feature changes, and release-note review expectations. +### Fixed + +- Hardened Intel SDE setup caching and release-gate handling so transient Intel + CDN failures no longer leave AVX-512 checks dependent on one live download. + ### Security - Added bounded parser/report defaults to `ordvec-manifest` verification for @@ -186,5 +201,6 @@ system dependencies** — no BLAS, no `ndarray`, no `faer`. AVX-512 intrinsics this crate relies on were stabilized. - Dual-licensed under **MIT OR Apache-2.0**. +[0.4.0]: https://github.com/Fieldnote-Echo/ordvec/compare/v0.3.0...v0.4.0 [0.2.0]: https://github.com/Fieldnote-Echo/ordvec/compare/v0.1.0...v0.2.0 [0.1.0]: https://github.com/Fieldnote-Echo/ordvec/releases/tag/v0.1.0 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2263d46..a22929f 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -104,14 +104,18 @@ Changelog and release notes are generated with [git-cliff](https://git-cliff.org) from Conventional Commit history (`cliff.toml`). -- **The whole release is automated except the two registry publishes.** Pushing - a `vMAJOR.MINOR.PATCH` tag triggers `.github/workflows/release.yml`, which - runs git-cliff for the GitHub Release notes, builds the crate + wheels + - sdist, generates SLSA build provenance (`*.intoto.jsonl`) and a Sigstore - bundle (`*.sigstore.json`), attaches everything to the GitHub Release, and - un-drafts it — all without human intervention. The `crates.io` and `pypi` +- **The release build and provenance graph is automated; registry publishes are + gated.** Pushing a `vMAJOR.MINOR.PATCH` tag triggers + `.github/workflows/release.yml`, which runs git-cliff for the GitHub Release + notes, builds the core crate + wheels + sdist, generates SLSA build + provenance (`*.intoto.jsonl`) and Sigstore bundles (`*.sigstore.json`), and + stages assets on the GitHub Release while it remains draft. The lockstep + `ordvec-manifest` crate is built and staged only after the core `ordvec` + crate publishes, because Cargo cannot package a fresh manifest version until + the matching core version exists on crates.io. The `crates.io` and `pypi` publishes wait at GitHub Environments with **Required reviewers** (the - maintainer approves each in the Actions UI). Pre-release tags (e.g. + maintainer approves each in the Actions UI), and the GitHub Release un-drafts + only after all registry publishes succeed. Pre-release tags (e.g. `v0.3.0-rc.1`) do not trigger it. - **`CHANGELOG.md` is curated by hand** — it is not auto-committed, because `main` is branch-protected. Keep adding entries under `[Unreleased]`; at diff --git a/Cargo.lock b/Cargo.lock index ab777b2..b5b42df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -601,7 +601,7 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "ordvec" -version = "0.3.0" +version = "0.4.0" dependencies = [ "rand", "rand_chacha", @@ -610,14 +610,14 @@ dependencies = [ [[package]] name = "ordvec-ffi" -version = "0.1.0" +version = "0.4.0" dependencies = [ "ordvec", ] [[package]] name = "ordvec-manifest" -version = "0.3.0" +version = "0.4.0" dependencies = [ "chrono", "clap", @@ -633,7 +633,7 @@ dependencies = [ [[package]] name = "ordvec-python" -version = "0.3.0" +version = "0.4.0" dependencies = [ "numpy", "ordvec", diff --git a/Cargo.toml b/Cargo.toml index 129b827..14a364c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ordvec" -version = "0.3.0" +version = "0.4.0" edition = "2021" rust-version = "1.89" # AVX-512 intrinsics stabilized in 1.89.0; also clears the 1.87 floor from u64::is_multiple_of description = "Training-free ordinal & sign quantization for vector retrieval" diff --git a/RELEASING.md b/RELEASING.md index 73a683d..e402e65 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -32,17 +32,18 @@ The unified `release.yml`: files, verifies their SHA-256 digests from PyPI JSON, and uses those bytes as the GitHub Release assets; - emits **GitHub SLSA build provenance** (`actions/attest-build-provenance`) - and a **SLSA-generator `*.intoto.jsonl`** attached to the GitHub Release - **before** the gated publishes — a failed attestation fails the release + and **SLSA-generator `*.intoto.jsonl`** assets attached to the GitHub Release + **before** each gated publish — a failed attestation fails the release closed, so nothing ships without provenance recorded. In recovery mode where - PyPI files already exist, the GitHub/SLSA subjects are deliberately limited - to the Rust crates built by the current run; the Python files are verified - immutable PyPI bytes from the earlier Trusted Publishing upload, not falsely - claimed as rebuilt by the recovery run; -- stages the **`.crate` files, canonical wheels, canonical sdist, + PyPI files already exist, the initial GitHub/SLSA subjects are deliberately + limited to the Rust crate built by the current run; the Python files are + verified immutable PyPI bytes from the earlier Trusted Publishing upload, not + falsely claimed as rebuilt by the recovery run; +- stages the core **`.crate` file, canonical wheels, canonical sdist, `*.sigstore.json` bundle, and `*.intoto.jsonl` provenance** on the GitHub Release while it is still **a - DRAFT** (`release-assets-draft` is the sole Release-asset writer — no manual - attach, which is what v0.2.0's manual step missed); + DRAFT** (`release-assets-draft` owns the core/Python Release uploads, and + `release-manifest-assets-draft` later owns the manifest crate uploads; no + manual attach, which is what v0.2.0's manual step missed); - proves **byte-identity** in `publish-crate` and `publish-manifest-crate` on both sides of `cargo publish`: 1. **pre-publish gate** — downloads the SLSA-attested `.crate` artifact, @@ -59,13 +60,13 @@ The unified `release.yml`: `publish-github-release` never un-drafts the Release (the version is then yank-only on crates.io, but the failure is loudly observable); - publishes `ordvec-manifest` only after the lockstep `ordvec` crate has - published and passed its crates.io-served byte-identity readback. The - pre-publish manifest `.crate` artifact is created with `cargo package - --no-verify` because Cargo cannot verify a lockstep dependency version before - `ordvec` exists on crates.io; `publish-manifest-crate` then runs the normal - verified `cargo package -p ordvec-manifest --locked` after `ordvec` is - published and byte-compares that output to the attested artifact before - minting its own OIDC token; + published and passed its crates.io-served byte-identity readback. Cargo cannot + package a fresh lockstep manifest version until the matching core crate exists + on crates.io, so the workflow builds, attests, generates SLSA provenance for, + and stages the manifest `.crate` on the draft GitHub Release after + `publish-crate` succeeds; `publish-manifest-crate` then re-runs + `cargo package -p ordvec-manifest --locked` and byte-compares that output to + the attested artifact before minting its own OIDC token; - **un-drafts the GitHub Release ONLY after `publish-crate`, `publish-manifest-crate`, AND `publish-pypi` succeed** (`publish-github-release` is the sole un-draft point). If any publish fails @@ -141,10 +142,12 @@ filename. Until a record is updated, the corresponding gated publish fails useful for this release; - distinguish `ordvec` primitive API/file compatibility from downstream application database behavior. -3. Bump the version (crate `Cargo.toml`, `ordvec-manifest/Cargo.toml`, and - `ordvec-python` if the wheel changed) and update `CHANGELOG.md` with - migration notes for every intentional compatibility break. Commit on - `main`. +3. Bump the lockstep version (`Cargo.toml`, + `ordvec-manifest/Cargo.toml` including its `ordvec` dependency, + `ordvec-python/Cargo.toml`, `ordvec-python/pyproject.toml`, + `ordvec-python/python/ordvec/__init__.py`, and `ordvec-ffi/Cargo.toml`) and + update `CHANGELOG.md` with migration notes for every intentional + compatibility break. Commit on `main`. 4. Confirm CI is **green for current `main` HEAD**. `require-ci-green` checks `main` HEAD's SHA — which needs a **completed, successful** (not `cancelled`, not in-progress) run of `ci.yml`, `python.yml`, `fuzz.yml`, and @@ -184,13 +187,15 @@ filename. Until a record is updated, the corresponding gated publish fails git push origin vX.Y.Z ``` - `release.yml` triggers automatically. It builds the two `.crate` files, - wheels, and sdist; selects the canonical Python dist (current build for a - new PyPI version, verified PyPI bytes for an existing immutable version); - attests the files this run can honestly attest (GitHub attestation store + - `*.sigstore.json`); generates the SLSA `*.intoto.jsonl`; and stages every - artifact, the attestation bundle, and the provenance on the GitHub Release - — **as a DRAFT**. It then pauses at the registry environment gates. + `release.yml` triggers automatically. It builds the core `.crate`, wheels, + and sdist; selects the canonical Python dist (current build for a new PyPI + version, verified PyPI bytes for an existing immutable version); attests the + files this run can honestly attest (GitHub attestation store + + `*.sigstore.json`); generates SLSA `*.intoto.jsonl`; and stages the core and + Python assets on the GitHub Release — **as a DRAFT**. After `publish-crate` + succeeds, it builds, attests, generates SLSA provenance for, and stages the + lockstep `ordvec-manifest` `.crate`, then pauses at the manifest registry + environment gate. 8. **Approve each publish environment pause** in the Actions UI. There are three registry publish jobs: `publish-crate`, `publish-manifest-crate`, and `publish-pypi`. The two crates.io jobs use the same `crates-io` environment diff --git a/THREAT_MODEL.md b/THREAT_MODEL.md index aefe527..f6574a7 100644 --- a/THREAT_MODEL.md +++ b/THREAT_MODEL.md @@ -1,6 +1,6 @@ # Threat Model — `ordvec` -> **Status:** v0.3.0 (pre-1.0), 2026-05-29. This is the maintained threat model +> **Status:** v0.4.0 (pre-1.0), 2026-06-04. This is the maintained threat model > for the `ordvec` Rust crate, C ABI, Go wrapper, PyO3/maturin Python bindings, > and the `ordvec-manifest` sidecar verifier. It is reviewed when the > attack surface changes (new persistence formats, new `unsafe` kernels, new diff --git a/fuzz/Cargo.lock b/fuzz/Cargo.lock index ca2cc33..0e29679 100644 --- a/fuzz/Cargo.lock +++ b/fuzz/Cargo.lock @@ -231,7 +231,7 @@ checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "ordvec" -version = "0.3.0" +version = "0.4.0" dependencies = [ "rayon", ] diff --git a/ordvec-ffi/Cargo.toml b/ordvec-ffi/Cargo.toml index 67eea44..f698a40 100644 --- a/ordvec-ffi/Cargo.toml +++ b/ordvec-ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ordvec-ffi" -version = "0.1.0" +version = "0.4.0" edition = "2021" publish = false license = "MIT OR Apache-2.0" @@ -11,4 +11,3 @@ crate-type = ["rlib", "cdylib", "staticlib"] [dependencies] ordvec = { path = ".." } - diff --git a/ordvec-go/ordvec.go b/ordvec-go/ordvec.go index 6ca2593..21126fb 100644 --- a/ordvec-go/ordvec.go +++ b/ordvec-go/ordvec.go @@ -88,6 +88,18 @@ func (s Status) String() string { return C.GoString(C.ordvec_status_name(C.ordvec_status_t(s))) } +func ABIVersion() uint32 { + return uint32(C.ordvec_abi_version()) +} + +func Version() string { + ptr := C.ordvec_version_string() + if ptr == nil { + return "" + } + return C.GoString(ptr) +} + type Info struct { Kind Kind FormatVersion uint32 diff --git a/ordvec-go/ordvec_test.go b/ordvec-go/ordvec_test.go index 4cdf2ee..11a7a0e 100644 --- a/ordvec-go/ordvec_test.go +++ b/ordvec-go/ordvec_test.go @@ -7,6 +7,7 @@ import ( "math" "os" "path/filepath" + "regexp" "strings" "sync" "testing" @@ -69,6 +70,23 @@ func query64() []float32 { return q } +func TestVersionAccessors(t *testing.T) { + if ABIVersion() != 1 { + t.Fatalf("unexpected ABI version: %d", ABIVersion()) + } + manifest, err := os.ReadFile(filepath.Join("..", "ordvec-ffi", "Cargo.toml")) + if err != nil { + t.Fatal(err) + } + match := regexp.MustCompile(`(?m)^version = "([^"]+)"$`).FindSubmatch(manifest) + if match == nil { + t.Fatal("ordvec-ffi/Cargo.toml missing package version") + } + if got, want := Version(), string(match[1]); got != want { + t.Fatalf("unexpected library version: got %q want %q", got, want) + } +} + func TestLoadInfoSearchRankQuant(t *testing.T) { idx, err := Load(writeRankQuantFixture(t)) if err != nil { diff --git a/ordvec-manifest/Cargo.toml b/ordvec-manifest/Cargo.toml index ad4f8a4..a85880b 100644 --- a/ordvec-manifest/Cargo.toml +++ b/ordvec-manifest/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ordvec-manifest" -version = "0.3.0" +version = "0.4.0" edition = "2021" rust-version = "1.89" license = "MIT OR Apache-2.0" @@ -24,7 +24,7 @@ path = "src/main.rs" chrono = { version = "0.4.44", default-features = false, features = ["clock", "std"] } clap = { version = "4.6.1", features = ["derive"] } hex = "0.4.3" -ordvec = { version = "0.3.0", path = ".." } +ordvec = { version = "0.4.0", path = ".." } rusqlite = { version = "0.39.0", optional = true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/ordvec-manifest/README.md b/ordvec-manifest/README.md index 3e8bf9d..3b48ec9 100644 --- a/ordvec-manifest/README.md +++ b/ordvec-manifest/README.md @@ -10,10 +10,9 @@ ordvec index. It does not sign artifacts, manage keys, call networks, mutate index files, decide deployment trust policy, estimate encoder geometry, compute calibration statistics, or change the C ABI. -`ordvec-manifest` is versioned in lockstep with the core `ordvec` crate. Before -the first crates.io release that includes this package, use it from the GitHub -workspace checkout; after that release, install it with `cargo install -ordvec-manifest`. +`ordvec-manifest` is versioned in lockstep with the core `ordvec` crate. From a +workspace checkout, use it with `cargo run -p ordvec-manifest --`; from a +published release, install it with `cargo install ordvec-manifest`. ```sh ordvec-manifest create \ @@ -25,7 +24,7 @@ ordvec-manifest create \ ordvec-manifest verify --manifest path/to/index.manifest.json ``` -From a pre-release workspace checkout, prefix the same commands with +From a workspace checkout, prefix the same commands with `cargo run -p ordvec-manifest --`. The schema version is `ordvec.index_manifest.v1`. Relative paths resolve from diff --git a/ordvec-python/Cargo.toml b/ordvec-python/Cargo.toml index 9ffed2c..4adbe56 100644 --- a/ordvec-python/Cargo.toml +++ b/ordvec-python/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "ordvec-python" -version = "0.3.0" +version = "0.4.0" edition = "2021" rust-version = "1.89" # inherits ordvec's AVX-512 MSRV floor description = "Python bindings for ordvec — training-free ordinal & sign vector quantization" diff --git a/ordvec-python/pyproject.toml b/ordvec-python/pyproject.toml index c3943d7..d9c1e99 100644 --- a/ordvec-python/pyproject.toml +++ b/ordvec-python/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "maturin" [project] name = "ordvec" -version = "0.3.0" +version = "0.4.0" description = "Training-free ordinal & sign quantization for compressed vector retrieval" readme = "README.md" requires-python = ">=3.10" diff --git a/ordvec-python/python/ordvec/__init__.py b/ordvec-python/python/ordvec/__init__.py index a29a655..f97388e 100644 --- a/ordvec-python/python/ordvec/__init__.py +++ b/ordvec-python/python/ordvec/__init__.py @@ -11,9 +11,11 @@ ``rankquant_eval_search``, the byte-LUT scoring helper ``search_asymmetric_byte_lut``, and the loader limit constants (``MAX_DIM``, ``MAX_SIGN_BITMAP_DIM``, ``MAX_VECTORS``). Together with the four classes' -methods this mirrors the Rust crate's public API; the low-level ``rank_io`` -read/write functions are reached through the classes' ``write()`` / ``load()`` -methods rather than exposed as standalone free functions. +methods this mirrors the headline Rust retrieval API. Rust-only metadata +probing and manifest-verification helpers remain available through the Rust +crates and the ``ordvec-manifest`` CLI; the low-level ``rank_io`` read/write +functions are reached through the classes' ``write()`` / ``load()`` methods +rather than exposed as standalone free functions. ``Bitmap`` exposes the constant-weight top-bucket overlap statistic formalized in the companion ``ordvec-formalization`` Lean repo: under explicit finite @@ -103,4 +105,4 @@ "SignBitmapIndex", ] -__version__ = "0.3.0" +__version__ = "0.4.0" diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py index fc077c1..6a6e3fc 100644 --- a/tests/release_publish_invariants.py +++ b/tests/release_publish_invariants.py @@ -13,6 +13,11 @@ import sys from typing import Any +try: + import tomllib +except ModuleNotFoundError: + tomllib = None # type: ignore[assignment] + WORKFLOW_PATH = os.environ.get("RELEASE_WORKFLOW_PATH", ".github/workflows/release.yml") PYTHON_WORKFLOW_PATH = os.environ.get("PYTHON_WORKFLOW_PATH", ".github/workflows/python.yml") @@ -117,6 +122,209 @@ def read_text(path: str) -> str: fail(f"{path}: could not read workflow: {exc}") +def strip_toml_comment(line: str) -> str: + quote: str | None = None + escaped = False + for index, char in enumerate(line): + if escaped: + escaped = False + continue + if char == "\\" and quote == '"': + escaped = True + continue + if char in {'"', "'"}: + if quote == char: + quote = None + elif quote is None: + quote = char + continue + if char == "#" and quote is None: + return line[:index] + return line + + +def split_inline_table(value: str) -> list[str]: + parts: list[str] = [] + start = 0 + quote: str | None = None + escaped = False + bracket_depth = 0 + for index, char in enumerate(value): + if escaped: + escaped = False + continue + if char == "\\" and quote == '"': + escaped = True + continue + if char in {'"', "'"}: + if quote == char: + quote = None + elif quote is None: + quote = char + continue + if char == "[" and quote is None: + bracket_depth += 1 + continue + if char == "]" and quote is None and bracket_depth > 0: + bracket_depth -= 1 + continue + if char == "," and quote is None and bracket_depth == 0: + parts.append(value[start:index].strip()) + start = index + 1 + parts.append(value[start:].strip()) + return [part for part in parts if part] + + +def parse_toml_value(value: str) -> Any: + value = value.strip() + if len(value) >= 2 and value[0] == value[-1] and value[0] in {'"', "'"}: + return value[1:-1] + if value in {"true", "false"}: + return value == "true" + if re.fullmatch(r"[+-]?\d+", value): + return int(value) + if re.fullmatch(r"[+-]?\d+\.\d+", value): + return float(value) + if value.startswith("[") and value.endswith("]"): + inner = value[1:-1].strip() + return [] if not inner else [parse_toml_value(part) for part in split_inline_table(inner)] + if value.startswith("{") and value.endswith("}"): + parsed: dict[str, Any] = {} + for part in split_inline_table(value[1:-1]): + key, separator, inner = part.partition("=") + if not separator: + raise ValueError(f"unsupported inline table entry {part!r}") + parsed[key.strip()] = parse_toml_value(inner) + return parsed + raise ValueError(f"unsupported TOML value {value!r}") + + +def minimal_load_toml(path: str) -> dict[str, Any]: + data: dict[str, Any] = {} + current: dict[str, Any] = data + in_multiline_array = False + for lineno, raw_line in enumerate(read_text(path).splitlines(), start=1): + line = strip_toml_comment(raw_line).strip() + if not line: + continue + if in_multiline_array: + if line == "]" or line.endswith("]"): + in_multiline_array = False + continue + if line.startswith("[[") and line.endswith("]]"): + current = {} + continue + if line.startswith("[") and line.endswith("]"): + current = data + for part in line[1:-1].split("."): + current = current.setdefault(part.strip(), {}) + if not isinstance(current, dict): + raise ValueError(f"{path}:{lineno}: section conflicts with scalar value") + continue + key, separator, value = line.partition("=") + if not separator: + raise ValueError(f"{path}:{lineno}: unsupported TOML line {line!r}") + if value.strip() == "[": + current[key.strip()] = [] + in_multiline_array = True + continue + current[key.strip()] = parse_toml_value(value) + if in_multiline_array: + raise ValueError(f"{path}: unterminated multiline array") + return data + + +def load_toml(path: str) -> dict[str, Any]: + try: + if tomllib is None: + data = minimal_load_toml(path) + else: + with open(path, "rb") as fh: + data = tomllib.load(fh) + except OSError as exc: + fail(f"{path}: could not read TOML: {exc}") + except (tomllib.TOMLDecodeError if tomllib is not None else ValueError) as exc: + fail(f"{path}: could not parse TOML: {exc}") + if not isinstance(data, dict): + fail(f"{path}: TOML root must be a mapping") + return data + + +def package_version(path: str) -> str: + data = load_toml(path) + package = mapping(data.get("package"), f"{path}: package") + version = package.get("version") + if not isinstance(version, str) or not version: + fail(f"{path}: package.version must be a non-empty string") + return version + + +def project_version(path: str) -> str: + data = load_toml(path) + project = mapping(data.get("project"), f"{path}: project") + version = project.get("version") + if not isinstance(version, str) or not version: + fail(f"{path}: project.version must be a non-empty string") + return version + + +def python_init_version(path: str) -> str: + text = read_text(path) + matches = re.findall(r"^__version__\s*=\s*['\"]([^'\"]+)['\"]\s*$", text, re.MULTILINE) + if len(matches) != 1: + fail(f"{path}: must contain exactly one literal __version__ assignment") + return matches[0] + + +def check_release_version_sync() -> None: + core_version = package_version("Cargo.toml") + expected = { + "ordvec-python/Cargo.toml package.version": package_version("ordvec-python/Cargo.toml"), + "ordvec-python/pyproject.toml project.version": project_version( + "ordvec-python/pyproject.toml" + ), + "ordvec-python/python/ordvec/__init__.py __version__": python_init_version( + "ordvec-python/python/ordvec/__init__.py" + ), + "ordvec-manifest/Cargo.toml package.version": package_version("ordvec-manifest/Cargo.toml"), + "ordvec-ffi/Cargo.toml package.version": package_version("ordvec-ffi/Cargo.toml"), + } + for label, version in expected.items(): + if version != core_version: + fail(f"{label} is {version}, expected lockstep version {core_version}") + + manifest = load_toml("ordvec-manifest/Cargo.toml") + dependencies = mapping(manifest.get("dependencies"), "ordvec-manifest/Cargo.toml: dependencies") + ordvec_dep = mapping( + dependencies.get("ordvec"), "ordvec-manifest/Cargo.toml: dependencies.ordvec" + ) + dep_version = ordvec_dep.get("version") + if dep_version != core_version: + fail( + "ordvec-manifest/Cargo.toml: dependencies.ordvec.version " + f"is {dep_version!r}, expected {core_version!r}" + ) + + changelog = read_text("CHANGELOG.md") + if not re.search(rf"^## \[?{re.escape(core_version)}\]? - \d{{4}}-\d{{2}}-\d{{2}}$", changelog, re.MULTILINE): + fail(f"CHANGELOG.md must contain a dated section for {core_version}") + + threat_model = read_text("THREAT_MODEL.md") + if not re.search( + rf"^\>\s+\*\*Status:\*\*\s+v{re.escape(core_version)}\s+\(pre-1\.0\),", + threat_model, + re.MULTILINE, + ): + fail(f"THREAT_MODEL.md status must mention v{core_version}") + + fuzz_lock = read_text("fuzz/Cargo.lock") + if not re.search( + rf'(?ms)^\[\[package\]\]\nname = "ordvec"\nversion = "{re.escape(core_version)}"\n', + fuzz_lock, + ): + fail(f"fuzz/Cargo.lock must lock the path dependency ordvec at {core_version}") + + def shell_vars(name: str) -> set[str]: return {f"${name}", f"${{{name}}}"} @@ -485,6 +693,7 @@ def check_publish_crates(workflow: dict[str, Any], path: str) -> None: def main() -> None: workflow = load_workflow(WORKFLOW_PATH) + check_release_version_sync() check_hash_requirement_temp_paths([WORKFLOW_PATH, PYTHON_WORKFLOW_PATH]) check_aarch64_smoke_selector(workflow, WORKFLOW_PATH) check_pypi_canonical_dist(workflow, WORKFLOW_PATH) diff --git a/tests/release_signed_release_invariants.sh b/tests/release_signed_release_invariants.sh index b4facd2..415b7be 100755 --- a/tests/release_signed_release_invariants.sh +++ b/tests/release_signed_release_invariants.sh @@ -17,11 +17,15 @@ # | Rust-crates-only when PyPI files already exist) # | # v -# release-assets-draft (uploads .crate/canonical .whl/.tar.gz/.sigstore.json/.intoto.jsonl to DRAFT release) +# release-assets-draft (uploads core .crate/canonical .whl/.tar.gz/.sigstore.json/.intoto.jsonl to DRAFT release) # | -# +--> publish-crate (byte-identity check vs attested .crate, then cargo publish) -# +--> publish-manifest-crate (after publish-crate; same byte-identity proof) -# +--> publish-pypi (Trusted Publishing, or existing-file verification) +# +--> publish-crate (byte-identity check vs attested .crate, then cargo publish) +# +--> publish-pypi (Trusted Publishing, or existing-file verification) +# | +# +-> build/attest/provenance/release-manifest-assets-draft +# | (after publish-crate; uploads manifest .crate/.sigstore.json/.intoto.jsonl) +# | +# +--> publish-manifest-crate (same byte-identity proof after manifest assets stage) # | # v # publish-github-release (un-draft, ONLY after all registry publishes succeed) @@ -54,9 +58,10 @@ job_body() { # Accept both `needs: [a, b, c]` (inline) and `needs:\n - a\n - b` (block) forms. job_needs() { local jobname="$1" needed="$2" - local body + local body escaped body="$(job_body "$jobname")" - printf '%s\n' "$body" | grep -qE "(^[[:space:]]+needs:.*\\b${needed}\\b|^[[:space:]]+-[[:space:]]+${needed}[[:space:]]*$)" + escaped="$(printf '%s' "$needed" | sed 's/[][\\.^$*+?{}|()]/\\&/g')" + printf '%s\n' "$body" | grep -qE "(^[[:space:]]+needs:[[:space:]]*\\[[^]]*(^|[^A-Za-z0-9_-])${escaped}([^A-Za-z0-9_-]|$)|^[[:space:]]+-[[:space:]]+${escaped}[[:space:]]*$)" } job_line() { @@ -111,11 +116,29 @@ printf '%s\n' "$body_draft" | grep -qE 'name:[[:space:]]*pypi-canonical-dist' \ || fail "release-assets-draft must upload canonical Python dist, not raw rebuilt wheel/sdist artifacts" job_downloads_artifact_to_path release-assets-draft dist-crate dist \ || fail "release-assets-draft must download the core dist-crate artifact into dist" -job_downloads_artifact_to_path release-assets-draft dist-manifest-crate dist \ - || fail "release-assets-draft must download the manifest dist-manifest-crate artifact into dist" printf '%s\n' "$body_draft" | grep -qE "$github_repo_env_re" \ || fail "release-assets-draft must set \`GH_REPO: \${{ github.repository }}\` (no checkout, so gh release upload needs explicit repo context)" +body_manifest_draft="$(job_body release-manifest-assets-draft)" +job_needs release-manifest-assets-draft build-manifest-crate \ + || fail "release-manifest-assets-draft must \`needs: build-manifest-crate\`" +job_needs release-manifest-assets-draft attest-manifest \ + || fail "release-manifest-assets-draft must \`needs: attest-manifest\`" +job_needs release-manifest-assets-draft manifest-provenance \ + || fail "release-manifest-assets-draft must \`needs: manifest-provenance\`" +job_downloads_artifact_to_path release-manifest-assets-draft dist-manifest-crate dist \ + || fail "release-manifest-assets-draft must download the manifest dist-manifest-crate artifact into dist" +job_downloads_artifact_to_path release-manifest-assets-draft sigstore-bundle-manifest dist \ + || fail "release-manifest-assets-draft must download the manifest Sigstore bundle into dist" +printf '%s\n' "$body_manifest_draft" | grep -qE 'dist/\*\.crate([^a-zA-Z]|$)' \ + || fail "release-manifest-assets-draft must upload dist/*.crate" +printf '%s\n' "$body_manifest_draft" | grep -qE 'dist/\*\.sigstore\.json([^a-zA-Z]|$)' \ + || fail "release-manifest-assets-draft must upload dist/*.sigstore.json" +printf '%s\n' "$body_manifest_draft" | grep -qE 'dist/\*\.intoto\.jsonl([^a-zA-Z]|$)' \ + || fail "release-manifest-assets-draft must upload dist/*.intoto.jsonl" +printf '%s\n' "$body_manifest_draft" | grep -qE "$github_repo_env_re" \ + || fail "release-manifest-assets-draft must set \`GH_REPO: \${{ github.repository }}\`" + # ---------------------------------------------------------------------- # (3) release-assets-draft must NOT un-draft (the dedicated un-draft job owns # that; un-drafting here would re-introduce the public-release-before- @@ -124,6 +147,9 @@ printf '%s\n' "$body_draft" | grep -qE "$github_repo_env_re" \ if printf '%s\n' "$body_draft" | grep -qE 'gh release edit.*--draft=false'; then fail "release-assets-draft must NOT un-draft the Release (un-drafting belongs in publish-github-release, after all registry publishes succeed)" fi +if printf '%s\n' "$body_manifest_draft" | grep -qE 'gh release edit.*--draft=false'; then + fail "release-manifest-assets-draft must NOT un-draft the Release (un-drafting belongs in publish-github-release, after all registry publishes succeed)" +fi # ---------------------------------------------------------------------- # (4) provenance uses slsa-github-generator pinned to a SEMANTIC VERSION TAG @@ -132,13 +158,18 @@ fi prov="$(job_body provenance)" printf '%s\n' "$prov" | grep -qE 'uses:[[:space:]]*slsa-framework/slsa-github-generator/.+/generator_generic_slsa3\.yml@v[0-9]+\.[0-9]+\.[0-9]+' \ || fail "provenance must \`uses: slsa-framework/slsa-github-generator/.../generator_generic_slsa3.yml@vX.Y.Z\` (tag-pinned per SLSA trust model)" +manifest_prov="$(job_body manifest-provenance)" +printf '%s\n' "$manifest_prov" | grep -qE 'uses:[[:space:]]*slsa-framework/slsa-github-generator/.+/generator_generic_slsa3\.yml@v[0-9]+\.[0-9]+\.[0-9]+' \ + || fail "manifest-provenance must \`uses: slsa-framework/slsa-github-generator/.../generator_generic_slsa3.yml@vX.Y.Z\` (tag-pinned per SLSA trust model)" # ---------------------------------------------------------------------- -# (5) provenance must have `upload-assets: false` — release-assets-draft is -# the sole Release-asset writer; two concurrent writers would race. +# (5) provenance must have `upload-assets: false` — asset-staging jobs, not +# SLSA generator workflows, own Release uploads. # ---------------------------------------------------------------------- printf '%s\n' "$prov" | grep -qE '^[[:space:]]+upload-assets:[[:space:]]*false[[:space:]]*$' \ - || fail "provenance must set \`upload-assets: false\` (single Release-asset writer is release-assets-draft; the .intoto.jsonl flows through the workflow-artifact path)" + || fail "provenance must set \`upload-assets: false\` (release-assets-draft uploads the collected .intoto.jsonl from the workflow-artifact path)" +printf '%s\n' "$manifest_prov" | grep -qE '^[[:space:]]+upload-assets:[[:space:]]*false[[:space:]]*$' \ + || fail "manifest-provenance must set \`upload-assets: false\` (release-manifest-assets-draft uploads the collected .intoto.jsonl from the workflow-artifact path)" # ---------------------------------------------------------------------- # (6) provenance-name MUST end in `.intoto.jsonl` — Scorecard's provenance @@ -146,6 +177,8 @@ printf '%s\n' "$prov" | grep -qE '^[[:space:]]+upload-assets:[[:space:]]*false[[ # ---------------------------------------------------------------------- printf '%s\n' "$prov" | grep -qE '^[[:space:]]+provenance-name:.*\.intoto\.jsonl[[:space:]]*$' \ || fail "provenance must set \`provenance-name: .intoto.jsonl\` (Scorecard Signed-Releases provenance probe matches this suffix only)" +printf '%s\n' "$manifest_prov" | grep -qE '^[[:space:]]+provenance-name:.*ordvec-manifest-.*\.intoto\.jsonl[[:space:]]*$' \ + || fail "manifest-provenance must set \`provenance-name: ordvec-manifest-.intoto.jsonl\`" # ---------------------------------------------------------------------- # (7) attest job grants id-token: write + attestations: write @@ -155,31 +188,49 @@ printf '%s\n' "$att" | grep -qE '^[[:space:]]+id-token:[[:space:]]*write' \ || fail "attest job must grant \`id-token: write\` (Sigstore OIDC signing cert)" printf '%s\n' "$att" | grep -qE '^[[:space:]]+attestations:[[:space:]]*write' \ || fail "attest job must grant \`attestations: write\` (persist to the GitHub attestation store)" -job_needs attest build-manifest-crate \ - || fail "attest must \`needs: build-manifest-crate\` so the manifest .crate is an attestation subject" -job_downloads_artifact_to_path attest dist-manifest-crate dist \ - || fail "attest must download the dist-manifest-crate artifact into dist" +att_manifest="$(job_body attest-manifest)" +printf '%s\n' "$att_manifest" | grep -qE '^[[:space:]]+id-token:[[:space:]]*write' \ + || fail "attest-manifest job must grant \`id-token: write\` (Sigstore OIDC signing cert)" +printf '%s\n' "$att_manifest" | grep -qE '^[[:space:]]+attestations:[[:space:]]*write' \ + || fail "attest-manifest job must grant \`attestations: write\` (persist to the GitHub attestation store)" +job_needs attest-manifest build-manifest-crate \ + || fail "attest-manifest must \`needs: build-manifest-crate\`" +job_downloads_artifact_to_path attest-manifest dist-manifest-crate dist \ + || fail "attest-manifest must download the dist-manifest-crate artifact into dist" comb="$(job_body combine-hashes)" -job_needs combine-hashes build-manifest-crate \ - || fail "combine-hashes must \`needs: build-manifest-crate\` so the manifest .crate is a SLSA subject" -job_downloads_artifact_to_path combine-hashes dist-manifest-crate dist \ - || fail "combine-hashes must download the dist-manifest-crate artifact into dist" +comb_manifest="$(job_body combine-manifest-hash)" +job_needs combine-manifest-hash build-manifest-crate \ + || fail "combine-manifest-hash must \`needs: build-manifest-crate\` so the manifest .crate is a SLSA subject" +job_downloads_artifact_to_path combine-manifest-hash dist-manifest-crate dist \ + || fail "combine-manifest-hash must download the dist-manifest-crate artifact into dist" build_manifest="$(job_body build-manifest-crate)" -printf '%s\n' "$build_manifest" | grep -qE 'cargo[[:space:]]+package[[:space:]]+-p[[:space:]]+ordvec-manifest[[:space:]]+--locked[[:space:]]+--no-verify' \ - || fail "build-manifest-crate must package with --no-verify before the lockstep core crate exists on crates.io" +job_needs build-manifest-crate publish-crate \ + || fail "build-manifest-crate must \`needs: publish-crate\` so lockstep ordvec exists on crates.io" +printf '%s\n' "$build_manifest" | grep -qE 'cargo[[:space:]]+package[[:space:]]+-p[[:space:]]+ordvec-manifest[[:space:]]+--locked([^[:alnum:]_-]|$)' \ + || fail "build-manifest-crate must package ordvec-manifest with Cargo registry verification" +if printf '%s\n' "$build_manifest" | grep -q -- '--no-verify'; then + fail "build-manifest-crate must not use --no-verify after publish-crate" +fi # ---------------------------------------------------------------------- # (8) Registry publish jobs grant id-token: write AND need release-assets-draft. # ---------------------------------------------------------------------- -for pub in publish-crate publish-manifest-crate publish-pypi; do +for pub in publish-crate publish-pypi; do body="$(job_body "$pub")" printf '%s\n' "$body" | grep -qE '^[[:space:]]+id-token:[[:space:]]*write' \ || fail "$pub must grant \`id-token: write\` (Trusted Publishing OIDC)" job_needs "$pub" release-assets-draft \ || fail "$pub must \`needs: release-assets-draft\` (gated by attest + provenance via the draft-assets edge)" done +body="$(job_body publish-manifest-crate)" +printf '%s\n' "$body" | grep -qE '^[[:space:]]+id-token:[[:space:]]*write' \ + || fail "publish-manifest-crate must grant \`id-token: write\` (Trusted Publishing OIDC)" +job_needs publish-manifest-crate release-manifest-assets-draft \ + || fail "publish-manifest-crate must \`needs: release-manifest-assets-draft\`" +job_needs publish-manifest-crate publish-crate \ + || fail "publish-manifest-crate must \`needs: publish-crate\`" # ---------------------------------------------------------------------- # (9) Rust crate publish jobs prove byte-identity vs the attested .crate on BOTH From 1e9e64268f5a35627eac44d579eb7355462c26ba Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Tue, 9 Jun 2026 09:30:46 -0500 Subject: [PATCH 2/2] Harden manifest package release checks Signed-off-by: Nelson Spence --- .github/workflows/ci.yml | 16 ++++++++- .github/workflows/release.yml | 16 ++++++++- tests/release_publish_invariants.py | 53 +++++++++++++++++++++++++++++ 3 files changed, 83 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 4a27876..fea6163 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -265,14 +265,28 @@ jobs: - name: cargo package -p ordvec-manifest when lockstep core exists run: | set -euo pipefail + core_version="$(cargo metadata --no-deps --format-version 1 | python3 -c 'import json, sys; print(next(pkg["version"] for pkg in json.load(sys.stdin)["packages"] if pkg["name"] == "ordvec"))')" log="${RUNNER_TEMP}/ordvec-manifest-package.log" if cargo package -p ordvec-manifest --locked 2>&1 | tee "$log"; then exit 0 fi - if grep -q 'failed to select a version for the requirement `ordvec = "' "$log"; then + status="$(curl \ + --silent \ + --show-error \ + --location \ + --connect-timeout 10 \ + --max-time 60 \ + --retry 3 \ + --retry-all-errors \ + --user-agent "ordvec-ci/${core_version} (https://github.com/Fieldnote-Echo/ordvec)" \ + --output /dev/null \ + --write-out "%{http_code}" \ + "https://crates.io/api/v1/crates/ordvec/${core_version}" || true)" + if [ "${status}" = "404" ]; then echo "::notice::ordvec-manifest package check is deferred: the lockstep ordvec version is not published yet. release.yml packages ordvec-manifest after publish-crate succeeds." exit 0 fi + echo "::error::ordvec-manifest package failed and ordvec ${core_version} registry status was ${status}; not deferring a real packaging failure." exit 1 # ---------------------------------------------------------------------- diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 63d902f..aa185ad 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -260,6 +260,8 @@ jobs: needs: [guard, publish-crate] if: needs.guard.outputs.ok == 'true' runs-on: ubuntu-latest + env: + VERSION: ${{ needs.guard.outputs.version }} steps: - name: Harden the runner uses: step-security/harden-runner@9af89fc71515a100421586dfdb3dc9c984fbf411 # v2.19.4 @@ -276,7 +278,19 @@ jobs: # version, so package it only after `publish-crate` has published and # verified the core crate. This is the first point where Cargo can # resolve the dependency through the registry for a fresh release. - run: cargo package -p ordvec-manifest --locked + run: | + set -euo pipefail + for i in {1..12}; do + if cargo package -p ordvec-manifest --locked; then + exit 0 + fi + if [ "$i" -eq 12 ]; then + echo "::error::ordvec-manifest packaging did not resolve ordvec ${VERSION} after retries." + exit 1 + fi + echo "waiting for crates.io to resolve ordvec ${VERSION} before manifest packaging (${i}/12)..." + sleep 10 + done - name: Generate CycloneDX SBOM for the manifest crate run: | cargo install cargo-cyclonedx --version 0.5.9 --locked diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py index 6a6e3fc..731d0cd 100644 --- a/tests/release_publish_invariants.py +++ b/tests/release_publish_invariants.py @@ -21,6 +21,7 @@ WORKFLOW_PATH = os.environ.get("RELEASE_WORKFLOW_PATH", ".github/workflows/release.yml") PYTHON_WORKFLOW_PATH = os.environ.get("PYTHON_WORKFLOW_PATH", ".github/workflows/python.yml") +CI_WORKFLOW_PATH = os.environ.get("CI_WORKFLOW_PATH", ".github/workflows/ci.yml") def fail(message: str) -> None: @@ -379,6 +380,9 @@ def has_cargo_package_arg(words: list[str], package: str) -> bool: def has_cargo_command(run: str, subcommand: str, package: str) -> bool: for line in shell_logical_lines(run): + if line.startswith("if "): + line = line[3:].strip() + line = line.split("; then", 1)[0].strip().rstrip(";") try: words = shlex.split(line) except ValueError: @@ -681,6 +685,27 @@ def check_publish_crates(workflow: dict[str, Any], path: str) -> None: manifest_job = mapping(jobs.get("publish-manifest-crate"), f"{path}: jobs.publish-manifest-crate") if not has_need(manifest_job, "publish-crate"): fail(f"{path}: publish-manifest-crate must need publish-crate so ordvec publishes first") + build_manifest_job = mapping(jobs.get("build-manifest-crate"), f"{path}: jobs.build-manifest-crate") + if not has_need(build_manifest_job, "publish-crate"): + fail(f"{path}: build-manifest-crate must need publish-crate so lockstep ordvec exists") + build_env = mapping(build_manifest_job.get("env"), f"{path}: jobs.build-manifest-crate.env") + if build_env.get("VERSION") != "${{ needs.guard.outputs.version }}": + fail(f"{path}: build-manifest-crate must expose the release VERSION to retry diagnostics") + build_steps = sequence( + build_manifest_job.get("steps"), f"{path}: jobs.build-manifest-crate.steps" + ) + build_manifest_packages = [] + for index, raw_step in enumerate(build_steps): + step = mapping(raw_step, f"{path}: jobs.build-manifest-crate.steps[{index}]") + run = step.get("run") + if isinstance(run, str) and has_cargo_command(run, "package", "ordvec-manifest"): + build_manifest_packages.append(run) + if len(build_manifest_packages) != 1: + fail(f"{path}: build-manifest-crate must package ordvec-manifest after publish-crate") + build_run = build_manifest_packages[0] + for fragment in ("for i in {1..12}", "sleep 10", "ordvec ${VERSION}"): + if fragment not in build_run: + fail(f"{path}: build-manifest-crate package step must retry crates.io propagation") check_publish_crate_job(workflow, path, "publish-crate", "ordvec", "dist-crate") check_publish_crate_job( workflow, @@ -691,6 +716,33 @@ def check_publish_crates(workflow: dict[str, Any], path: str) -> None: ) +def check_ci_manifest_package_defer(workflow: dict[str, Any], path: str) -> None: + jobs = mapping(workflow.get("jobs"), f"{path}: jobs") + deps_job = mapping(jobs.get("deps"), f"{path}: jobs.deps") + steps = sequence(deps_job.get("steps"), f"{path}: jobs.deps.steps") + manifest_package_runs = [] + for index, raw_step in enumerate(steps): + step = mapping(raw_step, f"{path}: jobs.deps.steps[{index}]") + run = step.get("run") + if isinstance(run, str) and has_cargo_command(run, "package", "ordvec-manifest"): + manifest_package_runs.append(run) + if len(manifest_package_runs) != 1: + fail(f"{path}: deps job must run exactly one deferred ordvec-manifest package check") + run = manifest_package_runs[0] + if "grep" in run or "failed to select a version for the requirement" in run: + fail(f"{path}: deferred ordvec-manifest package check must not grep cargo errors") + required_fragments = ( + "cargo metadata --no-deps --format-version 1", + "https://crates.io/api/v1/crates/ordvec/${core_version}", + '--write-out "%{http_code}"', + '[ "${status}" = "404" ]', + "not deferring a real packaging failure", + ) + for fragment in required_fragments: + if fragment not in run: + fail(f"{path}: deferred ordvec-manifest package check must include {fragment!r}") + + def main() -> None: workflow = load_workflow(WORKFLOW_PATH) check_release_version_sync() @@ -698,6 +750,7 @@ def main() -> None: check_aarch64_smoke_selector(workflow, WORKFLOW_PATH) check_pypi_canonical_dist(workflow, WORKFLOW_PATH) check_publish_crates(workflow, WORKFLOW_PATH) + check_ci_manifest_package_defer(load_workflow(CI_WORKFLOW_PATH), CI_WORKFLOW_PATH) check_publish_pypi(workflow, WORKFLOW_PATH)