From c1a7bcf6157c6ff8cf3c01961993e1da8e1b83d7 Mon Sep 17 00:00:00 2001
From: Nelson Spence <nelson@projectnavi.ai>
Date: Tue, 9 Jun 2026 15:36:34 -0500
Subject: [PATCH 1/4] Harden pre-v0.4 release and manifest gates

Signed-off-by: Nelson Spence <nelson@projectnavi.ai>
---
 .github/workflows/release.yml              |  40 +++++-
 RELEASING.md                               |  51 ++++---
 THREAT_MODEL.md                            |  12 +-
 docs/INDEX_PROVENANCE.md                   |   4 +-
 ordvec-manifest/README.md                  |   3 +-
 ordvec-manifest/src/lib.rs                 |  22 ++-
 ordvec-manifest/src/sqlite.rs              |  52 +++++--
 ordvec-manifest/tests/manifest.rs          | 157 ++++++++++++++++++++-
 src/bitmap.rs                              |   1 +
 src/quant.rs                               |  15 +-
 src/sign_bitmap.rs                         |   3 +
 tests/index/finite.rs                      |  26 ++++
 tests/redteam_delta.rs                     |  16 +++
 tests/release_publish_invariants.py        |  83 +++++++++++
 tests/release_signed_release_invariants.sh |   4 +
 15 files changed, 440 insertions(+), 49 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index ea5baee..28a182c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -169,6 +169,12 @@ jobs:
           SHA: ${{ github.sha }}
         run: |
           set -euo pipefail
+          MAIN_SHA="$(gh api "repos/${REPO}/commits/main" --jq '.sha')"
+          echo "current main sha: ${MAIN_SHA}"
+          if [ "$SHA" != "$MAIN_SHA" ]; then
+            echo "::error::release tag points at ${SHA}, but current main is ${MAIN_SHA}. Move the tag to the current protected main HEAD and re-run."
+            exit 1
+          fi
           # Require a SUCCESSFUL push run for this SHA *on main* for each workflow.
           # Filtering on branch as well as head_sha stops a green run for the same
           # commit on an unrelated branch from satisfying the gate.
@@ -1058,12 +1064,42 @@ jobs:
             exit 1
           fi
           echo "OK: byte-identity verified ($A_SHA)"
+      - name: Check for existing ordvec .crate recovery
+        id: crate_recovery
+        env:
+          VERSION: ${{ needs.guard.outputs.version }}
+        run: |
+          set -euo pipefail
+          ATTESTED="${RUNNER_TEMP}/attested/ordvec-${VERSION}.crate"
+          [ -f "$ATTESTED" ] || { echo "::error::attested .crate missing at $ATTESTED"; exit 1; }
+          A_SHA=$(sha256sum "$ATTESTED" | cut -d' ' -f1)
+          API_URL="https://crates.io/api/v1/crates/ordvec/${VERSION}/download"
+          STATIC_URL="https://static.crates.io/crates/ordvec/ordvec-${VERSION}.crate"
+          CRATES_IO_USER_AGENT="ordvec-release-verify/${VERSION} (https://github.com/Fieldnote-Echo/ordvec)"
+          EXISTING="${RUNNER_TEMP}/existing-ordvec.crate"
+          if curl -fsSL --user-agent "$CRATES_IO_USER_AGENT" "$API_URL" -o "$EXISTING" \
+            || curl -fsSL --user-agent "$CRATES_IO_USER_AGENT" "$STATIC_URL" -o "$EXISTING"; then
+            E_SHA=$(sha256sum "$EXISTING" | cut -d' ' -f1)
+            echo "attested:         $A_SHA"
+            echo "crates.io-served: $E_SHA"
+            if [ "$A_SHA" != "$E_SHA" ]; then
+              echo "::error::crates.io already serves ordvec ${VERSION}, but the served .crate is not byte-identical to the SLSA-attested artifact ($E_SHA != $A_SHA). Refusing recovery."
+              exit 1
+            fi
+            echo "already_published=true" >> "$GITHUB_OUTPUT"
+            echo "::notice::crates.io already serves byte-identical ordvec ${VERSION}; skipping upload and verifying served bytes."
+          else
+            echo "already_published=false" >> "$GITHUB_OUTPUT"
+            echo "No existing ordvec ${VERSION} .crate found on crates.io; proceeding with publish."
+          fi
       # Mint the short-lived crates.io credential immediately before publish so
       # the ephemeral token's exposure window is minimal. No stored secret.
       - name: Mint a short-lived crates.io credential (OIDC)
+        if: steps.crate_recovery.outputs.already_published != 'true'
         id: auth
         uses: rust-lang/crates-io-auth-action@bbd81622f20ce9e2dd9622e3218b975523e45bbe # v1.0.4
       - name: cargo publish
+        if: steps.crate_recovery.outputs.already_published != 'true'
         env:
           CARGO_REGISTRY_TOKEN: ${{ steps.auth.outputs.token }}
         run: cargo publish -p ordvec --locked
@@ -1398,7 +1434,7 @@ jobs:
 
   publish-manifest-pypi:
     name: publish ordvec-manifest to PyPI
-    needs: [guard, pypi-manifest-canonical-dist, release-manifest-assets-draft]
+    needs: [guard, pypi-manifest-canonical-dist, release-manifest-assets-draft, publish-manifest-crate]
     if: needs.guard.outputs.ok == 'true'
     runs-on: ubuntu-latest
     environment:
@@ -1441,7 +1477,7 @@ jobs:
 
   publish-pypi:
     name: publish to PyPI
-    needs: [guard, pypi-canonical-dist, release-assets-draft]
+    needs: [guard, pypi-canonical-dist, release-assets-draft, publish-crate]
     if: needs.guard.outputs.ok == 'true'
     runs-on: ubuntu-latest
     environment:
diff --git a/RELEASING.md b/RELEASING.md
index 9982628..fc9a763 100644
--- a/RELEASING.md
+++ b/RELEASING.md
@@ -8,9 +8,10 @@
 > push.
 
 `ordvec` (the Rust crate), `ordvec-manifest` (the lockstep manifest verifier
-crate), and `ordvec` on PyPI (the PyO3 wheel built from `ordvec-python/`) are
-released by **pushing a `vMAJOR.MINOR.PATCH` tag** to a commit on `main`. The
-release workflow handles build, canonical Python artifact selection,
+crate), `ordvec` on PyPI (the PyO3 wheel built from `ordvec-python/`), and
+`ordvec-manifest` on PyPI (the PyO3 wheel built from
+`ordvec-manifest-python/`) are released by **pushing a `vMAJOR.MINOR.PATCH` tag**
+to current `main` HEAD. The release workflow handles build, canonical Python artifact selection,
 attestation, SLSA provenance, Release-asset attach, and un-draft automatically;
 only the registry environment approvals are manual.
 
@@ -21,13 +22,13 @@ The unified `release.yml`:
 - triggers on **tag push** (`v[0-9]*.[0-9]*.[0-9]*`); a strict-SemVer guard
   step rejects pre-release / leading-zero / non-SemVer tags so they wake the
   workflow but skip every job below the gate;
-- runs a **`require-ci-green`** gate confirming the per-commit CI is green on
-  `main` for the tagged SHA — `ci.yml`, `python.yml`, `fuzz.yml`, `codeql.yml`,
-  `actionlint.yml`, `zizmor.yml`
-  (a *successful* run for that exact SHA on `main`);
+- runs a **`require-ci-green`** gate confirming the tag points at current `main`
+  HEAD and that per-commit CI is green on `main` for that SHA — `ci.yml`,
+  `python.yml`, `fuzz.yml`, `codeql.yml`, `actionlint.yml`, `zizmor.yml` (a
+  *successful* run for that exact SHA on `main`);
 - publishes via **OIDC trusted publishing** (no long-lived crates.io / PyPI
-  tokens in the repo) for both Rust crates and the Python distribution;
-- canonicalizes the Python dist before attestation and release upload: for a
+  tokens in the repo) for both Rust crates and both Python distributions;
+- canonicalizes each Python dist before attestation and release upload: for a
   new PyPI version it uses the current run's wheels/sdist; if PyPI already owns
   that immutable version during recovery, it downloads the exact PyPI-served
   files, verifies their SHA-256 digests from PyPI JSON, and uses those bytes as
@@ -69,7 +70,7 @@ The unified `release.yml`:
   `cargo package -p ordvec-manifest --locked` and byte-compares that output to
   the attested artifact before minting its own OIDC token;
 - **un-drafts the GitHub Release ONLY after `publish-crate`,
-  `publish-manifest-crate`, AND `publish-pypi` succeed**
+  `publish-manifest-crate`, `publish-pypi`, AND `publish-manifest-pypi` succeed**
   (`publish-github-release` is the sole un-draft point). If any publish fails
   or is skipped, the Release stays DRAFT — no public Release ever exists for a
   version the registries refused;
@@ -116,7 +117,10 @@ filename. Until a record is updated, the corresponding gated publish fails
   requires an initial owner bootstrap before a new crate's Trusted Publisher can
   be configured, do that explicit maintainer-approved bootstrap before tagging.
 - **PyPI** → `ordvec` → Publishing → GitHub publisher: `workflow = release.yml`,
-  `environment = pypi`.
+  `environment = pypi`, project URL `https://pypi.org/p/ordvec`.
+- **PyPI** → `ordvec-manifest` → Publishing → GitHub publisher:
+  `workflow = release.yml`, `environment = pypi`, project URL
+  `https://pypi.org/p/ordvec-manifest`.
 
 ### Tag and branch protection
 
@@ -188,7 +192,7 @@ filename. Until a record is updated, the corresponding gated publish fails
    and accept only the stable release tag pattern. Separately verify the
    registry Trusted Publisher records by hand: crates.io must point both
    `ordvec` and `ordvec-manifest` to `release.yml` / `crates-io`, and PyPI must
-   point `ordvec` to `release.yml` / `pypi`.
+   point both `ordvec` and `ordvec-manifest` to `release.yml` / `pypi`.
 6. Get the maintainer's explicit go to publish.
 7. Push the version tag from `main` (signed):
 
@@ -198,18 +202,20 @@ filename. Until a record is updated, the corresponding gated publish fails
    ```
 
    `release.yml` triggers automatically. It builds the core `.crate`, wheels,
-   and sdist; selects the canonical Python dist (current build for a new PyPI
-   version, verified PyPI bytes for an existing immutable version); attests the
-   files this run can honestly attest (GitHub attestation store +
+   and sdist for both Python packages; selects the canonical Python dists
+   (current build for a new PyPI version, verified PyPI bytes for an existing
+   immutable version); attests the files this run can honestly attest (GitHub
+   attestation store +
    `*.sigstore.json`); generates SLSA `*.intoto.jsonl`; and stages the core and
    Python assets on the GitHub Release — **as a DRAFT**. After `publish-crate`
    succeeds, it builds, attests, generates SLSA provenance for, and stages the
    lockstep `ordvec-manifest` `.crate`, then pauses at the manifest registry
    environment gate.
 8. **Approve each publish environment pause** in the Actions UI. There are
-   three registry publish jobs: `publish-crate`, `publish-manifest-crate`, and
-   `publish-pypi`. The two crates.io jobs use the same `crates-io` environment
-   and may require separate approvals; PyPI uses the `pypi` environment.
+   four registry publish jobs: `publish-crate`, `publish-manifest-crate`,
+   `publish-pypi`, and `publish-manifest-pypi`. The two crates.io jobs use the
+   same `crates-io` environment and may require separate approvals; the two PyPI
+   jobs use the `pypi` environment and may also require separate approvals.
    Required-reviewer approval is what authorises each registry push.
    - `publish-crate` and `publish-manifest-crate` first sha256-compare their
      repackaged `.crate` to the SLSA-attested artifact — if either diverges
@@ -219,10 +225,11 @@ filename. Until a record is updated, the corresponding gated publish fails
      un-drafts the GitHub Release automatically. If one gate fails, the Release
      stays DRAFT — investigate and re-run from a fixed workflow rather than
      approving another registry into a partial state.
-   - `publish-pypi` either uploads the fresh canonical dist or, if PyPI already
-     serves that version, skips upload and verifies the existing files. In both
-     modes it compares every PyPI-served wheel/sdist SHA-256 digest against the
-     canonical `dist/` files before the GitHub Release can un-draft.
+   - `publish-pypi` and `publish-manifest-pypi` either upload their fresh
+     canonical dist or, if PyPI already serves that version, skip upload and
+     verify the existing files. In both modes they compare every PyPI-served
+     wheel/sdist SHA-256 digest against the canonical `dist/` files before the
+     GitHub Release can un-draft.
 9. Verify each published artifact and its provenance:
    - crates.io / docs.rs for `ordvec` and `ordvec-manifest`;
    - PyPI (confirm the post-publish hash-verification log, optionally
diff --git a/THREAT_MODEL.md b/THREAT_MODEL.md
index f6574a7..6dcbaf3 100644
--- a/THREAT_MODEL.md
+++ b/THREAT_MODEL.md
@@ -307,11 +307,13 @@ trust model requires be pinned by version *tag*); `persist-credentials: false`
 on every checkout; `permissions: contents: read` default. The **release
 workflow** (`release.yml`) is tag-triggered with a strict-SemVer guard; build,
 GitHub attestation, SLSA provenance, Release-asset attach, and un-draft all
-run automatically, while the two **`crates.io`** publish jobs (`ordvec` first,
-then lockstep `ordvec-manifest`) and the **`pypi`** publish job are gated
-behind GitHub Environments with **Required reviewers** (the only manual step).
-It runs a `require-ci-green` gate against `main`, publishes via **OIDC trusted
-publishing** (no long-lived registry tokens), and emits **SLSA build
+run automatically, while the two **`crates.io`** publish jobs (`publish-crate`
+for `ordvec` first, then `publish-manifest-crate` for lockstep
+`ordvec-manifest`) and the two **`pypi`** publish jobs (`publish-pypi` and
+`publish-manifest-pypi`) are gated behind GitHub Environments with **Required
+reviewers** (the only manual step). It runs a `require-ci-green` gate against
+current `main` HEAD, publishes via **OIDC trusted publishing** (no long-lived
+registry tokens), and emits **SLSA build
 provenance** (`actions/attest-build-provenance` + a `slsa-github-generator`
 `*.intoto.jsonl` attached to the GitHub Release) **before** publish — a failed
 attestation fails the release closed. Each Rust publish job proves pre- and
diff --git a/docs/INDEX_PROVENANCE.md b/docs/INDEX_PROVENANCE.md
index aee9bbe..aaf82c6 100644
--- a/docs/INDEX_PROVENANCE.md
+++ b/docs/INDEX_PROVENANCE.md
@@ -108,7 +108,9 @@ OrdinalDB v0.1 should use `row_id_identity` for the ordvec vector row count and
 declare `ids.bin` as required auxiliary artifact name `ordinaldb.ids`. The
 OrdinalDB `u64` IDs remain caller-owned sidecar bytes. Do not model `ids.bin`
 as JSONL row identity: v1 JSONL row identity is UUID-only, and generic row-map
-ID formats are deferred until there is a separate schema contract for them.
+ID formats are deferred until there is a separate schema contract for them. The
+reserved `row_identity.db` block is rejected in v1 because it is not byte-bound
+or path-checked.
 
 When present, `encoder_distortion` records a scoped encoder geometry profile:
 source metric, embedding metric, lower/upper distortion-style bounds when
diff --git a/ordvec-manifest/README.md b/ordvec-manifest/README.md
index d294653..170c426 100644
--- a/ordvec-manifest/README.md
+++ b/ordvec-manifest/README.md
@@ -150,7 +150,8 @@ file as required auxiliary artifact name `ordinaldb.ids`. That makes the vector
 row count an ordvec invariant while leaving OrdinalDB's `u64` document IDs as a
 caller-owned sidecar. Do not encode `ids.bin` as `RowIdentity::Jsonl`: v1 JSONL
 row identity is UUID-oriented (`id_kind = "uuid"`), and generic row-map ID
-formats are intentionally deferred.
+formats are intentionally deferred. The reserved `row_identity.db` metadata
+block is rejected in v1 because it is not byte-bound or path-checked.
 
 The unified JSON report carries per-sidecar audit fields. A successful
 auxiliary artifact verification includes the manifest path, resolved/canonical
diff --git a/ordvec-manifest/src/lib.rs b/ordvec-manifest/src/lib.rs
index f542748..901deed 100644
--- a/ordvec-manifest/src/lib.rs
+++ b/ordvec-manifest/src/lib.rs
@@ -379,6 +379,7 @@ fn validate_manifest_shape(
         path,
         sha256,
         id_kind,
+        db,
         ..
     } = &manifest.row_identity
     {
@@ -400,6 +401,12 @@ fn validate_manifest_shape(
                 "row_identity.id_kind must be uuid in v1",
             );
         }
+        if db.is_some() {
+            report.error(
+                "row_identity_db_unsupported",
+                "row_identity.db is reserved for a future schema and is not verified in v1",
+            );
+        }
     }
 
     validate_auxiliary_artifact_shape(manifest, limits, report);
@@ -1839,7 +1846,9 @@ fn expected_profile_shape(
         ProfileParameterization::MarginalTopKFrequency => Some(vec![ordinalization.dim()]),
         ProfileParameterization::SignFrequency => Some(vec![ordinalization.dim()]),
         ProfileParameterization::BucketFrequency => match ordinalization {
-            CalibrationOrdinalization::Bucket { dim, bits } => Some(vec![*dim, 1usize << *bits]),
+            CalibrationOrdinalization::Bucket { dim, bits } if matches!(*bits, 1 | 2 | 4) => {
+                Some(vec![*dim, 1usize << *bits])
+            }
             _ => None,
         },
         ProfileParameterization::RankPositionFrequency => {
@@ -3790,7 +3799,9 @@ fn validate_row_id_string(
     limits: &ResourceLimits,
     errors: &mut Vec<ReportIssue>,
 ) {
+    let mut structurally_invalid = false;
     if value.is_empty() {
+        structurally_invalid = true;
         push_report_issue_bounded(
             errors,
             limits,
@@ -3799,6 +3810,7 @@ fn validate_row_id_string(
         );
     }
     if value.contains('\0') {
+        structurally_invalid = true;
         push_report_issue_bounded(
             errors,
             limits,
@@ -3806,6 +3818,14 @@ fn validate_row_id_string(
             format!("line {line_idx} {field} contains NUL"),
         );
     }
+    if !structurally_invalid && Uuid::parse_str(value).is_err() {
+        push_report_issue_bounded(
+            errors,
+            limits,
+            format!("row_identity_{field}_invalid_uuid"),
+            format!("line {line_idx} {field} must be a UUID because row_identity.id_kind is uuid"),
+        );
+    }
 }
 
 fn is_limit_issue_code(code: &str) -> bool {
diff --git a/ordvec-manifest/src/sqlite.rs b/ordvec-manifest/src/sqlite.rs
index 414cd30..a84f156 100644
--- a/ordvec-manifest/src/sqlite.rs
+++ b/ordvec-manifest/src/sqlite.rs
@@ -685,20 +685,44 @@ fn verification_reports_needs_migration(conn: &Connection) -> Result<bool, Manif
         .map_err(sqlite_err)?
         .collect::<Result<Vec<_>, _>>()
         .map_err(sqlite_err)?;
-    Ok(!columns.iter().any(|column| column == "report_id")
-        || !columns.iter().any(|column| column == "manifest_sha256")
-        || !columns
-            .iter()
-            .any(|column| column == "manifest_location_sha256")
-        || !columns
-            .iter()
-            .any(|column| column == "calibration_profile_sha256")
-        || !columns
-            .iter()
-            .any(|column| column == "auxiliary_artifacts_sha256")
-        || !columns
-            .iter()
-            .any(|column| column == "encoder_distortion_profile_sha256"))
+    let current_required = [
+        "report_id",
+        "manifest_id",
+        "manifest_path",
+        "checked_at",
+        "ok",
+        "manifest_location_sha256",
+        "manifest_sha256",
+        "options_sha256",
+        "artifact_sha256",
+        "row_identity_sha256",
+        "calibration_profile_sha256",
+        "auxiliary_artifacts_sha256",
+        "encoder_distortion_profile_sha256",
+        "report_json",
+    ];
+    if current_required
+        .iter()
+        .all(|required| columns.iter().any(|column| column == required))
+    {
+        return Ok(false);
+    }
+
+    let legacy_schema = [
+        "manifest_id",
+        "manifest_path",
+        "checked_at",
+        "ok",
+        "report_json",
+    ];
+    if columns.iter().map(String::as_str).eq(legacy_schema) {
+        return Ok(true);
+    }
+
+    Err(ManifestError::invalid(format!(
+        "unsupported verification_reports schema {:?}; refusing destructive migration",
+        columns
+    )))
 }
 
 fn sqlite_err(err: rusqlite::Error) -> ManifestError {
diff --git a/ordvec-manifest/tests/manifest.rs b/ordvec-manifest/tests/manifest.rs
index a3414d8..1762e47 100644
--- a/ordvec-manifest/tests/manifest.rs
+++ b/ordvec-manifest/tests/manifest.rs
@@ -1495,6 +1495,42 @@ fn calibration_encoder_identity_must_match_embedding() {
     }
 }
 
+#[test]
+fn calibration_invalid_bucket_bits_reports_without_panic() {
+    let temp = tempfile::tempdir().unwrap();
+    let case = tempfile::tempdir_in(temp.path()).unwrap();
+    let index = write_index_kind(case.path(), FixtureKind::RankQuant);
+    let manifest_path = case.path().join("manifest.json");
+    let mut manifest = create_manifest_for_index(
+        &index,
+        CreateRowIdentity::RowIdIdentity,
+        "test-embedding",
+        &manifest_path,
+    )
+    .unwrap();
+    let profile_hash = write_profile(
+        &case.path().join("bucket.f64"),
+        manifest.artifact.dim * std::mem::size_of::<f64>(),
+    );
+    manifest.calibration = Some(weighted_calibration(
+        &manifest,
+        "bucket.f64",
+        profile_hash,
+        CalibrationOrdinalization::Bucket {
+            dim: manifest.artifact.dim,
+            bits: 255,
+        },
+        ProfileParameterization::BucketFrequency,
+        vec![manifest.artifact.dim, 1],
+    ));
+
+    let report = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        verify_manifest_with_base(manifest, case.path(), VerifyOptions::default())
+    }))
+    .expect("invalid bucket bits must report errors instead of panicking");
+    assert!(error_codes(&report).contains(&"calibration_ordinalization_artifact_mismatch"));
+}
+
 #[test]
 fn calibration_ordinalization_matches_artifact_formats() {
     let temp = tempfile::tempdir().unwrap();
@@ -2359,6 +2395,66 @@ fn jsonl_row_identity_is_strict_and_duplicate_ids_need_opt_in() {
         .any(|issue| issue.code == "row_identity_row_id_mismatch"));
 }
 
+#[test]
+fn jsonl_row_identity_rejects_non_uuid_ids() {
+    let temp = tempfile::tempdir().unwrap();
+    let index = write_rankquant_index(temp.path(), 2);
+    let rows = temp.path().join("rows.jsonl");
+    write_row_map(&rows, &[("doc-a", None), ("doc-b", Some("doc-a"))]);
+    let row_hash = sha256_file(&rows).unwrap();
+    let manifest_path = temp.path().join("manifest.json");
+    let mut manifest = create_manifest_for_index(
+        &index,
+        CreateRowIdentity::RowIdIdentity,
+        "test-embedding",
+        &manifest_path,
+    )
+    .unwrap();
+    manifest.row_identity = RowIdentity::Jsonl {
+        path: "rows.jsonl".to_string(),
+        sha256: row_hash.sha256,
+        row_count: 2,
+        id_kind: "uuid".to_string(),
+        db: None,
+    };
+
+    let report = verify_manifest_with_base(manifest, temp.path(), VerifyOptions::default());
+    let codes = error_codes(&report);
+    assert!(codes.contains(&"row_identity_db_id_invalid_uuid"));
+    assert!(codes.contains(&"row_identity_parent_id_invalid_uuid"));
+}
+
+#[test]
+fn jsonl_row_identity_rejects_reserved_db_metadata() {
+    let temp = tempfile::tempdir().unwrap();
+    let index = write_rankquant_index(temp.path(), 1);
+    let rows = temp.path().join("rows.jsonl");
+    write_row_map(&rows, &[("00000000-0000-0000-0000-000000000001", None)]);
+    let row_hash = sha256_file(&rows).unwrap();
+    let manifest_path = temp.path().join("manifest.json");
+    let mut manifest = create_manifest_for_index(
+        &index,
+        CreateRowIdentity::RowIdIdentity,
+        "test-embedding",
+        &manifest_path,
+    )
+    .unwrap();
+    manifest.row_identity = RowIdentity::Jsonl {
+        path: "rows.jsonl".to_string(),
+        sha256: row_hash.sha256,
+        row_count: 1,
+        id_kind: "uuid".to_string(),
+        db: Some(ordvec_manifest::RowIdentityDb {
+            path: Some("/etc/passwd".to_string()),
+            table: Some("documents".to_string()),
+            id_column: Some("id".to_string()),
+        }),
+    };
+
+    let report = verify_manifest_with_base(manifest, temp.path(), VerifyOptions::default());
+    assert!(error_codes(&report).contains(&"row_identity_db_unsupported"));
+}
+
 #[test]
 fn auxiliary_artifacts_verify_and_report_deterministically() {
     let root = tempfile::tempdir().unwrap();
@@ -2927,6 +3023,56 @@ fn verify_index_manifest_uses_explicit_index_override() {
     assert!(report.ok, "{:?}", report.errors);
 }
 
+#[cfg(feature = "sqlite")]
+#[test]
+fn sqlite_refuses_to_migrate_unknown_verification_reports_table() {
+    use rusqlite::Connection;
+
+    let temp = tempfile::tempdir().unwrap();
+    let index = write_index(temp.path());
+    let manifest_path = temp.path().join("manifest.json");
+    let manifest = create_manifest_for_index(
+        &index,
+        CreateRowIdentity::RowIdIdentity,
+        "test-embedding",
+        &manifest_path,
+    )
+    .unwrap();
+    fs::write(
+        &manifest_path,
+        serde_json::to_string_pretty(&manifest).unwrap(),
+    )
+    .unwrap();
+    let document = load_manifest_file(&manifest_path).unwrap();
+    let db = temp.path().join("foreign.sqlite");
+    let conn = Connection::open(&db).unwrap();
+    conn.execute("CREATE TABLE verification_reports(id INTEGER)", [])
+        .unwrap();
+    drop(conn);
+
+    let err = ordvec_manifest::sqlite::verify_with_registry(
+        &db,
+        &document,
+        &manifest_path,
+        VerifyOptions::default(),
+        true,
+    )
+    .unwrap_err();
+    assert!(err
+        .to_string()
+        .contains("unsupported verification_reports schema"));
+
+    let conn = Connection::open(&db).unwrap();
+    let columns = conn
+        .prepare("PRAGMA table_info(verification_reports)")
+        .unwrap()
+        .query_map([], |row| row.get::<_, String>(1))
+        .unwrap()
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+    assert_eq!(columns, vec!["id"]);
+}
+
 #[cfg(feature = "sqlite")]
 #[test]
 fn sqlite_cache_is_explicit_and_activation_reverifies_by_default() {
@@ -3076,7 +3222,16 @@ fn sqlite_combined_verified_load_matrix_respects_limits_paths_and_cache() {
 
     let index = write_index_kind(&assets, FixtureKind::RankQuant);
     let row_map_path = assets.join("rows.jsonl");
-    write_row_map(&row_map_path, &[("doc-a", None), ("doc-b", Some("doc-a"))]);
+    write_row_map(
+        &row_map_path,
+        &[
+            ("00000000-0000-0000-0000-000000000001", None),
+            (
+                "00000000-0000-0000-0000-000000000002",
+                Some("00000000-0000-0000-0000-000000000001"),
+            ),
+        ],
+    );
     let required_path = assets.join("required-sidecar.json");
     fs::write(&required_path, b"{\"required\":true}\n").unwrap();
     let required_hash = sha256_file(&required_path).unwrap();
diff --git a/src/bitmap.rs b/src/bitmap.rs
index 3e7c4f4..35ca962 100644
--- a/src/bitmap.rs
+++ b/src/bitmap.rs
@@ -234,6 +234,7 @@ impl Bitmap {
     /// cheap relative to the cost it saves at M ≥ 1000.
     #[must_use = "this scans the corpus to generate candidates; dropping the result discards that work"]
     pub fn top_m_candidates(&self, q: &[f32], m: usize) -> Vec<u32> {
+        assert_eq!(q.len(), self.dim);
         assert_all_finite(q);
         let m_eff = m.min(self.n_vectors);
         if m_eff == 0 {
diff --git a/src/quant.rs b/src/quant.rs
index ccd65bd..2c66403 100644
--- a/src/quant.rs
+++ b/src/quant.rs
@@ -596,7 +596,9 @@ impl RankQuant {
     /// `candidates` may contain duplicate global row IDs. Each candidate entry
     /// is scored independently, so duplicate IDs may produce duplicate returned
     /// global IDs. Callers that require unique hits should deduplicate the
-    /// candidate list before calling this method.
+    /// candidate list before calling this method. The candidate list length is
+    /// still bounded by `n_vectors`; this keeps duplicate-heavy inputs from
+    /// allocating more scratch space than a full-corpus scan.
     ///
     /// Uses the same AVX-512 → AVX2 → scalar dispatch as
     /// [`Self::search_asymmetric`] and the same centre-drop math, just
@@ -621,6 +623,12 @@ impl RankQuant {
     ) -> (Vec<f32>, Vec<i64>) {
         assert_eq!(query.len(), self.dim);
         assert_all_finite(query);
+        assert!(
+            candidates.len() <= self.n_vectors,
+            "search_asymmetric_subset: candidate list length {} exceeds n_vectors {}; deduplicate repeated ids before calling",
+            candidates.len(),
+            self.n_vectors,
+        );
         // Bounds-check candidate ids before the gather below indexes
         // `self.packed[src..src + bpv]` with `src = di * bpv`. An OOB id
         // otherwise surfaces as a cryptic slice-range panic; fail fast
@@ -657,7 +665,10 @@ impl RankQuant {
         // Pack the candidate docs' bytes into a contiguous buffer so
         // the SIMD kernels can scan them as if they were a small dense
         // sub-index. Cost: m * bpv copy (small for typical m).
-        let mut sub_packed = vec![0u8; m * bpv];
+        let sub_packed_len = m
+            .checked_mul(bpv)
+            .expect("search_asymmetric_subset: candidate scratch length overflows usize");
+        let mut sub_packed = vec![0u8; sub_packed_len];
         for (i, &di) in candidates.iter().enumerate() {
             let src = (di as usize) * bpv;
             sub_packed[i * bpv..(i + 1) * bpv].copy_from_slice(&self.packed[src..src + bpv]);
diff --git a/src/sign_bitmap.rs b/src/sign_bitmap.rs
index 8cae9a1..2e3473c 100644
--- a/src/sign_bitmap.rs
+++ b/src/sign_bitmap.rs
@@ -158,6 +158,8 @@ impl SignBitmap {
     /// [`crate::Bitmap::top_m_candidates`].
     #[must_use = "this scans the corpus to generate candidates; dropping the result discards that work"]
     pub fn top_m_candidates(&self, q: &[f32], m: usize) -> Vec<u32> {
+        assert_eq!(q.len(), self.dim);
+        crate::util::assert_all_finite(q);
         let m_eff = m.min(self.n_vectors);
         if m_eff == 0 {
             return Vec::new();
@@ -194,6 +196,7 @@ impl SignBitmap {
         let dim = self.dim;
         let batch = queries.len() / dim;
         assert_eq!(queries.len(), batch * dim);
+        crate::util::assert_all_finite(queries);
         let m_eff = m.min(self.n_vectors);
         if batch == 0 || m_eff == 0 {
             return vec![Vec::new(); batch];
diff --git a/tests/index/finite.rs b/tests/index/finite.rs
index f2e3710..a2dfbef 100644
--- a/tests/index/finite.rs
+++ b/tests/index/finite.rs
@@ -36,6 +36,14 @@ fn bitmap_top_m_candidates_rejects_nan() {
     let _ = idx.top_m_candidates(&q, 16);
 }
 
+#[test]
+#[should_panic]
+fn bitmap_top_m_candidates_zero_m_validates_query_len() {
+    let idx = Bitmap::new(D, D / 4);
+    let q = vec![0.1f32; D - 1];
+    let _ = idx.top_m_candidates(&q, 0);
+}
+
 #[test]
 #[should_panic(expected = "non-finite")]
 fn sign_bitmap_build_query_rejects_neg_inf() {
@@ -74,3 +82,21 @@ fn bitmap_build_query_bitmap_fp32_rejects_nan() {
     q[0] = f32::NAN;
     let _ = idx.build_query_bitmap_fp32(&q);
 }
+
+#[test]
+#[should_panic(expected = "non-finite")]
+fn sign_bitmap_top_m_candidates_zero_m_rejects_nan() {
+    let idx = SignBitmap::new(D);
+    let mut q = vec![0.1f32; D];
+    q[0] = f32::NAN;
+    let _ = idx.top_m_candidates(&q, 0);
+}
+
+#[test]
+#[should_panic(expected = "non-finite")]
+fn sign_bitmap_batched_zero_m_rejects_nan() {
+    let idx = SignBitmap::new(D);
+    let mut queries = vec![0.1f32; D * 2];
+    queries[D] = f32::NAN;
+    let _ = idx.top_m_candidates_batched(&queries, 0);
+}
diff --git a/tests/redteam_delta.rs b/tests/redteam_delta.rs
index 95b6da2..f891ad6 100644
--- a/tests/redteam_delta.rs
+++ b/tests/redteam_delta.rs
@@ -539,6 +539,22 @@ fn delta_c4_subset_dup_plus_oob_still_rejected() {
     let _ = idx.search_asymmetric_subset(&query, &[5, 999, 5], 3);
 }
 
+/// DELTA-C5: duplicate ids are accepted, but a candidate list longer than the
+/// corpus is not. This caps scratch-gather size for adversarial duplicate-heavy
+/// lists while still allowing repeated ids within a bounded candidate budget.
+#[test]
+#[should_panic(expected = "candidate list length")]
+fn delta_c5_subset_duplicate_overrun_list_rejected() {
+    let dim = 64;
+    let n = 2;
+    let corpus = make_corpus(8451, n, dim);
+    let mut idx = RankQuant::new(dim, 2);
+    idx.add(&corpus);
+    let query = make_corpus(8452, 1, dim);
+
+    let _ = idx.search_asymmetric_subset(&query, &[0, 0, 0], 3);
+}
+
 // =====================================================================
 // DELTA-D — empty-index / empty-input search paths.
 // =====================================================================
diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py
index d91d181..33bc4cd 100644
--- a/tests/release_publish_invariants.py
+++ b/tests/release_publish_invariants.py
@@ -547,6 +547,26 @@ def check_python_package_metadata() -> None:
         fail(".github/dependabot.yml must keep the Python NumPy floor comment at >=2.2")
 
 
+def check_release_docs_include_manifest_pypi_lane() -> None:
+    releasing = read_text("RELEASING.md")
+    normalized_releasing = " ".join(releasing.split())
+    for required in (
+        "`ordvec-manifest` on PyPI",
+        "`publish-manifest-pypi`",
+        "four registry publish jobs",
+        "PyPI must point both `ordvec` and `ordvec-manifest`",
+        "https://pypi.org/p/ordvec-manifest",
+    ):
+        if " ".join(required.split()) not in normalized_releasing:
+            fail(f"RELEASING.md must mention {required!r}")
+
+    threat_model = read_text("THREAT_MODEL.md")
+    normalized_threat_model = " ".join(threat_model.split())
+    for required in ("`publish-manifest-pypi`", "two **`pypi`** publish jobs"):
+        if " ".join(required.split()) not in normalized_threat_model:
+            fail(f"THREAT_MODEL.md must mention {required!r}")
+
+
 def check_strict_release_tag_patterns(workflow: dict[str, Any], path: str) -> None:
     try:
         tag_pattern = read_toml_string_in_section("cliff.toml", "git", "tag_pattern")
@@ -925,6 +945,12 @@ def check_release_security_gates(workflow: dict[str, Any], path: str) -> None:
         )
     if found_gate_run is None or "event=push" not in found_gate_run or '.event == "push"' not in found_gate_run:
         fail(f"{path}: require-ci-green must require successful push workflow runs")
+    if (
+        found_gate_run is None
+        or "repos/${REPO}/commits/main" not in found_gate_run
+        or "MAIN_SHA" not in found_gate_run
+    ):
+        fail(f"{path}: require-ci-green must verify the release tag points at current main")
 
     allowed_id_token_jobs = {
         "attest",
@@ -1090,6 +1116,7 @@ def check_publish_pypi(
     canonical_job: str = "pypi-canonical-dist",
     canonical_artifact_name: str = "pypi-canonical-dist",
     project: str | None = None,
+    crate_publish_job: str = "publish-crate",
 ) -> None:
     jobs = mapping(workflow.get("jobs"), f"{path}: jobs")
     job = mapping(jobs.get(job_name), f"{path}: jobs.{job_name}")
@@ -1097,6 +1124,8 @@ def check_publish_pypi(
 
     if not has_need(job, canonical_job):
         fail(f"{path}: {job_name} must need {canonical_job}")
+    if not has_need(job, crate_publish_job):
+        fail(f"{path}: {job_name} must need {crate_publish_job} to avoid a partial PyPI-first release")
 
     publish_steps: list[tuple[int, dict[str, Any]]] = []
     canonical_downloads: list[tuple[int, dict[str, Any], dict[str, Any]]] = []
@@ -1174,6 +1203,7 @@ def check_publish_crate_job(
     publish_runs: list[tuple[int, str]] = []
     publish_dry_runs: list[tuple[int, str]] = []
     auth_steps: list[int] = []
+    recovery_steps: list[tuple[int, dict[str, Any]]] = []
 
     for index, raw_step in enumerate(steps):
         step = mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]")
@@ -1188,6 +1218,8 @@ def check_publish_crate_job(
                     publish_runs.append((index, run))
         if action_name(step) == "rust-lang/crates-io-auth-action":
             auth_steps.append(index)
+        if step.get("name") == f"Check for existing {package} .crate recovery":
+            recovery_steps.append((index, step))
         if action_name(step) == "actions/download-artifact":
             with_block = step.get("with", {})
             with_map = mapping(with_block, f"{path}: {step_label(index, step)} with")
@@ -1232,6 +1264,55 @@ def check_publish_crate_job(
     if found_names != verify_step_names:
         fail(f"{path}: {job_name} must have both attested .crate verification steps")
 
+    recovery_id = "crate_recovery" if package == "ordvec" else "manifest_crate_recovery"
+    if len(recovery_steps) != 1:
+        fail(f"{path}: {job_name} must have exactly one first-publish recovery check")
+    recovery_index, recovery_step = recovery_steps[0]
+    if recovery_step.get("id") != recovery_id:
+        fail(f"{path}: {job_name} recovery step must have id {recovery_id}")
+    recovery_run = recovery_step.get("run")
+    if not isinstance(recovery_run, str):
+        fail(f"{path}: {job_name} recovery step must be a run step")
+    for required in (
+        "already_published=true",
+        "already_published=false",
+        "Refusing recovery",
+        f"crates.io already serves byte-identical {package}",
+    ):
+        if required not in recovery_run:
+            fail(f"{path}: {job_name} recovery step must contain {required!r}")
+    for url_var in ("API_URL", "STATIC_URL"):
+        if not any(
+            has_shell_arg(words, shell_vars(url_var))
+            and has_shell_option_value(
+                words, {"--user-agent", "-A"}, shell_vars("CRATES_IO_USER_AGENT")
+            )
+            and has_shell_option_value(words, {"--output", "-o"}, shell_vars("EXISTING"))
+            for words in shell_curl_commands(recovery_run)
+        ):
+            fail(
+                f"{path}: {job_name} recovery step must curl ${url_var} "
+                "with CRATES_IO_USER_AGENT into $EXISTING"
+            )
+
+    protected_step_names = {
+        "Mint a short-lived crates.io credential (OIDC)",
+        "cargo publish",
+    }
+    if require_publish_dry_run:
+        protected_step_names.add("Validate manifest publish dry-run")
+    for index, raw_step in enumerate(steps):
+        step = mapping(raw_step, f"{path}: jobs.{job_name}.steps[{index}]")
+        name = step.get("name")
+        if name in protected_step_names:
+            if index < recovery_index:
+                fail(f"{path}: {name} must run after the {package} crate recovery check")
+            if step.get("if") != f"steps.{recovery_id}.outputs.already_published != 'true'":
+                fail(
+                    f"{path}: {name} must be skipped when {package} crate recovery found "
+                    "byte-identical existing bytes"
+                )
+
     if require_publish_dry_run:
         dry_run_index = publish_dry_runs[0][0]
         byte_identity_index = verify_step_indices["Verify byte-identity vs the attested .crate"]
@@ -1557,6 +1638,7 @@ def main() -> None:
     check_release_compatibility_sync()
     check_publication_model()
     check_python_package_metadata()
+    check_release_docs_include_manifest_pypi_lane()
     check_strict_release_tag_patterns(workflow, WORKFLOW_PATH)
     check_package_contents()
     check_ci_package_guards(ci_workflow, CI_WORKFLOW_PATH)
@@ -1587,6 +1669,7 @@ def main() -> None:
         canonical_job="pypi-manifest-canonical-dist",
         canonical_artifact_name="pypi-manifest-canonical-dist",
         project="ordvec-manifest",
+        crate_publish_job="publish-manifest-crate",
     )
     check_sde_cache_invariants()
 
diff --git a/tests/release_signed_release_invariants.sh b/tests/release_signed_release_invariants.sh
index e709f1a..7d3df87 100755
--- a/tests/release_signed_release_invariants.sh
+++ b/tests/release_signed_release_invariants.sh
@@ -257,6 +257,10 @@ printf '%s\n' "$body" | grep -qE '^[[:space:]]+id-token:[[:space:]]*write' \
   || fail "publish-manifest-pypi must grant \`id-token: write\` (Trusted Publishing OIDC)"
 job_needs publish-manifest-pypi release-manifest-assets-draft \
   || fail "publish-manifest-pypi must \`needs: release-manifest-assets-draft\`"
+job_needs publish-manifest-pypi publish-manifest-crate \
+  || fail "publish-manifest-pypi must \`needs: publish-manifest-crate\` (manifest crate publishes before manifest PyPI)"
+job_needs publish-pypi publish-crate \
+  || fail "publish-pypi must \`needs: publish-crate\` (core crate publishes before core PyPI)"
 
 # ----------------------------------------------------------------------
 # (9) Rust crate publish jobs prove byte-identity vs the attested .crate on BOTH

From cdd6163cd55547c634456c51f5f2a9369341b293 Mon Sep 17 00:00:00 2001
From: Nelson Spence <nelson@projectnavi.ai>
Date: Tue, 9 Jun 2026 15:44:02 -0500
Subject: [PATCH 2/4] Harden PyPI canonical dist coverage

Signed-off-by: Nelson Spence <nelson@projectnavi.ai>
---
 .github/workflows/release.yml              |  32 +++++-
 tests/release_publish_invariants.py        |  14 +++
 tests/release_pypi_canonical_dist.py       | 123 +++++++++++++++++++--
 tests/release_pypi_canonical_dist_tests.py | 114 +++++++++++++++++++
 4 files changed, 270 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 28a182c..73a4bca 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -704,7 +704,13 @@ jobs:
           python3 tests/release_pypi_canonical_dist.py canonicalize \
             --version "$VERSION" \
             --built-dir built-dist \
-            --out-dir canonical-dist
+            --out-dir canonical-dist \
+            --expected-wheels 4 \
+            --expected-sdists 1 \
+            --required-wheel-tag x86_64 \
+            --required-wheel-tag aarch64 \
+            --required-wheel-tag macosx \
+            --required-wheel-tag win_amd64
       - name: Upload the canonical Python dist
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
@@ -751,7 +757,13 @@ jobs:
             --project ordvec-manifest \
             --version "$VERSION" \
             --built-dir built-dist \
-            --out-dir canonical-dist
+            --out-dir canonical-dist \
+            --expected-wheels 4 \
+            --expected-sdists 1 \
+            --required-wheel-tag x86_64 \
+            --required-wheel-tag aarch64 \
+            --required-wheel-tag macosx \
+            --required-wheel-tag win_amd64
       - name: Upload the canonical ordvec-manifest Python dist
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
@@ -1473,7 +1485,13 @@ jobs:
           python3 tests/release_pypi_canonical_dist.py verify \
             --project ordvec-manifest \
             --version "$VERSION" \
-            --dist-dir dist
+            --dist-dir dist \
+            --expected-wheels 4 \
+            --expected-sdists 1 \
+            --required-wheel-tag x86_64 \
+            --required-wheel-tag aarch64 \
+            --required-wheel-tag macosx \
+            --required-wheel-tag win_amd64
 
   publish-pypi:
     name: publish to PyPI
@@ -1515,7 +1533,13 @@ jobs:
           set -euo pipefail
           python3 tests/release_pypi_canonical_dist.py verify \
             --version "$VERSION" \
-            --dist-dir dist
+            --dist-dir dist \
+            --expected-wheels 4 \
+            --expected-sdists 1 \
+            --required-wheel-tag x86_64 \
+            --required-wheel-tag aarch64 \
+            --required-wheel-tag macosx \
+            --required-wheel-tag win_amd64
 
   publish-github-release:
     name: un-draft the GitHub Release (only after all registry publishes succeed)
diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py
index 33bc4cd..bf98c38 100644
--- a/tests/release_publish_invariants.py
+++ b/tests/release_publish_invariants.py
@@ -28,6 +28,14 @@
 SDE_ACTION_PATH = os.environ.get(
     "SDE_ACTION_PATH", ".github/actions/setup-intel-sde/action.yml"
 )
+PYPI_CANONICAL_EXPECTED_ARGS = (
+    "--expected-wheels 4",
+    "--expected-sdists 1",
+    "--required-wheel-tag x86_64",
+    "--required-wheel-tag aarch64",
+    "--required-wheel-tag macosx",
+    "--required-wheel-tag win_amd64",
+)
 
 
 def fail(message: str) -> None:
@@ -1089,6 +1097,9 @@ def check_pypi_canonical_dist(
                 fail(f"{path}: {job_name} canonicalize step must read built-dist and write canonical-dist")
             if project is not None and f"--project {project}" not in run:
                 fail(f"{path}: {job_name} canonicalize step must pass --project {project}")
+            for required_arg in PYPI_CANONICAL_EXPECTED_ARGS:
+                if required_arg not in run:
+                    fail(f"{path}: {job_name} canonicalize step must pass {required_arg}")
 
     if len(wheels_downloads) != 1:
         fail(f"{path}: {job_name} must download exactly one {wheel_artifact_pattern} artifact set")
@@ -1151,6 +1162,9 @@ def check_publish_pypi(
                 fail(f"{path}: {job_name} PyPI verify step must verify dist")
             if project is not None and f"--project {project}" not in run:
                 fail(f"{path}: {job_name} PyPI verify step must pass --project {project}")
+            for required_arg in PYPI_CANONICAL_EXPECTED_ARGS:
+                if required_arg not in run:
+                    fail(f"{path}: {job_name} PyPI verify step must pass {required_arg}")
 
     if len(publish_steps) != 1:
         fail(f"{path}: {job_name} must have exactly one pypa/gh-action-pypi-publish step")
diff --git a/tests/release_pypi_canonical_dist.py b/tests/release_pypi_canonical_dist.py
index b0be14f..86de8e8 100644
--- a/tests/release_pypi_canonical_dist.py
+++ b/tests/release_pypi_canonical_dist.py
@@ -65,6 +65,29 @@ def dist_files(directory: Path) -> dict[str, Path]:
     return files
 
 
+def validate_expected_dist(
+    files: dict[str, Any],
+    *,
+    expected_wheels: int | None = None,
+    expected_sdists: int | None = None,
+    required_wheel_tags: tuple[str, ...] = (),
+) -> None:
+    wheels = sorted(name for name in files if name.endswith(".whl"))
+    sdists = sorted(name for name in files if name.endswith(".tar.gz"))
+    if expected_wheels is not None and len(wheels) != expected_wheels:
+        fail(f"expected {expected_wheels} wheel files, found {len(wheels)}: {wheels!r}")
+    if expected_sdists is not None and len(sdists) != expected_sdists:
+        fail(f"expected {expected_sdists} sdist files, found {len(sdists)}: {sdists!r}")
+    missing_tags = [
+        tag for tag in required_wheel_tags if not any(tag in wheel for wheel in wheels)
+    ]
+    if missing_tags:
+        fail(
+            "wheel dist is missing required platform tag substrings: "
+            f"missing={missing_tags!r} wheels={wheels!r}"
+        )
+
+
 def fetch_pypi_payload(project: str, version: str) -> dict[str, Any] | None:
     url = f"https://pypi.org/pypi/{project}/{version}/json"
     try:
@@ -130,8 +153,23 @@ def ensure_same_filenames(local: dict[str, Path], remote: dict[str, dict[str, st
         )
 
 
-def canonicalize(project: str, version: str, built_dir: Path, out_dir: Path) -> None:
+def canonicalize(
+    project: str,
+    version: str,
+    built_dir: Path,
+    out_dir: Path,
+    *,
+    expected_wheels: int | None = None,
+    expected_sdists: int | None = None,
+    required_wheel_tags: tuple[str, ...] = (),
+) -> None:
     built = dist_files(built_dir)
+    validate_expected_dist(
+        built,
+        expected_wheels=expected_wheels,
+        expected_sdists=expected_sdists,
+        required_wheel_tags=required_wheel_tags,
+    )
     prepare_empty_dir(out_dir)
     try:
         payload = fetch_pypi_payload(project, version)
@@ -151,6 +189,12 @@ def canonicalize(project: str, version: str, built_dir: Path, out_dir: Path) ->
     except PyPIReadError as exc:
         fail(str(exc))
     ensure_same_filenames(built, remote)
+    validate_expected_dist(
+        remote,
+        expected_wheels=expected_wheels,
+        expected_sdists=expected_sdists,
+        required_wheel_tags=required_wheel_tags,
+    )
 
     mismatched: list[str] = []
     for filename, path in built.items():
@@ -180,12 +224,40 @@ def remote_hashes(project: str, version: str) -> dict[str, str] | None:
     return {name: item["sha256"] for name, item in pypi_dist_map(payload).items()}
 
 
-def local_hashes(dist_dir: Path) -> dict[str, str]:
-    return {name: sha256_file(path) for name, path in dist_files(dist_dir).items()}
-
-
-def verify(project: str, version: str, dist_dir: Path, attempts: int, sleep_seconds: float) -> None:
-    local = local_hashes(dist_dir)
+def local_hashes(
+    dist_dir: Path,
+    *,
+    expected_wheels: int | None = None,
+    expected_sdists: int | None = None,
+    required_wheel_tags: tuple[str, ...] = (),
+) -> dict[str, str]:
+    files = dist_files(dist_dir)
+    validate_expected_dist(
+        files,
+        expected_wheels=expected_wheels,
+        expected_sdists=expected_sdists,
+        required_wheel_tags=required_wheel_tags,
+    )
+    return {name: sha256_file(path) for name, path in files.items()}
+
+
+def verify(
+    project: str,
+    version: str,
+    dist_dir: Path,
+    attempts: int,
+    sleep_seconds: float,
+    *,
+    expected_wheels: int | None = None,
+    expected_sdists: int | None = None,
+    required_wheel_tags: tuple[str, ...] = (),
+) -> None:
+    local = local_hashes(
+        dist_dir,
+        expected_wheels=expected_wheels,
+        expected_sdists=expected_sdists,
+        required_wheel_tags=required_wheel_tags,
+    )
     url = f"https://pypi.org/pypi/{project}/{version}/json"
     last_error = "not checked"
     for attempt in range(1, attempts + 1):
@@ -212,6 +284,14 @@ def parse_args() -> argparse.Namespace:
     canonical.add_argument("--version", required=True)
     canonical.add_argument("--built-dir", required=True, type=Path)
     canonical.add_argument("--out-dir", required=True, type=Path)
+    canonical.add_argument("--expected-wheels", type=int)
+    canonical.add_argument("--expected-sdists", type=int)
+    canonical.add_argument(
+        "--required-wheel-tag",
+        action="append",
+        default=[],
+        help="Require at least one wheel filename containing this substring; may be repeated.",
+    )
 
     verify_parser = subparsers.add_parser("verify")
     verify_parser.add_argument("--project", default=DEFAULT_PROJECT)
@@ -219,6 +299,14 @@ def parse_args() -> argparse.Namespace:
     verify_parser.add_argument("--dist-dir", required=True, type=Path)
     verify_parser.add_argument("--attempts", default=24, type=int)
     verify_parser.add_argument("--sleep-seconds", default=5.0, type=float)
+    verify_parser.add_argument("--expected-wheels", type=int)
+    verify_parser.add_argument("--expected-sdists", type=int)
+    verify_parser.add_argument(
+        "--required-wheel-tag",
+        action="append",
+        default=[],
+        help="Require at least one wheel filename containing this substring; may be repeated.",
+    )
 
     return parser.parse_args()
 
@@ -226,10 +314,27 @@ def parse_args() -> argparse.Namespace:
 def main() -> None:
     args = parse_args()
     if args.command == "canonicalize":
-        canonicalize(args.project, args.version, args.built_dir, args.out_dir)
+        canonicalize(
+            args.project,
+            args.version,
+            args.built_dir,
+            args.out_dir,
+            expected_wheels=args.expected_wheels,
+            expected_sdists=args.expected_sdists,
+            required_wheel_tags=tuple(args.required_wheel_tag),
+        )
         return
     if args.command == "verify":
-        verify(args.project, args.version, args.dist_dir, args.attempts, args.sleep_seconds)
+        verify(
+            args.project,
+            args.version,
+            args.dist_dir,
+            args.attempts,
+            args.sleep_seconds,
+            expected_wheels=args.expected_wheels,
+            expected_sdists=args.expected_sdists,
+            required_wheel_tags=tuple(args.required_wheel_tag),
+        )
         return
     raise AssertionError(f"unknown command: {args.command}")
 
diff --git a/tests/release_pypi_canonical_dist_tests.py b/tests/release_pypi_canonical_dist_tests.py
index 0022cb1..63c76b3 100644
--- a/tests/release_pypi_canonical_dist_tests.py
+++ b/tests/release_pypi_canonical_dist_tests.py
@@ -24,6 +24,17 @@ def write(path: Path, data: bytes) -> str:
     return hashlib.sha256(data).hexdigest()
 
 
+def write_complete_release_dist(directory: Path, project: str = "ordvec") -> dict[str, str]:
+    files = {
+        f"{project}-0.3.0.tar.gz": b"sdist",
+        f"{project}-0.3.0-cp310-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl": b"linux x86_64",
+        f"{project}-0.3.0-cp310-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl": b"linux aarch64",
+        f"{project}-0.3.0-cp310-abi3-macosx_11_0_arm64.whl": b"macos arm64",
+        f"{project}-0.3.0-cp310-abi3-win_amd64.whl": b"windows amd64",
+    }
+    return {name: write(directory / name, data) for name, data in files.items()}
+
+
 class CanonicalPyPIDistTests(unittest.TestCase):
     def test_missing_pypi_release_uses_current_build(self) -> None:
         with tempfile.TemporaryDirectory() as tmp:
@@ -45,6 +56,86 @@ def test_missing_pypi_release_uses_current_build(self) -> None:
             self.assertEqual((out / "ordvec-0.3.0.tar.gz").read_bytes(), b"fresh sdist")
             self.assertEqual((out / "ordvec-0.3.0-cp310-abi3-win_amd64.whl").read_bytes(), b"fresh wheel")
 
+    def test_missing_pypi_release_accepts_complete_expected_release_dist(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            built = root / "built"
+            out = root / "out"
+            built.mkdir()
+            write_complete_release_dist(built)
+
+            old_fetch = canonical.fetch_pypi_payload
+            canonical.fetch_pypi_payload = lambda project, version: None
+            try:
+                with redirect_stdout(io.StringIO()):
+                    canonical.canonicalize(
+                        "ordvec",
+                        "0.3.0",
+                        built,
+                        out,
+                        expected_wheels=4,
+                        expected_sdists=1,
+                        required_wheel_tags=("x86_64", "aarch64", "macosx", "win_amd64"),
+                    )
+            finally:
+                canonical.fetch_pypi_payload = old_fetch
+
+            self.assertEqual(len(list(out.glob("*.whl"))), 4)
+            self.assertEqual(len(list(out.glob("*.tar.gz"))), 1)
+
+    def test_canonicalize_rejects_incomplete_expected_wheel_set(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            built = root / "built"
+            out = root / "out"
+            built.mkdir()
+            write(built / "ordvec-0.3.0.tar.gz", b"fresh sdist")
+            write(built / "ordvec-0.3.0-cp310-abi3-win_amd64.whl", b"fresh wheel")
+
+            old_fetch = canonical.fetch_pypi_payload
+            canonical.fetch_pypi_payload = lambda project, version: self.fail("unexpected PyPI fetch")
+            try:
+                with redirect_stderr(io.StringIO()), self.assertRaises(SystemExit):
+                    canonical.canonicalize(
+                        "ordvec",
+                        "0.3.0",
+                        built,
+                        out,
+                        expected_wheels=4,
+                        expected_sdists=1,
+                        required_wheel_tags=("x86_64", "aarch64", "macosx", "win_amd64"),
+                    )
+            finally:
+                canonical.fetch_pypi_payload = old_fetch
+
+    def test_canonicalize_rejects_missing_required_platform_tag(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            root = Path(tmp)
+            built = root / "built"
+            out = root / "out"
+            built.mkdir()
+            write(built / "ordvec-0.3.0.tar.gz", b"fresh sdist")
+            write(built / "ordvec-0.3.0-cp310-abi3-manylinux_2_17_x86_64.whl", b"linux x86_64")
+            write(built / "ordvec-0.3.0-cp310-abi3-manylinux_2_17_aarch64.whl", b"linux aarch64")
+            write(built / "ordvec-0.3.0-cp310-abi3-macosx_11_0_arm64.whl", b"macos arm64")
+            write(built / "ordvec-0.3.0-cp310-abi3-macosx_12_0_universal2.whl", b"extra macos")
+
+            old_fetch = canonical.fetch_pypi_payload
+            canonical.fetch_pypi_payload = lambda project, version: self.fail("unexpected PyPI fetch")
+            try:
+                with redirect_stderr(io.StringIO()), self.assertRaises(SystemExit):
+                    canonical.canonicalize(
+                        "ordvec",
+                        "0.3.0",
+                        built,
+                        out,
+                        expected_wheels=4,
+                        expected_sdists=1,
+                        required_wheel_tags=("x86_64", "aarch64", "macosx", "win_amd64"),
+                    )
+            finally:
+                canonical.fetch_pypi_payload = old_fetch
+
     def test_existing_pypi_release_uses_verified_remote_bytes(self) -> None:
         with tempfile.TemporaryDirectory() as tmp:
             root = Path(tmp)
@@ -175,6 +266,29 @@ def test_verify_retries_after_empty_pypi_dist_payload(self) -> None:
 
             self.assertEqual(sleeps, [0.5])
 
+    def test_verify_rejects_incomplete_local_dist_before_remote_check(self) -> None:
+        with tempfile.TemporaryDirectory() as tmp:
+            dist = Path(tmp)
+            write(dist / "ordvec-0.3.0.tar.gz", b"canonical sdist")
+            write(dist / "ordvec-0.3.0-cp310-abi3-win_amd64.whl", b"canonical wheel")
+
+            old_fetch = canonical.fetch_pypi_payload
+            canonical.fetch_pypi_payload = lambda project, version: self.fail("unexpected PyPI fetch")
+            try:
+                with redirect_stderr(io.StringIO()), self.assertRaises(SystemExit):
+                    canonical.verify(
+                        "ordvec",
+                        "0.3.0",
+                        dist,
+                        attempts=1,
+                        sleep_seconds=0.0,
+                        expected_wheels=4,
+                        expected_sdists=1,
+                        required_wheel_tags=("x86_64", "aarch64", "macosx", "win_amd64"),
+                    )
+            finally:
+                canonical.fetch_pypi_payload = old_fetch
+
     def test_canonicalize_reports_pypi_read_error(self) -> None:
         with tempfile.TemporaryDirectory() as tmp:
             root = Path(tmp)

From 41d8cc2839b42860aa8b7209aeb596bfe0e60aa6 Mon Sep 17 00:00:00 2001
From: Nelson Spence <nelson@projectnavi.ai>
Date: Tue, 9 Jun 2026 16:13:21 -0500
Subject: [PATCH 3/4] Address PR review and fuzz smoke findings

Signed-off-by: Nelson Spence <nelson@projectnavi.ai>
---
 .../signbitmap_rankquant_twostage.rs          | 20 ++--
 ordvec-manifest/src/lib.rs                    |  2 +-
 ordvec-manifest/src/sqlite.rs                 | 15 +--
 ordvec-manifest/tests/manifest.rs             | 97 +++++++++++++++++++
 4 files changed, 119 insertions(+), 15 deletions(-)

diff --git a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
index 1e35582..22bd60e 100644
--- a/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
+++ b/fuzz/fuzz_targets/signbitmap_rankquant_twostage.rs
@@ -6,14 +6,15 @@
 //!
 //! The fuzzer builds both indexes over one generated finite corpus, derives a
 //! bounded structured shape for `(dim, bits, n_vectors, m, k)`, and feeds
-//! duplicate candidate IDs into the subset path. When sign candidate generation
-//! returns the full corpus (`m >= n`), the target also checks that subset
-//! reranking agrees with a full RankQuant search.
+//! duplicate candidate IDs into the subset path while preserving the public
+//! subset API's corpus-sized candidate-budget contract. When sign candidate
+//! generation returns the full corpus (`m >= n`), the target also checks that
+//! subset reranking agrees with a full RankQuant search.
 //!
-//! Contract: no panic, abort, or out-of-bounds access on any in-range candidate
-//! input, subset reranking must preserve score-descending/doc-ID-ascending
-//! ordering, and full-corpus candidate reranking must match full RankQuant
-//! search.
+//! Contract: no panic, abort, or out-of-bounds access on any bounded in-range
+//! candidate input, subset reranking must preserve score-descending/doc-ID-
+//! ascending ordering, and full-corpus candidate reranking must match full
+//! RankQuant search.
 #![no_main]
 
 use libfuzzer_sys::{
@@ -116,12 +117,15 @@ fuzz_target!(|input: TwoStageInput| {
             0 => subset_candidates.clear(),
             1 => {
                 let id = subset_candidates.first().copied().unwrap_or(0);
-                subset_candidates.push(id);
+                if subset_candidates.len() < input.n_vectors {
+                    subset_candidates.push(id);
+                }
             }
             2 if subset_candidates.is_empty() => subset_candidates.push(0),
             _ => {}
         }
     }
+    assert!(subset_candidates.len() <= input.n_vectors);
 
     let (scores, ids) =
         rankquant.search_asymmetric_subset(query, &subset_candidates, input.k);
diff --git a/ordvec-manifest/src/lib.rs b/ordvec-manifest/src/lib.rs
index 901deed..acdfc5a 100644
--- a/ordvec-manifest/src/lib.rs
+++ b/ordvec-manifest/src/lib.rs
@@ -3823,7 +3823,7 @@ fn validate_row_id_string(
             errors,
             limits,
             format!("row_identity_{field}_invalid_uuid"),
-            format!("line {line_idx} {field} must be a UUID because row_identity.id_kind is uuid"),
+            format!("line {line_idx} {field} must be a UUID in v1"),
         );
     }
 }
diff --git a/ordvec-manifest/src/sqlite.rs b/ordvec-manifest/src/sqlite.rs
index a84f156..bd6694c 100644
--- a/ordvec-manifest/src/sqlite.rs
+++ b/ordvec-manifest/src/sqlite.rs
@@ -701,21 +701,18 @@ fn verification_reports_needs_migration(conn: &Connection) -> Result<bool, Manif
         "encoder_distortion_profile_sha256",
         "report_json",
     ];
-    if current_required
-        .iter()
-        .all(|required| columns.iter().any(|column| column == required))
-    {
+    if has_required_columns(&columns, &current_required) {
         return Ok(false);
     }
 
-    let legacy_schema = [
+    let legacy_required = [
         "manifest_id",
         "manifest_path",
         "checked_at",
         "ok",
         "report_json",
     ];
-    if columns.iter().map(String::as_str).eq(legacy_schema) {
+    if has_required_columns(&columns, &legacy_required) {
         return Ok(true);
     }
 
@@ -725,6 +722,12 @@ fn verification_reports_needs_migration(conn: &Connection) -> Result<bool, Manif
     )))
 }
 
+fn has_required_columns(columns: &[String], required: &[&str]) -> bool {
+    required
+        .iter()
+        .all(|required| columns.iter().any(|column| column == required))
+}
+
 fn sqlite_err(err: rusqlite::Error) -> ManifestError {
     ManifestError::invalid(format!("sqlite error: {err}"))
 }
diff --git a/ordvec-manifest/tests/manifest.rs b/ordvec-manifest/tests/manifest.rs
index 1762e47..7c9ff43 100644
--- a/ordvec-manifest/tests/manifest.rs
+++ b/ordvec-manifest/tests/manifest.rs
@@ -2424,6 +2424,43 @@ fn jsonl_row_identity_rejects_non_uuid_ids() {
     assert!(codes.contains(&"row_identity_parent_id_invalid_uuid"));
 }
 
+#[test]
+fn jsonl_row_identity_uuid_error_message_is_v1_scoped() {
+    let temp = tempfile::tempdir().unwrap();
+    let index = write_rankquant_index(temp.path(), 1);
+    let rows = temp.path().join("rows.jsonl");
+    write_row_map(&rows, &[("doc-a", None)]);
+    let row_hash = sha256_file(&rows).unwrap();
+    let manifest_path = temp.path().join("manifest.json");
+    let mut manifest = create_manifest_for_index(
+        &index,
+        CreateRowIdentity::RowIdIdentity,
+        "test-embedding",
+        &manifest_path,
+    )
+    .unwrap();
+    manifest.row_identity = RowIdentity::Jsonl {
+        path: "rows.jsonl".to_string(),
+        sha256: row_hash.sha256,
+        row_count: 1,
+        id_kind: "u64".to_string(),
+        db: None,
+    };
+
+    let report = verify_manifest_with_base(manifest, temp.path(), VerifyOptions::default());
+    let codes = error_codes(&report);
+    assert!(codes.contains(&"row_identity_id_kind_unsupported"));
+    let issue = report
+        .errors
+        .iter()
+        .find(|issue| issue.code == "row_identity_db_id_invalid_uuid")
+        .expect("non-UUID db_id should still report v1 UUID validation");
+    assert!(issue.message.contains("must be a UUID in v1"));
+    assert!(!issue
+        .message
+        .contains("because row_identity.id_kind is uuid"));
+}
+
 #[test]
 fn jsonl_row_identity_rejects_reserved_db_metadata() {
     let temp = tempfile::tempdir().unwrap();
@@ -3073,6 +3110,66 @@ fn sqlite_refuses_to_migrate_unknown_verification_reports_table() {
     assert_eq!(columns, vec!["id"]);
 }
 
+#[cfg(feature = "sqlite")]
+#[test]
+fn sqlite_migrates_legacy_verification_reports_by_required_column_names() {
+    use rusqlite::Connection;
+
+    let temp = tempfile::tempdir().unwrap();
+    let index = write_index(temp.path());
+    let manifest_path = temp.path().join("manifest.json");
+    let manifest = create_manifest_for_index(
+        &index,
+        CreateRowIdentity::RowIdIdentity,
+        "test-embedding",
+        &manifest_path,
+    )
+    .unwrap();
+    fs::write(
+        &manifest_path,
+        serde_json::to_string_pretty(&manifest).unwrap(),
+    )
+    .unwrap();
+    let document = load_manifest_file(&manifest_path).unwrap();
+    let db = temp.path().join("legacy.sqlite");
+    let conn = Connection::open(&db).unwrap();
+    conn.execute(
+        "CREATE TABLE verification_reports(
+            report_json TEXT,
+            checked_at TEXT,
+            extra TEXT,
+            ok INTEGER,
+            manifest_path TEXT,
+            manifest_id TEXT
+        )",
+        [],
+    )
+    .unwrap();
+    drop(conn);
+
+    let report = ordvec_manifest::sqlite::verify_with_registry(
+        &db,
+        &document,
+        &manifest_path,
+        VerifyOptions::default(),
+        true,
+    )
+    .unwrap();
+    assert!(report.ok, "{:?}", report.errors);
+
+    let conn = Connection::open(&db).unwrap();
+    let columns = conn
+        .prepare("PRAGMA table_info(verification_reports)")
+        .unwrap()
+        .query_map([], |row| row.get::<_, String>(1))
+        .unwrap()
+        .collect::<Result<Vec<_>, _>>()
+        .unwrap();
+    assert!(columns.contains(&"report_id".to_string()));
+    assert!(columns.contains(&"manifest_sha256".to_string()));
+    assert!(!columns.contains(&"extra".to_string()));
+}
+
 #[cfg(feature = "sqlite")]
 #[test]
 fn sqlite_cache_is_explicit_and_activation_reverifies_by_default() {

From 2d15584c77e1ed4b65a44fc406a57a23f5fc5701 Mon Sep 17 00:00:00 2001
From: Nelson Spence <nelson@projectnavi.ai>
Date: Tue, 9 Jun 2026 16:35:31 -0500
Subject: [PATCH 4/4] Fail closed on crates.io recovery errors

Signed-off-by: Nelson Spence <nelson@projectnavi.ai>
---
 .github/workflows/release.yml       | 120 ++++++++++++++++++++++++++--
 tests/release_publish_invariants.py |  54 +++++++++----
 2 files changed, 154 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 73a4bca..24af2ba 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -1089,8 +1089,62 @@ jobs:
           STATIC_URL="https://static.crates.io/crates/ordvec/ordvec-${VERSION}.crate"
           CRATES_IO_USER_AGENT="ordvec-release-verify/${VERSION} (https://github.com/Fieldnote-Echo/ordvec)"
           EXISTING="${RUNNER_TEMP}/existing-ordvec.crate"
-          if curl -fsSL --user-agent "$CRATES_IO_USER_AGENT" "$API_URL" -o "$EXISTING" \
-            || curl -fsSL --user-agent "$CRATES_IO_USER_AGENT" "$STATIC_URL" -o "$EXISTING"; then
+          API_STATUS_FILE="${RUNNER_TEMP}/existing-ordvec-api-status.txt"
+          STATIC_STATUS_FILE="${RUNNER_TEMP}/existing-ordvec-static-status.txt"
+          already_present=false
+
+          rm -f "$EXISTING" "$API_STATUS_FILE" "$STATIC_STATUS_FILE"
+          API_CURL_EXIT=0
+          curl -sSL --retry 3 --retry-delay 2 --retry-all-errors --connect-timeout 10 --max-time 60 \
+            --user-agent "$CRATES_IO_USER_AGENT" \
+            --write-out "%{http_code}" \
+            --output "$EXISTING" \
+            "$API_URL" > "$API_STATUS_FILE" || API_CURL_EXIT=$?
+          API_STATUS="$(cat "$API_STATUS_FILE")"
+          if [ "$API_CURL_EXIT" -ne 0 ]; then
+            echo "::error::could not determine crates.io status while checking ordvec ${VERSION} at $API_URL (curl exit ${API_CURL_EXIT}). Refusing recovery."
+            exit 1
+          fi
+          case "$API_STATUS" in
+            200)
+              already_present=true
+              ;;
+            404)
+              rm -f "$EXISTING"
+              ;;
+            *)
+              echo "::error::unexpected crates.io status ${API_STATUS} while checking ordvec ${VERSION} at $API_URL. Refusing recovery."
+              exit 1
+              ;;
+          esac
+
+          if [ "$already_present" != true ]; then
+            STATIC_CURL_EXIT=0
+            curl -sSL --retry 3 --retry-delay 2 --retry-all-errors --connect-timeout 10 --max-time 60 \
+              --user-agent "$CRATES_IO_USER_AGENT" \
+              --write-out "%{http_code}" \
+              --output "$EXISTING" \
+              "$STATIC_URL" > "$STATIC_STATUS_FILE" || STATIC_CURL_EXIT=$?
+            STATIC_STATUS="$(cat "$STATIC_STATUS_FILE")"
+            if [ "$STATIC_CURL_EXIT" -ne 0 ]; then
+              echo "::error::could not determine crates.io status while checking ordvec ${VERSION} at $STATIC_URL (curl exit ${STATIC_CURL_EXIT}). Refusing recovery."
+              exit 1
+            fi
+            case "$STATIC_STATUS" in
+              200)
+                already_present=true
+                ;;
+              404)
+                rm -f "$EXISTING"
+                ;;
+              *)
+                echo "::error::unexpected crates.io status ${STATIC_STATUS} while checking ordvec ${VERSION} at $STATIC_URL. Refusing recovery."
+                exit 1
+                ;;
+            esac
+          fi
+
+          if [ "$already_present" = true ]; then
             E_SHA=$(sha256sum "$EXISTING" | cut -d' ' -f1)
             echo "attested:         $A_SHA"
             echo "crates.io-served: $E_SHA"
@@ -1102,7 +1156,7 @@ jobs:
             echo "::notice::crates.io already serves byte-identical ordvec ${VERSION}; skipping upload and verifying served bytes."
           else
             echo "already_published=false" >> "$GITHUB_OUTPUT"
-            echo "No existing ordvec ${VERSION} .crate found on crates.io; proceeding with publish."
+            echo "Both crates.io recovery endpoints returned 404 for ordvec ${VERSION}; proceeding with publish."
           fi
       # Mint the short-lived crates.io credential immediately before publish so
       # the ephemeral token's exposure window is minimal. No stored secret.
@@ -1386,8 +1440,62 @@ jobs:
           STATIC_URL="https://static.crates.io/crates/ordvec-manifest/ordvec-manifest-${VERSION}.crate"
           CRATES_IO_USER_AGENT="ordvec-release-verify/${VERSION} (https://github.com/Fieldnote-Echo/ordvec)"
           EXISTING="${RUNNER_TEMP}/existing-ordvec-manifest.crate"
-          if curl -fsSL --user-agent "$CRATES_IO_USER_AGENT" "$API_URL" -o "$EXISTING" \
-            || curl -fsSL --user-agent "$CRATES_IO_USER_AGENT" "$STATIC_URL" -o "$EXISTING"; then
+          API_STATUS_FILE="${RUNNER_TEMP}/existing-ordvec-manifest-api-status.txt"
+          STATIC_STATUS_FILE="${RUNNER_TEMP}/existing-ordvec-manifest-static-status.txt"
+          already_present=false
+
+          rm -f "$EXISTING" "$API_STATUS_FILE" "$STATIC_STATUS_FILE"
+          API_CURL_EXIT=0
+          curl -sSL --retry 3 --retry-delay 2 --retry-all-errors --connect-timeout 10 --max-time 60 \
+            --user-agent "$CRATES_IO_USER_AGENT" \
+            --write-out "%{http_code}" \
+            --output "$EXISTING" \
+            "$API_URL" > "$API_STATUS_FILE" || API_CURL_EXIT=$?
+          API_STATUS="$(cat "$API_STATUS_FILE")"
+          if [ "$API_CURL_EXIT" -ne 0 ]; then
+            echo "::error::could not determine crates.io status while checking ordvec-manifest ${VERSION} at $API_URL (curl exit ${API_CURL_EXIT}). Refusing recovery."
+            exit 1
+          fi
+          case "$API_STATUS" in
+            200)
+              already_present=true
+              ;;
+            404)
+              rm -f "$EXISTING"
+              ;;
+            *)
+              echo "::error::unexpected crates.io status ${API_STATUS} while checking ordvec-manifest ${VERSION} at $API_URL. Refusing recovery."
+              exit 1
+              ;;
+          esac
+
+          if [ "$already_present" != true ]; then
+            STATIC_CURL_EXIT=0
+            curl -sSL --retry 3 --retry-delay 2 --retry-all-errors --connect-timeout 10 --max-time 60 \
+              --user-agent "$CRATES_IO_USER_AGENT" \
+              --write-out "%{http_code}" \
+              --output "$EXISTING" \
+              "$STATIC_URL" > "$STATIC_STATUS_FILE" || STATIC_CURL_EXIT=$?
+            STATIC_STATUS="$(cat "$STATIC_STATUS_FILE")"
+            if [ "$STATIC_CURL_EXIT" -ne 0 ]; then
+              echo "::error::could not determine crates.io status while checking ordvec-manifest ${VERSION} at $STATIC_URL (curl exit ${STATIC_CURL_EXIT}). Refusing recovery."
+              exit 1
+            fi
+            case "$STATIC_STATUS" in
+              200)
+                already_present=true
+                ;;
+              404)
+                rm -f "$EXISTING"
+                ;;
+              *)
+                echo "::error::unexpected crates.io status ${STATIC_STATUS} while checking ordvec-manifest ${VERSION} at $STATIC_URL. Refusing recovery."
+                exit 1
+                ;;
+            esac
+          fi
+
+          if [ "$already_present" = true ]; then
             E_SHA=$(sha256sum "$EXISTING" | cut -d' ' -f1)
             echo "attested:         $A_SHA"
             echo "crates.io-served: $E_SHA"
@@ -1399,7 +1507,7 @@ jobs:
             echo "::notice::crates.io already serves byte-identical ordvec-manifest ${VERSION}; skipping upload and verifying served bytes."
           else
             echo "already_published=false" >> "$GITHUB_OUTPUT"
-            echo "No existing ordvec-manifest ${VERSION} .crate found on crates.io; proceeding with publish."
+            echo "Both crates.io recovery endpoints returned 404 for ordvec-manifest ${VERSION}; proceeding with publish."
           fi
       - name: Validate manifest publish dry-run
         if: steps.manifest_crate_recovery.outputs.already_published != 'true'
diff --git a/tests/release_publish_invariants.py b/tests/release_publish_invariants.py
index bf98c38..4ba80e2 100644
--- a/tests/release_publish_invariants.py
+++ b/tests/release_publish_invariants.py
@@ -896,6 +896,38 @@ def readback_curl_uses(words: list[str], url_var: str) -> bool:
     )
 
 
+def recovery_curl_uses(words: list[str], url_var: str) -> bool:
+    return (
+        has_shell_arg(words, shell_vars(url_var))
+        and has_shell_option_value(words, {"--user-agent", "-A"}, shell_vars("CRATES_IO_USER_AGENT"))
+        and has_shell_option_value(words, {"--output", "-o"}, shell_vars("EXISTING"))
+        and has_shell_option_value(words, {"--write-out", "-w"}, {"%{http_code}"})
+        and "--retry" in words
+        and "--retry-all-errors" in words
+    )
+
+
+def check_crate_recovery_status_handling(
+    recovery_run: str, path: str, job_name: str, package: str
+) -> None:
+    required_fragments = (
+        "API_CURL_EXIT=0",
+        'if [ "$API_CURL_EXIT" -ne 0 ]; then',
+        "STATIC_CURL_EXIT=0",
+        'if [ "$STATIC_CURL_EXIT" -ne 0 ]; then',
+        'case "$API_STATUS" in',
+        'case "$STATIC_STATUS" in',
+        "200)",
+        "404)",
+        "could not determine crates.io status",
+        "unexpected crates.io status",
+        f"Both crates.io recovery endpoints returned 404 for {package}",
+    )
+    for fragment in required_fragments:
+        if fragment not in recovery_run:
+            fail(f"{path}: {job_name} recovery step must contain {fragment!r}")
+
+
 def check_hash_requirement_temp_paths(paths: list[str]) -> None:
     for path in paths:
         workflow_text = read_text(path)
@@ -1295,18 +1327,14 @@ def check_publish_crate_job(
     ):
         if required not in recovery_run:
             fail(f"{path}: {job_name} recovery step must contain {required!r}")
+    check_crate_recovery_status_handling(recovery_run, path, job_name, package)
     for url_var in ("API_URL", "STATIC_URL"):
         if not any(
-            has_shell_arg(words, shell_vars(url_var))
-            and has_shell_option_value(
-                words, {"--user-agent", "-A"}, shell_vars("CRATES_IO_USER_AGENT")
-            )
-            and has_shell_option_value(words, {"--output", "-o"}, shell_vars("EXISTING"))
-            for words in shell_curl_commands(recovery_run)
+            recovery_curl_uses(words, url_var) for words in shell_curl_commands(recovery_run)
         ):
             fail(
                 f"{path}: {job_name} recovery step must curl ${url_var} "
-                "with CRATES_IO_USER_AGENT into $EXISTING"
+                "with CRATES_IO_USER_AGENT into $EXISTING, capture HTTP status, and retry"
             )
 
     protected_step_names = {
@@ -1424,18 +1452,16 @@ def check_publish_crates(workflow: dict[str, Any], path: str) -> None:
     ):
         if required not in recovery_run:
             fail(f"{path}: manifest crate recovery step must contain {required!r}")
+    check_crate_recovery_status_handling(
+        recovery_run, path, "publish-manifest-crate", "ordvec-manifest"
+    )
     for url_var in ("API_URL", "STATIC_URL"):
         if not any(
-            has_shell_arg(words, shell_vars(url_var))
-            and has_shell_option_value(
-                words, {"--user-agent", "-A"}, shell_vars("CRATES_IO_USER_AGENT")
-            )
-            and has_shell_option_value(words, {"--output", "-o"}, shell_vars("EXISTING"))
-            for words in shell_curl_commands(recovery_run)
+            recovery_curl_uses(words, url_var) for words in shell_curl_commands(recovery_run)
         ):
             fail(
                 f"{path}: manifest crate recovery step must curl ${url_var} "
-                "with CRATES_IO_USER_AGENT into $EXISTING"
+                "with CRATES_IO_USER_AGENT into $EXISTING, capture HTTP status, and retry"
             )
     for index, raw_step in enumerate(manifest_steps):
         step = mapping(raw_step, f"{path}: jobs.publish-manifest-crate.steps[{index}]")