diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 2aa6b8e..c4b5031 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -19,8 +19,8 @@ updates: patterns: - "*" - # Cargo workspace root: core `ordvec`, C ABI, Python binding, and repo-local - # manifest verifier (single workspace Cargo.lock). Group minor+patch into one + # Cargo workspace root: core `ordvec`, C ABI, Python binding, and manifest + # verifier crate (single workspace Cargo.lock). Group minor+patch into one # PR; majors stay separate for manual review. - package-ecosystem: "cargo" directory: "/" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be03b53..406a9d3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -95,9 +95,9 @@ jobs: run: cargo clippy -p ordvec-ffi --all-targets -- -D warnings # ---------------------------------------------------------------------- - # Repo-local manifest verifier. Kept out of default-members so the core - # crate's default no-system-deps contract stays unchanged; this explicit lane - # covers its no-default-features surface, optional SQLite support, and clippy. + # Manifest verifier crate. Kept out of default-members so the core crate's + # default no-system-deps contract stays unchanged; this explicit lane covers + # its no-default-features surface, optional SQLite support, and clippy. # ---------------------------------------------------------------------- manifest: name: manifest verifier @@ -219,7 +219,7 @@ jobs: # ---------------------------------------------------------------------- # No-system-deps guarantee + packaging check. Fails if any forbidden # numerical/BLAS crate has crept into the dependency tree, then dry-runs - # the publish to catch packaging problems before a real release. + # the publishable crates to catch packaging problems before a real release. # ---------------------------------------------------------------------- deps: name: deps (no-system-deps + publish dry-run) @@ -255,9 +255,11 @@ jobs: echo "OK: no forbidden dependencies" - name: cargo publish --dry-run # Explicit -p ordvec: in a workspace, publish should never infer the package - # from the current directory. Only the core crate goes to crates.io - # (ordvec-python is publish = false and ships to PyPI via maturin). + # from the current directory. The Python binding remains publish = false + # and ships to PyPI via maturin. run: cargo publish -p ordvec --dry-run --locked + - name: cargo publish --dry-run -p ordvec-manifest + run: cargo publish -p ordvec-manifest --dry-run --locked # ---------------------------------------------------------------------- # Pin the release-publish invariants. release.yml is tag-triggered (with the diff --git a/CHANGELOG.md b/CHANGELOG.md index a9f4f1f..c84d347 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,9 +22,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Documentation - Added a pre-1.0 compatibility policy covering stable and experimental Rust - APIs, Python bindings, repo-local C/Go/Manifest sidecars, primitive persisted - formats, examples/docs, MSRV/feature changes, and release-note review - expectations. + APIs, Python bindings, the lockstep Manifest crate, repo-local C/Go sidecars, + primitive persisted formats, examples/docs, MSRV/feature changes, and + release-note review expectations. ### Security @@ -40,10 +40,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Added `probe_index_metadata` to inspect persisted `Rank`, `RankQuant`, `Bitmap`, and `SignBitmap` headers without allocating payloads. -- Added the repo-local, publish=false `ordvec-manifest` crate with a strict v1 - JSON schema, artifact and row-identity verification, attestation shape - checks, a CLI, and optional SQLite cache/audit support with one active - manifest pointer. +- Added the lockstep `ordvec-manifest` crate with a strict v1 JSON schema, + artifact and row-identity verification, attestation shape checks, a CLI, and + optional SQLite cache/audit support with one active manifest pointer. - Added optional typed calibration profile references to the v1 manifest schema, with path/hash/identity/compatibility verification but no statistical computation. diff --git a/Cargo.lock b/Cargo.lock index 01043e2..ab777b2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -617,7 +617,7 @@ dependencies = [ [[package]] name = "ordvec-manifest" -version = "0.0.0" +version = "0.3.0" dependencies = [ "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 3b77332..129b827 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,10 +79,10 @@ opt-level = 3 # Workspace: the `ordvec-python` member holds the PyO3/maturin bindings shipped to # PyPI as `ordvec`; `ordvec-ffi` holds the C ABI; and `ordvec-manifest` is a -# repo-local, publish=false sidecar verifier. `default-members = ["."]` keeps -# bare `cargo build/test/clippy` scoped to the core crate, so the existing CI -# gates are unaffected; non-core members get explicit CI lanes. The single -# workspace `Cargo.lock` carries their transitive dependencies. +# lockstep manifest verifier crate. `default-members = ["."]` keeps bare +# `cargo build/test/clippy` scoped to the core crate, so the existing CI gates +# are unaffected; non-core members get explicit CI lanes. The single workspace +# `Cargo.lock` carries their transitive dependencies. [workspace] resolver = "2" members = ["ordvec-python", "ordvec-ffi", "ordvec-manifest"] diff --git a/README.md b/README.md index b85e2bd..0a58c45 100644 --- a/README.md +++ b/README.md @@ -173,10 +173,10 @@ candidate slices passed to `Search` until the call returns. [`docs/INDEX_PROVENANCE.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/INDEX_PROVENANCE.md), [`docs/determinism.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/determinism.md), [`THREAT_MODEL.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/THREAT_MODEL.md) -- **Repo-local manifest verifier, C ABI, and Go wrapper:** - available from the full GitHub checkout. These sidecars are not part of the - published core `.crate`; use the GitHub checkout for `ordvec-manifest/`, - `ordvec-ffi/`, `ordvec-go/`, and +- **Manifest verifier, C ABI, and Go wrapper:** + `ordvec-manifest` is versioned in lockstep with the core crate and is + package-gated separately; use the GitHub checkout for `ordvec-ffi/`, + `ordvec-go/`, and [`docs/c-api.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/c-api.md). - **Pre-1.0 compatibility policy:** [`docs/compatibility-policy.md`](docs/compatibility-policy.md) defines the @@ -186,7 +186,7 @@ candidate slices passed to `Search` until the call returns. including its [`proof-spine`](https://github.com/Fieldnote-Echo/ordvec-formalization/blob/main/docs/proof-spine.md), [`theorem-map`](https://github.com/Fieldnote-Echo/ordvec-formalization/blob/main/docs/theorem-map.md), and [`reviewer brief`](https://github.com/Fieldnote-Echo/ordvec-formalization/blob/main/docs/reviewer-brief.md). -- **API docs:** +- **API docs:** , - **Paper (OrdVec / RankQuant):** _link TBD — see [Research collaboration](#research-collaboration)._ @@ -285,12 +285,11 @@ checksum, MAC, or signature — by design.** The loaders validate *structure* (magic, version, bounds, exact-length payload) but not *origin*: a structurally valid file can still be untrusted. If an index file crosses a trust boundary (network transfer, shared storage), verify it before loading. -The full GitHub checkout includes a publish=false sidecar CLI, -`ordvec-manifest`, that binds an index file to a JSON manifest by SHA-256, -header metadata, row identity, named auxiliary sidecars, and attestation shape -checks. It does not sign artifacts, manage keys, or decide deployment trust -policy. No in-format crypto is shipped because it would add key management the -library can't own. See +`ordvec-manifest` binds an index file to a JSON manifest by SHA-256, header +metadata, row identity, named auxiliary sidecars, and attestation shape checks. +It does not sign artifacts, manage keys, or decide deployment trust policy. No +in-format crypto is shipped because it would add key management the library +can't own. See [`docs/PERSISTED_FORMAT.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/PERSISTED_FORMAT.md), [`docs/INDEX_PROVENANCE.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/docs/INDEX_PROVENANCE.md), and [`THREAT_MODEL.md`](https://github.com/Fieldnote-Echo/ordvec/blob/main/THREAT_MODEL.md) diff --git a/RELEASING.md b/RELEASING.md index da4a1c8..d614be2 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -126,9 +126,10 @@ filename. Until either is updated, the corresponding gated publish fails useful for this release; - distinguish `ordvec` primitive API/file compatibility from downstream application database behavior. -3. Bump the version (crate `Cargo.toml`, and `ordvec-python` if the wheel - changed) and update `CHANGELOG.md` with migration notes for every - intentional compatibility break. Commit on `main`. +3. Bump the version (crate `Cargo.toml`, `ordvec-manifest/Cargo.toml`, and + `ordvec-python` if the wheel changed) and update `CHANGELOG.md` with + migration notes for every intentional compatibility break. Commit on + `main`. 4. Confirm CI is **green for current `main` HEAD**. `require-ci-green` checks `main` HEAD's SHA — which needs a **completed, successful** (not `cancelled`, not in-progress) run of `ci.yml`, `python.yml`, `fuzz.yml`, and diff --git a/THREAT_MODEL.md b/THREAT_MODEL.md index 2d7a58d..aac32c0 100644 --- a/THREAT_MODEL.md +++ b/THREAT_MODEL.md @@ -2,7 +2,7 @@ > **Status:** v0.3.0 (pre-1.0), 2026-05-29. This is the maintained threat model > for the `ordvec` Rust crate, C ABI, Go wrapper, PyO3/maturin Python bindings, -> and the repo-local `ordvec-manifest` sidecar verifier. It is reviewed when the +> and the `ordvec-manifest` sidecar verifier. It is reviewed when the > attack surface changes (new persistence formats, new `unsafe` kernels, new > FFI surface, or release-pipeline changes). > @@ -67,7 +67,7 @@ absence of a second maintainer is itself a tracked supply-chain residual | Layer | Components | Trust boundary | |---|---|---| | **Deserialization** | `rank_io.rs` — `.tvr` / `.tvrq` / `.tvbm` / `.tvsb` loaders | Untrusted filesystem / network byte stream | -| **Manifest verification** | `ordvec-manifest` — publish=false JSON sidecar verifier | Manifest + index + optional row-map files before load | +| **Manifest verification** | `ordvec-manifest` — JSON sidecar verifier | Manifest + index + optional row-map files before load | | **Compute kernels** | `fastscan.rs`, `quant_kernels.rs`, `bitmap.rs`, `sign_bitmap.rs` | Trust established after format validation | | **Index API** | `rank.rs`, `quant.rs`, `bitmap.rs`, `sign_bitmap.rs` | Caller-controlled query embeddings | | **C ABI** | `ordvec-ffi` (`include/ordvec.h`) | C caller ↔ Rust boundary; raw pointers and opaque handles | @@ -146,7 +146,7 @@ problem, not a parser problem. *Mitigation (no format change):* validates structure, not origin, and lists verification options (checksum manifest, artifact-store integrity, Sigstore / GitHub artifact attestation) for deployments where index files cross trust boundaries. The repo now includes -`ordvec-manifest`, a publish=false sidecar verifier that binds an index file to +`ordvec-manifest`, a sidecar verifier that binds an index file to JSON manifest metadata by SHA-256, allocation-resistant header probing, strict row identity checks, and attestation shape checks. It deliberately does **not** sign, manage keys, call networks, mutate index files, change the C ABI, or diff --git a/docs/INDEX_PROVENANCE.md b/docs/INDEX_PROVENANCE.md index a9b2cc4..3725fee 100644 --- a/docs/INDEX_PROVENANCE.md +++ b/docs/INDEX_PROVENANCE.md @@ -43,7 +43,7 @@ The loaders validate **structure, not origin or truth**: If you load index files that were produced elsewhere, transferred over a network, or stored on shared/mutable infrastructure, verify them **before** -loading. The repo-local `ordvec-manifest` crate provides a sidecar verifier for +loading. The lockstep `ordvec-manifest` crate provides a sidecar verifier for that pre-load step: ```sh diff --git a/docs/compatibility-policy.md b/docs/compatibility-policy.md index 2c2b033..3d6c660 100644 --- a/docs/compatibility-policy.md +++ b/docs/compatibility-policy.md @@ -5,10 +5,10 @@ changes. The project nevertheless treats downstream embedders as real users: patch releases should be safe for stable surfaces, and any intentional break must be visible in release notes before users discover it at build or load time. -This policy covers the published Rust crate, the PyPI bindings, repo-local -sidecars (C ABI, Go, and Manifest), the primitive persisted index formats, and -project examples and documentation. It does not promise a database or -application-store lifecycle outside `ordvec` itself. +This policy covers the published Rust crates, the PyPI bindings, repo-local +sidecars (C ABI and Go), the primitive persisted index formats, and project +examples and documentation. It does not promise a database or application-store +lifecycle outside `ordvec` itself. ## Versioning Rules @@ -93,11 +93,11 @@ Python, NumPy, or wheel-platform floor changes are minor-release changes unless a supported upstream version has reached end-of-life or a security issue makes the old floor unsafe. Such changes require release-note migration text. -### Repo-Local Sidecars (C ABI, Go, and Manifest) +### Sidecars (C ABI, Go, and Manifest) -`ordvec-ffi`, `ordvec-go`, and `ordvec-manifest` are repo-local sidecars, not -part of the published core `.crate`. They are still consumed by embedders from -the GitHub checkout, so their compatibility must be reviewed before releases. +`ordvec-manifest` is a lockstep crate for manifest verification. `ordvec-ffi` +and `ordvec-go` remain repo-local sidecars consumed by embedders from the +GitHub checkout. All three surfaces must be reviewed before releases. The C ABI is versioned by `ORDVEC_ABI_VERSION`. ABI v1 currently supports loading persisted `RankQuant` and `Bitmap` files, metadata inspection, and @@ -110,10 +110,11 @@ a new ABI version or clear migration notes. The Go wrapper follows the C ABI. Source-breaking Go API changes require the same compatibility classification in release notes. -The `ordvec-manifest` CLI and its v1 JSON schema are also treated as stable -repo-local surfaces. Patch releases should not introduce breaking changes to -the CLI arguments, emitted error codes, or JSON schema structure. Minor -releases may introduce schema or CLI updates with documented migration steps. +The `ordvec-manifest` CLI, library API, and v1 JSON schema are treated as +stable release surfaces. Patch releases should not introduce breaking changes +to the CLI arguments, emitted error codes, library report shapes, or JSON +schema structure. Minor releases may introduce schema, CLI, or library updates +with documented migration steps. ### Primitive Persisted Formats diff --git a/ordvec-manifest/Cargo.toml b/ordvec-manifest/Cargo.toml index 12d52ca..bc883aa 100644 --- a/ordvec-manifest/Cargo.toml +++ b/ordvec-manifest/Cargo.toml @@ -1,11 +1,19 @@ [package] name = "ordvec-manifest" -version = "0.0.0" +version = "0.3.0" edition = "2021" rust-version = "1.89" -publish = false license = "MIT OR Apache-2.0" -description = "Repo-local ordvec index manifest verifier" +description = "Manifest verifier for ordvec index provenance and sidecar artifacts" +repository = "https://github.com/Fieldnote-Echo/ordvec" +homepage = "https://github.com/Fieldnote-Echo/ordvec" +documentation = "https://docs.rs/ordvec-manifest" +readme = "README.md" +keywords = ["vector-search", "manifest", "provenance", "verification", "quantization"] +categories = ["algorithms", "command-line-utilities", "data-structures"] + +[package.metadata.docs.rs] +all-features = false [lib] name = "ordvec_manifest" @@ -19,7 +27,7 @@ path = "src/main.rs" chrono = { version = "0.4.44", default-features = false, features = ["clock", "std"] } clap = { version = "4.6.1", features = ["derive"] } hex = "0.4.3" -ordvec = { path = ".." } +ordvec = { version = "0.3.0", path = ".." } rusqlite = { version = "0.39.0", optional = true } serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/ordvec-manifest/README.md b/ordvec-manifest/README.md index b29af46..b640f28 100644 --- a/ordvec-manifest/README.md +++ b/ordvec-manifest/README.md @@ -1,6 +1,7 @@ # ordvec-manifest -Repo-local, publish=false sidecar verifier for ordvec index manifests. +Manifest verifier for ordvec index provenance and caller-owned sidecar +artifacts. It verifies index bytes, probed header metadata, row identity, named auxiliary artifacts, optional encoder distortion profile references, optional @@ -9,16 +10,24 @@ ordvec index. It does not sign artifacts, manage keys, call networks, mutate index files, decide deployment trust policy, estimate encoder geometry, compute calibration statistics, or change the C ABI. +`ordvec-manifest` is versioned in lockstep with the core `ordvec` crate. Before +the first crates.io release that includes this package, use it from the GitHub +workspace checkout; after that release, install it with `cargo install +ordvec-manifest`. + ```sh -cargo run -p ordvec-manifest -- create \ +ordvec-manifest create \ --index path/to/index.tvrq \ --row-id-is-identity \ --embedding-model bge-small-en-v1.5 \ --out path/to/index.manifest.json -cargo run -p ordvec-manifest -- verify --manifest path/to/index.manifest.json +ordvec-manifest verify --manifest path/to/index.manifest.json ``` +From a pre-release workspace checkout, prefix the same commands with +`cargo run -p ordvec-manifest --`. + The schema version is `ordvec.index_manifest.v1`. Relative paths resolve from the manifest file's directory, absolute paths are rejected by default, and relative paths may not escape the manifest directory unless explicitly allowed. diff --git a/ordvec-manifest/src/lib.rs b/ordvec-manifest/src/lib.rs index d0214a5..72457b6 100644 --- a/ordvec-manifest/src/lib.rs +++ b/ordvec-manifest/src/lib.rs @@ -1,3 +1,19 @@ +//! Manifest verification for ordvec index artifacts. +//! +//! This crate verifies JSON manifests that bind an ordvec index file to +//! SHA-256 digests, probed loader metadata, row identity, caller-owned +//! auxiliary artifacts, optional encoder-distortion profiles, optional +//! calibration profiles, and attestation-shape metadata. It is intentionally a +//! verifier, not a trust oracle: it does not sign artifacts, manage keys, call +//! networks, mutate index files, estimate model geometry, or decide deployment +//! policy. +//! +//! Library callers can use [`load_manifest_file_with_options`] and +//! [`verify_document_for_load`], or use [`verify_for_load`] when they need a +//! verified snapshot of the canonical artifact path and related load metadata. +//! The `ordvec-manifest` binary exposes the same bounded verification surfaces +//! for command-line use. + use chrono::{DateTime, SecondsFormat, Utc}; use ordvec::{ probe_index_metadata, IndexKind as CoreIndexKind, IndexMetadata as CoreIndexMetadata,