Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 8 additions & 5 deletions ordvec-manifest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2747,12 +2747,15 @@ impl ManifestIndexParams {
}
}

/// Successful verification data for a caller that will load an ordvec index.
/// Verified paths and metadata for a caller-managed load.
///
/// A plan intentionally contains paths and report data, not file descriptors or
/// byte buffers. Treat it as proof of the verification just performed, then load
/// from controlled storage immediately or re-verify if another actor may have
/// changed the files.
/// A `VerifiedLoadPlan` means the manifest, primary artifact, row-identity
/// file, and declared auxiliary artifacts verified at the time verification
/// ran. It is not a durable capability over mutable storage: the plan does not
/// pin file descriptors, hold locks, buffer bytes, or guarantee that bytes at
/// the returned paths remain unchanged after verification. Treat it as proof of
/// the verification just performed, then load from controlled storage
/// immediately or re-verify if another actor may have changed the files.
#[derive(Clone, Debug)]
pub struct VerifiedLoadPlan {
manifest_path: Option<PathBuf>,
Expand Down
70 changes: 58 additions & 12 deletions ordvec-manifest/src/sqlite.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use chrono::{SecondsFormat, Utc};
use rusqlite::{params, Connection, OptionalExtension};
use serde::Serialize;
use sha2::{Digest, Sha256};
use std::fs;
use std::path::{Path, PathBuf};

pub fn verify_with_registry(
Expand Down Expand Up @@ -110,6 +111,7 @@ fn init(conn: &Connection) -> Result<(), ManifestError> {
manifest_path TEXT NOT NULL,
checked_at TEXT NOT NULL,
ok INTEGER NOT NULL,
manifest_location_sha256 TEXT,
manifest_sha256 TEXT,
options_sha256 TEXT,
artifact_sha256 TEXT,
Expand All @@ -135,6 +137,7 @@ fn init(conn: &Connection) -> Result<(), ManifestError> {
manifest_path TEXT NOT NULL,
checked_at TEXT NOT NULL,
ok INTEGER NOT NULL,
manifest_location_sha256 TEXT,
manifest_sha256 TEXT,
options_sha256 TEXT,
artifact_sha256 TEXT,
Expand All @@ -147,6 +150,7 @@ fn init(conn: &Connection) -> Result<(), ManifestError> {
CREATE INDEX IF NOT EXISTS verification_reports_cache_idx
ON verification_reports(
manifest_id,
manifest_location_sha256,
manifest_sha256,
options_sha256,
artifact_sha256,
Expand Down Expand Up @@ -194,6 +198,7 @@ fn store_report(
manifest_path,
checked_at,
ok,
manifest_location_sha256,
manifest_sha256,
options_sha256,
artifact_sha256,
Expand All @@ -202,12 +207,13 @@ fn store_report(
auxiliary_artifacts_sha256,
encoder_distortion_profile_sha256,
report_json
) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)",
params![
document.manifest.manifest_id,
manifest_path.display().to_string(),
report.checked_at,
i64::from(report.ok),
cache_key.map(|key| key.manifest_location_sha256.as_str()),
cache_key.map(|key| key.manifest_sha256.as_str()),
cache_key.map(|key| key.options_sha256.as_str()),
cache_key.map(|key| key.artifact_sha256.as_str()),
Expand All @@ -234,29 +240,31 @@ fn load_cached_report(
"SELECT report_id, length(CAST(report_json AS BLOB))
FROM verification_reports
WHERE manifest_id = ?1
AND manifest_sha256 = ?2
AND options_sha256 = ?3
AND artifact_sha256 = ?4
AND manifest_location_sha256 = ?2
AND manifest_sha256 = ?3
AND options_sha256 = ?4
AND artifact_sha256 = ?5
AND (
(row_identity_sha256 IS NULL AND ?5 IS NULL)
OR row_identity_sha256 = ?5
(row_identity_sha256 IS NULL AND ?6 IS NULL)
OR row_identity_sha256 = ?6
)
AND (
(calibration_profile_sha256 IS NULL AND ?6 IS NULL)
OR calibration_profile_sha256 = ?6
(calibration_profile_sha256 IS NULL AND ?7 IS NULL)
OR calibration_profile_sha256 = ?7
)
AND (
(auxiliary_artifacts_sha256 IS NULL AND ?7 IS NULL)
OR auxiliary_artifacts_sha256 = ?7
(auxiliary_artifacts_sha256 IS NULL AND ?8 IS NULL)
OR auxiliary_artifacts_sha256 = ?8
)
AND (
(encoder_distortion_profile_sha256 IS NULL AND ?8 IS NULL)
OR encoder_distortion_profile_sha256 = ?8
(encoder_distortion_profile_sha256 IS NULL AND ?9 IS NULL)
OR encoder_distortion_profile_sha256 = ?9
)
ORDER BY report_id DESC
LIMIT 1",
params![
manifest_id,
cache_key.manifest_location_sha256.as_str(),
cache_key.manifest_sha256.as_str(),
cache_key.options_sha256.as_str(),
cache_key.artifact_sha256.as_str(),
Expand Down Expand Up @@ -296,6 +304,7 @@ fn load_cached_report(

#[derive(Clone, Debug)]
struct CacheKey {
manifest_location_sha256: String,
manifest_sha256: String,
options_sha256: String,
artifact_sha256: String,
Expand All @@ -314,6 +323,12 @@ struct CacheableVerifyOptions {
limits: ResourceLimits,
}

#[derive(Serialize)]
struct CacheableManifestLocation {
manifest_path: String,
base_dir: String,
}

impl CacheableVerifyOptions {
fn from_options(options: &VerifyOptions) -> Self {
Self {
Expand All @@ -329,6 +344,26 @@ impl CacheableVerifyOptions {
}
}

fn manifest_location_sha256(
manifest_path: &Path,
document: &ManifestDocument,
) -> Result<Option<String>, ManifestError> {
let manifest_path = match fs::canonicalize(manifest_path) {
Ok(path) => path,
Err(_) => return Ok(None),
};
let base_dir = match fs::canonicalize(&document.base_dir) {
Ok(path) => path,
Err(_) => return Ok(None),
};
let material = CacheableManifestLocation {
manifest_path: hex::encode(manifest_path.as_os_str().as_encoded_bytes()),
base_dir: hex::encode(base_dir.as_os_str().as_encoded_bytes()),
};
let json = serde_json::to_vec(&material)?;
Ok(Some(sha256_bytes(&json)))
}

fn current_cache_key(
document: &ManifestDocument,
manifest_path: &Path,
Expand All @@ -343,6 +378,9 @@ fn current_cache_key(
Ok(hash) => hash.sha256,
Err(_) => return Ok(None),
};
let Some(manifest_location_sha256) = manifest_location_sha256(manifest_path, document)? else {
return Ok(None);
};
let options_json = serde_json::to_vec(&CacheableVerifyOptions::from_options(options))?;
let options_sha256 = sha256_bytes(&options_json);

Expand Down Expand Up @@ -407,6 +445,7 @@ fn current_cache_key(
current_encoder_distortion_profile_sha256(document, options)?;

Ok(Some(CacheKey {
manifest_location_sha256,
manifest_sha256,
options_sha256,
artifact_sha256,
Expand All @@ -432,6 +471,9 @@ fn cache_key_from_report(
Ok(hash) => hash.sha256,
Err(_) => return Ok(None),
};
let Some(manifest_location_sha256) = manifest_location_sha256(manifest_path, document)? else {
return Ok(None);
};
let options_json = serde_json::to_vec(&CacheableVerifyOptions::from_options(options))?;
let options_sha256 = sha256_bytes(&options_json);
let Some(artifact_sha256) = report.artifact.sha256.clone() else {
Expand Down Expand Up @@ -476,6 +518,7 @@ fn cache_key_from_report(
None
};
Ok(Some(CacheKey {
manifest_location_sha256,
manifest_sha256,
options_sha256,
artifact_sha256,
Expand Down Expand Up @@ -644,6 +687,9 @@ fn verification_reports_needs_migration(conn: &Connection) -> Result<bool, Manif
.map_err(sqlite_err)?;
Ok(!columns.iter().any(|column| column == "report_id")
|| !columns.iter().any(|column| column == "manifest_sha256")
|| !columns
.iter()
.any(|column| column == "manifest_location_sha256")
|| !columns
.iter()
.any(|column| column == "calibration_profile_sha256")
Expand Down
146 changes: 146 additions & 0 deletions ordvec-manifest/tests/manifest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3196,6 +3196,152 @@ fn sqlite_cache_key_includes_calibration_profile_bytes() {
assert!(error_codes(&cached).contains(&"calibration_profile_sha256_mismatch"));
}

#[cfg(feature = "sqlite")]
#[test]
fn sqlite_cache_key_is_scoped_to_manifest_location() {
use rusqlite::Connection;

let root = tempfile::tempdir().unwrap();
let case_a = root.path().join("case-a");
let case_b = root.path().join("case-b");
fs::create_dir(&case_a).unwrap();
fs::create_dir(&case_b).unwrap();

let index_a = write_index(&case_a);
let manifest_a = case_a.join("manifest.json");
let manifest = create_manifest_for_index(
&index_a,
CreateRowIdentity::RowIdIdentity,
"test-embedding",
&manifest_a,
)
.unwrap();
fs::write(
&manifest_a,
serde_json::to_string_pretty(&manifest).unwrap(),
)
.unwrap();

let index_b = case_b.join("index.tvrq");
let manifest_b = case_b.join("manifest.json");
fs::copy(&index_a, &index_b).unwrap();
fs::copy(&manifest_a, &manifest_b).unwrap();

let document_a = load_manifest_file(&manifest_a).unwrap();
let document_b = load_manifest_file(&manifest_b).unwrap();
let db = root.path().join("registry.sqlite");

let report_a = ordvec_manifest::sqlite::verify_with_registry(
&db,
&document_a,
&manifest_a,
VerifyOptions::default(),
true,
)
.unwrap();
assert!(report_a.ok, "{:?}", report_a.errors);
assert_eq!(
report_a.artifact.canonical_path.as_deref(),
Some(fs::canonicalize(&index_a).unwrap().to_str().unwrap())
);

let report_b = ordvec_manifest::sqlite::verify_with_registry(
&db,
&document_b,
&manifest_b,
VerifyOptions::default(),
true,
)
.unwrap();
assert!(report_b.ok, "{:?}", report_b.errors);
assert_eq!(
report_b.artifact.canonical_path.as_deref(),
Some(fs::canonicalize(&index_b).unwrap().to_str().unwrap())
);

let conn = Connection::open(&db).unwrap();
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM verification_reports", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(
count, 2,
"copied manifests at distinct locations must not reuse canonical-path reports"
);
}

#[cfg(feature = "sqlite")]
#[test]
fn sqlite_cache_key_includes_jsonl_row_identity_bytes() {
use rusqlite::Connection;

let temp = tempfile::tempdir().unwrap();
let index = write_index(temp.path());
let rows = temp.path().join("rows.jsonl");
write_row_map(
&rows,
&[
("00000000-0000-0000-0000-000000000001", None),
("00000000-0000-0000-0000-000000000002", None),
],
);
let manifest_path = temp.path().join("manifest.json");
let manifest = create_manifest_for_index(
&index,
CreateRowIdentity::Jsonl(rows.clone()),
"test-embedding",
&manifest_path,
)
.unwrap();
fs::write(
&manifest_path,
serde_json::to_string_pretty(&manifest).unwrap(),
)
.unwrap();
let document = load_manifest_file(&manifest_path).unwrap();
let db = temp.path().join("registry.sqlite");

let report = ordvec_manifest::sqlite::verify_with_registry(
&db,
&document,
&manifest_path,
VerifyOptions::default(),
true,
)
.unwrap();
assert!(report.ok, "{:?}", report.errors);

write_row_map(
&rows,
&[
("00000000-0000-0000-0000-000000000011", None),
("00000000-0000-0000-0000-000000000012", None),
],
);
let cached = ordvec_manifest::sqlite::verify_with_registry(
&db,
&document,
&manifest_path,
VerifyOptions::default(),
true,
)
.unwrap();
assert!(
!cached.ok,
"row identity drift must force fresh verification"
);
assert!(error_codes(&cached).contains(&"row_identity_sha256_mismatch"));

let conn = Connection::open(&db).unwrap();
let count: i64 = conn
.query_row("SELECT COUNT(*) FROM verification_reports", [], |row| {
row.get(0)
})
.unwrap();
assert_eq!(count, 2, "row-map drift must store a fresh report");
}

#[cfg(feature = "sqlite")]
#[test]
fn sqlite_cache_key_includes_auxiliary_artifact_bytes() {
Expand Down
Loading