From ecdd1741dcb76a30db6d2fb8b99dbfdf1b148cec Mon Sep 17 00:00:00 2001 From: Nelson Spence Date: Thu, 4 Jun 2026 00:59:47 -0500 Subject: [PATCH] Scope manifest sqlite cache to location Signed-off-by: Nelson Spence --- ordvec-manifest/src/lib.rs | 21 +++++ ordvec-manifest/src/sqlite.rs | 70 +++++++++++--- ordvec-manifest/tests/manifest.rs | 146 ++++++++++++++++++++++++++++++ 3 files changed, 225 insertions(+), 12 deletions(-) diff --git a/ordvec-manifest/src/lib.rs b/ordvec-manifest/src/lib.rs index a90c745..39bfd2c 100644 --- a/ordvec-manifest/src/lib.rs +++ b/ordvec-manifest/src/lib.rs @@ -192,6 +192,15 @@ pub fn verify_index_manifest( Ok(verify_manifest(&document, options)) } +/// Verify a manifest file and return a point-in-time load plan. +/// +/// This helper fails closed when verification reports any errors. On success it +/// returns canonical artifact and sidecar paths plus the verification report so +/// callers can load without parsing report text. +/// +/// This is not TOCTOU protection: it does not open, pin, lock, or make the +/// backing files immutable. Load from the returned paths immediately on storage +/// you control, or re-verify if files may change between verification and load. pub fn verify_for_load( manifest_path: impl AsRef, options: VerifyOptions, @@ -200,6 +209,11 @@ pub fn verify_for_load( verify_document_for_load(&document, options) } +/// Verify an already-loaded manifest document and return a point-in-time load plan. +/// +/// This has the same fail-closed and TOCTOU boundary as [`verify_for_load`], but +/// uses the document's existing `base_dir` and `source_path` instead of reading +/// the manifest JSON again. pub fn verify_document_for_load( document: &ManifestDocument, options: VerifyOptions, @@ -2733,6 +2747,13 @@ impl ManifestIndexParams { } } +/// Verified paths and metadata for a caller-managed load. +/// +/// A `VerifiedLoadPlan` means the manifest, primary artifact, row-identity +/// file, and declared auxiliary artifacts verified at the time verification +/// ran. It is not a durable capability over mutable storage: the plan does not +/// pin file descriptors, hold locks, or guarantee that bytes at the returned +/// paths remain unchanged after verification. #[derive(Clone, Debug)] pub struct VerifiedLoadPlan { manifest_path: Option, diff --git a/ordvec-manifest/src/sqlite.rs b/ordvec-manifest/src/sqlite.rs index a554206..414cd30 100644 --- a/ordvec-manifest/src/sqlite.rs +++ b/ordvec-manifest/src/sqlite.rs @@ -8,6 +8,7 @@ use chrono::{SecondsFormat, Utc}; use rusqlite::{params, Connection, OptionalExtension}; use serde::Serialize; use sha2::{Digest, Sha256}; +use std::fs; use std::path::{Path, PathBuf}; pub fn verify_with_registry( @@ -110,6 +111,7 @@ fn init(conn: &Connection) -> Result<(), ManifestError> { manifest_path TEXT NOT NULL, checked_at TEXT NOT NULL, ok INTEGER NOT NULL, + manifest_location_sha256 TEXT, manifest_sha256 TEXT, options_sha256 TEXT, artifact_sha256 TEXT, @@ -135,6 +137,7 @@ fn init(conn: &Connection) -> Result<(), ManifestError> { manifest_path TEXT NOT NULL, checked_at TEXT NOT NULL, ok INTEGER NOT NULL, + manifest_location_sha256 TEXT, manifest_sha256 TEXT, options_sha256 TEXT, artifact_sha256 TEXT, @@ -147,6 +150,7 @@ fn init(conn: &Connection) -> Result<(), ManifestError> { CREATE INDEX IF NOT EXISTS verification_reports_cache_idx ON verification_reports( manifest_id, + manifest_location_sha256, manifest_sha256, options_sha256, artifact_sha256, @@ -194,6 +198,7 @@ fn store_report( manifest_path, checked_at, ok, + manifest_location_sha256, manifest_sha256, options_sha256, artifact_sha256, @@ -202,12 +207,13 @@ fn store_report( auxiliary_artifacts_sha256, encoder_distortion_profile_sha256, report_json - ) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)", + ) VALUES(?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)", params![ document.manifest.manifest_id, manifest_path.display().to_string(), report.checked_at, i64::from(report.ok), + cache_key.map(|key| key.manifest_location_sha256.as_str()), cache_key.map(|key| key.manifest_sha256.as_str()), cache_key.map(|key| key.options_sha256.as_str()), cache_key.map(|key| key.artifact_sha256.as_str()), @@ -234,29 +240,31 @@ fn load_cached_report( "SELECT report_id, length(CAST(report_json AS BLOB)) FROM verification_reports WHERE manifest_id = ?1 - AND manifest_sha256 = ?2 - AND options_sha256 = ?3 - AND artifact_sha256 = ?4 + AND manifest_location_sha256 = ?2 + AND manifest_sha256 = ?3 + AND options_sha256 = ?4 + AND artifact_sha256 = ?5 AND ( - (row_identity_sha256 IS NULL AND ?5 IS NULL) - OR row_identity_sha256 = ?5 + (row_identity_sha256 IS NULL AND ?6 IS NULL) + OR row_identity_sha256 = ?6 ) AND ( - (calibration_profile_sha256 IS NULL AND ?6 IS NULL) - OR calibration_profile_sha256 = ?6 + (calibration_profile_sha256 IS NULL AND ?7 IS NULL) + OR calibration_profile_sha256 = ?7 ) AND ( - (auxiliary_artifacts_sha256 IS NULL AND ?7 IS NULL) - OR auxiliary_artifacts_sha256 = ?7 + (auxiliary_artifacts_sha256 IS NULL AND ?8 IS NULL) + OR auxiliary_artifacts_sha256 = ?8 ) AND ( - (encoder_distortion_profile_sha256 IS NULL AND ?8 IS NULL) - OR encoder_distortion_profile_sha256 = ?8 + (encoder_distortion_profile_sha256 IS NULL AND ?9 IS NULL) + OR encoder_distortion_profile_sha256 = ?9 ) ORDER BY report_id DESC LIMIT 1", params![ manifest_id, + cache_key.manifest_location_sha256.as_str(), cache_key.manifest_sha256.as_str(), cache_key.options_sha256.as_str(), cache_key.artifact_sha256.as_str(), @@ -296,6 +304,7 @@ fn load_cached_report( #[derive(Clone, Debug)] struct CacheKey { + manifest_location_sha256: String, manifest_sha256: String, options_sha256: String, artifact_sha256: String, @@ -314,6 +323,12 @@ struct CacheableVerifyOptions { limits: ResourceLimits, } +#[derive(Serialize)] +struct CacheableManifestLocation { + manifest_path: String, + base_dir: String, +} + impl CacheableVerifyOptions { fn from_options(options: &VerifyOptions) -> Self { Self { @@ -329,6 +344,26 @@ impl CacheableVerifyOptions { } } +fn manifest_location_sha256( + manifest_path: &Path, + document: &ManifestDocument, +) -> Result, ManifestError> { + let manifest_path = match fs::canonicalize(manifest_path) { + Ok(path) => path, + Err(_) => return Ok(None), + }; + let base_dir = match fs::canonicalize(&document.base_dir) { + Ok(path) => path, + Err(_) => return Ok(None), + }; + let material = CacheableManifestLocation { + manifest_path: hex::encode(manifest_path.as_os_str().as_encoded_bytes()), + base_dir: hex::encode(base_dir.as_os_str().as_encoded_bytes()), + }; + let json = serde_json::to_vec(&material)?; + Ok(Some(sha256_bytes(&json))) +} + fn current_cache_key( document: &ManifestDocument, manifest_path: &Path, @@ -343,6 +378,9 @@ fn current_cache_key( Ok(hash) => hash.sha256, Err(_) => return Ok(None), }; + let Some(manifest_location_sha256) = manifest_location_sha256(manifest_path, document)? else { + return Ok(None); + }; let options_json = serde_json::to_vec(&CacheableVerifyOptions::from_options(options))?; let options_sha256 = sha256_bytes(&options_json); @@ -407,6 +445,7 @@ fn current_cache_key( current_encoder_distortion_profile_sha256(document, options)?; Ok(Some(CacheKey { + manifest_location_sha256, manifest_sha256, options_sha256, artifact_sha256, @@ -432,6 +471,9 @@ fn cache_key_from_report( Ok(hash) => hash.sha256, Err(_) => return Ok(None), }; + let Some(manifest_location_sha256) = manifest_location_sha256(manifest_path, document)? else { + return Ok(None); + }; let options_json = serde_json::to_vec(&CacheableVerifyOptions::from_options(options))?; let options_sha256 = sha256_bytes(&options_json); let Some(artifact_sha256) = report.artifact.sha256.clone() else { @@ -476,6 +518,7 @@ fn cache_key_from_report( None }; Ok(Some(CacheKey { + manifest_location_sha256, manifest_sha256, options_sha256, artifact_sha256, @@ -644,6 +687,9 @@ fn verification_reports_needs_migration(conn: &Connection) -> Result