77//! (issue #33 S2). Returns a list of `FindingHit`s the caller can render
88//! as a table or JSON.
99//!
10- //! ## Supported forms (S3 initial)
10+ //! ## Supported forms
1111//!
1212//! ```text
1313//! (category UnsafeCode)
1616//! (repo <name-substring>)
1717//! (file <path-substring>)
1818//! (pr-state open | pr-filed | pr-merged | pr-closed | dismissed | nil)
19+ //! (since 2026-04-12) ; or (since "2026-04-12T00:00:00Z")
20+ //! (crosslang :from FFI :to ProofDrift)
21+ //! (crosslang FFI ProofDrift) ; positional shorthand
1922//! (and <expr> <expr> ...)
2023//! (or <expr> <expr> ...)
2124//! (not <expr>)
2225//! ```
2326//!
24- //! ## Deferred to S3 follow-ups
27+ //! ## Semantic notes
2528//!
26- //! - `(crosslang :from FFI :to ProofDrift)` — relational chain over the
27- //! kanren cross-language fact base. Needs an integration with
28- //! `src/kanren/crosslang.rs` that runs *after* the persistence layer
29- //! is settled in S1/S2/S3 initial.
30- //! - `(diff :since 2026-04-12 :category PA022)` — temporal slicing by
31- //! run id. Requires an explicit "since" cursor in the hexad store
32- //! beyond `created_at` (e.g. a "baseline run id" marker).
29+ //! - `(since ...)` compares the finding's `first_seen_run` (when it
30+ //! parses as ISO-8601) or its hexad `created_at` against the cutoff
31+ //! lexicographically. RFC-3339 / ISO-8601 strings sort correctly under
32+ //! string comparison, which is what we use.
3333//!
34- //! The initial form is enough to express the operational queries the
35- //! estate-sweep campaign actually needs day-to-day: "all PA001 of
36- //! Critical severity that don't have an open PR yet", "all dismissed
37- //! findings in repo foo", etc.
34+ //! - `(crosslang :from X :to Y)` is a **co-occurrence proxy** for
35+ //! FFI/cross-language reachability: it matches a `Y`-category finding
36+ //! in a repo that also has at least one `X`-category finding. This is
37+ //! the operationally useful case for the estate sweep — most
38+ //! FFI-driven proof drift surfaces in the same repository. A future
39+ //! slice will persist `kanren::crosslang` derived facts as hexads
40+ //! and tighten this to true reachability over the FFI boundary graph.
41+ //!
42+ //! ## Deferred to later follow-ups
43+ //!
44+ //! - True kanren-derived `(crosslang ...)` evaluation backed by
45+ //! persisted FFI-boundary facts (rather than the current
46+ //! co-occurrence proxy).
3847
3948use crate :: storage:: {
4049 load_campaign_hexads, load_finding_hexads, CampaignSemantic , FindingSemantic ,
@@ -63,6 +72,20 @@ pub enum Query {
6372 File ( String ) ,
6473 /// Match by campaign state. `None` means "no campaign hexad yet".
6574 PrState ( Option < String > ) ,
75+ /// `(crosslang :from FROM_CAT :to TO_CAT)` — match a `TO_CAT` finding
76+ /// in a repo that also has at least one `FROM_CAT` finding.
77+ ///
78+ /// Co-occurrence proxy: until kanren-derived cross-language facts are
79+ /// persisted as hexads (S3b follow-up), "the FROM finding is reachable
80+ /// from the TO finding" is approximated by "they live in the same
81+ /// repository", which is the operationally-useful case for the estate
82+ /// sweep — most FFI-driven proof drift surfaces in the same repo.
83+ Crosslang { from : String , to : String } ,
84+ /// Match by ISO-8601 / RFC-3339 first-seen timestamp ≥ `since`.
85+ /// Filed under the `(since ...)` keyword for compactness; combined
86+ /// with `(and (category ...) (since ...))` gives the "what's new
87+ /// since DATE" diff query the issue calls out.
88+ Since ( String ) ,
6689 /// Conjunction.
6790 And ( Vec < Query > ) ,
6891 /// Disjunction.
@@ -259,6 +282,52 @@ fn parse_form(tokens: &[Token], cursor: &mut usize) -> Result<Query> {
259282 close_paren ( tokens, cursor) ?;
260283 Ok ( Query :: Not ( Box :: new ( child) ) )
261284 }
285+ "since" => {
286+ let v = parse_value ( tokens, cursor) ?;
287+ close_paren ( tokens, cursor) ?;
288+ Ok ( Query :: Since ( v) )
289+ }
290+ "crosslang" => {
291+ // Two accepted shapes:
292+ // (crosslang FROM TO) — positional
293+ // (crosslang :from FROM :to TO) — keyword
294+ // First token decides which.
295+ let mut from: Option < String > = None ;
296+ let mut to: Option < String > = None ;
297+ loop {
298+ match tokens. get ( * cursor) {
299+ Some ( Token :: RParen ) => {
300+ * cursor += 1 ;
301+ break ;
302+ }
303+ Some ( Token :: Atom ( a) ) if a. starts_with ( ':' ) => {
304+ let kw = a[ 1 ..] . to_ascii_lowercase ( ) ;
305+ * cursor += 1 ;
306+ let v = parse_value ( tokens, cursor) ?;
307+ match kw. as_str ( ) {
308+ "from" => from = Some ( v) ,
309+ "to" => to = Some ( v) ,
310+ other => bail ! ( "unknown crosslang keyword: :{}" , other) ,
311+ }
312+ }
313+ Some ( _) => {
314+ // Positional fallback — `from` first, then `to`.
315+ let v = parse_value ( tokens, cursor) ?;
316+ if from. is_none ( ) {
317+ from = Some ( v) ;
318+ } else if to. is_none ( ) {
319+ to = Some ( v) ;
320+ } else {
321+ bail ! ( "too many positional args to crosslang" ) ;
322+ }
323+ }
324+ None => bail ! ( "missing ')' in crosslang" ) ,
325+ }
326+ }
327+ let from = from. ok_or_else ( || anyhow ! ( "crosslang missing :from" ) ) ?;
328+ let to = to. ok_or_else ( || anyhow ! ( "crosslang missing :to" ) ) ?;
329+ Ok ( Query :: Crosslang { from, to } )
330+ }
262331 other => bail ! ( "unknown query head: {}" , other) ,
263332 }
264333}
@@ -321,14 +390,24 @@ fn close_paren(tokens: &[Token], cursor: &mut usize) -> Result<()> {
321390struct FindingRow {
322391 finding : FindingSemantic ,
323392 campaign : Option < CampaignSemantic > ,
393+ /// `created_at` of the finding hexad — used by `(since ...)`.
394+ created_at : String ,
395+ }
396+
397+ /// Index from repo name → set of category Debug-names present in that
398+ /// repo. Used by `(crosslang ...)` to check co-occurrence.
399+ type RepoCategoryIndex = HashMap < String , std:: collections:: HashSet < String > > ;
400+
401+ struct EvalContext {
402+ rows : Vec < FindingRow > ,
403+ repo_categories : RepoCategoryIndex ,
324404}
325405
326- fn load_rows ( base_dir : & Path ) -> Result < Vec < FindingRow > > {
406+ fn load_context ( base_dir : & Path ) -> Result < EvalContext > {
327407 let finding_hexads = load_finding_hexads ( base_dir) ?;
328408 let mut campaign_hexads = load_campaign_hexads ( base_dir) ?;
329409 campaign_hexads. sort_by ( |a, b| a. created_at . cmp ( & b. created_at ) ) ;
330410
331- // Latest campaign event wins per finding_id.
332411 let mut latest: HashMap < String , CampaignSemantic > = HashMap :: new ( ) ;
333412 for h in campaign_hexads {
334413 if let Some ( c) = h. semantic . campaign {
@@ -337,19 +416,29 @@ fn load_rows(base_dir: &Path) -> Result<Vec<FindingRow>> {
337416 }
338417
339418 let mut rows = Vec :: new ( ) ;
419+ let mut repo_categories: RepoCategoryIndex = HashMap :: new ( ) ;
340420 for h in finding_hexads {
421+ let created_at = h. created_at . clone ( ) ;
341422 if let Some ( f) = h. semantic . finding {
423+ repo_categories
424+ . entry ( f. repo_name . to_ascii_lowercase ( ) )
425+ . or_default ( )
426+ . insert ( f. category . to_ascii_lowercase ( ) ) ;
342427 let campaign = latest. get ( & f. finding_id ) . cloned ( ) ;
343428 rows. push ( FindingRow {
344429 finding : f,
345430 campaign,
431+ created_at,
346432 } ) ;
347433 }
348434 }
349- Ok ( rows)
435+ Ok ( EvalContext {
436+ rows,
437+ repo_categories,
438+ } )
350439}
351440
352- fn matches ( query : & Query , row : & FindingRow ) -> bool {
441+ fn matches ( query : & Query , row : & FindingRow , ctx : & EvalContext ) -> bool {
353442 match query {
354443 Query :: Category ( target) => row. finding . category . eq_ignore_ascii_case ( target) ,
355444 Query :: RuleId ( target) => row. finding . rule_id . eq_ignore_ascii_case ( target) ,
@@ -369,19 +458,46 @@ fn matches(query: &Query, row: &FindingRow) -> bool {
369458 ( Some ( want) , Some ( c) ) => c. state . eq_ignore_ascii_case ( want) ,
370459 _ => false ,
371460 } ,
372- Query :: And ( children) => children. iter ( ) . all ( |c| matches ( c, row) ) ,
373- Query :: Or ( children) => children. iter ( ) . any ( |c| matches ( c, row) ) ,
374- Query :: Not ( inner) => !matches ( inner, row) ,
461+ Query :: Since ( since) => {
462+ // ISO-8601 / RFC-3339 timestamps sort lexicographically when
463+ // the format is well-formed. Falls back to `>=` string compare
464+ // against either the finding hexad's created_at or the
465+ // first_seen_run if it parses as a timestamp.
466+ let candidate = if !row. finding . first_seen_run . is_empty ( )
467+ && row. finding . first_seen_run . contains ( 'T' )
468+ {
469+ row. finding . first_seen_run . as_str ( )
470+ } else {
471+ row. created_at . as_str ( )
472+ } ;
473+ candidate >= since. as_str ( )
474+ }
475+ Query :: Crosslang { from, to } => {
476+ // `to`-matching finding in a repo that also has at least one
477+ // `from`-category finding. The current finding must be the
478+ // `to` side (so callers can wrap with `and`/`or`).
479+ if !row. finding . category . eq_ignore_ascii_case ( to) {
480+ return false ;
481+ }
482+ let from_lower = from. to_ascii_lowercase ( ) ;
483+ ctx. repo_categories
484+ . get ( & row. finding . repo_name . to_ascii_lowercase ( ) )
485+ . map ( |cats| cats. contains ( & from_lower) )
486+ . unwrap_or ( false )
487+ }
488+ Query :: And ( children) => children. iter ( ) . all ( |c| matches ( c, row, ctx) ) ,
489+ Query :: Or ( children) => children. iter ( ) . any ( |c| matches ( c, row, ctx) ) ,
490+ Query :: Not ( inner) => !matches ( inner, row, ctx) ,
375491 }
376492}
377493
378494/// Execute a query against the persisted hexad store and return all
379495/// matching findings.
380496pub fn run ( query : & Query , base_dir : & Path ) -> Result < Vec < FindingHit > > {
381- let rows = load_rows ( base_dir) ?;
497+ let ctx = load_context ( base_dir) ?;
382498 let mut hits = Vec :: new ( ) ;
383- for row in rows {
384- if matches ( query, & row) {
499+ for row in & ctx . rows {
500+ if matches ( query, row, & ctx ) {
385501 hits. push ( FindingHit {
386502 finding_id : row. finding . finding_id . clone ( ) ,
387503 repo_name : row. finding . repo_name . clone ( ) ,
@@ -689,6 +805,91 @@ mod tests {
689805 ) ;
690806 }
691807
808+ #[ test]
809+ fn parse_since_atom ( ) {
810+ let q = parse ( "(since 2026-04-12)" ) . unwrap ( ) ;
811+ assert_eq ! ( q, Query :: Since ( "2026-04-12" . to_string( ) ) ) ;
812+ }
813+
814+ #[ test]
815+ fn parse_since_quoted ( ) {
816+ let q = parse ( "(since \" 2026-04-12T00:00:00Z\" )" ) . unwrap ( ) ;
817+ assert_eq ! ( q, Query :: Since ( "2026-04-12T00:00:00Z" . to_string( ) ) ) ;
818+ }
819+
820+ #[ test]
821+ fn parse_crosslang_keyword_form ( ) {
822+ let q = parse ( "(crosslang :from UnsafeFFI :to ProofDrift)" ) . unwrap ( ) ;
823+ assert_eq ! (
824+ q,
825+ Query :: Crosslang {
826+ from: "UnsafeFFI" . to_string( ) ,
827+ to: "ProofDrift" . to_string( ) ,
828+ }
829+ ) ;
830+ }
831+
832+ #[ test]
833+ fn parse_crosslang_positional_form ( ) {
834+ let q = parse ( "(crosslang UnsafeFFI ProofDrift)" ) . unwrap ( ) ;
835+ assert_eq ! (
836+ q,
837+ Query :: Crosslang {
838+ from: "UnsafeFFI" . to_string( ) ,
839+ to: "ProofDrift" . to_string( ) ,
840+ }
841+ ) ;
842+ }
843+
844+ #[ test]
845+ fn parse_crosslang_missing_keyword_errors ( ) {
846+ assert ! ( parse( "(crosslang :from UnsafeFFI)" ) . is_err( ) ) ;
847+ assert ! ( parse( "(crosslang :to ProofDrift)" ) . is_err( ) ) ;
848+ }
849+
850+ #[ test]
851+ fn parse_crosslang_unknown_keyword_errors ( ) {
852+ assert ! ( parse( "(crosslang :bogus UnsafeFFI :to ProofDrift)" ) . is_err( ) ) ;
853+ }
854+
855+ #[ test]
856+ fn run_since_filters_old_findings ( ) {
857+ let dir = tempdir ( ) . unwrap ( ) ;
858+ write_test_findings ( dir. path ( ) ) ;
859+ // All test fixtures stamp first_seen_run with a hexad-id that
860+ // does not look like an ISO timestamp; fallback is the hexad's
861+ // created_at, which is "now". So (since 2099) returns nothing.
862+ let q_future = parse ( "(since 2099-01-01)" ) . unwrap ( ) ;
863+ assert ! ( run( & q_future, dir. path( ) ) . unwrap( ) . is_empty( ) ) ;
864+ // Conversely (since 2000) returns everything.
865+ let q_past = parse ( "(since 2000-01-01)" ) . unwrap ( ) ;
866+ assert_eq ! ( run( & q_past, dir. path( ) ) . unwrap( ) . len( ) , 3 ) ;
867+ }
868+
869+ #[ test]
870+ fn run_crosslang_matches_co_occurrence ( ) {
871+ let dir = tempdir ( ) . unwrap ( ) ;
872+ write_test_findings ( dir. path ( ) ) ;
873+ // Test fixture: repo "alpha" has UnsafeCode + CryptoMisuse.
874+ // (crosslang :from UnsafeCode :to CryptoMisuse) should match
875+ // the CryptoMisuse finding in alpha.
876+ let q = parse ( "(crosslang :from UnsafeCode :to CryptoMisuse)" ) . unwrap ( ) ;
877+ let hits = run ( & q, dir. path ( ) ) . unwrap ( ) ;
878+ assert_eq ! ( hits. len( ) , 1 ) ;
879+ assert_eq ! ( hits[ 0 ] . repo_name, "alpha" ) ;
880+ assert_eq ! ( hits[ 0 ] . category, "CryptoMisuse" ) ;
881+ }
882+
883+ #[ test]
884+ fn run_crosslang_excludes_missing_source ( ) {
885+ let dir = tempdir ( ) . unwrap ( ) ;
886+ write_test_findings ( dir. path ( ) ) ;
887+ // Test fixture: no PanicPath finding anywhere. So
888+ // (crosslang :from PanicPath :to UnsafeCode) finds nothing.
889+ let q = parse ( "(crosslang :from PanicPath :to UnsafeCode)" ) . unwrap ( ) ;
890+ assert ! ( run( & q, dir. path( ) ) . unwrap( ) . is_empty( ) ) ;
891+ }
892+
692893 #[ test]
693894 fn render_table_empty ( ) {
694895 let s = render_table ( & [ ] ) ;
0 commit comments