3131//! lexicographically. RFC-3339 / ISO-8601 strings sort correctly under
3232//! string comparison, which is what we use.
3333//!
34- //! - `(crosslang :from X :to Y)` is a **co-occurrence proxy** for
35- //! FFI/cross-language reachability: it matches a `Y`-category finding
36- //! in a repo that also has at least one `X`-category finding. This is
37- //! the operationally useful case for the estate sweep — most
38- //! FFI-driven proof drift surfaces in the same repository. A future
39- //! slice will persist `kanren::crosslang` derived facts as hexads
40- //! and tighten this to true reachability over the FFI boundary graph.
34+ //! - `(crosslang :from X :to Y)` is evaluated in two modes:
35+ //! * **Facts-backed** (`<dir>/hexads/crosslang/` is non-empty):
36+ //! matches a `Y`-category finding when there exists a persisted
37+ //! kanren-derived `CrossLangInteraction` in the same repo where one
38+ //! endpoint of the interaction is the file of an `X`-category
39+ //! finding. This is the "real" FFI/cross-language reachability
40+ //! semantics.
41+ //! * **Co-occurrence proxy** (fallback when no crosslang hexads are
42+ //! on disk): matches a `Y`-category finding in any repo that also
43+ //! has ≥ 1 `X`-category finding. Preserves the historical
44+ //! co-occurrence behaviour for users who haven't enabled crosslang
45+ //! persistence yet (`PANIC_ATTACK_STORE_CROSSLANG_HEXADS=1`).
4146//!
42- //! ## Deferred to later follow-ups
43- //!
44- //! - True kanren-derived `(crosslang ...)` evaluation backed by
45- //! persisted FFI-boundary facts (rather than the current
46- //! co-occurrence proxy ).
47+ //! Most FFI-driven proof drift surfaces in the same repo, so both
48+ //! modes converge on the operationally common case, but the
49+ //! facts-backed mode prunes cross-repo false-positive co-occurrences
50+ //! (e.g. an `UnsafeFFI`-bearing repo that contains an unrelated
51+ //! `ProofDrift` finding in a non-FFI module ).
4752
4853use crate :: storage:: {
49- load_campaign_hexads, load_finding_hexads, CampaignSemantic , FindingSemantic ,
54+ load_campaign_hexads, load_crosslang_hexads, load_finding_hexads, CampaignSemantic ,
55+ FindingSemantic ,
5056} ;
5157use anyhow:: { anyhow, bail, Result } ;
5258use serde:: Serialize ;
53- use std:: collections:: HashMap ;
59+ use std:: collections:: { HashMap , HashSet } ;
5460use std:: path:: Path ;
5561
5662// ===========================================================================
@@ -73,13 +79,13 @@ pub enum Query {
7379 /// Match by campaign state. `None` means "no campaign hexad yet".
7480 PrState ( Option < String > ) ,
7581 /// `(crosslang :from FROM_CAT :to TO_CAT)` — match a `TO_CAT` finding
76- /// in a repo that also has at least one `FROM_CAT` finding.
82+ /// reachable from a `FROM_CAT` finding via an FFI boundary .
7783 ///
78- /// Co-occurrence proxy: until kanren-derived cross-language facts are
79- /// persisted as hexads (S3b follow-up), "the FROM finding is reachable
80- /// from the TO finding" is approximated by "they live in the same
81- /// repository", which is the operationally-useful case for the estate
82- /// sweep — most FFI-driven proof drift surfaces in the same repo .
84+ /// Evaluated in two modes depending on whether crosslang hexads have
85+ /// been persisted (see the module-level doc for the full semantics):
86+ /// facts-backed FFI-endpoint reachability when
87+ /// `<dir>/hexads/crosslang/` is populated, same-repo co-occurrence
88+ /// proxy otherwise .
8389 Crosslang { from : String , to : String } ,
8490 /// Match by ISO-8601 / RFC-3339 first-seen timestamp ≥ `since`.
8591 /// Filed under the `(since ...)` keyword for compactness; combined
@@ -395,12 +401,31 @@ struct FindingRow {
395401}
396402
397403/// Index from repo name → set of category Debug-names present in that
398- /// repo. Used by `(crosslang ...)` to check co-occurrence.
399- type RepoCategoryIndex = HashMap < String , std:: collections:: HashSet < String > > ;
404+ /// repo. Used by the co-occurrence proxy path of `(crosslang ...)`.
405+ type RepoCategoryIndex = HashMap < String , HashSet < String > > ;
406+
407+ /// Index from `(repo_name_lower, category_lower)` → set of files in that
408+ /// repo that carry a finding of that category. Used by the facts-backed
409+ /// `(crosslang ...)` path so we can check whether a candidate
410+ /// `from`-category finding's file is an endpoint of any persisted
411+ /// `CrossLangInteraction` in the same repo.
412+ type RepoCategoryFileIndex = HashMap < ( String , String ) , HashSet < String > > ;
413+
414+ /// Index from repo name (lowercased) → list of `(source_file,
415+ /// target_file)` pairs derived from persisted crosslang hexads. Used by
416+ /// the facts-backed `(crosslang ...)` path.
417+ type RepoInteractionIndex = HashMap < String , Vec < ( String , String ) > > ;
400418
401419struct EvalContext {
402420 rows : Vec < FindingRow > ,
403421 repo_categories : RepoCategoryIndex ,
422+ /// Per-repo, per-category file index. Populated unconditionally; only
423+ /// consulted by the facts-backed crosslang path.
424+ repo_category_files : RepoCategoryFileIndex ,
425+ /// Per-repo crosslang interaction endpoints. `None` when
426+ /// `<dir>/hexads/crosslang/` is empty (signal to the evaluator that
427+ /// it should fall back to the co-occurrence proxy).
428+ crosslang_interactions : Option < RepoInteractionIndex > ,
404429}
405430
406431fn load_context ( base_dir : & Path ) -> Result < EvalContext > {
@@ -417,13 +442,20 @@ fn load_context(base_dir: &Path) -> Result<EvalContext> {
417442
418443 let mut rows = Vec :: new ( ) ;
419444 let mut repo_categories: RepoCategoryIndex = HashMap :: new ( ) ;
445+ let mut repo_category_files: RepoCategoryFileIndex = HashMap :: new ( ) ;
420446 for h in finding_hexads {
421447 let created_at = h. created_at . clone ( ) ;
422448 if let Some ( f) = h. semantic . finding {
449+ let repo_lower = f. repo_name . to_ascii_lowercase ( ) ;
450+ let cat_lower = f. category . to_ascii_lowercase ( ) ;
423451 repo_categories
424- . entry ( f . repo_name . to_ascii_lowercase ( ) )
452+ . entry ( repo_lower . clone ( ) )
425453 . or_default ( )
426- . insert ( f. category . to_ascii_lowercase ( ) ) ;
454+ . insert ( cat_lower. clone ( ) ) ;
455+ repo_category_files
456+ . entry ( ( repo_lower, cat_lower) )
457+ . or_default ( )
458+ . insert ( f. file . clone ( ) ) ;
427459 let campaign = latest. get ( & f. finding_id ) . cloned ( ) ;
428460 rows. push ( FindingRow {
429461 finding : f,
@@ -432,12 +464,55 @@ fn load_context(base_dir: &Path) -> Result<EvalContext> {
432464 } ) ;
433465 }
434466 }
467+
468+ // Crosslang facts: load hexads; treat empty dir as "fall back to
469+ // co-occurrence proxy" by leaving `crosslang_interactions = None`.
470+ let crosslang_hexads = load_crosslang_hexads ( base_dir) ?;
471+ let crosslang_interactions = if crosslang_hexads. is_empty ( ) {
472+ None
473+ } else {
474+ let mut idx: RepoInteractionIndex = HashMap :: new ( ) ;
475+ for h in crosslang_hexads {
476+ let Some ( cl) = h. semantic . crosslang else {
477+ continue ;
478+ } ;
479+ idx. entry ( cl. repo_name . to_ascii_lowercase ( ) )
480+ . or_default ( )
481+ . push ( ( cl. source_file . clone ( ) , cl. target_file . clone ( ) ) ) ;
482+ }
483+ Some ( idx)
484+ } ;
485+
435486 Ok ( EvalContext {
436487 rows,
437488 repo_categories,
489+ repo_category_files,
490+ crosslang_interactions,
438491 } )
439492}
440493
494+ /// Facts-backed `(crosslang :from F :to T)` check for one candidate row.
495+ ///
496+ /// Pre-condition: `row.finding.category` already matches `to`. Returns
497+ /// `true` when a persisted `CrossLangInteraction` in the same repo has
498+ /// one endpoint equal to a file carrying an `F`-category finding.
499+ fn crosslang_facts_match ( row : & FindingRow , from : & str , ctx : & EvalContext ) -> bool {
500+ let Some ( by_repo) = ctx. crosslang_interactions . as_ref ( ) else {
501+ return false ;
502+ } ;
503+ let repo_lower = row. finding . repo_name . to_ascii_lowercase ( ) ;
504+ let from_lower = from. to_ascii_lowercase ( ) ;
505+ let Some ( pairs) = by_repo. get ( & repo_lower) else {
506+ return false ;
507+ } ;
508+ let Some ( from_files) = ctx. repo_category_files . get ( & ( repo_lower, from_lower) ) else {
509+ return false ;
510+ } ;
511+ pairs
512+ . iter ( )
513+ . any ( |( src, tgt) | from_files. contains ( src) || from_files. contains ( tgt) )
514+ }
515+
441516fn matches ( query : & Query , row : & FindingRow , ctx : & EvalContext ) -> bool {
442517 match query {
443518 Query :: Category ( target) => row. finding . category . eq_ignore_ascii_case ( target) ,
@@ -473,12 +548,22 @@ fn matches(query: &Query, row: &FindingRow, ctx: &EvalContext) -> bool {
473548 candidate >= since. as_str ( )
474549 }
475550 Query :: Crosslang { from, to } => {
476- // `to`-matching finding in a repo that also has at least one
477- // `from`-category finding. The current finding must be the
478- // `to` side (so callers can wrap with `and`/`or`).
551+ // The current finding must be the `to` side (so callers can
552+ // wrap with `and`/`or`).
479553 if !row. finding . category . eq_ignore_ascii_case ( to) {
480554 return false ;
481555 }
556+ // Mode 1 — facts-backed: `<dir>/hexads/crosslang/` has hexads.
557+ // Match when there is a persisted `CrossLangInteraction` in
558+ // the same repo whose source or target file is the location of
559+ // an `F`-category finding. This is true FFI reachability.
560+ if ctx. crosslang_interactions . is_some ( ) {
561+ return crosslang_facts_match ( row, from, ctx) ;
562+ }
563+ // Mode 2 — co-occurrence proxy fallback (no crosslang hexads
564+ // on disk yet): same-repo co-occurrence of categories.
565+ // Preserves S3b semantics for users who haven't enabled
566+ // `PANIC_ATTACK_STORE_CROSSLANG_HEXADS`.
482567 let from_lower = from. to_ascii_lowercase ( ) ;
483568 ctx. repo_categories
484569 . get ( & row. finding . repo_name . to_ascii_lowercase ( ) )
@@ -890,6 +975,117 @@ mod tests {
890975 assert ! ( run( & q, dir. path( ) ) . unwrap( ) . is_empty( ) ) ;
891976 }
892977
978+ // ----- Issue #33 kanren-crosslang: facts-backed crosslang tests ---
979+
980+ /// Write a synthetic crosslang hexad into
981+ /// `<dir>/hexads/crosslang/`. Tests use this to simulate persisted
982+ /// `CrossLangInteraction` facts without driving the full kanren
983+ /// pipeline.
984+ fn write_synthetic_crosslang_hexad (
985+ dir : & std:: path:: Path ,
986+ idx : usize ,
987+ repo : & str ,
988+ source_file : & str ,
989+ target_file : & str ,
990+ ) {
991+ use crate :: storage:: { CrosslangSemantic , HexadProvenance , HexadSemantic , PanicAttackHexad } ;
992+ let h = PanicAttackHexad {
993+ schema : "verisimdb.hexad.v1" . to_string ( ) ,
994+ id : format ! ( "pa-crosslang-test-{}" , idx) ,
995+ created_at : "2026-05-26T00:00:00Z" . to_string ( ) ,
996+ provenance : HexadProvenance {
997+ tool : "panic-attack" . to_string ( ) ,
998+ version : env ! ( "CARGO_PKG_VERSION" ) . to_string ( ) ,
999+ program_path : format ! ( "/tmp/{}" , repo) ,
1000+ language : "Rust" . to_string ( ) ,
1001+ attestation_hash : None ,
1002+ } ,
1003+ semantic : HexadSemantic {
1004+ total_weak_points : 0 ,
1005+ critical_count : 0 ,
1006+ high_count : 0 ,
1007+ total_crashes : 0 ,
1008+ robustness_score : 0.85 ,
1009+ categories : Vec :: new ( ) ,
1010+ migration : None ,
1011+ finding : None ,
1012+ campaign : None ,
1013+ crosslang : Some ( CrosslangSemantic {
1014+ interaction_id : format ! (
1015+ "crosslang:{}:{}:Rust:{}:Unknown:CFfi" ,
1016+ repo, source_file, target_file
1017+ ) ,
1018+ source_lang : "Rust" . to_string ( ) ,
1019+ target_lang : "Unknown" . to_string ( ) ,
1020+ mechanism : "CFfi" . to_string ( ) ,
1021+ source_file : source_file. to_string ( ) ,
1022+ source_line : None ,
1023+ target_file : target_file. to_string ( ) ,
1024+ target_line : None ,
1025+ repo_name : repo. to_string ( ) ,
1026+ } ) ,
1027+ } ,
1028+ document : serde_json:: Value :: Null ,
1029+ } ;
1030+ let cl_dir = dir. join ( "hexads" ) . join ( "crosslang" ) ;
1031+ std:: fs:: create_dir_all ( & cl_dir) . unwrap ( ) ;
1032+ std:: fs:: write (
1033+ cl_dir. join ( format ! ( "h-{}.json" , idx) ) ,
1034+ serde_json:: to_string_pretty ( & h) . unwrap ( ) ,
1035+ )
1036+ . unwrap ( ) ;
1037+ }
1038+
1039+ #[ test]
1040+ fn run_crosslang_facts_backed_matches ( ) {
1041+ let dir = tempdir ( ) . unwrap ( ) ;
1042+ write_test_findings ( dir. path ( ) ) ;
1043+ // alpha has UnsafeCode finding at src/a.rs:1 and CryptoMisuse at
1044+ // src/a.rs:7. Plant a crosslang interaction in alpha with one
1045+ // endpoint at src/a.rs (the UnsafeCode-finding's file). The
1046+ // CryptoMisuse finding must now match
1047+ // `(crosslang :from UnsafeCode :to CryptoMisuse)` via the
1048+ // facts-backed path.
1049+ write_synthetic_crosslang_hexad ( dir. path ( ) , 0 , "alpha" , "src/a.rs" , "foreign" ) ;
1050+ let q = parse ( "(crosslang :from UnsafeCode :to CryptoMisuse)" ) . unwrap ( ) ;
1051+ let hits = run ( & q, dir. path ( ) ) . unwrap ( ) ;
1052+ assert_eq ! ( hits. len( ) , 1 ) ;
1053+ assert_eq ! ( hits[ 0 ] . repo_name, "alpha" ) ;
1054+ assert_eq ! ( hits[ 0 ] . category, "CryptoMisuse" ) ;
1055+ }
1056+
1057+ #[ test]
1058+ fn run_crosslang_falls_back_to_co_occurrence_when_no_facts ( ) {
1059+ // No crosslang hexads written → evaluator must take the legacy
1060+ // co-occurrence proxy path. alpha has both UnsafeCode and
1061+ // CryptoMisuse findings so the CryptoMisuse finding matches.
1062+ let dir = tempdir ( ) . unwrap ( ) ;
1063+ write_test_findings ( dir. path ( ) ) ;
1064+ let q = parse ( "(crosslang :from UnsafeCode :to CryptoMisuse)" ) . unwrap ( ) ;
1065+ let hits = run ( & q, dir. path ( ) ) . unwrap ( ) ;
1066+ assert_eq ! ( hits. len( ) , 1 ) ;
1067+ assert_eq ! ( hits[ 0 ] . repo_name, "alpha" ) ;
1068+ }
1069+
1070+ #[ test]
1071+ fn run_crosslang_facts_backed_no_match_when_endpoint_misses ( ) {
1072+ // Mixed setup: crosslang hexads ARE present (so we're on the
1073+ // facts-backed path), but no interaction in alpha touches the
1074+ // file that carries the UnsafeCode finding. The CryptoMisuse
1075+ // finding must NOT match — facts-backed mode strictly requires
1076+ // an FFI endpoint at an `from`-finding's file. This is the
1077+ // pruning the co-occurrence proxy can't do.
1078+ let dir = tempdir ( ) . unwrap ( ) ;
1079+ write_test_findings ( dir. path ( ) ) ;
1080+ write_synthetic_crosslang_hexad ( dir. path ( ) , 0 , "alpha" , "src/unrelated.rs" , "foreign" ) ;
1081+ let q = parse ( "(crosslang :from UnsafeCode :to CryptoMisuse)" ) . unwrap ( ) ;
1082+ let hits = run ( & q, dir. path ( ) ) . unwrap ( ) ;
1083+ assert ! (
1084+ hits. is_empty( ) ,
1085+ "facts-backed mode must reject co-occurrences without a real FFI endpoint"
1086+ ) ;
1087+ }
1088+
8931089 #[ test]
8941090 fn render_table_empty ( ) {
8951091 let s = render_table ( & [ ] ) ;
0 commit comments