From dbf3a6f5587bd2de77e4c3db7d84de386fc41fbd Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 8 Apr 2026 23:10:28 -0600 Subject: [PATCH 1/9] fix(native): strip pre-release suffix in semverCompare `semverCompare('3.9.3-dev.6', '3.9.1')` returned -1 (less than) because `Number('3-dev')` is NaN, which the `|| 0` fallback turned into 0, making the comparison `0 < 1`. This caused `shouldSkipNativeOrchestrator` to flag all pre-release builds as "buggy", disabling the native orchestrator fast path introduced in #897. Strip `-` before splitting on `.` so the numeric comparison sees `3.9.3` vs `3.9.1` correctly. --- src/infrastructure/update-check.ts | 6 +++--- tests/unit/update-check.test.ts | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/src/infrastructure/update-check.ts b/src/infrastructure/update-check.ts index b0199892..d8438088 100644 --- a/src/infrastructure/update-check.ts +++ b/src/infrastructure/update-check.ts @@ -18,11 +18,11 @@ interface UpdateCache { /** * Minimal semver comparison. Returns -1, 0, or 1. - * Only handles numeric x.y.z (no pre-release tags). + * Strips pre-release suffixes (e.g. "3.9.3-dev.6" → "3.9.3") before comparing. */ export function semverCompare(a: string, b: string): -1 | 0 | 1 { - const pa = a.split('.').map(Number); - const pb = b.split('.').map(Number); + const pa = a.replace(/-.*$/, '').split('.').map(Number); + const pb = b.replace(/-.*$/, '').split('.').map(Number); for (let i = 0; i < 3; i++) { const na = pa[i] || 0; const nb = pb[i] || 0; diff --git a/tests/unit/update-check.test.ts b/tests/unit/update-check.test.ts index 46621d8b..abb35aad 100644 --- a/tests/unit/update-check.test.ts +++ b/tests/unit/update-check.test.ts @@ -51,6 +51,15 @@ describe('semverCompare', () => { it('major takes priority over minor and patch', () => { expect(semverCompare('1.9.9', '2.0.0')).toBe(-1); }); + + it('strips pre-release suffixes before comparing', () => { + // 3.9.3-dev.6 should be treated as 3.9.3, which is > 3.9.1 + expect(semverCompare('3.9.3-dev.6', '3.9.1')).toBe(1); + expect(semverCompare('3.9.3-dev.6', '3.9.3')).toBe(0); + expect(semverCompare('3.9.3-dev.6', '3.9.4')).toBe(-1); + // Both sides with pre-release + expect(semverCompare('2.0.0-beta.1', '1.9.9-alpha.3')).toBe(1); + }); }); // ─── checkForUpdates ──────────────────────────────────────────────── From d99013f9d1229f0e825ff5f15fd39c8a1d42a8b5 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 9 Apr 2026 00:50:13 -0600 Subject: [PATCH 2/9] perf(query): short-circuit diffImpact when no functions affected MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skip co-change, ownership, and boundary lookups when findAffectedFunctions returns empty — all callers return early on this case anyway. Also pass the already-loaded config to checkBoundaryViolations to avoid a redundant loadConfig call. Saves ~2-3ms of fixed overhead per diffImpact invocation when the diff touches no function bodies (the common case for comment/import/type-only changes and the benchmark probe). Closes #904 --- src/domain/analysis/diff-impact.ts | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/src/domain/analysis/diff-impact.ts b/src/domain/analysis/diff-impact.ts index f1e2fc16..e92121f8 100644 --- a/src/domain/analysis/diff-impact.ts +++ b/src/domain/analysis/diff-impact.ts @@ -307,6 +307,32 @@ export function diffImpactData( } const affectedFunctions = findAffectedFunctions(db, changedRanges, noTests); + + // Short-circuit: when no function-level changes detected, skip expensive + // lookups (BFS, co-change, ownership, boundary checks). All callers + // (CLI, MCP, benchmark) return early on empty affectedFunctions. + if (affectedFunctions.length === 0) { + const base = { + changedFiles: changedRanges.size, + newFiles: [...newFiles], + affectedFunctions: [] as unknown[], + affectedFiles: [] as string[], + historicallyCoupled: [] as unknown[], + ownership: null, + boundaryViolations: [] as unknown[], + boundaryViolationCount: 0, + summary: { + functionsChanged: 0, + callersAffected: 0, + filesAffected: 0, + historicallyCoupledCount: 0, + ownersAffected: 0, + boundaryViolationCount: 0, + }, + }; + return paginateResult(base, 'affectedFunctions', { limit: opts.limit, offset: opts.offset }); + } + const includeImplementors = opts.includeImplementors !== false; const { functionResults, allAffected } = buildFunctionImpactResults( db, @@ -325,7 +351,7 @@ export function diffImpactData( db, changedRanges, noTests, - opts, + { ...opts, config }, repoRoot, ); From 14bf7c47bb53c93a02582941f26272c1140b5389 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Thu, 9 Apr 2026 01:28:35 -0600 Subject: [PATCH 3/9] fix(query): preserve boundary checks in diffImpact short-circuit (#905) The short-circuit path was hardcoding boundaryViolations: [] when no functions were affected. Since boundary checks are file-scoped (not function-scoped), import or type-alias changes can still produce real violations. Preserve the check and align the return shape (summary: null) with the two existing early-exit paths. --- src/domain/analysis/diff-impact.ts | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/domain/analysis/diff-impact.ts b/src/domain/analysis/diff-impact.ts index e92121f8..2d263edb 100644 --- a/src/domain/analysis/diff-impact.ts +++ b/src/domain/analysis/diff-impact.ts @@ -309,9 +309,17 @@ export function diffImpactData( const affectedFunctions = findAffectedFunctions(db, changedRanges, noTests); // Short-circuit: when no function-level changes detected, skip expensive - // lookups (BFS, co-change, ownership, boundary checks). All callers - // (CLI, MCP, benchmark) return early on empty affectedFunctions. + // lookups (BFS, co-change, ownership). Boundary checks are preserved + // because they are file-scoped and can surface real violations even when + // no function bodies were touched (e.g. import or type-alias changes). if (affectedFunctions.length === 0) { + const { boundaryViolations, boundaryViolationCount } = checkBoundaryViolations( + db, + changedRanges, + noTests, + { ...opts, config }, + repoRoot, + ); const base = { changedFiles: changedRanges.size, newFiles: [...newFiles], @@ -319,16 +327,9 @@ export function diffImpactData( affectedFiles: [] as string[], historicallyCoupled: [] as unknown[], ownership: null, - boundaryViolations: [] as unknown[], - boundaryViolationCount: 0, - summary: { - functionsChanged: 0, - callersAffected: 0, - filesAffected: 0, - historicallyCoupledCount: 0, - ownersAffected: 0, - boundaryViolationCount: 0, - }, + boundaryViolations, + boundaryViolationCount, + summary: null as null, }; return paginateResult(base, 'affectedFunctions', { limit: opts.limit, offset: opts.offset }); } From 8c281a5a34e4de451fe911e9df28d675cfba4c54 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:02:25 -0600 Subject: [PATCH 4/9] perf(native): move analysis persistence into Rust orchestrator Add AST, complexity, CFG, and dataflow write stages to the Rust build pipeline (build_pipeline.rs), eliminating the JS runPostNativeAnalysis step and its WASM re-parse overhead. The orchestrator now writes all analysis data directly to DB from the parsed FileSymbols, using the same single rusqlite connection. New pipeline stages (8b) after structure/roles: - AST nodes: reuses ast_db::do_insert_ast_nodes with parent resolution - Complexity: writes metrics from Definition.complexity to function_complexity - CFG: writes blocks/edges from Definition.cfg to cfg_blocks/cfg_edges - Dataflow: resolves function names to node IDs and writes to dataflow table Also removes the native-first pipeline (JS-orchestrated with native backend) since the Rust orchestrator now handles everything end-to-end. Removes CODEGRAPH_FORCE_JS_PIPELINE env var, runPostNativeAnalysis, and the third benchmark variant. Includes prior fast-path fixes from this branch: - allNativeDataComplete() fast path in ast-analysis engine - Fix AST tryNativeBulkInsert bail on native-parsed files - Fix complexity collectNativeBulkRows bail on unsupported languages - parseFilesFull napi export for single-pass extraction --- crates/codegraph-core/src/build_pipeline.rs | 431 ++++++++++++++++++++ crates/codegraph-core/src/lib.rs | 14 + crates/codegraph-core/src/parallel.rs | 31 ++ scripts/benchmark.ts | 49 +-- src/ast-analysis/engine.ts | 83 ++++ src/domain/graph/builder/pipeline.ts | 153 +------ src/domain/parser.ts | 8 +- src/features/ast.ts | 4 +- src/features/complexity.ts | 7 + src/types.ts | 1 + 10 files changed, 616 insertions(+), 165 deletions(-) diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index ad174c48..66927f2d 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -24,6 +24,7 @@ use crate::import_edges::{self, ImportEdgeContext}; use crate::import_resolution; use crate::journal; use crate::parallel; +use crate::ast_db::{self, AstInsertNode, FileAstBatch}; use crate::roles_db; use crate::structure; use crate::types::{FileSymbols, ImportResolutionInput}; @@ -46,6 +47,10 @@ pub struct PipelineTiming { pub edges_ms: f64, pub structure_ms: f64, pub roles_ms: f64, + pub ast_ms: f64, + pub complexity_ms: f64, + pub cfg_ms: f64, + pub dataflow_ms: f64, pub finalize_ms: f64, } @@ -77,6 +82,9 @@ pub struct BuildPipelineResult { /// ran (≤5 changed files, >20 existing files). When false, the JS caller /// must run its own structure phase as a post-processing step. pub structure_handled: bool, + /// Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to the DB. + /// When true, the JS caller can skip `runPostNativeAnalysis` entirely. + pub analysis_complete: bool, } /// Normalize path to forward slashes. @@ -175,6 +183,7 @@ pub fn run_pipeline( is_full_build: false, structure_scope: Some(vec![]), structure_handled: true, + analysis_complete: true, }); } @@ -391,6 +400,52 @@ pub fn run_pipeline( } timing.roles_ms = t0.elapsed().as_secs_f64() * 1000.0; + // ── Stage 8b: Analysis persistence (AST, complexity, CFG, dataflow) ── + // Write analysis data from parsed file_symbols directly to DB tables, + // eliminating the JS runPostNativeAnalysis step and its WASM re-parse. + let include_cfg = opts.cfg.unwrap_or(true); + let do_analysis = include_ast || include_dataflow || include_cfg; + + if do_analysis { + // Determine which files to analyze (excludes reverse-dep files) + let analysis_file_set: HashSet<&str> = match &analysis_scope { + Some(files) => files.iter().map(|s| s.as_str()).collect(), + None => file_symbols.keys().map(|s| s.as_str()).collect(), + }; + + // Build node ID lookup: (file, name, line) -> node_id + let node_id_map = build_analysis_node_map(conn, &analysis_file_set); + + // AST nodes + if include_ast { + let t0 = Instant::now(); + let ast_batches = build_ast_batches(&file_symbols, &analysis_file_set); + let _ = ast_db::do_insert_ast_nodes(conn, &ast_batches); + timing.ast_ms = t0.elapsed().as_secs_f64() * 1000.0; + } + + // Complexity metrics + { + let t0 = Instant::now(); + write_complexity(conn, &file_symbols, &analysis_file_set, &node_id_map); + timing.complexity_ms = t0.elapsed().as_secs_f64() * 1000.0; + } + + // CFG blocks + edges + if include_cfg { + let t0 = Instant::now(); + write_cfg(conn, &file_symbols, &analysis_file_set, &node_id_map); + timing.cfg_ms = t0.elapsed().as_secs_f64() * 1000.0; + } + + // Dataflow edges + if include_dataflow { + let t0 = Instant::now(); + write_dataflow(conn, &file_symbols, &analysis_file_set); + timing.dataflow_ms = t0.elapsed().as_secs_f64() * 1000.0; + } + } + // ── Stage 9: Finalize ────────────────────────────────────────────── let t0 = Instant::now(); let (node_count, edge_count) = finalize_build(conn, root_dir); @@ -406,6 +461,10 @@ pub fn run_pipeline( + timing.edges_ms + timing.structure_ms + timing.roles_ms + + timing.ast_ms + + timing.complexity_ms + + timing.cfg_ms + + timing.dataflow_ms + timing.finalize_ms; let overhead = total_start.elapsed().as_secs_f64() * 1000.0 - stage_sum; timing.setup_ms += overhead.max(0.0); @@ -422,6 +481,7 @@ pub fn run_pipeline( is_full_build: change_result.is_full_build, structure_scope: changed_file_list.clone(), structure_handled: use_fast_path, + analysis_complete: do_analysis, }) } @@ -937,6 +997,377 @@ fn build_and_insert_call_edges( } } +// ── Analysis persistence helpers ───────────────────────────────────────── + +/// Build a lookup map from (file, name, line) to node_id for analysis writes. +fn build_analysis_node_map( + conn: &Connection, + files: &HashSet<&str>, +) -> HashMap<(String, String, u32), i64> { + let mut map = HashMap::new(); + let mut stmt = match conn.prepare( + "SELECT id, file, name, line FROM nodes WHERE file = ?1 AND kind != 'file'", + ) { + Ok(s) => s, + Err(_) => return map, + }; + for file in files { + if let Ok(rows) = stmt.query_map(rusqlite::params![file], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, u32>(3)?, + )) + }) { + for row in rows.flatten() { + let (id, file, name, line) = row; + map.insert((file, name, line), id); + } + } + } + map +} + +/// Convert FileSymbols AST nodes to FileAstBatch format for `ast_db::do_insert_ast_nodes`. +fn build_ast_batches( + file_symbols: &HashMap, + analysis_files: &HashSet<&str>, +) -> Vec { + let mut batches = Vec::new(); + for (file, symbols) in file_symbols { + if !analysis_files.contains(file.as_str()) || symbols.ast_nodes.is_empty() { + continue; + } + batches.push(FileAstBatch { + file: file.clone(), + nodes: symbols + .ast_nodes + .iter() + .map(|n| AstInsertNode { + line: n.line, + kind: n.kind.clone(), + name: n.name.clone(), + text: n.text.clone(), + receiver: n.receiver.clone(), + }) + .collect(), + }); + } + batches +} + +/// Write complexity metrics from parsed definitions to the `function_complexity` table. +fn write_complexity( + conn: &Connection, + file_symbols: &HashMap, + analysis_files: &HashSet<&str>, + node_id_map: &HashMap<(String, String, u32), i64>, +) { + let tx = match conn.unchecked_transaction() { + Ok(tx) => tx, + Err(_) => return, + }; + + let mut stmt = match tx.prepare( + "INSERT OR REPLACE INTO function_complexity \ + (node_id, cognitive, cyclomatic, max_nesting, \ + loc, sloc, comment_lines, \ + halstead_n1, halstead_n2, halstead_big_n1, halstead_big_n2, \ + halstead_vocabulary, halstead_length, halstead_volume, \ + halstead_difficulty, halstead_effort, halstead_bugs, \ + maintainability_index) \ + VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18)", + ) { + Ok(s) => s, + Err(_) => return, + }; + + fn insert_def_complexity( + stmt: &mut rusqlite::Statement, + file: &str, + def: &crate::types::Definition, + node_id_map: &HashMap<(String, String, u32), i64>, + ) { + if let Some(ref cm) = def.complexity { + let key = (file.to_string(), def.name.clone(), def.line); + if let Some(&node_id) = node_id_map.get(&key) { + let h = cm.halstead.as_ref(); + let loc = cm.loc.as_ref(); + let _ = stmt.execute(rusqlite::params![ + node_id, + cm.cognitive, + cm.cyclomatic, + cm.max_nesting, + loc.map(|l| l.loc).unwrap_or(0), + loc.map(|l| l.sloc).unwrap_or(0), + loc.map(|l| l.comment_lines).unwrap_or(0), + h.map(|h| h.n1).unwrap_or(0), + h.map(|h| h.n2).unwrap_or(0), + h.map(|h| h.big_n1).unwrap_or(0), + h.map(|h| h.big_n2).unwrap_or(0), + h.map(|h| h.vocabulary).unwrap_or(0), + h.map(|h| h.length).unwrap_or(0), + h.map(|h| h.volume).unwrap_or(0.0), + h.map(|h| h.difficulty).unwrap_or(0.0), + h.map(|h| h.effort).unwrap_or(0.0), + h.map(|h| h.bugs).unwrap_or(0.0), + cm.maintainability_index.unwrap_or(0.0), + ]); + } + } + } + + for (file, symbols) in file_symbols { + if !analysis_files.contains(file.as_str()) { + continue; + } + for def in &symbols.definitions { + insert_def_complexity(&mut stmt, file, def, node_id_map); + if let Some(ref children) = def.children { + for child in children { + insert_def_complexity(&mut stmt, file, child, node_id_map); + } + } + } + } + + let _ = tx.commit(); +} + +/// Write CFG blocks and edges from parsed definitions to DB tables. +fn write_cfg( + conn: &Connection, + file_symbols: &HashMap, + analysis_files: &HashSet<&str>, + node_id_map: &HashMap<(String, String, u32), i64>, +) { + let tx = match conn.unchecked_transaction() { + Ok(tx) => tx, + Err(_) => return, + }; + + let mut block_stmt = match tx.prepare( + "INSERT INTO cfg_blocks \ + (function_node_id, block_index, block_type, start_line, end_line, label) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", + ) { + Ok(s) => s, + Err(_) => return, + }; + + let mut edge_stmt = match tx.prepare( + "INSERT INTO cfg_edges \ + (function_node_id, source_block_id, target_block_id, kind) \ + VALUES (?1, ?2, ?3, ?4)", + ) { + Ok(s) => s, + Err(_) => return, + }; + + for (file, symbols) in file_symbols { + if !analysis_files.contains(file.as_str()) { + continue; + } + for def in &symbols.definitions { + write_def_cfg( + &tx, &mut block_stmt, &mut edge_stmt, + file, def, node_id_map, + ); + if let Some(ref children) = def.children { + for child in children { + write_def_cfg( + &tx, &mut block_stmt, &mut edge_stmt, + file, child, node_id_map, + ); + } + } + } + } + + let _ = tx.commit(); +} + +/// Write CFG data for a single definition. +fn write_def_cfg( + tx: &rusqlite::Transaction, + block_stmt: &mut rusqlite::Statement, + edge_stmt: &mut rusqlite::Statement, + file: &str, + def: &crate::types::Definition, + node_id_map: &HashMap<(String, String, u32), i64>, +) { + let cfg = match &def.cfg { + Some(c) if !c.blocks.is_empty() => c, + _ => return, + }; + let key = (file.to_string(), def.name.clone(), def.line); + let node_id = match node_id_map.get(&key) { + Some(&id) => id, + None => return, + }; + + // Insert blocks and track DB IDs for edge resolution + let mut block_db_ids: HashMap = HashMap::new(); + for block in &cfg.blocks { + if block_stmt + .execute(rusqlite::params![ + node_id, + block.index, + &block.block_type, + block.start_line, + block.end_line, + &block.label, + ]) + .is_ok() + { + block_db_ids.insert(block.index, tx.last_insert_rowid()); + } + } + + // Insert edges using resolved block DB IDs + for edge in &cfg.edges { + if let (Some(&src), Some(&tgt)) = ( + block_db_ids.get(&edge.source_index), + block_db_ids.get(&edge.target_index), + ) { + let _ = edge_stmt.execute(rusqlite::params![node_id, src, tgt, &edge.kind]); + } + } +} + +/// Write dataflow edges from parsed FileSymbols to the `dataflow` table. +/// Resolves function names to node IDs using the DB, mirroring the JS +/// `makeNodeResolver` logic (prefer same-file match, fall back to global). +fn write_dataflow( + conn: &Connection, + file_symbols: &HashMap, + analysis_files: &HashSet<&str>, +) { + let tx = match conn.unchecked_transaction() { + Ok(tx) => tx, + Err(_) => return, + }; + + let mut insert_stmt = match tx.prepare( + "INSERT INTO dataflow \ + (source_id, target_id, kind, param_index, expression, line, confidence) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", + ) { + Ok(s) => s, + Err(_) => return, + }; + + let mut local_stmt = match tx.prepare( + "SELECT id FROM nodes WHERE name = ?1 AND file = ?2 \ + AND kind IN ('function','method') LIMIT 1", + ) { + Ok(s) => s, + Err(_) => return, + }; + + let mut global_stmt = match tx.prepare( + "SELECT id FROM nodes WHERE name = ?1 \ + AND kind IN ('function','method') \ + ORDER BY file, line LIMIT 1", + ) { + Ok(s) => s, + Err(_) => return, + }; + + for (file, symbols) in file_symbols { + if !analysis_files.contains(file.as_str()) { + continue; + } + let data = match &symbols.dataflow { + Some(d) => d, + None => continue, + }; + + // argFlows → flows_to edges + for flow in &data.arg_flows { + let caller = match &flow.caller_func { + Some(name) => name.as_str(), + None => continue, + }; + let src = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, caller, file); + let tgt = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, &flow.callee_name, file); + if let (Some(src), Some(tgt)) = (src, tgt) { + let _ = insert_stmt.execute(rusqlite::params![ + src, + tgt, + "flows_to", + flow.arg_index, + &flow.expression, + flow.line, + flow.confidence, + ]); + } + } + + // assignments → returns edges + for assignment in &data.assignments { + let consumer = match &assignment.caller_func { + Some(name) => name.as_str(), + None => continue, + }; + let producer = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, &assignment.source_call_name, file); + let consumer_id = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, consumer, file); + if let (Some(producer), Some(consumer_id)) = (producer, consumer_id) { + let _ = insert_stmt.execute(rusqlite::params![ + producer, + consumer_id, + "returns", + Option::::None, + &assignment.expression, + assignment.line, + 1.0_f64, + ]); + } + } + + // mutations → mutates edges (only for param bindings) + for mutation in &data.mutations { + if mutation.binding_type.as_deref() != Some("param") { + continue; + } + let func = match &mutation.func_name { + Some(name) => name.as_str(), + None => continue, + }; + if let Some(node_id) = resolve_dataflow_node(&mut local_stmt, &mut global_stmt, func, file) { + let _ = insert_stmt.execute(rusqlite::params![ + node_id, + node_id, + "mutates", + Option::::None, + &mutation.mutating_expr, + mutation.line, + 1.0_f64, + ]); + } + } + } + + let _ = tx.commit(); +} + +/// Resolve a function name to a node ID, trying same-file first then global. +/// Mirrors the JS `makeNodeResolver` logic from `features/dataflow.ts`. +fn resolve_dataflow_node( + local_stmt: &mut rusqlite::Statement, + global_stmt: &mut rusqlite::Statement, + name: &str, + file: &str, +) -> Option { + if let Ok(id) = local_stmt.query_row(rusqlite::params![name, file], |r| r.get::<_, i64>(0)) { + return Some(id); + } + global_stmt + .query_row(rusqlite::params![name], |r| r.get::<_, i64>(0)) + .ok() +} + /// Current time in milliseconds since epoch. fn now_ms() -> f64 { std::time::SystemTime::now() diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs index 5fbe317d..52f9ae41 100644 --- a/crates/codegraph-core/src/lib.rs +++ b/crates/codegraph-core/src/lib.rs @@ -67,6 +67,20 @@ pub fn parse_files( ) } +/// Parse multiple files in parallel with ALL analysis data extracted in a single pass. +/// Always includes: symbols, AST nodes, complexity, CFG, and dataflow. +/// Eliminates the need for any downstream re-parse (WASM or native standalone). +#[napi] +pub fn parse_files_full( + file_paths: Vec, + root_dir: String, +) -> Vec { + parallel::parse_files_parallel_full( + &file_paths, + &root_dir, + ) +} + /// Resolve a single import path. #[napi] pub fn resolve_import( diff --git a/crates/codegraph-core/src/parallel.rs b/crates/codegraph-core/src/parallel.rs index 3a8bcba7..65c472b1 100644 --- a/crates/codegraph-core/src/parallel.rs +++ b/crates/codegraph-core/src/parallel.rs @@ -10,6 +10,8 @@ use crate::types::FileSymbols; /// Parse multiple files in parallel using rayon. /// Each thread creates its own Parser (cheap; Language objects are Send+Sync). /// Failed files are silently skipped (matches WASM behavior). +/// All analysis data (symbols, AST nodes, complexity, CFG, dataflow) is always +/// extracted in a single parse pass — no separate re-parse needed downstream. /// When `include_dataflow` is false, dataflow extraction is skipped for performance. /// When `include_ast_nodes` is false, AST node walking is skipped for performance. pub fn parse_files_parallel( @@ -40,6 +42,35 @@ pub fn parse_files_parallel( .collect() } +/// Parse multiple files in parallel, always extracting ALL analysis data: +/// symbols, AST nodes, complexity, CFG, and dataflow in a single parse pass. +/// This eliminates the need for any downstream re-parse (WASM or native standalone). +pub fn parse_files_parallel_full( + file_paths: &[String], + _root_dir: &str, +) -> Vec { + file_paths + .par_iter() + .filter_map(|file_path| { + let lang = LanguageKind::from_extension(file_path)?; + let source = fs::read(file_path).ok()?; + let line_count = source.iter().filter(|&&b| b == b'\n').count() as u32 + 1; + + let mut parser = Parser::new(); + parser.set_language(&lang.tree_sitter_language()).ok()?; + + let tree = parser.parse(&source, None)?; + // Always include AST nodes + let mut symbols = + extract_symbols_with_opts(lang, &tree, &source, file_path, true); + // Always extract dataflow + symbols.dataflow = extract_dataflow(&tree, &source, lang.lang_id_str()); + symbols.line_count = Some(line_count); + Some(symbols) + }) + .collect() +} + /// Parse a single file and return its symbols. /// When `include_dataflow` is false, dataflow extraction is skipped for performance. /// When `include_ast_nodes` is false, AST node walking is skipped for performance. diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index 5b2de8a8..554e89fa 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -37,40 +37,29 @@ if (!isWorker()) { process.exit(1); } + function formatEngineResult(data) { + if (!data) return null; + return { + buildTimeMs: data.buildTimeMs, + queryTimeMs: data.queryTimeMs, + nodes: data.nodes, + edges: data.edges, + dbSizeBytes: data.dbSizeBytes, + perFile: data.perFile, + noopRebuildMs: data.noopRebuildMs, + oneFileRebuildMs: data.oneFileRebuildMs, + oneFilePhases: data.oneFilePhases, + queries: data.queries, + phases: data.phases, + }; + } + const result = { version, date: new Date().toISOString().slice(0, 10), files: primary.files, - wasm: wasm - ? { - buildTimeMs: wasm.buildTimeMs, - queryTimeMs: wasm.queryTimeMs, - nodes: wasm.nodes, - edges: wasm.edges, - dbSizeBytes: wasm.dbSizeBytes, - perFile: wasm.perFile, - noopRebuildMs: wasm.noopRebuildMs, - oneFileRebuildMs: wasm.oneFileRebuildMs, - oneFilePhases: wasm.oneFilePhases, - queries: wasm.queries, - phases: wasm.phases, - } - : null, - native: native - ? { - buildTimeMs: native.buildTimeMs, - queryTimeMs: native.queryTimeMs, - nodes: native.nodes, - edges: native.edges, - dbSizeBytes: native.dbSizeBytes, - perFile: native.perFile, - noopRebuildMs: native.noopRebuildMs, - oneFileRebuildMs: native.oneFileRebuildMs, - oneFilePhases: native.oneFilePhases, - queries: native.queries, - phases: native.phases, - } - : null, + wasm: formatEngineResult(wasm), + native: formatEngineResult(native), }; console.log(JSON.stringify(result, null, 2)); diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts index 5c5c69a3..f92af4a9 100644 --- a/src/ast-analysis/engine.ts +++ b/src/ast-analysis/engine.ts @@ -666,6 +666,79 @@ async function delegateToBuildFunctions( } } +// ─── Native full-analysis fast path ──────────────────────────────────── + +/** + * Check whether all files already have complete analysis data from the native + * parse pass (parseFilesFull). When true, no WASM re-parse or JS visitor walk + * is needed — the engine can skip directly to DB persistence. + */ +function allNativeDataComplete( + fileSymbols: Map, + opts: AnalysisOpts, +): boolean { + const doAst = opts.ast !== false; + const doComplexity = opts.complexity !== false; + const doCfg = opts.cfg !== false; + const doDataflow = opts.dataflow !== false; + + for (const [relPath, symbols] of fileSymbols) { + // If any file has a WASM tree, it was parsed by WASM — not native full + if (symbols._tree) return false; + + const ext = path.extname(relPath).toLowerCase(); + const langId = symbols._langId || ''; + + // AST nodes: native must have produced them + if ( + doAst && + !Array.isArray(symbols.astNodes) && + (WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(langId)) + ) { + debug(`allNativeDataComplete: ${relPath} missing astNodes`); + return false; + } + + // Dataflow: native must have produced it + if ( + doDataflow && + !symbols.dataflow && + (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(langId)) + ) { + debug(`allNativeDataComplete: ${relPath} missing dataflow`); + return false; + } + + const defs = symbols.definitions || []; + for (const def of defs) { + if (!hasFuncBody(def)) continue; + + // Complexity: every function must already have it + if ( + doComplexity && + !def.complexity && + (COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId)) + ) { + debug(`allNativeDataComplete: ${relPath}:${def.name} missing complexity`); + return false; + } + + // CFG: every function must already have blocks + if ( + doCfg && + def.cfg !== null && + !Array.isArray(def.cfg?.blocks) && + (CFG_EXTENSIONS.has(ext) || CFG_RULES.has(langId)) + ) { + debug(`allNativeDataComplete: ${relPath}:${def.name} missing cfg blocks`); + return false; + } + } + } + + return fileSymbols.size > 0; +} + // ─── Public API ────────────────────────────────────────────────────────── export async function runAnalyses( @@ -686,6 +759,16 @@ export async function runAnalyses( const extToLang = buildExtToLangMap(); + // Fast path: when all files were parsed by the native engine with full analysis + // (parseFilesFull), all data is already present — skip WASM re-parse and JS + // visitor walks entirely, go straight to DB persistence. + if (allNativeDataComplete(fileSymbols, opts)) { + debug('native full-analysis fast path: all data present, skipping WASM/visitor passes'); + if (doComplexity && doCfg) reconcileCfgCyclomatic(fileSymbols); + await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing); + return timing; + } + // Native analysis pass: try Rust standalone functions before WASM fallback. // This fills in complexity/CFG/dataflow for files that the native parse pipeline // missed, avoiding the need to parse with WASM + run JS visitors. diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 65cc17c9..56569904 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -120,15 +120,11 @@ function setupPipeline(ctx: PipelineContext): void { const native = enginePref !== 'wasm' ? loadNative() : null; ctx.nativeAvailable = !!native?.NativeDatabase; - // Native-first: use only rusqlite for the entire pipeline (no better-sqlite3). - // This eliminates the dual-connection WAL corruption problem and enables all - // native fast-paths (bulkInsertNodes, classifyRolesFull, etc.). - // Fallback: if native is unavailable or FORCE_JS is set, use better-sqlite3. - if ( - ctx.nativeAvailable && - native?.NativeDatabase && - process.env.CODEGRAPH_FORCE_JS_PIPELINE !== '1' - ) { + // When native is available, use a NativeDbProxy backed by a single rusqlite + // connection. This eliminates the dual-connection WAL corruption problem. + // The Rust orchestrator handles the full pipeline; the proxy is used for any + // JS post-processing (e.g. structure fallback on large builds). + if (ctx.nativeAvailable && native?.NativeDatabase) { try { const dir = path.dirname(ctx.dbPath); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); @@ -264,13 +260,14 @@ interface NativeOrchestratorResult { structureScope?: string[]; /** Whether the Rust pipeline handled the structure phase (small-incremental fast path). */ structureHandled?: boolean; + /** Whether the Rust pipeline wrote AST/complexity/CFG/dataflow to DB. */ + analysisComplete?: boolean; } // ── Native orchestrator helpers ─────────────────────────────────────── /** Determine whether the native orchestrator should be skipped. Returns a reason string, or null if it should run. */ function shouldSkipNativeOrchestrator(ctx: PipelineContext): string | null { - if (process.env.CODEGRAPH_FORCE_JS_PIPELINE === '1') return 'CODEGRAPH_FORCE_JS_PIPELINE=1'; if (ctx.forceFullRebuild) return 'forceFullRebuild'; // v3.9.0 addon had buggy incremental purge (wrong SQL on analysis tables, // scoped removal over-detection). Fixed in v3.9.1 by PR #865. Gate on @@ -452,78 +449,6 @@ async function runPostNativeStructure( return performance.now() - structureStart; } -/** Run AST/complexity/CFG/dataflow analysis after native orchestrator. */ -async function runPostNativeAnalysis( - ctx: PipelineContext, - allFileSymbols: Map, - changedFiles: string[] | undefined, -): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> { - const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; - - // Scope analysis fileSymbols to changed files only - let analysisFileSymbols: Map; - if (changedFiles && changedFiles.length > 0) { - analysisFileSymbols = new Map(); - for (const f of changedFiles) { - const entry = allFileSymbols.get(f); - if (entry) analysisFileSymbols.set(f, entry); - } - } else { - analysisFileSymbols = allFileSymbols; - } - - // In native-first mode, nativeDb is already open — no reopen needed. - if (!ctx.nativeFirstProxy) { - const native = loadNative(); - if (native?.NativeDatabase) { - try { - ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath); - if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb; - } catch { - ctx.nativeDb = undefined; - if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; - } - } - } else if (ctx.engineOpts) { - ctx.engineOpts.nativeDb = ctx.nativeDb; - } - - try { - const { runAnalyses: runAnalysesFn } = await import('../../../ast-analysis/engine.js'); - const result = await runAnalysesFn( - ctx.db, - analysisFileSymbols, - ctx.rootDir, - ctx.opts, - ctx.engineOpts, - ); - timing.astMs = result.astMs ?? 0; - timing.complexityMs = result.complexityMs ?? 0; - timing.cfgMs = result.cfgMs ?? 0; - timing.dataflowMs = result.dataflowMs ?? 0; - } catch (err) { - warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`); - } - - // Close nativeDb after analyses (skip in native-first — single connection stays open) - if (ctx.nativeDb && !ctx.nativeFirstProxy) { - try { - ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)'); - } catch { - /* ignore checkpoint errors */ - } - try { - ctx.nativeDb.close(); - } catch { - /* ignore close errors */ - } - ctx.nativeDb = undefined; - if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; - } - - return timing; -} - /** Format timing result from native orchestrator phases + JS post-processing. */ function formatNativeTimingResult( p: Record, @@ -620,43 +545,32 @@ async function tryNativeOrchestrator( ); // ── Post-native structure + analysis ────────────────────────────── - let analysisTiming = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + const analysisTiming = { + astMs: +(p.astMs ?? 0), + complexityMs: +(p.complexityMs ?? 0), + cfgMs: +(p.cfgMs ?? 0), + dataflowMs: +(p.dataflowMs ?? 0), + }; let structurePatchMs = 0; - const needsAnalysis = - ctx.opts.ast !== false || - ctx.opts.complexity !== false || - ctx.opts.cfg !== false || - ctx.opts.dataflow !== false; // Skip JS structure when the Rust pipeline's small-incremental fast path // already handled it. For full builds and large incrementals where Rust // skipped structure, we must run the JS fallback. const needsStructure = !result.structureHandled; - if (needsAnalysis || needsStructure) { + if (needsStructure) { // In native-first mode the proxy is already wired — no WAL handoff needed. if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { // DB reopen failed — return partial result return formatNativeTimingResult(p, 0, analysisTiming); } - // When structure was handled by Rust, we only need changed files for - // analysis — no need to load the entire graph from DB. When structure - // was NOT handled, we need all files to build the complete directory tree. - const scopeFiles = needsStructure ? undefined : result.changedFiles; - const fileSymbols = reconstructFileSymbolsFromDb(ctx, scopeFiles); - - if (needsStructure) { - structurePatchMs = await runPostNativeStructure( - ctx, - fileSymbols, - !!result.isFullBuild, - result.structureScope ?? result.changedFiles, - ); - } - - if (needsAnalysis) { - analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles); - } + const fileSymbols = reconstructFileSymbolsFromDb(ctx); + structurePatchMs = await runPostNativeStructure( + ctx, + fileSymbols, + !!result.isFullBuild, + result.structureScope ?? result.changedFiles, + ); } closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); @@ -666,30 +580,7 @@ async function tryNativeOrchestrator( // ── Pipeline stages execution ─────────────────────────────────────────── async function runPipelineStages(ctx: PipelineContext): Promise { - // ── Native-first mode ──────────────────────────────────────────────── - // When ctx.nativeFirstProxy is true, ctx.db is a NativeDbProxy backed by - // the single rusqlite connection (ctx.nativeDb). No dual-connection WAL - // dance is needed — every stage uses the same connection transparently. - if (ctx.nativeFirstProxy) { - // Ensure engineOpts.nativeDb is set so stages can use dedicated native methods. - if (ctx.engineOpts) { - ctx.engineOpts.nativeDb = ctx.nativeDb; - } - - await collectFiles(ctx); - await detectChanges(ctx); - if (ctx.earlyExit) return; - await parseFiles(ctx); - await insertNodes(ctx); - await resolveImports(ctx); - await buildEdges(ctx); - await buildStructure(ctx); - await runAnalyses(ctx); - await finalize(ctx); - return; - } - - // ── Legacy dual-connection mode (WASM / fallback) ──────────────────── + // ── WASM / fallback dual-connection mode ───────────────────────────── // NativeDatabase is deferred — not opened during setup. collectFiles and // detectChanges only need better-sqlite3. If no files changed, we exit // early without ever opening the native connection, saving ~5ms. diff --git a/src/domain/parser.ts b/src/domain/parser.ts index 97272262..57bd116e 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -780,7 +780,7 @@ export async function parseFileAuto( const { native } = resolveEngine(opts); if (native) { - const result = native.parseFile(filePath, source, !!opts.dataflow, opts.ast !== false); + const result = native.parseFile(filePath, source, true, true); if (!result) return null; const patched = patchNativeResult(result); // Always backfill typeMap for TS/TSX from WASM — native parser's type @@ -878,7 +878,11 @@ export async function parseFilesAuto( if (!native) return parseFilesWasm(filePaths, rootDir); const result = new Map(); - const nativeResults = native.parseFiles(filePaths, rootDir, !!opts.dataflow, opts.ast !== false); + // Always extract all analysis data (dataflow + AST nodes) during native parse. + // This eliminates the need for any downstream WASM re-parse or native standalone calls. + const nativeResults = native.parseFilesFull + ? native.parseFilesFull(filePaths, rootDir) + : native.parseFiles(filePaths, rootDir, true, true); const needsTypeMap: { filePath: string; relPath: string }[] = []; for (const r of nativeResults) { if (!r) continue; diff --git a/src/features/ast.ts b/src/features/ast.ts index 56119380..c31b6690 100644 --- a/src/features/ast.ts +++ b/src/features/ast.ts @@ -115,8 +115,8 @@ function tryNativeBulkInsert( receiver: n.receiver ?? '', })), }); - } else if (symbols.calls || symbols._tree) { - return false; // needs JS fallback + } else if (symbols._tree) { + return false; // has WASM tree not yet processed — needs JS fallback } } diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 9d481057..509d0347 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -545,6 +545,10 @@ function collectNativeBulkRows( const rows: Array> = []; for (const [relPath, symbols] of fileSymbols) { + const ext = path.extname(relPath).toLowerCase(); + const langId = symbols._langId || ''; + const langSupported = COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId); + for (const def of symbols.definitions) { if (def.kind !== 'function' && def.kind !== 'method') continue; if (!def.line) continue; @@ -554,6 +558,9 @@ function collectNativeBulkRows( // of the native bulk-insert path for every TypeScript codebase (#846). if (!def.complexity) { if (def.name.includes('.') || !def.endLine || def.endLine <= def.line) continue; + // Languages without complexity rules will never have data — skip them + // rather than bailing out of the entire native bulk path. + if (!langSupported) continue; return null; // genuine function body missing complexity — needs JS fallback } const nodeId = getFunctionNodeId(db, def.name, relPath, def.line); diff --git a/src/types.ts b/src/types.ts index 8c6fc7fc..d8d7b00c 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1874,6 +1874,7 @@ export type StmtCache = WeakMap, From 95d0142db658ba3ce4c5f996b94952a780efcc79 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 10 Apr 2026 00:58:26 -0600 Subject: [PATCH 5/9] chore: update package-lock.json --- package-lock.json | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/package-lock.json b/package-lock.json index 0ba321d1..bc40f9b2 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1314,9 +1314,6 @@ "cpu": [ "arm64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1330,9 +1327,6 @@ "cpu": [ "x64" ], - "libc": [ - "glibc" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -1346,9 +1340,6 @@ "cpu": [ "x64" ], - "libc": [ - "musl" - ], "license": "Apache-2.0", "optional": true, "os": [ @@ -7292,6 +7283,7 @@ "resolved": "git+ssh://git@github.com/gleam-lang/tree-sitter-gleam.git#0153f8b875cd02034b553f3a84a2f5ee67a80364", "integrity": "sha512-BEC6Ti8xkVezSjitXVg6y+Hzin9VaoG+lcVGy73QCGB66wZBb2UCWbxvrdaBpioNyruYvDyxSPk/NECzT7QWKw==", "dev": true, + "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { "nan": "^2.18.0" From 7c5830f381d533bb2c2e6527e8d2b65aaa94604b Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 10 Apr 2026 01:07:07 -0600 Subject: [PATCH 6/9] fix(native): track analysis write success and respect complexity opt (#907) - write_complexity/write_cfg/write_dataflow now return bool reflecting whether the transaction committed successfully. analysis_complete is only true when all enabled stages actually succeeded, preventing silent data loss on incremental builds with no fallback. - Add complexity field to BuildOpts so write_complexity respects the opts.complexity flag, matching JS pipeline behavior. - Batch build_analysis_node_map into a single temp-table join query instead of N per-file prepared-statement executions. --- crates/codegraph-core/src/build_pipeline.rs | 92 ++++++++++++++------- crates/codegraph-core/src/config.rs | 4 + 2 files changed, 65 insertions(+), 31 deletions(-) diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index 66927f2d..d76860ff 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -403,9 +403,11 @@ pub fn run_pipeline( // ── Stage 8b: Analysis persistence (AST, complexity, CFG, dataflow) ── // Write analysis data from parsed file_symbols directly to DB tables, // eliminating the JS runPostNativeAnalysis step and its WASM re-parse. + let include_complexity = opts.complexity.unwrap_or(true); let include_cfg = opts.cfg.unwrap_or(true); - let do_analysis = include_ast || include_dataflow || include_cfg; + let do_analysis = include_ast || include_dataflow || include_cfg || include_complexity; + let mut analysis_ok = true; if do_analysis { // Determine which files to analyze (excludes reverse-dep files) let analysis_file_set: HashSet<&str> = match &analysis_scope { @@ -420,28 +422,36 @@ pub fn run_pipeline( if include_ast { let t0 = Instant::now(); let ast_batches = build_ast_batches(&file_symbols, &analysis_file_set); - let _ = ast_db::do_insert_ast_nodes(conn, &ast_batches); + if ast_db::do_insert_ast_nodes(conn, &ast_batches).is_err() { + analysis_ok = false; + } timing.ast_ms = t0.elapsed().as_secs_f64() * 1000.0; } // Complexity metrics - { + if include_complexity { let t0 = Instant::now(); - write_complexity(conn, &file_symbols, &analysis_file_set, &node_id_map); + if !write_complexity(conn, &file_symbols, &analysis_file_set, &node_id_map) { + analysis_ok = false; + } timing.complexity_ms = t0.elapsed().as_secs_f64() * 1000.0; } // CFG blocks + edges if include_cfg { let t0 = Instant::now(); - write_cfg(conn, &file_symbols, &analysis_file_set, &node_id_map); + if !write_cfg(conn, &file_symbols, &analysis_file_set, &node_id_map) { + analysis_ok = false; + } timing.cfg_ms = t0.elapsed().as_secs_f64() * 1000.0; } // Dataflow edges if include_dataflow { let t0 = Instant::now(); - write_dataflow(conn, &file_symbols, &analysis_file_set); + if !write_dataflow(conn, &file_symbols, &analysis_file_set) { + analysis_ok = false; + } timing.dataflow_ms = t0.elapsed().as_secs_f64() * 1000.0; } } @@ -481,7 +491,7 @@ pub fn run_pipeline( is_full_build: change_result.is_full_build, structure_scope: changed_file_list.clone(), structure_handled: use_fast_path, - analysis_complete: do_analysis, + analysis_complete: do_analysis && analysis_ok, }) } @@ -1005,27 +1015,47 @@ fn build_analysis_node_map( files: &HashSet<&str>, ) -> HashMap<(String, String, u32), i64> { let mut map = HashMap::new(); + if files.is_empty() { + return map; + } + + // Use a temp table to batch all file lookups into a single join query, + // avoiding N per-file round-trips through prepared-statement execution. + let _ = conn.execute_batch( + "CREATE TEMP TABLE IF NOT EXISTS _analysis_files (file TEXT NOT NULL)", + ); + let _ = conn.execute("DELETE FROM temp._analysis_files", []); + + if let Ok(mut ins) = conn.prepare("INSERT INTO temp._analysis_files (file) VALUES (?1)") { + for file in files { + let _ = ins.execute(rusqlite::params![file]); + } + } + let mut stmt = match conn.prepare( - "SELECT id, file, name, line FROM nodes WHERE file = ?1 AND kind != 'file'", + "SELECT n.id, n.file, n.name, n.line FROM nodes n \ + INNER JOIN temp._analysis_files af ON n.file = af.file \ + WHERE n.kind != 'file'", ) { Ok(s) => s, Err(_) => return map, }; - for file in files { - if let Ok(rows) = stmt.query_map(rusqlite::params![file], |row| { - Ok(( - row.get::<_, i64>(0)?, - row.get::<_, String>(1)?, - row.get::<_, String>(2)?, - row.get::<_, u32>(3)?, - )) - }) { - for row in rows.flatten() { - let (id, file, name, line) = row; - map.insert((file, name, line), id); - } + + if let Ok(rows) = stmt.query_map([], |row| { + Ok(( + row.get::<_, i64>(0)?, + row.get::<_, String>(1)?, + row.get::<_, String>(2)?, + row.get::<_, u32>(3)?, + )) + }) { + for row in rows.flatten() { + let (id, file, name, line) = row; + map.insert((file, name, line), id); } } + + let _ = conn.execute("DROP TABLE IF EXISTS temp._analysis_files", []); map } @@ -1063,10 +1093,10 @@ fn write_complexity( file_symbols: &HashMap, analysis_files: &HashSet<&str>, node_id_map: &HashMap<(String, String, u32), i64>, -) { +) -> bool { let tx = match conn.unchecked_transaction() { Ok(tx) => tx, - Err(_) => return, + Err(_) => return false, }; let mut stmt = match tx.prepare( @@ -1080,7 +1110,7 @@ fn write_complexity( VALUES (?1,?2,?3,?4,?5,?6,?7,?8,?9,?10,?11,?12,?13,?14,?15,?16,?17,?18)", ) { Ok(s) => s, - Err(_) => return, + Err(_) => return false, }; fn insert_def_complexity( @@ -1132,7 +1162,7 @@ fn write_complexity( } } - let _ = tx.commit(); + tx.commit().is_ok() } /// Write CFG blocks and edges from parsed definitions to DB tables. @@ -1141,10 +1171,10 @@ fn write_cfg( file_symbols: &HashMap, analysis_files: &HashSet<&str>, node_id_map: &HashMap<(String, String, u32), i64>, -) { +) -> bool { let tx = match conn.unchecked_transaction() { Ok(tx) => tx, - Err(_) => return, + Err(_) => return false, }; let mut block_stmt = match tx.prepare( @@ -1185,7 +1215,7 @@ fn write_cfg( } } - let _ = tx.commit(); + tx.commit().is_ok() } /// Write CFG data for a single definition. @@ -1243,10 +1273,10 @@ fn write_dataflow( conn: &Connection, file_symbols: &HashMap, analysis_files: &HashSet<&str>, -) { +) -> bool { let tx = match conn.unchecked_transaction() { Ok(tx) => tx, - Err(_) => return, + Err(_) => return false, }; let mut insert_stmt = match tx.prepare( @@ -1349,7 +1379,7 @@ fn write_dataflow( } } - let _ = tx.commit(); + tx.commit().is_ok() } /// Resolve a function name to a node ID, trying same-file first then global. diff --git a/crates/codegraph-core/src/config.rs b/crates/codegraph-core/src/config.rs index eba0df07..af2805bc 100644 --- a/crates/codegraph-core/src/config.rs +++ b/crates/codegraph-core/src/config.rs @@ -63,6 +63,10 @@ pub struct BuildOpts { #[serde(default)] pub ast: Option, + /// Whether to include complexity metrics. + #[serde(default)] + pub complexity: Option, + /// Whether to include CFG analysis. #[serde(default)] pub cfg: Option, From 4161f5f818a82b191024a9487322b111cf9dec20 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 10 Apr 2026 01:18:12 -0600 Subject: [PATCH 7/9] fix(native): fix Rust compile errors in write function return types (#907) - Change bare `return` to `return false` in write_cfg and write_dataflow since they now return bool - Drop prepared statements before tx.commit() to release borrows on the transaction, fixing E0505 move-out-of-borrowed errors --- crates/codegraph-core/src/build_pipeline.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs index d76860ff..53a58f14 100644 --- a/crates/codegraph-core/src/build_pipeline.rs +++ b/crates/codegraph-core/src/build_pipeline.rs @@ -1162,6 +1162,7 @@ fn write_complexity( } } + drop(stmt); // release borrow on tx before commit tx.commit().is_ok() } @@ -1183,7 +1184,7 @@ fn write_cfg( VALUES (?1, ?2, ?3, ?4, ?5, ?6)", ) { Ok(s) => s, - Err(_) => return, + Err(_) => return false, }; let mut edge_stmt = match tx.prepare( @@ -1192,7 +1193,7 @@ fn write_cfg( VALUES (?1, ?2, ?3, ?4)", ) { Ok(s) => s, - Err(_) => return, + Err(_) => return false, }; for (file, symbols) in file_symbols { @@ -1215,6 +1216,8 @@ fn write_cfg( } } + drop(block_stmt); + drop(edge_stmt); tx.commit().is_ok() } @@ -1285,7 +1288,7 @@ fn write_dataflow( VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7)", ) { Ok(s) => s, - Err(_) => return, + Err(_) => return false, }; let mut local_stmt = match tx.prepare( @@ -1293,7 +1296,7 @@ fn write_dataflow( AND kind IN ('function','method') LIMIT 1", ) { Ok(s) => s, - Err(_) => return, + Err(_) => return false, }; let mut global_stmt = match tx.prepare( @@ -1302,7 +1305,7 @@ fn write_dataflow( ORDER BY file, line LIMIT 1", ) { Ok(s) => s, - Err(_) => return, + Err(_) => return false, }; for (file, symbols) in file_symbols { @@ -1379,6 +1382,9 @@ fn write_dataflow( } } + drop(insert_stmt); + drop(local_stmt); + drop(global_stmt); tx.commit().is_ok() } From 0eaa9a605ae5515674da18e18b3de27604689073 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 10 Apr 2026 11:06:02 -0600 Subject: [PATCH 8/9] fix(native): fix NativeDbProxy lifecycle on forceFullRebuild and add analysis fallback (#907) Two fixes for the native-first pipeline mode: 1. When the native orchestrator is skipped (e.g., forceFullRebuild from version/engine/schema mismatch), suspendNativeDb closes the backing NativeDatabase but ctx.db remains a NativeDbProxy wrapping the closed connection. The JS pipeline stages then fail with "NativeDatabase is closed". Fix: swap the proxy for a real better-sqlite3 connection after suspending. 2. The PR removed runPostNativeAnalysis but the Rust addon may not include analysis persistence (older addon or analysis failure). When result.analysisComplete is not true, fall back to JS-side analysis via the restored runPostNativeAnalysis function. This ensures complexity/CFG/dataflow data is written regardless of which addon version is installed. --- src/domain/graph/builder/pipeline.ts | 158 +++++++++++++++++++++++++-- 1 file changed, 146 insertions(+), 12 deletions(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 56569904..668576fb 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -23,7 +23,13 @@ import { loadNative } from '../../../infrastructure/native.js'; import { semverCompare } from '../../../infrastructure/update-check.js'; import { toErrorMessage } from '../../../shared/errors.js'; import { CODEGRAPH_VERSION } from '../../../shared/version.js'; -import type { BuildGraphOpts, BuildResult, Definition, ExtractorOutput } from '../../../types.js'; +import type { + BetterSqlite3Database, + BuildGraphOpts, + BuildResult, + Definition, + ExtractorOutput, +} from '../../../types.js'; import { getActiveEngine } from '../../parser.js'; import { setWorkspaces } from '../resolve.js'; import { PipelineContext } from './context.js'; @@ -449,6 +455,106 @@ async function runPostNativeStructure( return performance.now() - structureStart; } +/** + * JS fallback for AST/complexity/CFG/dataflow analysis after native orchestrator. + * Used when the Rust addon doesn't include analysis persistence (older addon + * version) or when analysis failed on the Rust side. + */ +async function runPostNativeAnalysis( + ctx: PipelineContext, + allFileSymbols: Map, + changedFiles: string[] | undefined, +): Promise<{ astMs: number; complexityMs: number; cfgMs: number; dataflowMs: number }> { + const timing = { astMs: 0, complexityMs: 0, cfgMs: 0, dataflowMs: 0 }; + + // Scope analysis fileSymbols to changed files only + let analysisFileSymbols: Map; + if (changedFiles && changedFiles.length > 0) { + analysisFileSymbols = new Map(); + for (const f of changedFiles) { + const entry = allFileSymbols.get(f); + if (entry) analysisFileSymbols.set(f, entry); + } + } else { + analysisFileSymbols = allFileSymbols; + } + + // Reopen nativeDb for analysis features (suspend/resume WAL pattern). + const native = loadNative(); + if (native?.NativeDatabase) { + try { + ctx.nativeDb = native.NativeDatabase.openReadWrite(ctx.dbPath); + if (ctx.engineOpts) ctx.engineOpts.nativeDb = ctx.nativeDb; + } catch { + ctx.nativeDb = undefined; + if (ctx.engineOpts) ctx.engineOpts.nativeDb = undefined; + } + } + + // Wire up WAL checkpoint callbacks for the analysis engine + if (ctx.nativeDb && ctx.engineOpts) { + ctx.engineOpts.suspendJsDb = () => { + ctx.db.pragma('wal_checkpoint(TRUNCATE)'); + }; + ctx.engineOpts.resumeJsDb = () => { + try { + ctx.nativeDb?.exec('PRAGMA wal_checkpoint(TRUNCATE)'); + } catch (e) { + debug( + `resumeJsDb: WAL checkpoint failed (nativeDb may already be closed): ${toErrorMessage(e)}`, + ); + } + }; + } + + try { + const { runAnalyses: runAnalysesFn } = (await import('../../../ast-analysis/engine.js')) as { + runAnalyses: ( + db: BetterSqlite3Database, + fileSymbols: Map, + rootDir: string, + opts: Record, + engineOpts?: Record, + ) => Promise<{ astMs?: number; complexityMs?: number; cfgMs?: number; dataflowMs?: number }>; + }; + const result = await runAnalysesFn( + ctx.db, + analysisFileSymbols, + ctx.rootDir, + ctx.opts as Record, + ctx.engineOpts as Record | undefined, + ); + timing.astMs = result.astMs ?? 0; + timing.complexityMs = result.complexityMs ?? 0; + timing.cfgMs = result.cfgMs ?? 0; + timing.dataflowMs = result.dataflowMs ?? 0; + } catch (err) { + warn(`Analysis phases failed after native build: ${toErrorMessage(err)}`); + } + + // Close nativeDb after analyses + if (ctx.nativeDb) { + try { + ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)'); + } catch { + /* ignore checkpoint errors */ + } + try { + ctx.nativeDb.close(); + } catch { + /* ignore close errors */ + } + ctx.nativeDb = undefined; + if (ctx.engineOpts) { + ctx.engineOpts.nativeDb = undefined; + ctx.engineOpts.suspendJsDb = undefined; + ctx.engineOpts.resumeJsDb = undefined; + } + } + + return timing; +} + /** Format timing result from native orchestrator phases + JS post-processing. */ function formatNativeTimingResult( p: Record, @@ -545,7 +651,7 @@ async function tryNativeOrchestrator( ); // ── Post-native structure + analysis ────────────────────────────── - const analysisTiming = { + let analysisTiming = { astMs: +(p.astMs ?? 0), complexityMs: +(p.complexityMs ?? 0), cfgMs: +(p.cfgMs ?? 0), @@ -556,21 +662,42 @@ async function tryNativeOrchestrator( // already handled it. For full builds and large incrementals where Rust // skipped structure, we must run the JS fallback. const needsStructure = !result.structureHandled; - - if (needsStructure) { - // In native-first mode the proxy is already wired — no WAL handoff needed. - if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { + // When the Rust addon doesn't include analysis persistence (older addon + // version or analysis failed), fall back to JS-side analysis. + const needsAnalysisFallback = + !result.analysisComplete && + (ctx.opts.ast !== false || + ctx.opts.complexity !== false || + ctx.opts.cfg !== false || + ctx.opts.dataflow !== false); + + if (needsStructure || needsAnalysisFallback) { + // When analysis fallback is needed, handoff to better-sqlite3 — the + // analysis engine uses the suspend/resume WAL pattern that requires a + // real better-sqlite3 connection, not the NativeDbProxy. + if (needsAnalysisFallback && ctx.nativeFirstProxy) { + closeNativeDb(ctx, 'pre-analysis-fallback'); + ctx.db = openDb(ctx.dbPath); + ctx.nativeFirstProxy = false; + } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { // DB reopen failed — return partial result return formatNativeTimingResult(p, 0, analysisTiming); } const fileSymbols = reconstructFileSymbolsFromDb(ctx); - structurePatchMs = await runPostNativeStructure( - ctx, - fileSymbols, - !!result.isFullBuild, - result.structureScope ?? result.changedFiles, - ); + + if (needsStructure) { + structurePatchMs = await runPostNativeStructure( + ctx, + fileSymbols, + !!result.isFullBuild, + result.structureScope ?? result.changedFiles, + ); + } + + if (needsAnalysisFallback) { + analysisTiming = await runPostNativeAnalysis(ctx, fileSymbols, result.changedFiles); + } } closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); @@ -588,6 +715,13 @@ async function runPipelineStages(ctx: PipelineContext): Promise { // suspend it now to avoid dual-connection WAL corruption during stages. if (ctx.db && ctx.nativeDb) { suspendNativeDb(ctx, 'pre-collect'); + // When nativeFirstProxy is true, ctx.db is a NativeDbProxy wrapping the + // now-closed NativeDatabase. Replace it with a real better-sqlite3 + // connection so the JS pipeline stages can operate normally. + if (ctx.nativeFirstProxy) { + ctx.db = openDb(ctx.dbPath); + ctx.nativeFirstProxy = false; + } } await collectFiles(ctx); From 9c11ef8e0d3dbf71545afddd452a06dad4eb76a3 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Fri, 10 Apr 2026 19:11:17 -0600 Subject: [PATCH 9/9] fix(native): cast EngineOpts through unknown for Record conversion (#907) TypeScript rejects direct cast from EngineOpts to Record because EngineOpts contains function properties (suspendJsDb, resumeJsDb) and NativeDatabase that don't overlap with Record. Cast through unknown first to satisfy the type checker. --- src/domain/graph/builder/pipeline.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 668576fb..ea67bf4a 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -522,7 +522,7 @@ async function runPostNativeAnalysis( analysisFileSymbols, ctx.rootDir, ctx.opts as Record, - ctx.engineOpts as Record | undefined, + ctx.engineOpts as unknown as Record | undefined, ); timing.astMs = result.astMs ?? 0; timing.complexityMs = result.complexityMs ?? 0;