Skip to content
467 changes: 467 additions & 0 deletions crates/codegraph-core/src/build_pipeline.rs

Large diffs are not rendered by default.

4 changes: 4 additions & 0 deletions crates/codegraph-core/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ pub struct BuildOpts {
#[serde(default)]
pub ast: Option<bool>,

/// Whether to include complexity metrics.
#[serde(default)]
pub complexity: Option<bool>,

/// Whether to include CFG analysis.
#[serde(default)]
pub cfg: Option<bool>,
Expand Down
14 changes: 14 additions & 0 deletions crates/codegraph-core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,20 @@ pub fn parse_files(
)
}

/// Parse multiple files in parallel with ALL analysis data extracted in a single pass.
/// Always includes: symbols, AST nodes, complexity, CFG, and dataflow.
/// Eliminates the need for any downstream re-parse (WASM or native standalone).
#[napi]
pub fn parse_files_full(
file_paths: Vec<String>,
root_dir: String,
) -> Vec<FileSymbols> {
parallel::parse_files_parallel_full(
&file_paths,
&root_dir,
)
}

/// Resolve a single import path.
#[napi]
pub fn resolve_import(
Expand Down
31 changes: 31 additions & 0 deletions crates/codegraph-core/src/parallel.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ use crate::types::FileSymbols;
/// Parse multiple files in parallel using rayon.
/// Each thread creates its own Parser (cheap; Language objects are Send+Sync).
/// Failed files are silently skipped (matches WASM behavior).
/// All analysis data (symbols, AST nodes, complexity, CFG, dataflow) is always
/// extracted in a single parse pass — no separate re-parse needed downstream.
/// When `include_dataflow` is false, dataflow extraction is skipped for performance.
/// When `include_ast_nodes` is false, AST node walking is skipped for performance.
pub fn parse_files_parallel(
Expand Down Expand Up @@ -40,6 +42,35 @@ pub fn parse_files_parallel(
.collect()
}

/// Parse multiple files in parallel, always extracting ALL analysis data:
/// symbols, AST nodes, complexity, CFG, and dataflow in a single parse pass.
/// This eliminates the need for any downstream re-parse (WASM or native standalone).
pub fn parse_files_parallel_full(
file_paths: &[String],
_root_dir: &str,
) -> Vec<FileSymbols> {
file_paths
.par_iter()
.filter_map(|file_path| {
let lang = LanguageKind::from_extension(file_path)?;
let source = fs::read(file_path).ok()?;
let line_count = source.iter().filter(|&&b| b == b'\n').count() as u32 + 1;

let mut parser = Parser::new();
parser.set_language(&lang.tree_sitter_language()).ok()?;

let tree = parser.parse(&source, None)?;
// Always include AST nodes
let mut symbols =
extract_symbols_with_opts(lang, &tree, &source, file_path, true);
// Always extract dataflow
symbols.dataflow = extract_dataflow(&tree, &source, lang.lang_id_str());
symbols.line_count = Some(line_count);
Some(symbols)
})
.collect()
}

/// Parse a single file and return its symbols.
/// When `include_dataflow` is false, dataflow extraction is skipped for performance.
/// When `include_ast_nodes` is false, AST node walking is skipped for performance.
Expand Down
10 changes: 1 addition & 9 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

49 changes: 19 additions & 30 deletions scripts/benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,40 +37,29 @@ if (!isWorker()) {
process.exit(1);
}

function formatEngineResult(data) {
if (!data) return null;
return {
buildTimeMs: data.buildTimeMs,
queryTimeMs: data.queryTimeMs,
nodes: data.nodes,
edges: data.edges,
dbSizeBytes: data.dbSizeBytes,
perFile: data.perFile,
noopRebuildMs: data.noopRebuildMs,
oneFileRebuildMs: data.oneFileRebuildMs,
oneFilePhases: data.oneFilePhases,
queries: data.queries,
phases: data.phases,
};
}

const result = {
version,
date: new Date().toISOString().slice(0, 10),
files: primary.files,
wasm: wasm
? {
buildTimeMs: wasm.buildTimeMs,
queryTimeMs: wasm.queryTimeMs,
nodes: wasm.nodes,
edges: wasm.edges,
dbSizeBytes: wasm.dbSizeBytes,
perFile: wasm.perFile,
noopRebuildMs: wasm.noopRebuildMs,
oneFileRebuildMs: wasm.oneFileRebuildMs,
oneFilePhases: wasm.oneFilePhases,
queries: wasm.queries,
phases: wasm.phases,
}
: null,
native: native
? {
buildTimeMs: native.buildTimeMs,
queryTimeMs: native.queryTimeMs,
nodes: native.nodes,
edges: native.edges,
dbSizeBytes: native.dbSizeBytes,
perFile: native.perFile,
noopRebuildMs: native.noopRebuildMs,
oneFileRebuildMs: native.oneFileRebuildMs,
oneFilePhases: native.oneFilePhases,
queries: native.queries,
phases: native.phases,
}
: null,
wasm: formatEngineResult(wasm),
native: formatEngineResult(native),
};

console.log(JSON.stringify(result, null, 2));
Expand Down
83 changes: 83 additions & 0 deletions src/ast-analysis/engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -666,6 +666,79 @@ async function delegateToBuildFunctions(
}
}

// ─── Native full-analysis fast path ────────────────────────────────────

/**
* Check whether all files already have complete analysis data from the native
* parse pass (parseFilesFull). When true, no WASM re-parse or JS visitor walk
* is needed — the engine can skip directly to DB persistence.
*/
function allNativeDataComplete(
fileSymbols: Map<string, ExtractorOutput>,
opts: AnalysisOpts,
): boolean {
const doAst = opts.ast !== false;
const doComplexity = opts.complexity !== false;
const doCfg = opts.cfg !== false;
const doDataflow = opts.dataflow !== false;

for (const [relPath, symbols] of fileSymbols) {
// If any file has a WASM tree, it was parsed by WASM — not native full
if (symbols._tree) return false;

const ext = path.extname(relPath).toLowerCase();
const langId = symbols._langId || '';

// AST nodes: native must have produced them
if (
doAst &&
!Array.isArray(symbols.astNodes) &&
(WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(langId))
) {
debug(`allNativeDataComplete: ${relPath} missing astNodes`);
return false;
}

// Dataflow: native must have produced it
if (
doDataflow &&
!symbols.dataflow &&
(DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(langId))
) {
debug(`allNativeDataComplete: ${relPath} missing dataflow`);
return false;
}

const defs = symbols.definitions || [];
for (const def of defs) {
if (!hasFuncBody(def)) continue;

// Complexity: every function must already have it
if (
doComplexity &&
!def.complexity &&
(COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId))
) {
debug(`allNativeDataComplete: ${relPath}:${def.name} missing complexity`);
return false;
}

// CFG: every function must already have blocks
if (
doCfg &&
def.cfg !== null &&
!Array.isArray(def.cfg?.blocks) &&
(CFG_EXTENSIONS.has(ext) || CFG_RULES.has(langId))
) {
debug(`allNativeDataComplete: ${relPath}:${def.name} missing cfg blocks`);
return false;
}
}
}

return fileSymbols.size > 0;
}

// ─── Public API ──────────────────────────────────────────────────────────

export async function runAnalyses(
Expand All @@ -686,6 +759,16 @@ export async function runAnalyses(

const extToLang = buildExtToLangMap();

// Fast path: when all files were parsed by the native engine with full analysis
// (parseFilesFull), all data is already present — skip WASM re-parse and JS
// visitor walks entirely, go straight to DB persistence.
if (allNativeDataComplete(fileSymbols, opts)) {
debug('native full-analysis fast path: all data present, skipping WASM/visitor passes');
if (doComplexity && doCfg) reconcileCfgCyclomatic(fileSymbols);
await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing);
return timing;
}

// Native analysis pass: try Rust standalone functions before WASM fallback.
// This fills in complexity/CFG/dataflow for files that the native parse pipeline
// missed, avoiding the need to parse with WASM + run JS visitors.
Expand Down
Loading
Loading