optave · carlos-alm · Apr 9, 2026 · Apr 9, 2026 · Apr 9, 2026 · Apr 10, 2026
diff --git a/crates/codegraph-core/src/build_pipeline.rs b/crates/codegraph-core/src/build_pipeline.rs
diff --git a/crates/codegraph-core/src/config.rs b/crates/codegraph-core/src/config.rs
@@ -63,6 +63,10 @@ pub struct BuildOpts {
     #[serde(default)]
     pub ast: Option<bool>,
 
+    /// Whether to include complexity metrics.
+    #[serde(default)]
+    pub complexity: Option<bool>,
+
     /// Whether to include CFG analysis.
     #[serde(default)]
     pub cfg: Option<bool>,

diff --git a/crates/codegraph-core/src/lib.rs b/crates/codegraph-core/src/lib.rs
@@ -67,6 +67,20 @@ pub fn parse_files(
     )
 }
 
+/// Parse multiple files in parallel with ALL analysis data extracted in a single pass.
+/// Always includes: symbols, AST nodes, complexity, CFG, and dataflow.
+/// Eliminates the need for any downstream re-parse (WASM or native standalone).
+#[napi]
+pub fn parse_files_full(
+    file_paths: Vec<String>,
+    root_dir: String,
+) -> Vec<FileSymbols> {
+    parallel::parse_files_parallel_full(
+        &file_paths,
+        &root_dir,
+    )
+}
+
 /// Resolve a single import path.
 #[napi]
 pub fn resolve_import(

diff --git a/crates/codegraph-core/src/parallel.rs b/crates/codegraph-core/src/parallel.rs
@@ -10,6 +10,8 @@ use crate::types::FileSymbols;
 /// Parse multiple files in parallel using rayon.
 /// Each thread creates its own Parser (cheap; Language objects are Send+Sync).
 /// Failed files are silently skipped (matches WASM behavior).
+/// All analysis data (symbols, AST nodes, complexity, CFG, dataflow) is always
+/// extracted in a single parse pass — no separate re-parse needed downstream.
 /// When `include_dataflow` is false, dataflow extraction is skipped for performance.
 /// When `include_ast_nodes` is false, AST node walking is skipped for performance.
 pub fn parse_files_parallel(
@@ -40,6 +42,35 @@ pub fn parse_files_parallel(
         .collect()
 }
 
+/// Parse multiple files in parallel, always extracting ALL analysis data:
+/// symbols, AST nodes, complexity, CFG, and dataflow in a single parse pass.
+/// This eliminates the need for any downstream re-parse (WASM or native standalone).
+pub fn parse_files_parallel_full(
+    file_paths: &[String],
+    _root_dir: &str,
+) -> Vec<FileSymbols> {
+    file_paths
+        .par_iter()
+        .filter_map(|file_path| {
+            let lang = LanguageKind::from_extension(file_path)?;
+            let source = fs::read(file_path).ok()?;
+            let line_count = source.iter().filter(|&&b| b == b'\n').count() as u32 + 1;
+
+            let mut parser = Parser::new();
+            parser.set_language(&lang.tree_sitter_language()).ok()?;
+
+            let tree = parser.parse(&source, None)?;
+            // Always include AST nodes
+            let mut symbols =
+                extract_symbols_with_opts(lang, &tree, &source, file_path, true);
+            // Always extract dataflow
+            symbols.dataflow = extract_dataflow(&tree, &source, lang.lang_id_str());
+            symbols.line_count = Some(line_count);
+            Some(symbols)
+        })
+        .collect()
+}
+
 /// Parse a single file and return its symbols.
 /// When `include_dataflow` is false, dataflow extraction is skipped for performance.
 /// When `include_ast_nodes` is false, AST node walking is skipped for performance.

diff --git a/package-lock.json b/package-lock.json
diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts
@@ -37,40 +37,29 @@ if (!isWorker()) {
 		process.exit(1);
 	}
 
+	function formatEngineResult(data) {
+		if (!data) return null;
+		return {
+			buildTimeMs: data.buildTimeMs,
+			queryTimeMs: data.queryTimeMs,
+			nodes: data.nodes,
+			edges: data.edges,
+			dbSizeBytes: data.dbSizeBytes,
+			perFile: data.perFile,
+			noopRebuildMs: data.noopRebuildMs,
+			oneFileRebuildMs: data.oneFileRebuildMs,
+			oneFilePhases: data.oneFilePhases,
+			queries: data.queries,
+			phases: data.phases,
+		};
+	}
+
 	const result = {
 		version,
 		date: new Date().toISOString().slice(0, 10),
 		files: primary.files,
-		wasm: wasm
-			? {
-					buildTimeMs: wasm.buildTimeMs,
-					queryTimeMs: wasm.queryTimeMs,
-					nodes: wasm.nodes,
-					edges: wasm.edges,
-					dbSizeBytes: wasm.dbSizeBytes,
-					perFile: wasm.perFile,
-					noopRebuildMs: wasm.noopRebuildMs,
-					oneFileRebuildMs: wasm.oneFileRebuildMs,
-					oneFilePhases: wasm.oneFilePhases,
-					queries: wasm.queries,
-					phases: wasm.phases,
-				}
-			: null,
-		native: native
-			? {
-					buildTimeMs: native.buildTimeMs,
-					queryTimeMs: native.queryTimeMs,
-					nodes: native.nodes,
-					edges: native.edges,
-					dbSizeBytes: native.dbSizeBytes,
-					perFile: native.perFile,
-					noopRebuildMs: native.noopRebuildMs,
-					oneFileRebuildMs: native.oneFileRebuildMs,
-					oneFilePhases: native.oneFilePhases,
-					queries: native.queries,
-					phases: native.phases,
-				}
-			: null,
+		wasm: formatEngineResult(wasm),
+		native: formatEngineResult(native),
 	};
 
 	console.log(JSON.stringify(result, null, 2));

diff --git a/src/ast-analysis/engine.ts b/src/ast-analysis/engine.ts
@@ -666,6 +666,79 @@ async function delegateToBuildFunctions(
   }
 }
 
+// ─── Native full-analysis fast path ────────────────────────────────────
+
+/**
+ * Check whether all files already have complete analysis data from the native
+ * parse pass (parseFilesFull). When true, no WASM re-parse or JS visitor walk
+ * is needed — the engine can skip directly to DB persistence.
+ */
+function allNativeDataComplete(
+  fileSymbols: Map<string, ExtractorOutput>,
+  opts: AnalysisOpts,
+): boolean {
+  const doAst = opts.ast !== false;
+  const doComplexity = opts.complexity !== false;
+  const doCfg = opts.cfg !== false;
+  const doDataflow = opts.dataflow !== false;
+
+  for (const [relPath, symbols] of fileSymbols) {
+    // If any file has a WASM tree, it was parsed by WASM — not native full
+    if (symbols._tree) return false;
+
+    const ext = path.extname(relPath).toLowerCase();
+    const langId = symbols._langId || '';
+
+    // AST nodes: native must have produced them
+    if (
+      doAst &&
+      !Array.isArray(symbols.astNodes) &&
+      (WALK_EXTENSIONS.has(ext) || AST_TYPE_MAPS.has(langId))
+    ) {
+      debug(`allNativeDataComplete: ${relPath} missing astNodes`);
+      return false;
+    }
+
+    // Dataflow: native must have produced it
+    if (
+      doDataflow &&
+      !symbols.dataflow &&
+      (DATAFLOW_EXTENSIONS.has(ext) || DATAFLOW_RULES.has(langId))
+    ) {
+      debug(`allNativeDataComplete: ${relPath} missing dataflow`);
+      return false;
+    }
+
+    const defs = symbols.definitions || [];
+    for (const def of defs) {
+      if (!hasFuncBody(def)) continue;
+
+      // Complexity: every function must already have it
+      if (
+        doComplexity &&
+        !def.complexity &&
+        (COMPLEXITY_EXTENSIONS.has(ext) || COMPLEXITY_RULES.has(langId))
+      ) {
+        debug(`allNativeDataComplete: ${relPath}:${def.name} missing complexity`);
+        return false;
+      }
+
+      // CFG: every function must already have blocks
+      if (
+        doCfg &&
+        def.cfg !== null &&
+        !Array.isArray(def.cfg?.blocks) &&
+        (CFG_EXTENSIONS.has(ext) || CFG_RULES.has(langId))
+      ) {
+        debug(`allNativeDataComplete: ${relPath}:${def.name} missing cfg blocks`);
+        return false;
+      }
+    }
+  }
+
+  return fileSymbols.size > 0;
+}
+
 // ─── Public API ──────────────────────────────────────────────────────────
 
 export async function runAnalyses(
@@ -686,6 +759,16 @@ export async function runAnalyses(
 
   const extToLang = buildExtToLangMap();
 
+  // Fast path: when all files were parsed by the native engine with full analysis
+  // (parseFilesFull), all data is already present — skip WASM re-parse and JS
+  // visitor walks entirely, go straight to DB persistence.
+  if (allNativeDataComplete(fileSymbols, opts)) {
+    debug('native full-analysis fast path: all data present, skipping WASM/visitor passes');
+    if (doComplexity && doCfg) reconcileCfgCyclomatic(fileSymbols);
+    await delegateToBuildFunctions(db, fileSymbols, rootDir, opts, engineOpts, timing);
+    return timing;
+  }
+
   // Native analysis pass: try Rust standalone functions before WASM fallback.
   // This fills in complexity/CFG/dataflow for files that the native parse pipeline
   // missed, avoiding the need to parse with WASM + run JS visitors.