optave
diff --git a/‎CLAUDE.md‎
Lines changed: 18 additions & 1 deletion b/‎CLAUDE.md‎
Lines changed: 18 additions & 1 deletion
diff --git a/‎README.md‎
Lines changed: 14 additions & 6 deletions b/‎README.md‎
Lines changed: 14 additions & 6 deletions
diff --git a/‎docs/dogfooding-guide.md‎
Lines changed: 102 additions & 0 deletions b/‎docs/dogfooding-guide.md‎
Lines changed: 102 additions & 0 deletions
diff --git a/‎docs/recommended-practices.md‎
Lines changed: 8 additions & 2 deletions b/‎docs/recommended-practices.md‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎src/builder.js‎
Lines changed: 44 additions & 7 deletions b/‎src/builder.js‎
Lines changed: 44 additions & 7 deletions
@@ -45,7 +45,7 @@ JS source is plain JavaScript (ES modules) in `src/`. No transpilation step. The
 | `queries.js` | Query functions: symbol search, file deps, impact analysis, diff-impact; `SYMBOL_KINDS` constant defines all node kinds |
 | `embedder.js` | Semantic search with `@huggingface/transformers`; multi-query RRF ranking |
 | `db.js` | SQLite schema and operations (`better-sqlite3`) |
-| `mcp.js` | MCP server exposing graph queries to AI agents |
+| `mcp.js` | MCP server exposing graph queries to AI agents; single-repo by default, `--multi-repo` to enable cross-repo access |
 | `cycles.js` | Circular dependency detection |
 | `export.js` | DOT/Mermaid/JSON graph export |
 | `watcher.js` | Watch mode for incremental rebuilds |
@@ -66,6 +66,7 @@ JS source is plain JavaScript (ES modules) in `src/`. No transpilation step. The
 - Non-required parsers (all except JS/TS/TSX) fail gracefully if their WASM grammar is unavailable
 - Import resolution uses a 6-level priority system with confidence scoring (import-aware → same-file → directory → parent → global → method hierarchy)
 - Incremental builds track file hashes in the DB to skip unchanged files
+- **MCP single-repo isolation:** `startMCPServer` defaults to single-repo mode — tools have no `repo` property and `list_repos` is not exposed. Passing `--multi-repo` or `--repos` to the CLI (or `options.multiRepo` / `options.allowedRepos` programmatically) enables multi-repo access. `buildToolList(multiRepo)` builds the tool list dynamically; the backward-compatible `TOOLS` export equals `buildToolList(true)`
 - **Credential resolution:** `loadConfig` pipeline is `mergeConfig → applyEnvOverrides → resolveSecrets`. The `apiKeyCommand` config field shells out to an external secret manager via `execFileSync` (no shell). Priority: command output > env var > file config > defaults. On failure, warns and falls back gracefully
 
 **Database:** SQLite at `.codegraph/graph.db` with tables: `nodes`, `edges`, `metadata`, `embeddings`
@@ -94,9 +95,25 @@ Releases are triggered via the `publish.yml` workflow (`workflow_dispatch`). By
 
 The workflow can be overridden with a specific version via the `version-override` input. Locally, `npm run release:dry-run` previews the bump and changelog.
 
+## Dogfooding — codegraph on itself
+
+Codegraph is **our own tool**. Use it to analyze this repository before making changes:
+
+```bash
+node src/cli.js build .              # Build/update the graph
+node src/cli.js cycles               # Check for circular dependencies
+node src/cli.js map --limit 20       # Module overview & coupling hotspots
+node src/cli.js diff-impact main     # See impact of current branch changes
+node src/cli.js fn <name>            # Trace function-level dependency chains
+node src/cli.js deps src/<file>.js   # See what imports/depends on a file
+```
+
+If codegraph reports an error, crashes, or produces wrong results when analyzing itself, **fix the bug in the codebase** — don't just work around it. This is the best way to find and resolve real issues.
+
 ## Git Conventions
 
 - Never add AI co-authorship lines (`Co-Authored-By` or similar) to commit messages.
+- Never add "Built with Claude Code", "Generated with Claude Code", or any variation referencing Claude Code or Anthropic to commit messages, PR descriptions, code comments, or any other output.
 
 ## Node Version
 
 
@@ -128,7 +128,7 @@ codegraph deps src/index.ts  # file-level import/export map
 | 📤 | **Export** | DOT (Graphviz), Mermaid, and JSON graph export |
 | 🧠 | **Semantic search** | Embeddings-powered natural language search with multi-query RRF ranking |
 | 👀 | **Watch mode** | Incrementally update the graph as files change |
-| 🤖 | **MCP server** | 12-tool MCP server with multi-repo support for AI assistants |
+| 🤖 | **MCP server** | 13-tool MCP server for AI assistants; single-repo by default, opt-in multi-repo |
 | 🔒 | **Fully local** | No network calls, no data exfiltration, SQLite-backed |
 
 ## 📦 Commands
@@ -215,7 +215,7 @@ The model used during `embed` is stored in the database, so `search` auto-detect
 
 ### Multi-Repo Registry
 
-Manage a global registry of codegraph-enabled projects. AI agents can query any registered repo from a single MCP session using the `repo` parameter.
+Manage a global registry of codegraph-enabled projects. The registry stores paths to your built graphs so the MCP server can query them when multi-repo mode is enabled.
 
 ```bash
 codegraph registry list        # List all registered repos
@@ -230,9 +230,13 @@ codegraph registry remove <name>  # Unregister
 ### AI Integration
 
 ```bash
-codegraph mcp                  # Start MCP server for AI assistants
+codegraph mcp                  # Start MCP server (single-repo, current project only)
+codegraph mcp --multi-repo     # Enable access to all registered repos
+codegraph mcp --repos a,b      # Restrict to specific repos (implies --multi-repo)
 ```
 
+By default, the MCP server only exposes the local project's graph. AI agents cannot access other repositories unless you explicitly opt in with `--multi-repo` or `--repos`.
+
 ### Common Flags
 
 | Flag | Description |
@@ -324,13 +328,17 @@ Benchmarked on a ~3,200-file TypeScript project:
 
 ### MCP Server
 
-Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 12 tools, so AI assistants can query your dependency graph directly:
+Codegraph includes a built-in [Model Context Protocol](https://modelcontextprotocol.io/) server with 13 tools, so AI assistants can query your dependency graph directly:
 
 ```bash
-codegraph mcp
+codegraph mcp                  # Single-repo mode (default) — only local project
+codegraph mcp --multi-repo     # Multi-repo — all registered repos accessible
+codegraph mcp --repos a,b      # Multi-repo with allowlist
 ```
 
-All MCP tools accept an optional `repo` parameter to target any registered repository. Use `list_repos` to see available repos. When `repo` is omitted, the local `.codegraph/graph.db` is used (backwards compatible).
+**Single-repo mode (default):** Tools operate only on the local `.codegraph/graph.db`. The `repo` parameter and `list_repos` tool are not exposed to the AI agent.
+
+**Multi-repo mode (`--multi-repo`):** All tools gain an optional `repo` parameter to target any registered repository, and `list_repos` becomes available. Use `--repos` to restrict which repos the agent can access.
 
 ### CLAUDE.md / Agent Instructions
 
 
@@ -0,0 +1,102 @@
+# Codegraph Dogfooding Guide
+
+Codegraph analyzing its own codebase. This guide documents findings from self-analysis and lists improvements — both automated fixes already applied and items requiring human judgment.
+
+## Running the Self-Analysis
+
+```bash
+# Build the graph (from repo root)
+node src/cli.js build .
+
+# Core analysis commands
+node src/cli.js cycles                    # Circular dependency check
+node src/cli.js cycles --functions        # Function-level cycles
+node src/cli.js map --limit 20 --json     # Module coupling overview
+node src/cli.js diff-impact main --json   # Impact of current branch
+node src/cli.js deps src/<file>.js        # File dependency inspection
+node src/cli.js fn <name>                 # Function call chain trace
+node src/cli.js fn-impact <name>          # What breaks if function changes
+```
+
+## Action Items
+
+These findings require human judgment to address properly:
+
+### HIGH PRIORITY
+
+#### 1. parser.js is a 2200+ line monolith (47 function definitions)
+**Found by:** `codegraph deps src/parser.js` and `codegraph map`
+
+`parser.js` has the highest fan-in (14 files import it) and contains extractors for **all 11 languages** in a single file. Each language extractor (Python, Go, Rust, Java, C#, PHP, Ruby, HCL) has its own `walk()` function, creating duplicate names that confuse function-level analysis.
+
+**Recommendation:** Split per-language extractors into separate files under `src/extractors/`:
+```
+src/extractors/
+  javascript.js    # JS/TS/TSX extractor (currently inline)
+  python.js        # extractPythonSymbols + findPythonParentClass + walk
+  go.js            # extractGoSymbols + walk
+  rust.js          # extractRustSymbols + extractRustUsePath + walk
+  java.js          # extractJavaSymbols + findJavaParentClass + walk
+  csharp.js        # extractCSharpSymbols + extractCSharpBaseTypes + walk
+  ruby.js          # extractRubySymbols + findRubyParentClass + walk
+  php.js           # extractPHPSymbols + findPHPParentClass + walk
+  hcl.js           # extractHCLSymbols + walk
+```
+**Impact:** Would improve codegraph's own function-level analysis (no more ambiguous `walk` matches), make each extractor independently testable, and reduce the cognitive load of the file.
+
+**Trade-off:** The Rust native engine already has this structure (`crates/codegraph-core/src/extractors/`). Aligning the WASM extractors would create parity.
+
+
+### MEDIUM PRIORITY
+
+#### 3. builder.js has the highest fan-out (7 dependencies)
+**Found by:** `codegraph map`
+
+`builder.js` imports from 7 modules: config, constants, db, logger, parser, resolve, and structure. As the build orchestrator this is somewhat expected, but it also means any change to builder.js has wide blast radius.
+
+**Recommendation:** Consider whether the `structure.js` integration (already lazy-loaded via dynamic import) pattern could apply to other optional post-build steps.
+
+#### 4. watcher.js fan-out vs fan-in imbalance (5 out, 2 in)
+**Found by:** `codegraph map`
+
+The watcher depends on 5 modules but only 2 modules reference it. This suggests it might be pulling in more than it needs.
+
+**Recommendation:** Review whether watcher.js can use more targeted imports or lazy-load some dependencies.
+
+#### 5. diff-impact runs git in temp directories (test fragility)
+**Found by:** Integration test output showing `git diff --no-index` errors in temp directories
+
+The `diff-impact` command runs `git diff` which fails in non-git temp directories used by tests. The error output is noisy but doesn't fail the test.
+
+**Recommendation:** Guard the git call or skip gracefully when not in a git repo.
+
+### LOW PRIORITY
+
+#### 6. Consider adding a `codegraph stats` command
+There's no single command that shows a quick overview of graph health: node/edge counts, cycle count, top coupling hotspots, fan-out outliers. Currently you need to run `map`, `cycles`, and read the build output separately.
+
+#### 7. Embed and search the codebase itself
+Running `codegraph embed .` and then `codegraph search "build dependency graph"` on the codegraph repo would exercise the embedding pipeline and could surface naming/discoverability issues in the API.
+
+## Known Environment Issue
+
+On this workstation, changes to files not already tracked as modified on the current git branch (`docs/architecture-audit`) get reverted by an external process (likely a VS Code extension). If you're applying the parser.js cycle fix, do it from a fresh branch or commit immediately.
+
+## Periodic Self-Check Routine
+
+Run this after significant changes:
+
+```bash
+# 1. Rebuild the graph
+node src/cli.js build .
+
+# 2. Check for regressions
+node src/cli.js cycles            # Should be 0 file-level cycles
+node src/cli.js map --limit 10    # Verify no new coupling hotspots
+
+# 3. Check impact of your changes
+node src/cli.js diff-impact main
+
+# 4. Run tests
+npm test
+```
@@ -132,10 +132,16 @@ Speed up CI by caching `.codegraph/`:
 Start the MCP server so AI assistants can query your graph:
 
 ```bash
-codegraph mcp
+codegraph mcp                  # Single-repo mode (default) — only local project
+codegraph mcp --multi-repo     # Multi-repo — all registered repos accessible
+codegraph mcp --repos a,b      # Multi-repo with allowlist
 ```
 
-The server exposes tools for `query_function`, `file_deps`, `impact_analysis`, `find_cycles`, and `module_map`.
+By default, the MCP server runs in **single-repo mode** — the AI agent can only query the current project's graph. The `repo` parameter and `list_repos` tool are not exposed, preventing agents from silently accessing other codebases.
+
+Enable `--multi-repo` to let the agent query any registered repository, or use `--repos` to restrict access to a specific set of repos.
+
+The server exposes tools for `query_function`, `file_deps`, `impact_analysis`, `find_cycles`, `module_map`, `fn_deps`, `fn_impact`, `diff_impact`, `semantic_search`, `export_graph`, `list_functions`, `structure`, and `hotspots`.
 
 ### CLAUDE.md for your project
 
 
@@ -10,18 +10,20 @@ import { computeConfidence, resolveImportPath, resolveImportsBatch } from './res
 
 export { resolveImportPath } from './resolve.js';
 
-export function collectFiles(dir, files = [], config = {}) {
+export function collectFiles(dir, files = [], config = {}, directories = null) {
+  const trackDirs = directories !== null;
   let entries;
   try {
     entries = fs.readdirSync(dir, { withFileTypes: true });
   } catch (err) {
     warn(`Cannot read directory ${dir}: ${err.message}`);
-    return files;
+    return trackDirs ? { files, directories } : files;
   }
 
   // Merge config ignoreDirs with defaults
   const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
 
+  let hasFiles = false;
   for (const entry of entries) {
     if (entry.name.startsWith('.') && entry.name !== '.') {
       if (IGNORE_DIRS.has(entry.name)) continue;
@@ -32,12 +34,16 @@ export function collectFiles(dir, files = [], config = {}) {
 
     const full = path.join(dir, entry.name);
     if (entry.isDirectory()) {
-      collectFiles(full, files, config);
+      collectFiles(full, files, config, directories);
     } else if (EXTENSIONS.has(path.extname(entry.name))) {
       files.push(full);
+      hasFiles = true;
     }
   }
-  return files;
+  if (trackDirs && hasFiles) {
+    directories.add(dir);
+  }
+  return trackDirs ? { files, directories } : files;
 }
 
 export function loadPathAliases(rootDir) {
@@ -163,7 +169,9 @@ export async function buildGraph(rootDir, opts = {}) {
     );
   }
 
-  const files = collectFiles(rootDir, [], config);
+  const collected = collectFiles(rootDir, [], config, new Set());
+  const files = collected.files;
+  const discoveredDirs = collected.directories;
   console.log(`Found ${files.length} files to parse`);
 
   // Check for incremental build
@@ -179,23 +187,28 @@ export async function buildGraph(rootDir, opts = {}) {
 
   if (isFullBuild) {
     db.exec(
-      'PRAGMA foreign_keys = OFF; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
+      'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
     );
   } else {
     console.log(`Incremental: ${changed.length} changed, ${removed.length} removed`);
-    // Remove nodes/edges for changed and removed files
+    // Remove metrics/edges/nodes for changed and removed files
     const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
     const deleteEdgesForFile = db.prepare(`
       DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
       OR target_id IN (SELECT id FROM nodes WHERE file = @f)
     `);
+    const deleteMetricsForFile = db.prepare(
+      'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
+    );
     for (const relPath of removed) {
       deleteEdgesForFile.run({ f: relPath });
+      deleteMetricsForFile.run(relPath);
       deleteNodesForFile.run(relPath);
     }
     for (const item of changed) {
       const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
       deleteEdgesForFile.run({ f: relPath });
+      deleteMetricsForFile.run(relPath);
       deleteNodesForFile.run(relPath);
     }
   }
@@ -539,6 +552,30 @@ export async function buildGraph(rootDir, opts = {}) {
   });
   buildEdges();
 
+  // Build line count map for structure metrics
+  const lineCountMap = new Map();
+  for (const [relPath] of fileSymbols) {
+    const absPath = path.join(rootDir, relPath);
+    try {
+      const content = fs.readFileSync(absPath, 'utf-8');
+      lineCountMap.set(relPath, content.split('\n').length);
+    } catch {
+      lineCountMap.set(relPath, 0);
+    }
+  }
+
+  // Build directory structure, containment edges, and metrics
+  const relDirs = new Set();
+  for (const absDir of discoveredDirs) {
+    relDirs.add(normalizePath(path.relative(rootDir, absDir)));
+  }
+  try {
+    const { buildStructure } = await import('./structure.js');
+    buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs);
+  } catch (err) {
+    debug(`Structure analysis failed: ${err.message}`);
+  }
+
   const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
   console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
   console.log(`Stored in ${dbPath}`);