Skip to content

Commit a413ea7

Browse files
feat: harden multi-repo registry and add structural analysis
Registry hardening (3 of 4 audit concerns): - Add pruneRegistry() to remove stale entries where repo dir no longer exists - Add --repos allowlist on MCP server for repo-level access control - Auto-suffix name collisions in registerRepo (api → api-2) when no explicit name Structural analysis (new): - Add src/structure.js with directory nodes, containment edges, and metrics (symbol density, avg fan-out, cohesion scores) - Add structure/hotspots CLI commands - Extend DOT/Mermaid export with directory clusters - Add 'directory' and 'contains' kinds to DB schema CLI additions: - codegraph registry prune - codegraph mcp --repos <names> - codegraph structure [dir] - codegraph hotspots
1 parent 43a79ee commit a413ea7

File tree

13 files changed

+1633
-19
lines changed

13 files changed

+1633
-19
lines changed

src/builder.js

Lines changed: 44 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,20 @@ import { computeConfidence, resolveImportPath, resolveImportsBatch } from './res
1010

1111
export { resolveImportPath } from './resolve.js';
1212

13-
export function collectFiles(dir, files = [], config = {}) {
13+
export function collectFiles(dir, files = [], config = {}, directories = null) {
14+
const trackDirs = directories !== null;
1415
let entries;
1516
try {
1617
entries = fs.readdirSync(dir, { withFileTypes: true });
1718
} catch (err) {
1819
warn(`Cannot read directory ${dir}: ${err.message}`);
19-
return files;
20+
return trackDirs ? { files, directories } : files;
2021
}
2122

2223
// Merge config ignoreDirs with defaults
2324
const extraIgnore = config.ignoreDirs ? new Set(config.ignoreDirs) : null;
2425

26+
let hasFiles = false;
2527
for (const entry of entries) {
2628
if (entry.name.startsWith('.') && entry.name !== '.') {
2729
if (IGNORE_DIRS.has(entry.name)) continue;
@@ -32,12 +34,16 @@ export function collectFiles(dir, files = [], config = {}) {
3234

3335
const full = path.join(dir, entry.name);
3436
if (entry.isDirectory()) {
35-
collectFiles(full, files, config);
37+
collectFiles(full, files, config, directories);
3638
} else if (EXTENSIONS.has(path.extname(entry.name))) {
3739
files.push(full);
40+
hasFiles = true;
3841
}
3942
}
40-
return files;
43+
if (trackDirs && hasFiles) {
44+
directories.add(dir);
45+
}
46+
return trackDirs ? { files, directories } : files;
4147
}
4248

4349
export function loadPathAliases(rootDir) {
@@ -163,7 +169,9 @@ export async function buildGraph(rootDir, opts = {}) {
163169
);
164170
}
165171

166-
const files = collectFiles(rootDir, [], config);
172+
const collected = collectFiles(rootDir, [], config, new Set());
173+
const files = collected.files;
174+
const discoveredDirs = collected.directories;
167175
console.log(`Found ${files.length} files to parse`);
168176

169177
// Check for incremental build
@@ -179,23 +187,28 @@ export async function buildGraph(rootDir, opts = {}) {
179187

180188
if (isFullBuild) {
181189
db.exec(
182-
'PRAGMA foreign_keys = OFF; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
190+
'PRAGMA foreign_keys = OFF; DELETE FROM node_metrics; DELETE FROM edges; DELETE FROM nodes; PRAGMA foreign_keys = ON;',
183191
);
184192
} else {
185193
console.log(`Incremental: ${changed.length} changed, ${removed.length} removed`);
186-
// Remove nodes/edges for changed and removed files
194+
// Remove metrics/edges/nodes for changed and removed files
187195
const deleteNodesForFile = db.prepare('DELETE FROM nodes WHERE file = ?');
188196
const deleteEdgesForFile = db.prepare(`
189197
DELETE FROM edges WHERE source_id IN (SELECT id FROM nodes WHERE file = @f)
190198
OR target_id IN (SELECT id FROM nodes WHERE file = @f)
191199
`);
200+
const deleteMetricsForFile = db.prepare(
201+
'DELETE FROM node_metrics WHERE node_id IN (SELECT id FROM nodes WHERE file = ?)',
202+
);
192203
for (const relPath of removed) {
193204
deleteEdgesForFile.run({ f: relPath });
205+
deleteMetricsForFile.run(relPath);
194206
deleteNodesForFile.run(relPath);
195207
}
196208
for (const item of changed) {
197209
const relPath = item.relPath || normalizePath(path.relative(rootDir, item.file));
198210
deleteEdgesForFile.run({ f: relPath });
211+
deleteMetricsForFile.run(relPath);
199212
deleteNodesForFile.run(relPath);
200213
}
201214
}
@@ -539,6 +552,30 @@ export async function buildGraph(rootDir, opts = {}) {
539552
});
540553
buildEdges();
541554

555+
// Build line count map for structure metrics
556+
const lineCountMap = new Map();
557+
for (const [relPath] of fileSymbols) {
558+
const absPath = path.join(rootDir, relPath);
559+
try {
560+
const content = fs.readFileSync(absPath, 'utf-8');
561+
lineCountMap.set(relPath, content.split('\n').length);
562+
} catch {
563+
lineCountMap.set(relPath, 0);
564+
}
565+
}
566+
567+
// Build directory structure, containment edges, and metrics
568+
const relDirs = new Set();
569+
for (const absDir of discoveredDirs) {
570+
relDirs.add(normalizePath(path.relative(rootDir, absDir)));
571+
}
572+
try {
573+
const { buildStructure } = await import('./structure.js');
574+
buildStructure(db, fileSymbols, rootDir, lineCountMap, relDirs);
575+
} catch (err) {
576+
debug(`Structure analysis failed: ${err.message}`);
577+
}
578+
542579
const nodeCount = db.prepare('SELECT COUNT(*) as c FROM nodes').get().c;
543580
console.log(`Graph built: ${nodeCount} nodes, ${edgeCount} edges`);
544581
console.log(`Stored in ${dbPath}`);

src/cli.js

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,13 @@ import {
1919
moduleMap,
2020
queryName,
2121
} from './queries.js';
22-
import { listRepos, REGISTRY_PATH, registerRepo, unregisterRepo } from './registry.js';
22+
import {
23+
listRepos,
24+
pruneRegistry,
25+
REGISTRY_PATH,
26+
registerRepo,
27+
unregisterRepo,
28+
} from './registry.js';
2329
import { watchProject } from './watcher.js';
2430

2531
const program = new Command();
@@ -187,9 +193,14 @@ program
187193
.command('mcp')
188194
.description('Start MCP (Model Context Protocol) server for AI assistant integration')
189195
.option('-d, --db <path>', 'Path to graph.db')
196+
.option('--repos <names>', 'Comma-separated list of allowed repo names (restricts access)')
190197
.action(async (opts) => {
191198
const { startMCPServer } = await import('./mcp.js');
192-
await startMCPServer(opts.db);
199+
const mcpOpts = {};
200+
if (opts.repos) {
201+
mcpOpts.allowedRepos = opts.repos.split(',').map((s) => s.trim());
202+
}
203+
await startMCPServer(opts.db, mcpOpts);
193204
});
194205

195206
// ─── Registry commands ──────────────────────────────────────────────────
@@ -242,6 +253,21 @@ registry
242253
}
243254
});
244255

256+
registry
257+
.command('prune')
258+
.description('Remove registry entries whose directories no longer exist')
259+
.action(() => {
260+
const pruned = pruneRegistry();
261+
if (pruned.length === 0) {
262+
console.log('No stale entries found.');
263+
} else {
264+
for (const entry of pruned) {
265+
console.log(`Pruned "${entry.name}" (${entry.path})`);
266+
}
267+
console.log(`\nRemoved ${pruned.length} stale ${pruned.length === 1 ? 'entry' : 'entries'}.`);
268+
}
269+
});
270+
245271
// ─── Embedding commands ─────────────────────────────────────────────────
246272

247273
program
@@ -295,6 +321,53 @@ program
295321
});
296322
});
297323

324+
program
325+
.command('structure [dir]')
326+
.description(
327+
'Show project directory structure with hierarchy, cohesion scores, and per-file metrics',
328+
)
329+
.option('-d, --db <path>', 'Path to graph.db')
330+
.option('--depth <n>', 'Max directory depth')
331+
.option('--sort <metric>', 'Sort by: cohesion | fan-in | fan-out | density | files', 'files')
332+
.option('-j, --json', 'Output as JSON')
333+
.action(async (dir, opts) => {
334+
const { structureData, formatStructure } = await import('./structure.js');
335+
const data = structureData(opts.db, {
336+
directory: dir,
337+
depth: opts.depth ? parseInt(opts.depth, 10) : undefined,
338+
sort: opts.sort,
339+
});
340+
if (opts.json) {
341+
console.log(JSON.stringify(data, null, 2));
342+
} else {
343+
console.log(formatStructure(data));
344+
}
345+
});
346+
347+
program
348+
.command('hotspots')
349+
.description(
350+
'Find structural hotspots: files or directories with extreme fan-in, fan-out, or symbol density',
351+
)
352+
.option('-d, --db <path>', 'Path to graph.db')
353+
.option('-n, --limit <number>', 'Number of results', '10')
354+
.option('--metric <metric>', 'fan-in | fan-out | density | coupling', 'fan-in')
355+
.option('--level <level>', 'file | directory', 'file')
356+
.option('-j, --json', 'Output as JSON')
357+
.action(async (opts) => {
358+
const { hotspotsData, formatHotspots } = await import('./structure.js');
359+
const data = hotspotsData(opts.db, {
360+
metric: opts.metric,
361+
level: opts.level,
362+
limit: parseInt(opts.limit, 10),
363+
});
364+
if (opts.json) {
365+
console.log(JSON.stringify(data, null, 2));
366+
} else {
367+
console.log(formatHotspots(data));
368+
}
369+
});
370+
298371
program
299372
.command('watch [dir]')
300373
.description('Watch project for file changes and incrementally update the graph')

src/db.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,19 @@ export const MIGRATIONS = [
3333
CREATE INDEX IF NOT EXISTS idx_edges_source ON edges(source_id);
3434
CREATE INDEX IF NOT EXISTS idx_edges_target ON edges(target_id);
3535
CREATE INDEX IF NOT EXISTS idx_edges_kind ON edges(kind);
36+
CREATE TABLE IF NOT EXISTS node_metrics (
37+
node_id INTEGER PRIMARY KEY,
38+
line_count INTEGER,
39+
symbol_count INTEGER,
40+
import_count INTEGER,
41+
export_count INTEGER,
42+
fan_in INTEGER,
43+
fan_out INTEGER,
44+
cohesion REAL,
45+
file_count INTEGER,
46+
FOREIGN KEY(node_id) REFERENCES nodes(id)
47+
);
48+
CREATE INDEX IF NOT EXISTS idx_node_metrics_node ON node_metrics(node_id);
3649
`,
3750
},
3851
{

src/export.js

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,60 @@ export function exportDOT(db, opts = {}) {
2424
`)
2525
.all();
2626

27+
// Try to use directory nodes from DB (built by structure analysis)
28+
const hasDirectoryNodes =
29+
db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'directory'").get().c > 0;
30+
2731
const dirs = new Map();
2832
const allFiles = new Set();
2933
for (const { source, target } of edges) {
3034
allFiles.add(source);
3135
allFiles.add(target);
3236
}
33-
for (const file of allFiles) {
34-
const dir = path.dirname(file) || '.';
35-
if (!dirs.has(dir)) dirs.set(dir, []);
36-
dirs.get(dir).push(file);
37+
38+
if (hasDirectoryNodes) {
39+
// Use DB directory structure with cohesion labels
40+
const dbDirs = db
41+
.prepare(`
42+
SELECT n.id, n.name, nm.cohesion
43+
FROM nodes n
44+
LEFT JOIN node_metrics nm ON n.id = nm.node_id
45+
WHERE n.kind = 'directory'
46+
`)
47+
.all();
48+
49+
for (const d of dbDirs) {
50+
const containedFiles = db
51+
.prepare(`
52+
SELECT n.name FROM edges e
53+
JOIN nodes n ON e.target_id = n.id
54+
WHERE e.source_id = ? AND e.kind = 'contains' AND n.kind = 'file'
55+
`)
56+
.all(d.id)
57+
.map((r) => r.name)
58+
.filter((f) => allFiles.has(f));
59+
60+
if (containedFiles.length > 0) {
61+
dirs.set(d.name, { files: containedFiles, cohesion: d.cohesion });
62+
}
63+
}
64+
} else {
65+
// Fallback: reconstruct from path.dirname()
66+
for (const file of allFiles) {
67+
const dir = path.dirname(file) || '.';
68+
if (!dirs.has(dir)) dirs.set(dir, { files: [], cohesion: null });
69+
dirs.get(dir).files.push(file);
70+
}
3771
}
3872

3973
let clusterIdx = 0;
40-
for (const [dir, files] of [...dirs].sort()) {
74+
for (const [dir, info] of [...dirs].sort((a, b) => a[0].localeCompare(b[0]))) {
4175
lines.push(` subgraph cluster_${clusterIdx++} {`);
42-
lines.push(` label="${dir}";`);
76+
const cohLabel = info.cohesion !== null ? ` (cohesion: ${info.cohesion.toFixed(2)})` : '';
77+
lines.push(` label="${dir}${cohLabel}";`);
4378
lines.push(` style=dashed;`);
4479
lines.push(` color="#999999";`);
45-
for (const f of files) {
80+
for (const f of info.files) {
4681
const label = path.basename(f);
4782
lines.push(` "${f}" [label="${label}"];`);
4883
}

src/index.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,22 @@ export {
5050
export {
5151
listRepos,
5252
loadRegistry,
53+
pruneRegistry,
5354
REGISTRY_PATH,
5455
registerRepo,
5556
resolveRepoDbPath,
5657
saveRegistry,
5758
unregisterRepo,
5859
} from './registry.js';
60+
// Structure analysis
61+
export {
62+
buildStructure,
63+
formatHotspots,
64+
formatModuleBoundaries,
65+
formatStructure,
66+
hotspotsData,
67+
moduleBoundariesData,
68+
structureData,
69+
} from './structure.js';
5970
// Watch mode
6071
export { watchProject } from './watcher.js';

0 commit comments

Comments
 (0)