diff --git a/package-lock.json b/package-lock.json index 36c592b1..d82b716a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "chokidar": "^4.0.3", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", @@ -1004,6 +1005,21 @@ "node": ">= 16" } }, + "node_modules/chokidar": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz", + "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==", + "license": "MIT", + "dependencies": { + "readdirp": "^4.0.1" + }, + "engines": { + "node": ">= 14.16.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/commander": { "version": "14.0.3", "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz", @@ -1269,6 +1285,19 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/readdirp": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz", + "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==", + "license": "MIT", + "engines": { + "node": ">= 14.18.0" + }, + "funding": { + "type": "individual", + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/rollup": { "version": "4.57.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz", @@ -1431,7 +1460,6 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", diff --git a/package.json b/package.json index 5455ced9..beb25d9a 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "chokidar": "^4.0.3", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts index 68e60fff..aea311a8 100644 --- a/src/sync/watcher.ts +++ b/src/sync/watcher.ts @@ -4,11 +4,16 @@ * Watches the project directory for file changes and triggers * debounced sync operations to keep the code graph up-to-date. * - * Uses Node.js native fs.watch with recursive mode (macOS FSEvents, - * Windows ReadDirectoryChangesW, Linux inotify on Node 19+). + * Uses chokidar under the hood, which provides cross-platform file + * watching with built-in filtering to avoid registering unnecessary + * inotify watches (fixes #276: fs.watch recursive exhausts kernel + * watch budget on large repos). */ import * as fs from 'fs'; +import * as path from 'path'; +import chokidar, { FSWatcher } from 'chokidar'; +import ignore, { Ignore } from 'ignore'; import { isSourceFile } from '../extraction'; import { logDebug, logWarn } from '../errors'; import { normalizePath } from '../utils'; @@ -36,22 +41,64 @@ export interface WatchOptions { onSyncError?: (error: Error) => void; } +/** + * Represents a .gitignore file loaded from a specific directory. + * Rules in a .gitignore are relative to that directory, mirroring + * how git applies .gitignore files at every level. + */ +interface ScopedIgnore { + dir: string; + ig: Ignore; +} + +/** + * Load .gitignore files from projectRoot upward through parent + * directories. Returns a list ordered from root to projectRoot + * so nested rules (closest to the project) are checked first. + */ +function loadGitignoreChain(projectRoot: string): ScopedIgnore[] { + const matchers: ScopedIgnore[] = []; + let dir = projectRoot; + + // Determine the filesystem root (e.g. '/' on Linux) + const root = path.parse(dir).root; + + while (dir !== root) { + const giPath = path.join(dir, '.gitignore'); + try { + if (fs.existsSync(giPath)) { + matchers.unshift({ + dir, + ig: ignore().add(fs.readFileSync(giPath, 'utf-8')), + }); + } + } catch { + // Unreadable .gitignore — treat as absent + } + dir = path.dirname(dir); + } + + return matchers; +} + /** * FileWatcher monitors a project directory for changes and triggers * debounced sync operations via a provided callback. * * Design goals: - * - Minimal resource usage (native OS file events, no polling) + * - Minimal resource usage (chokidar with .gitignore-aware filtering + * avoids registering inotify watches on excluded directories) * - Debounced to avoid thrashing on rapid saves * - Filters to supported source files by extension * - Ignores .codegraph/ directory changes */ export class FileWatcher { - private watcher: fs.FSWatcher | null = null; + private watcher: FSWatcher | null = null; private debounceTimer: ReturnType | null = null; private hasChanges = false; private syncing = false; private stopped = false; + private gitignoreMatchers: ScopedIgnore[] = []; private readonly projectRoot: string; private readonly debounceMs: number; @@ -79,57 +126,103 @@ export class FileWatcher { if (this.watcher) return true; // Already watching this.stopped = false; - // Some environments make recursive fs.watch unusable — most notably WSL2 - // /mnt/ drives, where setup blocks long enough to break MCP startup - // handshakes (issue #199). Skip watching there; callers fall back to - // manual `codegraph sync` or the git sync hooks. + // Some environments make filesystem watching unusable — most notably + // WSL2 /mnt/ drives, where the underlying fs.watch calls block long + // enough to break MCP startup handshakes (issue #199). Skip watching + // there; callers fall back to manual `codegraph sync` or git sync hooks. const disabledReason = watchDisabledReason(this.projectRoot); if (disabledReason) { logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot }); return false; } - try { - this.watcher = fs.watch( - this.projectRoot, - { recursive: true }, - (_eventType, filename) => { - if (!filename || this.stopped) return; + // Load .gitignore rules from project root upward. + // These drive chokidar's `ignored` callback so we never register + // inotify watches on excluded directories (like node_modules/, .git/, + // dist/, .next/, etc.), avoiding kernel watch-budget exhaustion (#276). + this.gitignoreMatchers = loadGitignoreChain(this.projectRoot); - // Normalize path separators - const normalized = normalizePath(filename); + try { + this.watcher = chokidar.watch(this.projectRoot, { + // Core fix for #276: filter directories BEFORE they are watched. + // chokidar calls this for every file and directory it encounters, + // and only registers an underlying fs.watch on those that pass. + // This drops per-instance inotify watch count from hundreds of + // thousands (on a monorepo) to hundreds — only the directories + // that actually contain tracked source code. + ignored: (testPath: string) => { + const rel = normalizePath(path.relative(this.projectRoot, testPath)); - // Ignore .codegraph/ directory changes (our own DB writes) + // Always ignore .codegraph/ (our own DB writes) and .git/ if ( - normalized === '.codegraph' || - normalized.startsWith('.codegraph/') || - normalized.startsWith('.codegraph\\') + rel === '.codegraph' || + rel.startsWith('.codegraph/') || + rel === '.git' || + rel.startsWith('.git/') ) { - return; + return true; } - // Only sync changes to files we can actually parse. - if (!isSourceFile(normalized)) { - return; + // Check .gitignore rules + for (const { dir, ig } of this.gitignoreMatchers) { + let matcherRel = normalizePath(path.relative(dir, testPath)); + if (!matcherRel || matcherRel.startsWith('..')) continue; + + // For directory-only .gitignore rules (e.g. "build/"), + // append a trailing slash so the ignore package matches them. + try { + const stat = fs.statSync(testPath); + if (stat.isDirectory()) matcherRel += '/'; + } catch { + // If we can't stat, assume it's a file — don't append '/' + } + + if (ig.ignores(matcherRel)) return true; } - logDebug('File change detected', { file: normalized }); - this.hasChanges = true; - this.scheduleSync(); + return false; + }, + }); + + // Wire up the file-change handler. chokidar emits 'all' for every + // event type; we only care about files that were actually changed. + this.watcher.on('all', (_event: string, filePath: string) => { + if (this.stopped) return; + + const normalized = normalizePath(path.relative(this.projectRoot, filePath)); + + // Defense in depth: filter again even though `ignored` should + // have prevented watches on these directories. Events can still + // arrive during watcher setup or from symlink traversal. + if ( + normalized === '.codegraph' || + normalized.startsWith('.codegraph/') || + normalized === '.git' || + normalized.startsWith('.git/') + ) { + return; + } + + // Only sync changes to files we can actually parse. + if (!isSourceFile(normalized)) { + return; } - ); + + logDebug('File change detected', { file: normalized }); + this.hasChanges = true; + this.scheduleSync(); + }); // Handle watcher errors gracefully - this.watcher.on('error', (err) => { + this.watcher.on('error', (err: unknown) => { logWarn('File watcher error', { error: String(err) }); - // Don't crash — watcher may recover or user can restart }); logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs }); return true; } catch (err) { - // Recursive watch not supported (e.g., Linux < Node 19) - logWarn('Could not start file watcher — recursive fs.watch not supported on this platform', { error: String(err) }); + // Watcher setup failed (e.g., permission denied, missing directory) + logWarn('Could not start file watcher', { error: String(err) }); return false; } } @@ -151,6 +244,7 @@ export class FileWatcher { } this.hasChanges = false; + this.gitignoreMatchers = []; logDebug('File watcher stopped'); }