From bf14c0f48702f7d1504a88f19a1516cbab921800 Mon Sep 17 00:00:00 2001 From: LorenzoFeng Date: Sat, 23 May 2026 16:23:56 +0800 Subject: [PATCH] fix(watcher): use chokidar with .gitignore filtering to prevent inotify exhaustion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace raw fs.watch({recursive:true}) with chokidar, which supports an 'ignored' callback that filters directories BEFORE registering inotify watches. Previously, fs.watch registered a watch on every directory under the project root (including node_modules/, .git/, dist/, .next/, etc.) and only filtered in the callback — wasting kernel watch budget on directories whose events were discarded. The chokidar ignored callback loads .gitignore rules from the project root upward (using the existing 'ignore' package dependency) and also hardcodes exclusion of .codegraph/ and .git/. This drops per-instance watch count from hundreds of thousands to hundreds on monorepos. Closes #276 --- package-lock.json | 34 +++++++++- package.json | 1 + src/sync/watcher.ts | 158 +++++++++++++++++++++++++++++++++++--------- 3 files changed, 158 insertions(+), 35 deletions(-) diff --git a/package-lock.json b/package-lock.json index 36c592b1..d82b716a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,15 +1,16 @@ { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@colbymchenry/codegraph", - "version": "0.9.3", + "version": "0.9.4", "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "chokidar": "^4.0.3", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", @@ -1004,6 +1005,21 @@ "node": ">= 16" } }, + "node_modules/chokidar": { + "version": "4.0.3", + "resolved": "https://registry.npmjs.org/chokidar/-/chokidar-4.0.3.tgz", + "integrity": "sha512-Qgzu8kfBvo+cA4962jnP1KkS6Dop5NS6g7R5LFYJr4b8Ub94PPQXUksCw9PvXoeXPRRddRNC5C1JQUR2SMGtnA==", + "license": "MIT", + "dependencies": { + "readdirp": "^4.0.1" + }, + "engines": { + "node": ">= 14.16.0" + }, + "funding": { + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/commander": { "version": "14.0.3", "resolved": "https://registry.npmjs.org/commander/-/commander-14.0.3.tgz", @@ -1269,6 +1285,19 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/readdirp": { + "version": "4.1.2", + "resolved": "https://registry.npmjs.org/readdirp/-/readdirp-4.1.2.tgz", + "integrity": "sha512-GDhwkLfywWL2s6vEjyhri+eXmfH6j1L7JE27WhqLeYzoh/A3DBaYGEj2H/HFZCn/kMfim73FXxEJTw06WtxQwg==", + "license": "MIT", + "engines": { + "node": ">= 14.18.0" + }, + "funding": { + "type": "individual", + "url": "https://paulmillr.com/funding/" + } + }, "node_modules/rollup": { "version": "4.57.1", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.57.1.tgz", @@ -1431,7 +1460,6 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", diff --git a/package.json b/package.json index 5455ced9..beb25d9a 100644 --- a/package.json +++ b/package.json @@ -33,6 +33,7 @@ "license": "MIT", "dependencies": { "@clack/prompts": "^1.3.0", + "chokidar": "^4.0.3", "commander": "^14.0.2", "fast-string-width": "^3.0.2", "fast-wrap-ansi": "^0.2.0", diff --git a/src/sync/watcher.ts b/src/sync/watcher.ts index 68e60fff..aea311a8 100644 --- a/src/sync/watcher.ts +++ b/src/sync/watcher.ts @@ -4,11 +4,16 @@ * Watches the project directory for file changes and triggers * debounced sync operations to keep the code graph up-to-date. * - * Uses Node.js native fs.watch with recursive mode (macOS FSEvents, - * Windows ReadDirectoryChangesW, Linux inotify on Node 19+). + * Uses chokidar under the hood, which provides cross-platform file + * watching with built-in filtering to avoid registering unnecessary + * inotify watches (fixes #276: fs.watch recursive exhausts kernel + * watch budget on large repos). */ import * as fs from 'fs'; +import * as path from 'path'; +import chokidar, { FSWatcher } from 'chokidar'; +import ignore, { Ignore } from 'ignore'; import { isSourceFile } from '../extraction'; import { logDebug, logWarn } from '../errors'; import { normalizePath } from '../utils'; @@ -36,22 +41,64 @@ export interface WatchOptions { onSyncError?: (error: Error) => void; } +/** + * Represents a .gitignore file loaded from a specific directory. + * Rules in a .gitignore are relative to that directory, mirroring + * how git applies .gitignore files at every level. + */ +interface ScopedIgnore { + dir: string; + ig: Ignore; +} + +/** + * Load .gitignore files from projectRoot upward through parent + * directories. Returns a list ordered from root to projectRoot + * so nested rules (closest to the project) are checked first. + */ +function loadGitignoreChain(projectRoot: string): ScopedIgnore[] { + const matchers: ScopedIgnore[] = []; + let dir = projectRoot; + + // Determine the filesystem root (e.g. '/' on Linux) + const root = path.parse(dir).root; + + while (dir !== root) { + const giPath = path.join(dir, '.gitignore'); + try { + if (fs.existsSync(giPath)) { + matchers.unshift({ + dir, + ig: ignore().add(fs.readFileSync(giPath, 'utf-8')), + }); + } + } catch { + // Unreadable .gitignore — treat as absent + } + dir = path.dirname(dir); + } + + return matchers; +} + /** * FileWatcher monitors a project directory for changes and triggers * debounced sync operations via a provided callback. * * Design goals: - * - Minimal resource usage (native OS file events, no polling) + * - Minimal resource usage (chokidar with .gitignore-aware filtering + * avoids registering inotify watches on excluded directories) * - Debounced to avoid thrashing on rapid saves * - Filters to supported source files by extension * - Ignores .codegraph/ directory changes */ export class FileWatcher { - private watcher: fs.FSWatcher | null = null; + private watcher: FSWatcher | null = null; private debounceTimer: ReturnType | null = null; private hasChanges = false; private syncing = false; private stopped = false; + private gitignoreMatchers: ScopedIgnore[] = []; private readonly projectRoot: string; private readonly debounceMs: number; @@ -79,57 +126,103 @@ export class FileWatcher { if (this.watcher) return true; // Already watching this.stopped = false; - // Some environments make recursive fs.watch unusable — most notably WSL2 - // /mnt/ drives, where setup blocks long enough to break MCP startup - // handshakes (issue #199). Skip watching there; callers fall back to - // manual `codegraph sync` or the git sync hooks. + // Some environments make filesystem watching unusable — most notably + // WSL2 /mnt/ drives, where the underlying fs.watch calls block long + // enough to break MCP startup handshakes (issue #199). Skip watching + // there; callers fall back to manual `codegraph sync` or git sync hooks. const disabledReason = watchDisabledReason(this.projectRoot); if (disabledReason) { logDebug('File watcher disabled', { reason: disabledReason, projectRoot: this.projectRoot }); return false; } - try { - this.watcher = fs.watch( - this.projectRoot, - { recursive: true }, - (_eventType, filename) => { - if (!filename || this.stopped) return; + // Load .gitignore rules from project root upward. + // These drive chokidar's `ignored` callback so we never register + // inotify watches on excluded directories (like node_modules/, .git/, + // dist/, .next/, etc.), avoiding kernel watch-budget exhaustion (#276). + this.gitignoreMatchers = loadGitignoreChain(this.projectRoot); - // Normalize path separators - const normalized = normalizePath(filename); + try { + this.watcher = chokidar.watch(this.projectRoot, { + // Core fix for #276: filter directories BEFORE they are watched. + // chokidar calls this for every file and directory it encounters, + // and only registers an underlying fs.watch on those that pass. + // This drops per-instance inotify watch count from hundreds of + // thousands (on a monorepo) to hundreds — only the directories + // that actually contain tracked source code. + ignored: (testPath: string) => { + const rel = normalizePath(path.relative(this.projectRoot, testPath)); - // Ignore .codegraph/ directory changes (our own DB writes) + // Always ignore .codegraph/ (our own DB writes) and .git/ if ( - normalized === '.codegraph' || - normalized.startsWith('.codegraph/') || - normalized.startsWith('.codegraph\\') + rel === '.codegraph' || + rel.startsWith('.codegraph/') || + rel === '.git' || + rel.startsWith('.git/') ) { - return; + return true; } - // Only sync changes to files we can actually parse. - if (!isSourceFile(normalized)) { - return; + // Check .gitignore rules + for (const { dir, ig } of this.gitignoreMatchers) { + let matcherRel = normalizePath(path.relative(dir, testPath)); + if (!matcherRel || matcherRel.startsWith('..')) continue; + + // For directory-only .gitignore rules (e.g. "build/"), + // append a trailing slash so the ignore package matches them. + try { + const stat = fs.statSync(testPath); + if (stat.isDirectory()) matcherRel += '/'; + } catch { + // If we can't stat, assume it's a file — don't append '/' + } + + if (ig.ignores(matcherRel)) return true; } - logDebug('File change detected', { file: normalized }); - this.hasChanges = true; - this.scheduleSync(); + return false; + }, + }); + + // Wire up the file-change handler. chokidar emits 'all' for every + // event type; we only care about files that were actually changed. + this.watcher.on('all', (_event: string, filePath: string) => { + if (this.stopped) return; + + const normalized = normalizePath(path.relative(this.projectRoot, filePath)); + + // Defense in depth: filter again even though `ignored` should + // have prevented watches on these directories. Events can still + // arrive during watcher setup or from symlink traversal. + if ( + normalized === '.codegraph' || + normalized.startsWith('.codegraph/') || + normalized === '.git' || + normalized.startsWith('.git/') + ) { + return; + } + + // Only sync changes to files we can actually parse. + if (!isSourceFile(normalized)) { + return; } - ); + + logDebug('File change detected', { file: normalized }); + this.hasChanges = true; + this.scheduleSync(); + }); // Handle watcher errors gracefully - this.watcher.on('error', (err) => { + this.watcher.on('error', (err: unknown) => { logWarn('File watcher error', { error: String(err) }); - // Don't crash — watcher may recover or user can restart }); logDebug('File watcher started', { projectRoot: this.projectRoot, debounceMs: this.debounceMs }); return true; } catch (err) { - // Recursive watch not supported (e.g., Linux < Node 19) - logWarn('Could not start file watcher — recursive fs.watch not supported on this platform', { error: String(err) }); + // Watcher setup failed (e.g., permission denied, missing directory) + logWarn('Could not start file watcher', { error: String(err) }); return false; } } @@ -151,6 +244,7 @@ export class FileWatcher { } this.hasChanges = false; + this.gitignoreMatchers = []; logDebug('File watcher stopped'); }