From 61f9f74140515bdab8d8673da32cbee5ab7d1114 Mon Sep 17 00:00:00 2001 From: Chintan Gohil Date: Fri, 3 Apr 2026 00:12:45 +0530 Subject: [PATCH] fix: extend default ignore patterns and fix nested path matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `_should_ignore` function only used `fnmatch.fnmatch()` which matches from the start of the path. This means nested dependency directories like `packages/app/node_modules/react/index.js` were NOT ignored — only top-level `node_modules/` was caught. This is a problem in monorepos, workspaces, and any project with nested dependency directories. The graph would parse thousands of third-party files, inflating build time and polluting blast radius queries. Fix: check if any path segment matches the pattern prefix (e.g., if "node_modules" appears anywhere in the path parts). Also extends DEFAULT_IGNORE_PATTERNS to cover common frameworks: - PHP/Laravel: vendor/, storage/, bootstrap/cache/, public/build/ - Ruby/Rails: vendor/bundle/, .bundle/ - Java/Kotlin: .gradle/, *.jar - .NET/C#: bin/, obj/, packages/ - Dart/Flutter: .dart_tool/, .pub-cache/ - General: coverage/, .cache/, tmp/, .nuxt/ --- code_review_graph/incremental.py | 58 +++++++++++++++++++++++++++++--- tests/test_incremental.py | 33 ++++++++++++++++++ 2 files changed, 86 insertions(+), 5 deletions(-) diff --git a/code_review_graph/incremental.py b/code_review_graph/incremental.py index af72d29..673d73b 100644 --- a/code_review_graph/incremental.py +++ b/code_review_graph/incremental.py @@ -24,26 +24,56 @@ # Default ignore patterns (in addition to .gitignore) DEFAULT_IGNORE_PATTERNS = [ ".code-review-graph/**", - "node_modules/**", ".git/**", + # JavaScript / TypeScript / Node + "node_modules/**", + ".next/**", + ".nuxt/**", + # Python "__pycache__/**", "*.pyc", ".venv/**", "venv/**", + # PHP / Laravel / Composer + "vendor/**", + "storage/**", + "bootstrap/cache/**", + "public/build/**", + # Ruby / Rails + "vendor/bundle/**", + ".bundle/**", + # Java / Kotlin / Gradle / Maven + ".gradle/**", + "*.jar", + # .NET / C# + "bin/**", + "obj/**", + "packages/**", + # Rust + "target/**", + # Dart / Flutter + ".dart_tool/**", + ".pub-cache/**", + # Build outputs "dist/**", "build/**", - ".next/**", - "target/**", + "coverage/**", + # Minified / generated "*.min.js", "*.min.css", "*.map", "*.lock", "package-lock.json", "yarn.lock", + # Database files "*.db", "*.sqlite", "*.db-journal", "*.db-wal", + # Misc + ".cache/**", + ".tmp/**", + "tmp/**", ] @@ -116,8 +146,26 @@ def _load_ignore_patterns(repo_root: Path) -> list[str]: def _should_ignore(path: str, patterns: list[str]) -> bool: - """Check if a path matches any ignore pattern.""" - return any(fnmatch.fnmatch(path, p) for p in patterns) + """Check if a path matches any ignore pattern. + + For ** patterns like 'node_modules/**', matches any path segment — not just + the root. This ensures nested dependency directories (e.g., + 'packages/app/node_modules/react/index.js') are correctly ignored in + monorepos and workspaces. + """ + from pathlib import PurePosixPath + + pp = PurePosixPath(path) + for p in patterns: + if "**" in p: + prefix = p.split("/**")[0] + if any(part == prefix for part in pp.parts) or fnmatch.fnmatch(path, p): + return True + elif fnmatch.fnmatch(path, p) or any( + fnmatch.fnmatch(part, p) for part in pp.parts + ): + return True + return False def _is_binary(path: Path) -> bool: diff --git a/tests/test_incremental.py b/tests/test_incremental.py index ec3184f..e0113f0 100644 --- a/tests/test_incremental.py +++ b/tests/test_incremental.py @@ -101,6 +101,39 @@ def test_should_ignore_matches(self): assert _should_ignore(".git/HEAD", patterns) assert not _should_ignore("src/main.py", patterns) + def test_should_ignore_nested_paths(self): + """Nested dependency dirs (monorepos/workspaces) must be ignored.""" + patterns = ["node_modules/**", "vendor/**", "storage/**"] + # Nested node_modules (monorepo workspace) + assert _should_ignore("packages/app/node_modules/react/index.js", patterns) + # Nested vendor (PHP monorepo) + assert _should_ignore("services/api/vendor/guzzlehttp/Client.php", patterns) + # Nested storage + assert _should_ignore("app/storage/logs/laravel.log", patterns) + # Source files must not be affected + assert not _should_ignore("packages/app/src/main.ts", patterns) + assert not _should_ignore("src/vendors/custom.php", patterns) + + def test_should_ignore_framework_patterns(self): + """Framework-specific dirs from DEFAULT_IGNORE_PATTERNS.""" + from code_review_graph.incremental import DEFAULT_IGNORE_PATTERNS + + patterns = DEFAULT_IGNORE_PATTERNS + # PHP / Laravel + assert _should_ignore("vendor/laravel/framework/src/Collection.php", patterns) + assert _should_ignore("storage/logs/laravel.log", patterns) + assert _should_ignore("bootstrap/cache/packages.php", patterns) + # .NET + assert _should_ignore("bin/Debug/net8.0/app.dll", patterns) + assert _should_ignore("obj/Release/app.assets.cache", patterns) + # Dart / Flutter + assert _should_ignore(".dart_tool/package_config.json", patterns) + # Java / Gradle + assert _should_ignore(".gradle/caches/transforms-3/file.jar", patterns) + # Source files untouched + assert not _should_ignore("src/app/page.tsx", patterns) + assert not _should_ignore("app/Http/Controllers/UserController.php", patterns) + class TestIsBinary: def test_text_file_is_not_binary(self, tmp_path):