Skip to content

Commit a1ebb31

Browse files
committed
refactor: consolidate file discovery helpers, fix test skipping, use stderr for progress (PR #1232)
- Extract shared utilities into analysis/file_discovery.py - Update complexity, debt, and dependency modules to import from file_discovery - Expand _should_skip_file() to handle test_*.py prefix and directory parts - Use print(..., file=sys.stderr) for progress logs to avoid mixing with JSON
1 parent 5c99f0d commit a1ebb31

File tree

6 files changed

+195
-259
lines changed

6 files changed

+195
-259
lines changed

analysis/complexity_analysis.py

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,10 @@
55

66
import pathlib
77
import re
8-
import subprocess
98
from dataclasses import dataclass
109
from typing import Dict, List, Optional
1110

12-
import pathspec
11+
from .file_discovery import get_tracked_source_files
1312

1413
# Radon is optional - graceful fallback if not available
1514
try:
@@ -66,83 +65,6 @@ def to_dict(self) -> dict:
6665
}
6766

6867

69-
def load_gitignore(repo_root: pathlib.Path) -> Optional[pathspec.PathSpec]:
70-
"""Load .gitignore patterns."""
71-
gitignore_path = repo_root / ".gitignore"
72-
if not gitignore_path.exists():
73-
return None
74-
75-
with open(gitignore_path, "r", encoding="utf-8") as f:
76-
patterns = f.read().splitlines()
77-
78-
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
79-
80-
81-
def should_analyze_file(
82-
filepath: pathlib.Path,
83-
repo_root: pathlib.Path,
84-
gitignore: Optional[pathspec.PathSpec],
85-
) -> bool:
86-
"""Check if a file should be analyzed."""
87-
rel_path = filepath.relative_to(repo_root).as_posix()
88-
89-
# Skip common non-source directories
90-
skip_dirs = {
91-
"node_modules",
92-
"dist",
93-
"out",
94-
".venv",
95-
"__pycache__",
96-
".git",
97-
".vscode-test",
98-
}
99-
# Use relative path parts to avoid matching directories in repo root path
100-
rel_parts = filepath.relative_to(repo_root).parts
101-
for part in rel_parts:
102-
if part in skip_dirs:
103-
return False
104-
105-
# Skip if matched by gitignore
106-
if gitignore and gitignore.match_file(rel_path):
107-
return False
108-
109-
return True
110-
111-
112-
def get_tracked_source_files(
113-
repo_root: pathlib.Path, extensions: List[str]
114-
) -> List[pathlib.Path]:
115-
"""Get source files tracked by git (respects .gitignore automatically)."""
116-
try:
117-
result = subprocess.run(
118-
["git", "ls-files", "--cached", "--others", "--exclude-standard"],
119-
cwd=repo_root,
120-
capture_output=True,
121-
text=True,
122-
timeout=30,
123-
)
124-
if result.returncode != 0:
125-
return []
126-
127-
files = []
128-
for line in result.stdout.splitlines():
129-
line = line.strip()
130-
if line and any(line.endswith(ext) for ext in extensions):
131-
filepath = repo_root / line
132-
if filepath.exists():
133-
files.append(filepath)
134-
return files
135-
except (subprocess.TimeoutExpired, FileNotFoundError):
136-
# Fall back to rglob if git is not available
137-
gitignore = load_gitignore(repo_root)
138-
files = []
139-
for ext in extensions:
140-
for filepath in repo_root.rglob(f"*{ext}"):
141-
if should_analyze_file(filepath, repo_root, gitignore):
142-
files.append(filepath)
143-
return files
144-
145-
14668
def analyze_python_file(
14769
filepath: pathlib.Path, repo_root: pathlib.Path
14870
) -> Optional[FileComplexity]:

analysis/debt_indicators.py

Lines changed: 1 addition & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,10 @@
99

1010
import pathlib
1111
import re
12-
import subprocess
1312
from dataclasses import dataclass
1413
from typing import Dict, List, Optional
1514

16-
import pathspec
15+
from .file_discovery import get_tracked_source_files
1716

1817

1918
@dataclass
@@ -86,83 +85,6 @@ def to_dict(self) -> dict:
8685
LONG_FUNCTION_THRESHOLD = 50 # lines
8786

8887

89-
def load_gitignore(repo_root: pathlib.Path) -> Optional[pathspec.PathSpec]:
90-
"""Load .gitignore patterns."""
91-
gitignore_path = repo_root / ".gitignore"
92-
if not gitignore_path.exists():
93-
return None
94-
95-
with open(gitignore_path, "r", encoding="utf-8") as f:
96-
patterns = f.read().splitlines()
97-
98-
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
99-
100-
101-
def should_analyze_file(
102-
filepath: pathlib.Path,
103-
repo_root: pathlib.Path,
104-
gitignore: Optional[pathspec.PathSpec],
105-
) -> bool:
106-
"""Check if a file should be analyzed."""
107-
rel_path = filepath.relative_to(repo_root).as_posix()
108-
109-
# Skip common non-source directories
110-
skip_dirs = {
111-
"node_modules",
112-
"dist",
113-
"out",
114-
".venv",
115-
"__pycache__",
116-
".git",
117-
".vscode-test",
118-
}
119-
# Use relative path parts to avoid matching directories in repo root path
120-
rel_parts = filepath.relative_to(repo_root).parts
121-
for part in rel_parts:
122-
if part in skip_dirs:
123-
return False
124-
125-
# Skip if matched by gitignore
126-
if gitignore and gitignore.match_file(rel_path):
127-
return False
128-
129-
return True
130-
131-
132-
def get_tracked_source_files(
133-
repo_root: pathlib.Path, extensions: List[str]
134-
) -> List[pathlib.Path]:
135-
"""Get source files tracked by git (respects .gitignore automatically)."""
136-
try:
137-
result = subprocess.run(
138-
["git", "ls-files", "--cached", "--others", "--exclude-standard"],
139-
cwd=repo_root,
140-
capture_output=True,
141-
text=True,
142-
timeout=30,
143-
)
144-
if result.returncode != 0:
145-
return []
146-
147-
files = []
148-
for line in result.stdout.splitlines():
149-
line = line.strip()
150-
if line and any(line.endswith(ext) for ext in extensions):
151-
filepath = repo_root / line
152-
if filepath.exists():
153-
files.append(filepath)
154-
return files
155-
except (subprocess.TimeoutExpired, FileNotFoundError):
156-
# Fall back to rglob if git is not available
157-
gitignore = load_gitignore(repo_root)
158-
files = []
159-
for ext in extensions:
160-
for filepath in repo_root.rglob(f"*{ext}"):
161-
if should_analyze_file(filepath, repo_root, gitignore):
162-
files.append(filepath)
163-
return files
164-
165-
16688
def find_debt_markers(
16789
filepath: pathlib.Path, repo_root: pathlib.Path
16890
) -> List[DebtMarker]:

analysis/dependency_analysis.py

Lines changed: 3 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,10 @@
1212

1313
import pathlib
1414
import re
15-
import subprocess
1615
from dataclasses import dataclass, field
1716
from typing import Dict, List, Optional, Set
1817

19-
import pathspec
18+
from .file_discovery import get_tracked_source_files
2019

2120

2221
@dataclass
@@ -60,87 +59,6 @@ def to_dict(self) -> dict:
6059
}
6160

6261

63-
def load_gitignore(repo_root: pathlib.Path) -> Optional[pathspec.PathSpec]:
64-
"""Load .gitignore patterns."""
65-
gitignore_path = repo_root / ".gitignore"
66-
if not gitignore_path.exists():
67-
return None
68-
69-
with open(gitignore_path, "r", encoding="utf-8") as f:
70-
patterns = f.read().splitlines()
71-
72-
return pathspec.PathSpec.from_lines("gitwildmatch", patterns)
73-
74-
75-
def should_analyze_file(
76-
filepath: pathlib.Path,
77-
repo_root: pathlib.Path,
78-
gitignore: Optional[pathspec.PathSpec],
79-
) -> bool:
80-
"""Check if a file should be analyzed."""
81-
rel_path = filepath.relative_to(repo_root).as_posix()
82-
83-
# Skip common non-source directories
84-
skip_dirs = {
85-
"node_modules",
86-
"dist",
87-
"out",
88-
".venv",
89-
"__pycache__",
90-
".git",
91-
".vscode-test",
92-
}
93-
# Use relative path parts to avoid matching directories in repo root path
94-
rel_parts = filepath.relative_to(repo_root).parts
95-
for part in rel_parts:
96-
if part in skip_dirs:
97-
return False
98-
99-
# Skip if matched by gitignore
100-
if gitignore and gitignore.match_file(rel_path):
101-
return False
102-
103-
# Skip test and mock files for dependency analysis
104-
if any(x in rel_path for x in ["/test/", "/mocks/", ".test.", ".spec."]):
105-
return False
106-
107-
return True
108-
109-
110-
def get_tracked_source_files(
111-
repo_root: pathlib.Path, extensions: List[str]
112-
) -> List[pathlib.Path]:
113-
"""Get source files tracked by git (respects .gitignore automatically)."""
114-
try:
115-
result = subprocess.run(
116-
["git", "ls-files", "--cached", "--others", "--exclude-standard"],
117-
cwd=repo_root,
118-
capture_output=True,
119-
text=True,
120-
timeout=30,
121-
)
122-
if result.returncode != 0:
123-
return []
124-
125-
files = []
126-
for line in result.stdout.splitlines():
127-
line = line.strip()
128-
if line and any(line.endswith(ext) for ext in extensions):
129-
filepath = repo_root / line
130-
if filepath.exists():
131-
files.append(filepath)
132-
return files
133-
except (subprocess.TimeoutExpired, FileNotFoundError):
134-
# Fall back to rglob if git is not available
135-
gitignore = load_gitignore(repo_root)
136-
files = []
137-
for ext in extensions:
138-
for filepath in repo_root.rglob(f"*{ext}"):
139-
if should_analyze_file(filepath, repo_root, gitignore):
140-
files.append(filepath)
141-
return files
142-
143-
14462
def extract_imports_typescript(
14563
filepath: pathlib.Path, repo_root: pathlib.Path
14664
) -> Set[str]:
@@ -216,8 +134,9 @@ def build_dependency_graph(repo_root: pathlib.Path) -> Dict[str, ModuleInfo]:
216134
modules: Dict[str, ModuleInfo] = {}
217135

218136
# Find all TypeScript/JavaScript files using git ls-files (respects .gitignore)
137+
# Skip test files for dependency analysis
219138
extensions = [".ts", ".tsx", ".js", ".jsx"]
220-
source_files = get_tracked_source_files(repo_root, extensions)
139+
source_files = get_tracked_source_files(repo_root, extensions, skip_tests=True)
221140

222141
# First pass: extract imports
223142
for filepath in source_files:

0 commit comments

Comments
 (0)