|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Analyze Python files for size metrics and modularization candidates.""" |
| 3 | + |
| 4 | +import ast |
| 5 | +import os |
| 6 | +import re |
| 7 | +from pathlib import Path |
| 8 | +from typing import Dict, List, Tuple |
| 9 | + |
| 10 | + |
| 11 | +class FileAnalyzer(ast.NodeVisitor): |
| 12 | + """AST visitor to count functions, classes, and cyclomatic complexity.""" |
| 13 | + |
| 14 | + def __init__(self): |
| 15 | + self.function_count = 0 |
| 16 | + self.class_count = 0 |
| 17 | + self.cyclomatic_complexity = 0 |
| 18 | + |
| 19 | + def visit_FunctionDef(self, node): |
| 20 | + self.function_count += 1 |
| 21 | + # Count control flow statements within the function |
| 22 | + for child in ast.walk(node): |
| 23 | + if isinstance(child, (ast.If, ast.For, ast.While, ast.Try)): |
| 24 | + self.cyclomatic_complexity += 1 |
| 25 | + self.generic_visit(node) |
| 26 | + |
| 27 | + def visit_AsyncFunctionDef(self, node): |
| 28 | + self.function_count += 1 |
| 29 | + # Count control flow statements within the function |
| 30 | + for child in ast.walk(node): |
| 31 | + if isinstance(child, (ast.If, ast.For, ast.While, ast.Try)): |
| 32 | + self.cyclomatic_complexity += 1 |
| 33 | + self.generic_visit(node) |
| 34 | + |
| 35 | + def visit_ClassDef(self, node): |
| 36 | + self.class_count += 1 |
| 37 | + self.generic_visit(node) |
| 38 | + |
| 39 | + |
| 40 | +def analyze_file(filepath: Path) -> Dict[str, int]: |
| 41 | + """Analyze a Python file and return metrics.""" |
| 42 | + try: |
| 43 | + with open(filepath, 'r', encoding='utf-8') as f: |
| 44 | + content = f.read() |
| 45 | + except Exception: |
| 46 | + return None |
| 47 | + |
| 48 | + # Count lines |
| 49 | + line_count = len(content.splitlines()) |
| 50 | + |
| 51 | + # Parse AST |
| 52 | + try: |
| 53 | + tree = ast.parse(content) |
| 54 | + except SyntaxError: |
| 55 | + # File has syntax errors, return basic metrics |
| 56 | + return { |
| 57 | + 'line_count': line_count, |
| 58 | + 'function_count': 0, |
| 59 | + 'class_count': 0, |
| 60 | + 'cyclomatic_complexity': 0, |
| 61 | + } |
| 62 | + |
| 63 | + analyzer = FileAnalyzer() |
| 64 | + analyzer.visit(tree) |
| 65 | + |
| 66 | + return { |
| 67 | + 'line_count': line_count, |
| 68 | + 'function_count': analyzer.function_count, |
| 69 | + 'class_count': analyzer.class_count, |
| 70 | + 'cyclomatic_complexity': analyzer.cyclomatic_complexity, |
| 71 | + } |
| 72 | + |
| 73 | + |
| 74 | +def find_python_files(directories: List[Path]) -> List[Path]: |
| 75 | + """Find all Python files in the given directories.""" |
| 76 | + python_files = [] |
| 77 | + for directory in directories: |
| 78 | + if directory.exists(): |
| 79 | + for filepath in directory.rglob('*.py'): |
| 80 | + python_files.append(filepath) |
| 81 | + return python_files |
| 82 | + |
| 83 | + |
| 84 | +def main(): |
| 85 | + """Main analysis function.""" |
| 86 | + base_dir = Path('c:/Code/Swarm/flow-studio-swarm') |
| 87 | + directories = [ |
| 88 | + base_dir / 'swarm', |
| 89 | + base_dir / 'src', |
| 90 | + base_dir / 'tests', |
| 91 | + ] |
| 92 | + |
| 93 | + print("Finding Python files...") |
| 94 | + python_files = find_python_files(directories) |
| 95 | + print(f"Found {len(python_files)} Python files\n") |
| 96 | + |
| 97 | + results = [] |
| 98 | + |
| 99 | + for filepath in python_files: |
| 100 | + rel_path = str(filepath.relative_to(base_dir)) |
| 101 | + metrics = analyze_file(filepath) |
| 102 | + if metrics: |
| 103 | + results.append({ |
| 104 | + 'path': rel_path, |
| 105 | + **metrics, |
| 106 | + }) |
| 107 | + |
| 108 | + # Sort by line count descending |
| 109 | + results.sort(key=lambda x: x['line_count'], reverse=True) |
| 110 | + |
| 111 | + # Print top 10 by line count |
| 112 | + print("=" * 80) |
| 113 | + print("TOP 10 FILES BY LINE COUNT") |
| 114 | + print("=" * 80) |
| 115 | + print(f"{'Path':<50} {'LOC':>6} {'Funcs':>6} {'Classes':>6} {'CC':>6}") |
| 116 | + print("-" * 80) |
| 117 | + for i, r in enumerate(results[:10], 1): |
| 118 | + print(f"{i}. {r['path']:<48} {r['line_count']:>6} {r['function_count']:>6} {r['class_count']:>6} {r['cyclomatic_complexity']:>6}") |
| 119 | + |
| 120 | + # Identify modularization candidates |
| 121 | + print("\n" + "=" * 80) |
| 122 | + print("MODULARIZATION CANDIDATES") |
| 123 | + print("=" * 80) |
| 124 | + candidates = [] |
| 125 | + for r in results: |
| 126 | + reasons = [] |
| 127 | + if r['line_count'] > 500: |
| 128 | + reasons.append(f"LOC={r['line_count']}") |
| 129 | + if r['function_count'] > 20: |
| 130 | + reasons.append(f"Funcs={r['function_count']}") |
| 131 | + if r['class_count'] > 5: |
| 132 | + reasons.append(f"Classes={r['class_count']}") |
| 133 | + |
| 134 | + if reasons: |
| 135 | + candidates.append({ |
| 136 | + 'path': r['path'], |
| 137 | + 'line_count': r['line_count'], |
| 138 | + 'function_count': r['function_count'], |
| 139 | + 'class_count': r['class_count'], |
| 140 | + 'cyclomatic_complexity': r['cyclomatic_complexity'], |
| 141 | + 'reasons': ', '.join(reasons), |
| 142 | + }) |
| 143 | + |
| 144 | + print(f"\nFound {len(candidates)} candidates for modularization:\n") |
| 145 | + print(f"{'Path':<50} {'LOC':>6} {'Funcs':>6} {'Classes':>6} {'CC':>6} {'Reasons'}") |
| 146 | + print("-" * 100) |
| 147 | + for c in candidates: |
| 148 | + print(f"{c['path']:<48} {c['line_count']:>6} {c['function_count']:>6} {c['class_count']:>6} {c['cyclomatic_complexity']:>6} {c['reasons']}") |
| 149 | + |
| 150 | + # Print raw data in JSON format for further processing |
| 151 | + print("\n" + "=" * 80) |
| 152 | + print("RAW DATA (JSON format)") |
| 153 | + print("=" * 80) |
| 154 | + import json |
| 155 | + print(json.dumps(results, indent=2)) |
| 156 | + |
| 157 | + # Summary |
| 158 | + print("\n" + "=" * 80) |
| 159 | + print("SUMMARY") |
| 160 | + print("=" * 80) |
| 161 | + print(f"Total Python files analyzed: {len(results)}") |
| 162 | + print(f"Total lines of code: {sum(r['line_count'] for r in results)}") |
| 163 | + print(f"Total functions: {sum(r['function_count'] for r in results)}") |
| 164 | + print(f"Total classes: {sum(r['class_count'] for r in results)}") |
| 165 | + print(f"Modularization candidates: {len(candidates)}") |
| 166 | + |
| 167 | + |
| 168 | +if __name__ == '__main__': |
| 169 | + main() |
0 commit comments