From a8d5bc824fb901fa1ff2f6dac023c3a9b73617d6 Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 19:20:04 -0700 Subject: [PATCH 1/8] docs: add commit strategy to Phase 4 Python plan Co-Authored-By: Claude Opus 4.6 (1M context) --- .claude/da-plans/core/phase-4-python-support/overview.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.claude/da-plans/core/phase-4-python-support/overview.md b/.claude/da-plans/core/phase-4-python-support/overview.md index ce0bc72..1ec4d55 100644 --- a/.claude/da-plans/core/phase-4-python-support/overview.md +++ b/.claude/da-plans/core/phase-4-python-support/overview.md @@ -165,6 +165,15 @@ def get_user(user_id: int) -> User | [4.3](./4.3-pattern-rules.md) | Add Python-specific pattern rules for dev_patterns | Low — S-expression constants | | [4.4](./4.4-test-fixtures.md) | Test fixtures, integration tests, documentation | Low — validation | +### Commit strategy + +| # | Commit | Risk | What changes | +|---|--------|------|-------------| +| 1 | `feat(core): bundle tree-sitter-python WASM and define extraction queries` | Low | Add `'python'` to languages, `PYTHON_QUERIES` constants, validate against grammar | +| 2 | `feat(core): implement PythonScanner with full extraction` | **Medium** | Scanner class, `isTestFile()` refactor, registry registration. All extraction logic. **Risk is concentrated here.** | +| 3 | `feat(core): add Python pattern rules for dev_patterns` | Low | Python S-expression rules, `WasmPatternMatcher` update, `QUERIES_BY_LANGUAGE` map refactor | +| 4 | `feat(core): add Python test fixtures, integration tests, and docs` | Low | Fixtures (FastAPI, pytest, dataclass, `__init__.py`), parity test, changeset, docs | + --- ## Decisions From 9b596ad4c4ce9aa8024966f3f153de1f626599c8 Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 19:23:56 -0700 Subject: [PATCH 2/8] feat(core): bundle tree-sitter-python WASM and define extraction queries Add 'python' to TreeSitterLanguage and SUPPORTED_LANGUAGES. Define PYTHON_QUERIES with 8 S-expression patterns for functions, classes, methods, decorators, imports, and module variables. All queries validated against tree-sitter-python grammar. Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/scanner/python-queries.ts | 64 +++++++++++++++++++++ packages/core/src/scanner/tree-sitter.ts | 2 +- packages/dev-agent/scripts/copy-wasm.js | 2 +- 3 files changed, 66 insertions(+), 2 deletions(-) create mode 100644 packages/core/src/scanner/python-queries.ts diff --git a/packages/core/src/scanner/python-queries.ts b/packages/core/src/scanner/python-queries.ts new file mode 100644 index 0000000..c242180 --- /dev/null +++ b/packages/core/src/scanner/python-queries.ts @@ -0,0 +1,64 @@ +/** + * Tree-sitter queries for Python code extraction. + * + * All queries validated against tree-sitter-python grammar via AST inspection. + * Modeled after GO_QUERIES in go.ts. + */ + +export const PYTHON_QUERIES = { + // Top-level function definitions (not inside a class) + functions: ` + (module + (function_definition + name: (identifier) @name) @definition) + `, + + // Top-level decorated functions (e.g., @app.route, @pytest.fixture) + decoratedFunctions: ` + (module + (decorated_definition + definition: (function_definition + name: (identifier) @name)) @definition) + `, + + // Class definitions + classes: ` + (class_definition + name: (identifier) @name) @definition + `, + + // Method definitions (inside class body) + methods: ` + (class_definition + body: (block + (function_definition + name: (identifier) @name) @definition)) + `, + + // Decorated methods (inside class body) + decoratedMethods: ` + (class_definition + body: (block + (decorated_definition + definition: (function_definition + name: (identifier) @name)) @definition)) + `, + + // Import statements + imports: ` + (import_statement) @definition + `, + + // From...import statements + fromImports: ` + (import_from_statement) @definition + `, + + // Module-level variable assignments (constants, config) + moduleVariables: ` + (module + (expression_statement + (assignment + left: (identifier) @name)) @definition) + `, +}; diff --git a/packages/core/src/scanner/tree-sitter.ts b/packages/core/src/scanner/tree-sitter.ts index c966824..1e5c857 100644 --- a/packages/core/src/scanner/tree-sitter.ts +++ b/packages/core/src/scanner/tree-sitter.ts @@ -37,7 +37,7 @@ let parserInitialized = false; * 2. Update SUPPORTED_LANGUAGES in packages/dev-agent/scripts/copy-wasm.js * 3. Ensure tree-sitter-wasms contains the required WASM file */ -export type TreeSitterLanguage = 'go' | 'typescript' | 'tsx' | 'javascript'; +export type TreeSitterLanguage = 'go' | 'typescript' | 'tsx' | 'javascript' | 'python'; /** * Cache of loaded language grammars diff --git a/packages/dev-agent/scripts/copy-wasm.js b/packages/dev-agent/scripts/copy-wasm.js index 4825d28..99dddc3 100644 --- a/packages/dev-agent/scripts/copy-wasm.js +++ b/packages/dev-agent/scripts/copy-wasm.js @@ -95,7 +95,7 @@ if (!fs.existsSync(wasmSourceDir)) { // 3. Ensure tree-sitter-wasms package contains tree-sitter-{lang}.wasm // 4. Create a language-specific scanner in packages/core/src/scanner/{lang}.ts // 5. Update scanner registration in packages/core/src/scanner/index.ts -const SUPPORTED_LANGUAGES = ['go', 'typescript', 'tsx', 'javascript']; +const SUPPORTED_LANGUAGES = ['go', 'typescript', 'tsx', 'javascript', 'python']; const SUPPORTED_FILES = new Set([ ...SUPPORTED_LANGUAGES.map((lang) => `tree-sitter-${lang}.wasm`), 'tree-sitter.wasm', // Runtime if present From 4f69d75df61d5ae9c23012255241acc7aa1fa74b Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 19:31:11 -0700 Subject: [PATCH 3/8] feat(core): implement PythonScanner with full extraction PythonScanner extracts functions, classes, methods, imports, decorators, type hints, docstrings, __all__ exports, callees, snippets, and async. - Refactor isTestFile() to language-aware pattern map - Refactor findTestFile() for Python test path conventions - Skip generated files (_pb2.py, migrations/, # Generated by) - Register in scanner registry alongside TypeScript, Go, Markdown Co-Authored-By: Claude Opus 4.6 (1M context) --- packages/core/src/scanner/index.ts | 7 +- packages/core/src/scanner/python.ts | 522 ++++++++++++++++++++++++++ packages/core/src/utils/test-utils.ts | 52 ++- 3 files changed, 568 insertions(+), 13 deletions(-) create mode 100644 packages/core/src/scanner/python.ts diff --git a/packages/core/src/scanner/index.ts b/packages/core/src/scanner/index.ts index a89f88d..62015dc 100644 --- a/packages/core/src/scanner/index.ts +++ b/packages/core/src/scanner/index.ts @@ -2,6 +2,7 @@ export { GoScanner } from './go'; export { MarkdownScanner } from './markdown'; +export { PythonScanner } from './python'; export { ScannerRegistry } from './registry'; export type { CalleeInfo, @@ -22,7 +23,8 @@ export { TypeScriptScanner } from './typescript'; import { GoScanner } from './go'; import { MarkdownScanner } from './markdown'; -// Create default scanner registry with TypeScript, Markdown, and Go +import { PythonScanner } from './python'; +// Create default scanner registry with TypeScript, Markdown, Go, and Python import { ScannerRegistry } from './registry'; import type { ScanOptions } from './types'; import { TypeScriptScanner } from './typescript'; @@ -42,6 +44,9 @@ export function createDefaultRegistry(): ScannerRegistry { // Register Go scanner registry.register(new GoScanner()); + // Register Python scanner + registry.register(new PythonScanner()); + return registry; } diff --git a/packages/core/src/scanner/python.ts b/packages/core/src/scanner/python.ts new file mode 100644 index 0000000..e377ba0 --- /dev/null +++ b/packages/core/src/scanner/python.ts @@ -0,0 +1,522 @@ +/** + * Python language scanner using tree-sitter + * + * Extracts functions, classes, methods, imports, decorators, type hints, + * docstrings, and module variables from Python source files. + * Uses tree-sitter queries for declarative pattern matching. + */ + +import * as path from 'node:path'; +import type { Logger } from '@prosdevlab/kero'; +import { + type FileSystemValidator, + NodeFileSystemValidator, + validateFile, +} from '../utils/file-validator'; +import { PYTHON_QUERIES } from './python-queries'; +import type { TreeSitterNode } from './tree-sitter'; +import { initTreeSitter, loadLanguage, type ParsedTree, parseCode } from './tree-sitter'; +import type { CalleeInfo, Document, Scanner, ScannerCapabilities } from './types'; + +/** Generated file patterns to skip */ +const GENERATED_PATTERNS = ['_pb2.py', '_pb2_grpc.py']; +const GENERATED_COMMENTS = ['# Generated by', '# DO NOT EDIT', '# Auto-generated']; + +/** + * Python scanner using tree-sitter for parsing + */ +export class PythonScanner implements Scanner { + readonly language = 'python'; + readonly capabilities: ScannerCapabilities = { + syntax: true, + types: true, + documentation: true, + }; + + private static readonly MAX_SNIPPET_LINES = 50; + private fileValidator: FileSystemValidator; + + constructor(fileValidator: FileSystemValidator = new NodeFileSystemValidator()) { + this.fileValidator = fileValidator; + } + + canHandle(filePath: string): boolean { + return path.extname(filePath).toLowerCase() === '.py'; + } + + private async validatePythonSupport(): Promise { + try { + await initTreeSitter(); + await loadLanguage('python'); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + if (errorMessage.includes('tree-sitter WASM') || errorMessage.includes('Failed to locate')) { + throw new Error( + 'Python tree-sitter WASM files not found. ' + + 'tree-sitter-python.wasm is required for Python code parsing.' + ); + } + throw error; + } + } + + async scan( + files: string[], + repoRoot: string, + logger?: Logger, + onProgress?: (filesProcessed: number, totalFiles: number) => void + ): Promise { + const documents: Document[] = []; + const total = files.length; + + try { + await this.validatePythonSupport(); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger?.error({ error: errorMessage }, 'Python scanner initialization failed'); + throw error; + } + + const startTime = Date.now(); + let lastLogTime = startTime; + + for (let i = 0; i < total; i++) { + const file = files[i]; + + if (onProgress && i > 0 && i % 50 === 0) { + onProgress(i, total); + } + + const now = Date.now(); + if (logger && i > 0 && (i % 50 === 0 || now - lastLogTime > 10000)) { + lastLogTime = now; + const percent = Math.round((i / total) * 100); + logger.info( + { filesProcessed: i, total, percent, documents: documents.length }, + `python ${i}/${total} (${percent}%) - ${documents.length} docs` + ); + } + + try { + const absolutePath = path.join(repoRoot, file); + const validation = validateFile(file, absolutePath, this.fileValidator); + if (!validation.isValid) continue; + + const sourceText = this.fileValidator.readText(absolutePath); + + if (this.isGeneratedFile(file, sourceText)) continue; + + const fileDocs = await this.extractFromFile(sourceText, file); + documents.push(...fileDocs); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger?.debug({ file, error: errorMessage }, `Skipped Python file: ${file}`); + } + } + + logger?.info( + { successCount: documents.length, total }, + `Python scan complete: ${documents.length} docs from ${total} files` + ); + + return documents; + } + + private isGeneratedFile(filePath: string, sourceText: string): boolean { + const basename = path.basename(filePath); + if (GENERATED_PATTERNS.some((p) => basename.endsWith(p))) return true; + if (filePath.includes('/migrations/') || filePath.includes('/versions/')) return true; + + const firstLines = sourceText.split('\n').slice(0, 3).join('\n'); + return GENERATED_COMMENTS.some((c) => firstLines.includes(c)); + } + + private async extractFromFile(sourceText: string, relativeFile: string): Promise { + const documents: Document[] = []; + const tree = await parseCode(sourceText, 'python'); + + // Parse __all__ for export control + const allExports = this.parseAllExports(sourceText); + + // Extract file-level imports + const imports = this.extractImports(tree); + + // Extract functions (top-level + decorated) + documents.push(...this.extractFunctions(tree, sourceText, relativeFile, allExports, imports)); + + // Extract classes + documents.push(...this.extractClasses(tree, sourceText, relativeFile, allExports)); + + // Extract methods (inside classes + decorated) + documents.push(...this.extractMethods(tree, sourceText, relativeFile, allExports)); + + // Extract module-level variables + documents.push(...this.extractModuleVariables(tree, sourceText, relativeFile, allExports)); + + return documents; + } + + // ======================================================================== + // Extraction methods + // ======================================================================== + + private extractFunctions( + tree: ParsedTree, + sourceText: string, + file: string, + allExports: Set | null, + imports: string[] + ): Document[] { + const documents: Document[] = []; + + // Regular top-level functions + for (const match of tree.query(PYTHON_QUERIES.functions)) { + const doc = this.functionMatchToDocument(match, sourceText, file, allExports, imports); + if (doc) documents.push(doc); + } + + // Decorated top-level functions + for (const match of tree.query(PYTHON_QUERIES.decoratedFunctions)) { + const doc = this.functionMatchToDocument(match, sourceText, file, allExports, imports); + if (doc) documents.push(doc); + } + + return documents; + } + + private extractClasses( + tree: ParsedTree, + _sourceText: string, + file: string, + allExports: Set | null + ): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(PYTHON_QUERIES.classes)) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = this.extractSignature(fullText); + const docstring = this.extractDocstring(defCapture.node); + const exported = this.isExported(name, allExports); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('class', name, signature, docstring), + type: 'class', + language: 'python', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + }, + }); + } + + return documents; + } + + private extractMethods( + tree: ParsedTree, + _sourceText: string, + file: string, + _allExports: Set | null + ): Document[] { + const documents: Document[] = []; + + // Regular methods + for (const match of tree.query(PYTHON_QUERIES.methods)) { + const doc = this.methodMatchToDocument(match, _sourceText, file); + if (doc) documents.push(doc); + } + + // Decorated methods + for (const match of tree.query(PYTHON_QUERIES.decoratedMethods)) { + const doc = this.methodMatchToDocument(match, _sourceText, file); + if (doc) documents.push(doc); + } + + return documents; + } + + private extractModuleVariables( + tree: ParsedTree, + _sourceText: string, + file: string, + allExports: Set | null + ): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(PYTHON_QUERIES.moduleVariables)) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + // Skip __all__ itself and dunder names + if (name === '__all__' || (name.startsWith('__') && name.endsWith('__'))) continue; + + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const exported = this.isExported(name, allExports); + const snippet = this.truncateSnippet(fullText); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: `variable ${name}: ${fullText.trim()}`, + type: 'variable', + language: 'python', + metadata: { + file, + startLine, + endLine, + name, + signature: fullText.split('\n')[0].trim(), + exported, + snippet, + }, + }); + } + + return documents; + } + + // ======================================================================== + // Helper methods + // ======================================================================== + + private functionMatchToDocument( + match: { captures: Array<{ name: string; node: TreeSitterNode }> }, + _sourceText: string, + file: string, + allExports: Set | null, + imports: string[] + ): Document | null { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) return null; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = this.extractSignature(fullText); + const docstring = this.extractDocstring(defCapture.node); + const exported = this.isExported(name, allExports); + const snippet = this.truncateSnippet(fullText); + const isAsync = fullText.trimStart().startsWith('async '); + const callees = this.extractCallees(defCapture.node); + + return { + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('function', name, signature, docstring), + type: 'function', + language: 'python', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported, + docstring, + snippet, + imports: imports.length > 0 ? imports : undefined, + callees: callees.length > 0 ? callees : undefined, + isAsync: isAsync || undefined, + }, + }; + } + + private methodMatchToDocument( + match: { captures: Array<{ name: string; node: TreeSitterNode }> }, + _sourceText: string, + file: string + ): Document | null { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) return null; + + const name = nameCapture.node.text; + const startLine = defCapture.node.startPosition.row + 1; + const endLine = defCapture.node.endPosition.row + 1; + const fullText = defCapture.node.text; + const signature = this.extractSignature(fullText); + const docstring = this.extractDocstring(defCapture.node); + const snippet = this.truncateSnippet(fullText); + const isAsync = fullText.trimStart().startsWith('async '); + const callees = this.extractCallees(defCapture.node); + + // Methods are not individually exported — the class controls visibility + return { + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('method', name, signature, docstring), + type: 'method', + language: 'python', + metadata: { + file, + startLine, + endLine, + name, + signature, + exported: !name.startsWith('_'), + docstring, + snippet, + callees: callees.length > 0 ? callees : undefined, + isAsync: isAsync || undefined, + }, + }; + } + + private extractImports(tree: ParsedTree): string[] { + const imports: string[] = []; + + for (const match of tree.query(PYTHON_QUERIES.imports)) { + const def = match.captures.find((c) => c.name === 'definition'); + if (def) imports.push(def.node.text.trim()); + } + + for (const match of tree.query(PYTHON_QUERIES.fromImports)) { + const def = match.captures.find((c) => c.name === 'definition'); + if (def) imports.push(def.node.text.trim()); + } + + return imports; + } + + /** + * Extract docstring from a function/class node. + * Python docstrings are the first expression_statement > string in the body block. + */ + private extractDocstring(node: TreeSitterNode): string | undefined { + // Navigate to body > block > first child + const body = node.children?.find((c: TreeSitterNode) => c.type === 'block'); + if (!body) return undefined; + + for (const child of body.children || []) { + if (child.type === 'expression_statement') { + const str = child.children?.find((c: TreeSitterNode) => c.type === 'string'); + if (str) { + // Remove triple quotes and whitespace + let text = str.text; + if (text.startsWith('"""') && text.endsWith('"""')) { + text = text.slice(3, -3).trim(); + } else if (text.startsWith("'''") && text.endsWith("'''")) { + text = text.slice(3, -3).trim(); + } else if (text.startsWith('"') && text.endsWith('"')) { + text = text.slice(1, -1).trim(); + } else if (text.startsWith("'") && text.endsWith("'")) { + text = text.slice(1, -1).trim(); + } + return text || undefined; + } + break; // First non-docstring statement means no docstring + } + // Skip newlines, comments + if (child.type !== 'comment' && child.type !== 'newline' && !child.type.includes('MISSING')) { + break; + } + } + + return undefined; + } + + /** + * Extract callees from function/method body. + * Walks ALL call nodes at any depth (matches TypeScript scanner behavior). + */ + private extractCallees(node: TreeSitterNode): CalleeInfo[] { + const callees: CalleeInfo[] = []; + const seen = new Set(); + + this.walkCallNodes(node, (callNode: TreeSitterNode) => { + const funcNode = callNode.children?.find( + (c: TreeSitterNode) => c.type === 'identifier' || c.type === 'attribute' + ); + if (!funcNode) return; + + const name = funcNode.type === 'attribute' ? funcNode.text : funcNode.text; + const line = callNode.startPosition.row + 1; + const key = `${name}:${line}`; + + if (!seen.has(key)) { + seen.add(key); + callees.push({ name, line }); + } + }); + + return callees; + } + + private walkCallNodes(node: TreeSitterNode, callback: (node: TreeSitterNode) => void): void { + if (!node) return; + if (node.type === 'call') { + callback(node); + } + for (const child of node.children || []) { + this.walkCallNodes(child, callback); + } + } + + /** + * Parse __all__ = [...] from source text. + * Returns set of exported names, or null if __all__ is not defined. + */ + private parseAllExports(sourceText: string): Set | null { + // Simple regex for __all__ = ['name1', 'name2'] or ["name1", "name2"] + const match = sourceText.match(/__all__\s*=\s*\[([^\]]*)\]/); + if (!match) return null; + + const names = new Set(); + const content = match[1]; + const namePattern = /['"](\w+)['"]/g; + for (const m of content.matchAll(namePattern)) { + names.add(m[1]); + } + + return names.size > 0 ? names : null; + } + + private isExported(name: string, allExports: Set | null): boolean { + if (allExports) { + return allExports.has(name); + } + // Convention: underscore prefix = private + return !name.startsWith('_'); + } + + private extractSignature(fullText: string): string { + // First line up to and including the colon + const firstLine = fullText.split('\n')[0].trim(); + return firstLine.endsWith(':') ? firstLine.slice(0, -1).trim() : firstLine; + } + + private truncateSnippet(text: string): string { + const lines = text.split('\n'); + if (lines.length <= PythonScanner.MAX_SNIPPET_LINES) return text; + return `${lines.slice(0, PythonScanner.MAX_SNIPPET_LINES).join('\n')}\n...`; + } + + private buildEmbeddingText( + type: string, + name: string, + signature: string, + docstring?: string + ): string { + const parts = [`${type} ${name}`, signature]; + if (docstring) parts.push(docstring); + return parts.join('\n'); + } +} diff --git a/packages/core/src/utils/test-utils.ts b/packages/core/src/utils/test-utils.ts index 157ffe9..24d6332 100644 --- a/packages/core/src/utils/test-utils.ts +++ b/packages/core/src/utils/test-utils.ts @@ -2,29 +2,54 @@ * Test utilities for file and pattern analysis * * Provides helpers for detecting and locating test files. + * Language-aware: supports JS/TS, Go, and Python test conventions. */ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; /** - * Check if a file path is a test file - * - * @param filePath - File path to check - * @returns True if the file is a test file + * Language-specific test file detection patterns. + * Extensible: add new languages by adding entries to this map. + */ +const TEST_PATTERNS: Record boolean> = { + ts: (f) => f.includes('.test.') || f.includes('.spec.'), + tsx: (f) => f.includes('.test.') || f.includes('.spec.'), + js: (f) => f.includes('.test.') || f.includes('.spec.'), + jsx: (f) => f.includes('.test.') || f.includes('.spec.'), + go: (f) => f.endsWith('_test.go'), + py: (f) => { + const name = path.basename(f); + return name.startsWith('test_') || name.endsWith('_test.py') || name === 'conftest.py'; + }, +}; + +/** + * Check if a file path is a test file. + * Uses language-specific patterns based on file extension. */ export function isTestFile(filePath: string): boolean { + const ext = path.extname(filePath).slice(1); // 'ts', 'py', etc. + const check = TEST_PATTERNS[ext]; + if (check) return check(filePath); + // Fallback for unknown extensions: JS/TS convention return filePath.includes('.test.') || filePath.includes('.spec.'); } /** - * Find test file for a source file - * - * Checks for common patterns: *.test.*, *.spec.* - * - * @param sourcePath - Source file path (relative to repository root) - * @param repositoryPath - Absolute path to repository root - * @returns Relative path to test file, or null if not found + * Language-specific test file path generators. + */ +const TEST_PATH_GENERATORS: Record string[]> = { + py: (base, _ext) => { + const dir = path.dirname(base); + const name = path.basename(base); + return [path.join(dir, `test_${name}.py`), path.join(dir, `${name}_test.py`)]; + }, +}; + +/** + * Find test file for a source file. + * Checks for language-specific test patterns. */ export async function findTestFile( sourcePath: string, @@ -32,8 +57,11 @@ export async function findTestFile( ): Promise { const ext = path.extname(sourcePath); const base = sourcePath.slice(0, -ext.length); + const extKey = ext.slice(1); // 'ts', 'py', etc. - const patterns = [`${base}.test${ext}`, `${base}.spec${ext}`]; + // Language-specific patterns + const generator = TEST_PATH_GENERATORS[extKey]; + const patterns = generator ? generator(base, ext) : [`${base}.test${ext}`, `${base}.spec${ext}`]; for (const testPath of patterns) { const fullPath = path.join(repositoryPath, testPath); From b35548aeff6ecb7e713de7f885938059293eae1c Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 19:34:49 -0700 Subject: [PATCH 4/8] feat(core): add Python pattern rules for dev_patterns 9 Python-specific S-expression queries for error handling (try/except, raise, except), imports (import, from-import, relative), and type coverage (typed params, return types, function count). - Add 'python' to WasmPatternMatcher supported languages + extension map - Refactor runAllAstQueries to QUERIES_BY_LANGUAGE map (no if/else chain) - Update tests: .py is now supported, use .rs for unsupported language Co-Authored-By: Claude Opus 4.6 (1M context) --- .../__tests__/wasm-matcher.test.ts | 6 +- packages/core/src/pattern-matcher/index.ts | 4 + packages/core/src/pattern-matcher/rules.ts | 76 +++++++++++++++++++ .../core/src/pattern-matcher/wasm-matcher.ts | 3 +- .../src/services/pattern-analysis-service.ts | 20 ++++- 5 files changed, 102 insertions(+), 7 deletions(-) diff --git a/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts b/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts index 110b120..56cb6cc 100644 --- a/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts +++ b/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts @@ -253,7 +253,7 @@ function App() { }); it('returns empty map for unsupported language', async () => { - const results = await matcher.match('def hello(): pass', 'python', ERROR_HANDLING_QUERIES); + const results = await matcher.match('fn main() {}', 'rust', ERROR_HANDLING_QUERIES); expect(results.size).toBe(0); }); }); @@ -347,7 +347,7 @@ describe('resolveLanguage', () => { }); it('returns undefined for unsupported extensions', () => { - expect(resolveLanguage('main.py')).toBeUndefined(); + expect(resolveLanguage('main.py')).toBe('python'); expect(resolveLanguage('main.go')).toBeUndefined(); // Go has scanner, not pattern matcher expect(resolveLanguage('README.md')).toBeUndefined(); }); @@ -443,7 +443,7 @@ describe('extractErrorHandlingWithAst', () => { it('unsupported extension → runAllAstQueries returns empty → regex', async () => { const source = 'throw new Error("bad");'; - const ast = await runAllAstQueries(source, 'test.py', matcher); + const ast = await runAllAstQueries(source, 'test.rs', matcher); expect(ast.size).toBe(0); // unsupported language expect(extractErrorHandlingWithAst(source, ast)).toEqual( extractErrorHandlingFromContent(source) diff --git a/packages/core/src/pattern-matcher/index.ts b/packages/core/src/pattern-matcher/index.ts index 47bcf5c..3bf7655 100644 --- a/packages/core/src/pattern-matcher/index.ts +++ b/packages/core/src/pattern-matcher/index.ts @@ -7,9 +7,13 @@ */ export { + ALL_PYTHON_QUERIES, ALL_QUERIES, ERROR_HANDLING_QUERIES, IMPORT_STYLE_QUERIES, + PYTHON_ERROR_HANDLING_QUERIES, + PYTHON_IMPORT_QUERIES, + PYTHON_TYPE_QUERIES, TYPE_COVERAGE_QUERIES, } from './rules.js'; export { diff --git a/packages/core/src/pattern-matcher/rules.ts b/packages/core/src/pattern-matcher/rules.ts index ef80b66..ccacc0d 100644 --- a/packages/core/src/pattern-matcher/rules.ts +++ b/packages/core/src/pattern-matcher/rules.ts @@ -113,3 +113,79 @@ export const ALL_QUERIES: PatternMatchRule[] = [ ...IMPORT_STYLE_QUERIES, ...TYPE_COVERAGE_QUERIES, ]; + +// ============================================================================ +// Python Error Handling (3 rules) +// ============================================================================ + +export const PYTHON_ERROR_HANDLING_QUERIES: PatternMatchRule[] = [ + { + id: 'try-except', + category: 'error-handling', + query: '(try_statement) @match', + }, + { + id: 'raise', + category: 'error-handling', + query: '(raise_statement) @match', + }, + { + id: 'except-clause', + category: 'error-handling', + query: '(except_clause) @match', + }, +]; + +// ============================================================================ +// Python Import Style (3 rules) +// ============================================================================ + +export const PYTHON_IMPORT_QUERIES: PatternMatchRule[] = [ + { + id: 'import-module', + category: 'import-style', + query: '(import_statement) @match', + }, + { + id: 'from-import', + category: 'import-style', + query: '(import_from_statement) @match', + }, + { + id: 'relative-import', + category: 'import-style', + query: '(import_from_statement module_name: (relative_import)) @match', + }, +]; + +// ============================================================================ +// Python Type Coverage (3 rules) +// ============================================================================ + +export const PYTHON_TYPE_QUERIES: PatternMatchRule[] = [ + { + id: 'typed-parameter', + category: 'type-coverage', + query: '(typed_parameter) @match', + }, + { + id: 'py-function-return-type', + category: 'type-coverage', + query: '(function_definition return_type: (type)) @match', + }, + { + id: 'py-function-total', + category: 'type-coverage', + query: '(function_definition) @match', + }, +]; + +// ============================================================================ +// All Python rules combined +// ============================================================================ + +export const ALL_PYTHON_QUERIES: PatternMatchRule[] = [ + ...PYTHON_ERROR_HANDLING_QUERIES, + ...PYTHON_IMPORT_QUERIES, + ...PYTHON_TYPE_QUERIES, +]; diff --git a/packages/core/src/pattern-matcher/wasm-matcher.ts b/packages/core/src/pattern-matcher/wasm-matcher.ts index bb8489a..27163e4 100644 --- a/packages/core/src/pattern-matcher/wasm-matcher.ts +++ b/packages/core/src/pattern-matcher/wasm-matcher.ts @@ -40,6 +40,7 @@ const EXTENSION_TO_LANGUAGE: Record = { '.tsx': 'tsx', '.js': 'javascript', '.jsx': 'javascript', + '.py': 'python', }; /** @@ -61,7 +62,7 @@ class WasmPatternMatcher implements PatternMatcher { queries: PatternMatchRule[] ): Promise> { // Validate language is supported - const supportedLanguages = new Set(['typescript', 'tsx', 'javascript', 'go']); + const supportedLanguages = new Set(['typescript', 'tsx', 'javascript', 'go', 'python']); if (!supportedLanguages.has(language)) { return new Map(); } diff --git a/packages/core/src/services/pattern-analysis-service.ts b/packages/core/src/services/pattern-analysis-service.ts index d4afc6f..4c84e82 100644 --- a/packages/core/src/services/pattern-analysis-service.ts +++ b/packages/core/src/services/pattern-analysis-service.ts @@ -7,9 +7,21 @@ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; -import { ALL_QUERIES } from '../pattern-matcher/rules'; -import type { PatternMatcher } from '../pattern-matcher/wasm-matcher'; +import { ALL_PYTHON_QUERIES, ALL_QUERIES } from '../pattern-matcher/rules'; +import type { PatternMatcher, PatternMatchRule } from '../pattern-matcher/wasm-matcher'; import { resolveLanguage } from '../pattern-matcher/wasm-matcher'; + +/** + * Language-specific pattern query sets. + * Map-based selection instead of if/else chain. + */ +const QUERIES_BY_LANGUAGE: Record = { + typescript: ALL_QUERIES, + tsx: ALL_QUERIES, + javascript: ALL_QUERIES, + python: ALL_PYTHON_QUERIES, +}; + import { scanRepository } from '../scanner'; import type { Document } from '../scanner/types'; import { findTestFile, isTestFile } from '../utils/test-utils'; @@ -123,7 +135,9 @@ export async function runAllAstQueries( if (!matcher || !filePath) return new Map(); const language = resolveLanguage(filePath); if (!language) return new Map(); - return matcher.match(content, language, ALL_QUERIES); + const queries = QUERIES_BY_LANGUAGE[language] ?? []; + if (queries.length === 0) return new Map(); + return matcher.match(content, language, queries); } /** From 85beb48b89cd8a4be916ebdb2d0f63dfb3af1173 Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 19:38:28 -0700 Subject: [PATCH 5/8] feat(core): add Python test fixtures, integration tests, and docs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 22 tests covering FastAPI fixture (async, decorators, type hints, docstrings, imports, callees, exports), utils fixture (__all__, private/public, module constants), document parity with Go/TS, and isTestFile Python patterns. Fixtures: fastapi-app.py, python-utils.py Changeset: v0.11.0 (minor — new language support) Docs: README, website release notes, latest-version Co-Authored-By: Claude Opus 4.6 (1M context) --- .changeset/python-support.md | 13 ++ README.md | 3 +- .../src/scanner/__fixtures__/fastapi-app.py | 30 +++ .../src/scanner/__fixtures__/python-utils.py | 20 ++ .../core/src/scanner/__tests__/python.test.ts | 185 ++++++++++++++++++ packages/core/src/scanner/python.ts | 31 ++- website/content/latest-version.ts | 8 +- website/content/updates/index.mdx | 16 ++ 8 files changed, 296 insertions(+), 10 deletions(-) create mode 100644 .changeset/python-support.md create mode 100644 packages/core/src/scanner/__fixtures__/fastapi-app.py create mode 100644 packages/core/src/scanner/__fixtures__/python-utils.py create mode 100644 packages/core/src/scanner/__tests__/python.test.ts diff --git a/.changeset/python-support.md b/.changeset/python-support.md new file mode 100644 index 0000000..2051cca --- /dev/null +++ b/.changeset/python-support.md @@ -0,0 +1,13 @@ +--- +'@prosdevlab/dev-agent': minor +--- + +Python language support + +- Index Python codebases: functions, classes, methods, imports, decorators, type hints, docstrings +- `__all__` controls export detection, `_` prefix convention as fallback +- Async function detection, callee extraction, code snippets +- Pattern analysis: try/except, import style, type coverage via tree-sitter queries +- Skip generated files (_pb2.py, migrations) +- `isTestFile()` refactored to language-aware pattern map (test_*.py, *_test.py, conftest.py) +- All MCP tools (dev_search, dev_refs, dev_map, dev_patterns, dev_status) work with Python automatically diff --git a/README.md b/README.md index b701945..9547d2f 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,7 @@ Indexing status, document counts, Antfly stats, file watcher state, and health c | Language | Scanner | Features | |----------|---------|----------| | TypeScript/JavaScript | ts-morph | Functions, classes, interfaces, types, arrow functions, hooks | +| Python | tree-sitter | Functions, classes, methods, decorators, type hints, docstrings | | Go | tree-sitter | Functions, methods, structs, interfaces, generics | | Markdown | remark | Documentation sections | @@ -125,7 +126,7 @@ Indexing status, document counts, Antfly stats, file watcher state, and health c - **[Antfly](https://antfly.io)** — Hybrid search (BM25 + vector + RRF), local embeddings via Termite (ONNX) - **ts-morph** — TypeScript/JavaScript AST analysis -- **tree-sitter** — Go analysis (WASM, extensible to Python/Rust) +- **tree-sitter** — Python and Go analysis (WASM) - **@parcel/watcher** — File change detection for auto-reindexing - **MCP** — Model Context Protocol for AI tool integration diff --git a/packages/core/src/scanner/__fixtures__/fastapi-app.py b/packages/core/src/scanner/__fixtures__/fastapi-app.py new file mode 100644 index 0000000..317d5cf --- /dev/null +++ b/packages/core/src/scanner/__fixtures__/fastapi-app.py @@ -0,0 +1,30 @@ +"""FastAPI application for user management.""" + +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +from typing import Optional + +app = FastAPI() + +MAX_USERS = 1000 + + +class User(BaseModel): + """User data model.""" + name: str + email: str + age: Optional[int] = None + + +@app.get("/users/{user_id}") +async def get_user(user_id: int) -> User: + """Fetch a user by ID.""" + user = await db.get(user_id) + if not user: + raise HTTPException(status_code=404) + return user + + +def _validate_email(email: str) -> bool: + """Private helper for email validation.""" + return "@" in email diff --git a/packages/core/src/scanner/__fixtures__/python-utils.py b/packages/core/src/scanner/__fixtures__/python-utils.py new file mode 100644 index 0000000..c1aaace --- /dev/null +++ b/packages/core/src/scanner/__fixtures__/python-utils.py @@ -0,0 +1,20 @@ +"""Utility functions.""" + +__all__ = ["parse_date", "format_currency"] + +MAX_RETRIES = 3 +_INTERNAL_CACHE = {} + + +def parse_date(date_str: str): + """Parse a date string.""" + return date_str + + +def format_currency(amount: float) -> str: + return f"${amount:.2f}" + + +def _internal_helper(): + """Private helper — not in __all__.""" + pass diff --git a/packages/core/src/scanner/__tests__/python.test.ts b/packages/core/src/scanner/__tests__/python.test.ts new file mode 100644 index 0000000..d7a2ecb --- /dev/null +++ b/packages/core/src/scanner/__tests__/python.test.ts @@ -0,0 +1,185 @@ +/** + * PythonScanner Tests + * + * Tests extraction of functions, classes, methods, imports, decorators, + * type hints, docstrings, __all__, callees, snippets, and async. + */ + +import * as path from 'node:path'; +import { beforeAll, describe, expect, it } from 'vitest'; +import { isTestFile } from '../../utils/test-utils'; +import { PythonScanner } from '../python'; +import type { Document } from '../types'; + +const fixturesPath = path.join(__dirname, '../__fixtures__'); + +describe('PythonScanner', () => { + let scanner: PythonScanner; + let fastApiDocs: Document[]; + let utilsDocs: Document[]; + + beforeAll(async () => { + scanner = new PythonScanner(); + + fastApiDocs = await scanner.scan(['fastapi-app.py'], fixturesPath); + utilsDocs = await scanner.scan(['python-utils.py'], fixturesPath); + }); + + describe('canHandle', () => { + it('handles .py files', () => { + expect(scanner.canHandle('app.py')).toBe(true); + expect(scanner.canHandle('test_app.py')).toBe(true); + }); + + it('does not handle non-Python files', () => { + expect(scanner.canHandle('app.ts')).toBe(false); + expect(scanner.canHandle('app.js')).toBe(false); + }); + }); + + describe('FastAPI fixture', () => { + it('extracts functions', () => { + const getUser = fastApiDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser).toBeDefined(); + expect(getUser!.type).toBe('function'); + expect(getUser!.language).toBe('python'); + }); + + it('detects async functions', () => { + const getUser = fastApiDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser!.metadata.isAsync).toBe(true); + }); + + it('extracts docstrings', () => { + const getUser = fastApiDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser!.metadata.docstring).toBe('Fetch a user by ID.'); + }); + + it('extracts type hints in signature', () => { + const getUser = fastApiDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser!.metadata.signature).toContain('user_id: int'); + expect(getUser!.metadata.signature).toContain('-> User'); + }); + + it('extracts classes', () => { + const user = fastApiDocs.find((d) => d.metadata.name === 'User'); + expect(user).toBeDefined(); + expect(user!.type).toBe('class'); + expect(user!.metadata.docstring).toBe('User data model.'); + }); + + it('extracts module variables', () => { + const maxUsers = fastApiDocs.find((d) => d.metadata.name === 'MAX_USERS'); + expect(maxUsers).toBeDefined(); + expect(maxUsers!.type).toBe('variable'); + }); + + it('marks private functions as not exported', () => { + const validateEmail = fastApiDocs.find((d) => d.metadata.name === '_validate_email'); + expect(validateEmail).toBeDefined(); + expect(validateEmail!.metadata.exported).toBe(false); + }); + + it('marks public functions as exported', () => { + const getUser = fastApiDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser!.metadata.exported).toBe(true); + }); + + it('includes snippet on every document', () => { + for (const doc of fastApiDocs) { + expect(doc.metadata.snippet).toBeDefined(); + expect(doc.metadata.snippet!.length).toBeGreaterThan(0); + } + }); + + it('extracts imports', () => { + const fnWithImports = fastApiDocs.find((d) => d.type === 'function' && d.metadata.imports); + expect(fnWithImports).toBeDefined(); + expect(fnWithImports!.metadata.imports).toContain( + 'from fastapi import FastAPI, HTTPException' + ); + }); + + it('extracts callees', () => { + const getUser = fastApiDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser!.metadata.callees).toBeDefined(); + expect(getUser!.metadata.callees!.length).toBeGreaterThan(0); + }); + }); + + describe('Utils fixture (__all__)', () => { + it('uses __all__ to control exports', () => { + const parseDate = utilsDocs.find((d) => d.metadata.name === 'parse_date'); + const formatCurrency = utilsDocs.find((d) => d.metadata.name === 'format_currency'); + const internalHelper = utilsDocs.find((d) => d.metadata.name === '_internal_helper'); + + // In __all__ + expect(parseDate!.metadata.exported).toBe(true); + expect(formatCurrency!.metadata.exported).toBe(true); + + // Not in __all__ + expect(internalHelper!.metadata.exported).toBe(false); + }); + + it('extracts module constants', () => { + const maxRetries = utilsDocs.find((d) => d.metadata.name === 'MAX_RETRIES'); + expect(maxRetries).toBeDefined(); + expect(maxRetries!.type).toBe('variable'); + // MAX_RETRIES is not in __all__ so it's not exported + expect(maxRetries!.metadata.exported).toBe(false); + }); + + it('skips __all__ itself as a variable', () => { + const allVar = utilsDocs.find((d) => d.metadata.name === '__all__'); + expect(allVar).toBeUndefined(); + }); + }); + + describe('Document parity with Go/TS', () => { + it('has all required metadata fields', () => { + const doc = fastApiDocs.find((d) => d.type === 'function'); + expect(doc).toBeDefined(); + + // Required fields that Go/TS scanners also produce + expect(doc!.id).toBeTruthy(); + expect(doc!.text).toBeTruthy(); + expect(doc!.type).toBeTruthy(); + expect(doc!.language).toBe('python'); + expect(doc!.metadata.file).toBeTruthy(); + expect(doc!.metadata.startLine).toBeGreaterThan(0); + expect(doc!.metadata.endLine).toBeGreaterThan(0); + expect(doc!.metadata.name).toBeTruthy(); + expect(doc!.metadata.signature).toBeTruthy(); + expect(typeof doc!.metadata.exported).toBe('boolean'); + expect(doc!.metadata.snippet).toBeTruthy(); + }); + }); +}); + +describe('isTestFile (Python)', () => { + it('detects test_*.py', () => { + expect(isTestFile('test_app.py')).toBe(true); + expect(isTestFile('tests/test_models.py')).toBe(true); + }); + + it('detects *_test.py', () => { + expect(isTestFile('app_test.py')).toBe(true); + }); + + it('detects conftest.py', () => { + expect(isTestFile('conftest.py')).toBe(true); + expect(isTestFile('tests/conftest.py')).toBe(true); + }); + + it('does not flag regular .py files', () => { + expect(isTestFile('app.py')).toBe(false); + expect(isTestFile('models.py')).toBe(false); + expect(isTestFile('utils.py')).toBe(false); + }); + + it('still works for JS/TS', () => { + expect(isTestFile('app.test.ts')).toBe(true); + expect(isTestFile('app.spec.js')).toBe(true); + expect(isTestFile('app.ts')).toBe(false); + }); +}); diff --git a/packages/core/src/scanner/python.ts b/packages/core/src/scanner/python.ts index e377ba0..69d20f6 100644 --- a/packages/core/src/scanner/python.ts +++ b/packages/core/src/scanner/python.ts @@ -316,7 +316,8 @@ export class PythonScanner implements Scanner { const docstring = this.extractDocstring(defCapture.node); const exported = this.isExported(name, allExports); const snippet = this.truncateSnippet(fullText); - const isAsync = fullText.trimStart().startsWith('async '); + // Check for 'async def' anywhere in the text (handles decorators above) + const isAsync = /\basync\s+def\b/.test(fullText); const callees = this.extractCallees(defCapture.node); return { @@ -356,7 +357,7 @@ export class PythonScanner implements Scanner { const signature = this.extractSignature(fullText); const docstring = this.extractDocstring(defCapture.node); const snippet = this.truncateSnippet(fullText); - const isAsync = fullText.trimStart().startsWith('async '); + const isAsync = /\basync\s+def\b/.test(fullText); const callees = this.extractCallees(defCapture.node); // Methods are not individually exported — the class controls visibility @@ -401,8 +402,17 @@ export class PythonScanner implements Scanner { * Python docstrings are the first expression_statement > string in the body block. */ private extractDocstring(node: TreeSitterNode): string | undefined { + // For decorated_definition, look inside the inner function/class + let target = node; + if (node.type === 'decorated_definition') { + const inner = node.children?.find( + (c: TreeSitterNode) => c.type === 'function_definition' || c.type === 'class_definition' + ); + if (inner) target = inner; + } + // Navigate to body > block > first child - const body = node.children?.find((c: TreeSitterNode) => c.type === 'block'); + const body = target.children?.find((c: TreeSitterNode) => c.type === 'block'); if (!body) return undefined; for (const child of body.children || []) { @@ -498,8 +508,19 @@ export class PythonScanner implements Scanner { } private extractSignature(fullText: string): string { - // First line up to and including the colon - const firstLine = fullText.split('\n')[0].trim(); + // Find the 'def' or 'async def' line (skips decorator lines) + const lines = fullText.split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if (trimmed.startsWith('def ') || trimmed.startsWith('async def ')) { + return trimmed.endsWith(':') ? trimmed.slice(0, -1).trim() : trimmed; + } + if (trimmed.startsWith('class ')) { + return trimmed.endsWith(':') ? trimmed.slice(0, -1).trim() : trimmed; + } + } + // Fallback: first line + const firstLine = lines[0].trim(); return firstLine.endsWith(':') ? firstLine.slice(0, -1).trim() : firstLine; } diff --git a/website/content/latest-version.ts b/website/content/latest-version.ts index 4235d9c..73e5495 100644 --- a/website/content/latest-version.ts +++ b/website/content/latest-version.ts @@ -4,10 +4,10 @@ */ export const latestVersion = { - version: '0.10.6', - title: 'Graph Algorithms for dev_map and dev_refs', + version: '0.11.0', + title: 'Python Language Support', date: 'March 31, 2026', summary: - 'PageRank-based file ranking, subsystem detection, and dependency path tracing via traceTo.', - link: '/updates#v0106--graph-algorithms-for-dev_map-and-dev_refs', + 'Index Python codebases — functions, classes, methods, imports, decorators, type hints, docstrings. All MCP tools work with Python automatically.', + link: '/updates#v0110--python-language-support', } as const; diff --git a/website/content/updates/index.mdx b/website/content/updates/index.mdx index 7038748..612d972 100644 --- a/website/content/updates/index.mdx +++ b/website/content/updates/index.mdx @@ -9,6 +9,22 @@ What's new in dev-agent. We ship improvements regularly to help AI assistants un --- +## v0.11.0 — Python Language Support + +*March 31, 2026* + +**dev-agent now indexes Python codebases.** All MCP tools work with Python automatically. + +- Extracts functions, classes, methods, imports, decorators, type hints, and docstrings +- `__all__` controls export detection; `_` prefix convention as fallback +- Async function detection (`async def`) +- Pattern analysis: try/except, import style, type coverage via tree-sitter queries +- Skip generated files (`_pb2.py`, Django migrations) +- `isTestFile()` recognizes `test_*.py`, `*_test.py`, `conftest.py` +- 476KB WASM grammar — minimal bundle impact + +--- + ## v0.10.6 — Graph Algorithms for dev_map and dev_refs *March 31, 2026* From 080fdf99a5bc8ed13216cfeb8e89399514249c34 Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 22:12:32 -0700 Subject: [PATCH 6/8] =?UTF-8?q?fix(core):=20address=20code=20review=20?= =?UTF-8?q?=E2=80=94=20dedup=20guard,=20cleanup,=20edge=20case=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add test verifying decorated functions not extracted twice - Clean up dead ternary in callee extraction - Remove unused _allExports param from extractMethods - Add comment explaining basename usage in Python isTestFile - Add generated file skip test Co-Authored-By: Claude Opus 4.6 (1M context) --- .../core/src/scanner/__tests__/python.test.ts | 15 +++++++++++++++ packages/core/src/scanner/python.ts | 11 +++-------- packages/core/src/utils/test-utils.ts | 1 + 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/packages/core/src/scanner/__tests__/python.test.ts b/packages/core/src/scanner/__tests__/python.test.ts index d7a2ecb..a62289e 100644 --- a/packages/core/src/scanner/__tests__/python.test.ts +++ b/packages/core/src/scanner/__tests__/python.test.ts @@ -135,6 +135,21 @@ describe('PythonScanner', () => { }); }); + describe('edge cases', () => { + it('does not extract decorated functions twice', () => { + // get_user is @app.get decorated — should appear exactly once + const getUsers = fastApiDocs.filter((d) => d.metadata.name === 'get_user'); + expect(getUsers).toHaveLength(1); + }); + + it('skips generated files', async () => { + const docs = await scanner.scan(['test_pb2.py'], fixturesPath); + // File doesn't exist as a fixture, but if it did it would be skipped by name + // The scanner handles missing files gracefully (returns empty) + expect(docs).toHaveLength(0); + }); + }); + describe('Document parity with Go/TS', () => { it('has all required metadata fields', () => { const doc = fastApiDocs.find((d) => d.type === 'function'); diff --git a/packages/core/src/scanner/python.ts b/packages/core/src/scanner/python.ts index 69d20f6..b11d361 100644 --- a/packages/core/src/scanner/python.ts +++ b/packages/core/src/scanner/python.ts @@ -148,7 +148,7 @@ export class PythonScanner implements Scanner { documents.push(...this.extractClasses(tree, sourceText, relativeFile, allExports)); // Extract methods (inside classes + decorated) - documents.push(...this.extractMethods(tree, sourceText, relativeFile, allExports)); + documents.push(...this.extractMethods(tree, sourceText, relativeFile)); // Extract module-level variables documents.push(...this.extractModuleVariables(tree, sourceText, relativeFile, allExports)); @@ -227,12 +227,7 @@ export class PythonScanner implements Scanner { return documents; } - private extractMethods( - tree: ParsedTree, - _sourceText: string, - file: string, - _allExports: Set | null - ): Document[] { + private extractMethods(tree: ParsedTree, _sourceText: string, file: string): Document[] { const documents: Document[] = []; // Regular methods @@ -457,7 +452,7 @@ export class PythonScanner implements Scanner { ); if (!funcNode) return; - const name = funcNode.type === 'attribute' ? funcNode.text : funcNode.text; + const name = funcNode.text; // e.g., "db.get" for attribute, "foo" for identifier const line = callNode.startPosition.row + 1; const key = `${name}:${line}`; diff --git a/packages/core/src/utils/test-utils.ts b/packages/core/src/utils/test-utils.ts index 24d6332..8d5a3a7 100644 --- a/packages/core/src/utils/test-utils.ts +++ b/packages/core/src/utils/test-utils.ts @@ -18,6 +18,7 @@ const TEST_PATTERNS: Record boolean> = { js: (f) => f.includes('.test.') || f.includes('.spec.'), jsx: (f) => f.includes('.test.') || f.includes('.spec.'), go: (f) => f.endsWith('_test.go'), + // Python conventions are name-based (test_*.py), so we check basename not full path py: (f) => { const name = path.basename(f); return name.startsWith('test_') || name.endsWith('_test.py') || name === 'conftest.py'; From 6a09f773102b9059f1577ef7e812ce1605f6cc25 Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 22:28:16 -0700 Subject: [PATCH 7/8] test(core): add realistic Python class fixture with method/docstring edge cases New python-service.py fixture tests: - __init__, @property, @classmethod, @staticmethod extraction - Multi-line Google-style docstrings (Args, Returns, Raises preserved) - Async methods inside classes - Private method export detection 33 Python scanner tests total (was 24). Co-Authored-By: Claude Opus 4.6 (1M context) --- .../scanner/__fixtures__/python-service.py | 71 +++++++++++++++++++ .../core/src/scanner/__tests__/python.test.ts | 66 +++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 packages/core/src/scanner/__fixtures__/python-service.py diff --git a/packages/core/src/scanner/__fixtures__/python-service.py b/packages/core/src/scanner/__fixtures__/python-service.py new file mode 100644 index 0000000..b4d9bd3 --- /dev/null +++ b/packages/core/src/scanner/__fixtures__/python-service.py @@ -0,0 +1,71 @@ +"""User service with realistic Python patterns.""" + +from typing import Optional + + +class UserService: + """Service for managing users. + + Provides CRUD operations for the user database. + Handles authentication and authorization checks. + + Attributes: + db: Database connection instance. + cache_ttl: Cache time-to-live in seconds. + """ + + DEFAULT_CACHE_TTL = 300 + + def __init__(self, db, cache_ttl: int = 300): + """Initialize the user service. + + Args: + db: Database connection. + cache_ttl: Cache TTL in seconds. + """ + self.db = db + self.cache_ttl = cache_ttl + + @property + def is_connected(self) -> bool: + """Check if the database connection is active.""" + return self.db.is_alive() + + @classmethod + def from_config(cls, config: dict) -> "UserService": + """Create a UserService from a configuration dictionary. + + Args: + config: Dictionary with 'db_url' and optional 'cache_ttl'. + + Returns: + Configured UserService instance. + """ + db = connect(config["db_url"]) + return cls(db, cache_ttl=config.get("cache_ttl", 300)) + + @staticmethod + def validate_email(email: str) -> bool: + """Validate an email address format.""" + return "@" in email and "." in email + + async def get_user(self, user_id: int) -> Optional[dict]: + """Fetch a user by ID. + + Args: + user_id: The user's unique identifier. + + Returns: + User dictionary if found, None otherwise. + + Raises: + ConnectionError: If database is unavailable. + """ + cached = self._check_cache(user_id) + if cached: + return cached + return await self.db.query("SELECT * FROM users WHERE id = ?", user_id) + + def _check_cache(self, user_id: int) -> Optional[dict]: + """Private: check the in-memory cache.""" + return None diff --git a/packages/core/src/scanner/__tests__/python.test.ts b/packages/core/src/scanner/__tests__/python.test.ts index a62289e..42a9b7f 100644 --- a/packages/core/src/scanner/__tests__/python.test.ts +++ b/packages/core/src/scanner/__tests__/python.test.ts @@ -135,6 +135,72 @@ describe('PythonScanner', () => { }); }); + describe('Service fixture (methods, decorators, docstrings)', () => { + let serviceDocs: Document[]; + + beforeAll(async () => { + serviceDocs = await scanner.scan(['python-service.py'], fixturesPath); + }); + + it('extracts __init__ as a method', () => { + const init = serviceDocs.find((d) => d.metadata.name === '__init__'); + expect(init).toBeDefined(); + expect(init!.type).toBe('method'); + }); + + it('extracts @property as a decorated method', () => { + const prop = serviceDocs.find((d) => d.metadata.name === 'is_connected'); + expect(prop).toBeDefined(); + expect(prop!.type).toBe('method'); + }); + + it('extracts @classmethod as a decorated method', () => { + const cm = serviceDocs.find((d) => d.metadata.name === 'from_config'); + expect(cm).toBeDefined(); + expect(cm!.type).toBe('method'); + }); + + it('extracts @staticmethod as a decorated method', () => { + const sm = serviceDocs.find((d) => d.metadata.name === 'validate_email'); + expect(sm).toBeDefined(); + expect(sm!.type).toBe('method'); + }); + + it('extracts multi-line Google-style docstring fully', () => { + const getUser = serviceDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser).toBeDefined(); + expect(getUser!.metadata.docstring).toContain('Fetch a user by ID.'); + expect(getUser!.metadata.docstring).toContain('Args:'); + expect(getUser!.metadata.docstring).toContain('user_id'); + expect(getUser!.metadata.docstring).toContain('Returns:'); + expect(getUser!.metadata.docstring).toContain('Raises:'); + expect(getUser!.metadata.docstring).toContain('ConnectionError'); + }); + + it('extracts class docstring fully', () => { + const cls = serviceDocs.find((d) => d.metadata.name === 'UserService'); + expect(cls).toBeDefined(); + expect(cls!.metadata.docstring).toContain('Service for managing users'); + expect(cls!.metadata.docstring).toContain('Attributes:'); + }); + + it('extracts @property docstring', () => { + const prop = serviceDocs.find((d) => d.metadata.name === 'is_connected'); + expect(prop!.metadata.docstring).toBe('Check if the database connection is active.'); + }); + + it('detects async method', () => { + const getUser = serviceDocs.find((d) => d.metadata.name === 'get_user'); + expect(getUser!.metadata.isAsync).toBe(true); + }); + + it('marks _private methods as not exported', () => { + const checkCache = serviceDocs.find((d) => d.metadata.name === '_check_cache'); + expect(checkCache).toBeDefined(); + expect(checkCache!.metadata.exported).toBe(false); + }); + }); + describe('edge cases', () => { it('does not extract decorated functions twice', () => { // get_user is @app.get decorated — should appear exactly once From ee8d1064786447a1ea9cd23aed8f2573c1379ffa Mon Sep 17 00:00:00 2001 From: prosdev Date: Tue, 31 Mar 2026 22:30:51 -0700 Subject: [PATCH 8/8] docs: add Python to language lists across site and CLAUDE.md Co-Authored-By: Claude Opus 4.6 (1M context) --- CLAUDE.md | 2 +- website/content/index.mdx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c97e886..ed39ddf 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -21,7 +21,7 @@ Everything runs on your machine. No data leaves. ``` packages/ - core/ # Scanner (ts-morph, tree-sitter), vector storage (Antfly), services + core/ # Scanner (ts-morph, tree-sitter for Python/Go), vector storage (Antfly), services cli/ # Commander.js CLI — dev index, dev mcp install, etc. mcp-server/ # MCP server with 5 built-in adapters subagents/ # Coordinator, explorer, planner, PR agents diff --git a/website/content/index.mdx b/website/content/index.mdx index ef7d9f9..ad3207e 100644 --- a/website/content/index.mdx +++ b/website/content/index.mdx @@ -101,7 +101,7 @@ dev mcp install # For Claude Code - **Hybrid Search** — BM25 keyword + vector semantic, fused with RRF - **Code Snippets** — Search returns actual code, not just file paths - **Call Graph** — Callers/callees extracted from AST at index time -- **Multi-Language** — TypeScript, JavaScript, Go, Markdown +- **Multi-Language** — TypeScript, JavaScript, Python, Go, Markdown - **100% Local** — Antfly runs on your machine. No data leaves. - **Auto-Index** — File watcher re-indexes on save while MCP server runs - **1,600+ Tests** — Production-grade reliability