diff --git a/CHANGELOG.md b/CHANGELOG.md index c3103d2..f1c1069 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,55 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [2.0.0] - 2026-01-28 + +### Added - Smart Enforcement System +- **New `verify` tool** - Quality gate that must pass before presenting code to user + - Validates tests are provided (TDD compliance) + - Checks for interfaces (DI compliance) + - Detects critical code issues + - Returns PASS/FAIL with specific feedback + +- **Real code analysis in `validate` tool** + - Regex-based anti-pattern detection (15+ patterns) + - Method/class length analysis + - Quality score calculation (0-100) + - Actionable suggestions for each issue + +- **New `code-analyzer` module** (`src/analysis/code-analyzer.ts`) + - Detects: empty catch, hardcoded secrets, eval, innerHTML, generic exceptions + - Detects: console statements, field injection, any type, loose equality + - Measures: method lines, class lines, interface count, test count + +- **Mandatory Checkpoint JSON** in `get_context` output + - Forces LLM to commit to architecture decisions before coding + - Includes: interfaces_to_create, tests_to_write, quality_commitments + +- **Contractual Response Format** in `get_context` output + - Enforces order: CHECKPOINT → INTERFACES → TESTS → IMPLEMENTATION → SELF-REVIEW + - Prevents skipping TDD steps + +- **Mandatory Self-Review JSON** in `get_context` output + - LLM must audit own code: methods_over_20_lines, tests_written, etc. + - Must achieve quality_score >= 7 before presenting code + +### Changed +- `get_context` output now includes Smart Enforcement sections (~400 tokens extra) +- `validate` returns real analysis instead of just checklist +- Added `verify` to tool list and dispatcher + +### Technical +- 52 new tests (37 for analyzer, 15 for verify) +- Coverage maintained at 82.62% +- No new external dependencies + +### Breaking Changes +- `validate` output format changed from checklist to analysis results + +--- + +## [1.1.0] - 2026-01-15 + ### Added - `search_standards` tool for querying documentation by topic (kafka, docker, testing, etc.) - Enhanced Zod schemas for CQRS, Event-Driven, ArchUnit, HttpClients, Observability diff --git a/docs/api-reference.md b/docs/api-reference.md index 777d119..3f2ea9e 100644 --- a/docs/api-reference.md +++ b/docs/api-reference.md @@ -34,17 +34,26 @@ Complete reference for Corbat MCP tools, resources, and prompts. ### validate -Validate code against coding standards. +**Real code analysis** - Analyzes code and returns specific issues with suggestions. | Parameter | Type | Required | Description | |-----------|------|----------|-------------| | `code` | string | Yes | The code to validate | -| `task_type` | enum | No | One of: `feature`, `bugfix`, `refactor`, `test` | +| `task_type` | enum | No | One of: `feature`, `bugfix`, `refactor`, `test`, `security`, `performance` | + +**Detects:** +- Anti-patterns: empty catch, hardcoded secrets, eval, innerHTML, generic exceptions +- Code quality: console statements, field injection, any type, loose equality +- Structure: method length > 20 lines, class length > 200 lines +- Missing: tests, interfaces **Returns:** -- Code quality thresholds -- Guardrails for task type -- Review checklist template +- Quality score (0-100) +- CRITICAL issues (must fix) +- WARNINGS (should fix) +- INFO (optional) +- Metrics (lines, methods, classes, interfaces, tests) +- PASSED/NEEDS WORK verdict **Example:** ```json @@ -54,6 +63,134 @@ Validate code against coding standards. } ``` +**Example Output:** +```markdown +# ✅ Code Analysis Results + +**GOOD: Code follows most best practices** + +**Score: 78/100** + +--- + +## Metrics + +| Metric | Value | +|--------|-------| +| Total Lines | 45 | +| Methods | 3 | +| Interfaces | 1 | +| Tests | 5 | + +--- + +## Warnings (should fix) + +- **Line 12:** Console statement found + - Suggestion: Use a proper logging framework in production code + +--- + +## Verdict + +**PASSED** - Code meets quality standards. +``` + +--- + +### verify (NEW in v2.0) + +**Quality gate** - REQUIRED before presenting code to user. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `code` | string | Yes | All implementation code | +| `tests` | string | Yes* | All test code (*required for TDD compliance) | +| `interfaces` | string | No | All interfaces and type definitions | +| `task_type` | enum | No | One of: `feature`, `bugfix`, `refactor`, `test`, `security`, `performance` | + +**Checks:** +1. Tests are provided (TDD compliance) +2. Interfaces exist (DI compliance) +3. No critical code issues +4. Quality score >= 50 + +**Returns:** +- `PASS`: Code meets standards - present to user +- `FAIL`: Issues listed - fix and verify again + +**Workflow:** +``` +1. Call get_context() → get guidelines +2. Complete checkpoint JSON +3. Write code: INTERFACES → TESTS → IMPLEMENTATION +4. Complete self-review JSON +5. Call verify() → must PASS +6. If FAIL: fix issues, call verify again +7. If PASS: present to user +``` + +**Example:** +```json +{ + "code": "class UserServiceImpl implements UserService { ... }", + "tests": "describe('UserService', () => { ... })", + "interfaces": "interface UserService { getUser(id: string): User; }", + "task_type": "feature" +} +``` + +**Example PASS Output:** +```markdown +# ✅ VERIFICATION PASSED + +**Score: 85/100** + +The code meets quality standards and is ready to present to the user. + +--- + +## Verification Summary + +- Tests provided: Yes +- Interfaces provided: Yes +- Critical issues: 0 +- Warnings: 2 +- Test count: 5 +- Interface count: 2 + +--- + +**You may now present this code to the user.** +``` + +**Example FAIL Output:** +```markdown +# ❌ VERIFICATION FAILED + +**Score: 45/100** + +The code does not meet quality standards. Fix the issues below and verify again. + +--- + +## Issues to Fix (Blocking) + +- No tests provided - TDD requires tests before/with implementation +- 2 critical code issue(s) detected - see details below + +--- + +## Critical Code Issues + +- **Line 5:** Potential hardcoded secret detected + - Fix: Use environment variables or a secrets manager + +--- + +**Fix these issues and call `verify` again before presenting code to user.** +``` + --- ### search diff --git a/package.json b/package.json index 89dbf11..7027079 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@corbat-tech/coding-standards-mcp", - "version": "1.1.0", + "version": "2.0.0", "description": "AI coding standards that apply themselves - MCP server that enforces production-grade code", "mcpName": "io.github.corbat-tech/coding-standards", "type": "module", diff --git a/src/analysis/code-analyzer.ts b/src/analysis/code-analyzer.ts new file mode 100644 index 0000000..c57db40 --- /dev/null +++ b/src/analysis/code-analyzer.ts @@ -0,0 +1,640 @@ +/** + * Lightweight code analyzer using regex and heuristics. + * No AST parsing to keep it fast and dependency-free. + * + * This module provides real code analysis instead of just returning checklists. + * It detects anti-patterns, measures code quality metrics, and provides + * actionable feedback. + */ + +export interface AnalysisIssue { + type: 'CRITICAL' | 'WARNING' | 'INFO'; + rule: string; + message: string; + line?: number; + suggestion?: string; +} + +export interface CodeMetrics { + totalLines: number; + codeLines: number; + commentLines: number; + methodCount: number; + classCount: number; + interfaceCount: number; + testCount: number; + maxMethodLines: number; + maxClassLines: number; + customErrorCount: number; + importCount: number; +} + +export interface AnalysisResult { + issues: AnalysisIssue[]; + score: number; + summary: string; + metrics: CodeMetrics; + passed: boolean; +} + +// Anti-patterns to detect with their severity and suggestions +const ANTI_PATTERNS: Array<{ + pattern: RegExp; + message: string; + rule: string; + type: 'CRITICAL' | 'WARNING' | 'INFO'; + suggestion?: string; +}> = [ + // Critical issues + { + pattern: /catch\s*\(\s*\w*\s*\)\s*\{\s*\}/g, + message: 'Empty catch block - errors are silently swallowed', + rule: 'no-empty-catch', + type: 'CRITICAL', + suggestion: 'Handle the error, log it, or rethrow with context', + }, + { + pattern: /(password|secret|api_key|apikey|api[-_]?secret|token|credential)\s*[:=]\s*["'][^"']{3,}["']/gi, + message: 'Potential hardcoded secret detected', + rule: 'no-hardcoded-secrets', + type: 'CRITICAL', + suggestion: 'Use environment variables or a secrets manager', + }, + { + pattern: /eval\s*\(/g, + message: 'eval() is dangerous and can lead to code injection', + rule: 'no-eval', + type: 'CRITICAL', + suggestion: 'Use safer alternatives like JSON.parse() or specific parsers', + }, + { + pattern: /innerHTML\s*=/g, + message: 'innerHTML can lead to XSS vulnerabilities', + rule: 'no-inner-html', + type: 'CRITICAL', + suggestion: 'Use textContent or sanitize HTML before insertion', + }, + + // Warnings + { + pattern: /catch\s*\(\s*(Exception|Error|Throwable|any|unknown)\s+\w+\s*\)/gi, + message: 'Generic exception catch - use specific error types', + rule: 'no-generic-catch', + type: 'WARNING', + suggestion: 'Catch specific error types for proper error handling', + }, + { + pattern: /new\s+Date\(\s*\)/g, + message: 'Hardcoded current time - not testable', + rule: 'no-hardcoded-time', + type: 'WARNING', + suggestion: 'Inject a Clock/TimeProvider for testability', + }, + { + pattern: /console\.(log|error|warn|info|debug|trace)\s*\(/g, + message: 'Console statement found', + rule: 'no-console', + type: 'WARNING', + suggestion: 'Use a proper logging framework in production code', + }, + { + pattern: /System\.(out|err)\.(print|println)/g, + message: 'System.out/err usage - use proper logging', + rule: 'no-system-out', + type: 'WARNING', + suggestion: 'Use SLF4J, Log4j, or another logging framework', + }, + { + pattern: /@Autowired\s+(private|protected)/g, + message: 'Field injection - harder to test', + rule: 'no-field-injection', + type: 'WARNING', + suggestion: 'Use constructor injection instead', + }, + { + pattern: /:\s*any\b(?!\s*\()/g, + message: 'TypeScript "any" type - loses type safety', + rule: 'no-any-type', + type: 'WARNING', + suggestion: 'Use specific types or unknown with type guards', + }, + { + pattern: /==(?!=)/g, + message: 'Loose equality operator', + rule: 'strict-equality', + type: 'WARNING', + suggestion: 'Use === for strict comparison', + }, + { + pattern: /!=(?!=)/g, + message: 'Loose inequality operator', + rule: 'strict-inequality', + type: 'WARNING', + suggestion: 'Use !== for strict comparison', + }, + { + pattern: /public\s+\w+\s+\w+\s*;(?!\s*\/\/.*final|static)/g, + message: 'Public mutable field - breaks encapsulation', + rule: 'no-public-fields', + type: 'WARNING', + suggestion: 'Use private fields with getters/setters or records', + }, + { + pattern: /throw\s+new\s+(Error|Exception)\s*\(\s*["'][^"']*["']\s*\)/g, + message: 'Generic error thrown - use custom error types', + rule: 'no-generic-throw', + type: 'WARNING', + suggestion: 'Create specific error classes for different failure modes', + }, + { + pattern: /\.then\s*\([^)]*\)\s*(?!\.catch|\.finally)/g, + message: 'Promise without error handling', + rule: 'unhandled-promise', + type: 'WARNING', + suggestion: 'Add .catch() or use try/catch with async/await', + }, + { + pattern: /setTimeout\s*\(\s*[^,]+,\s*0\s*\)/g, + message: 'setTimeout with 0ms - likely a hack', + rule: 'no-settimeout-zero', + type: 'WARNING', + suggestion: 'Use queueMicrotask() or review the async flow', + }, + { + pattern: /magic\s*number|[^a-zA-Z](?:86400|3600|60000|1000)\b/gi, + message: 'Magic number detected - use named constants', + rule: 'no-magic-numbers', + type: 'WARNING', + suggestion: 'Extract to named constants (e.g., SECONDS_PER_DAY)', + }, + + // Info + { + pattern: /TODO|FIXME|HACK|XXX|BUG/g, + message: 'Unresolved TODO/FIXME comment', + rule: 'no-todo', + type: 'INFO', + suggestion: 'Track these in an issue tracker instead', + }, + { + pattern: /@deprecated/gi, + message: 'Deprecated code usage', + rule: 'deprecated-usage', + type: 'INFO', + suggestion: 'Update to use the recommended alternative', + }, + { + pattern: /\bvar\s+/g, + message: '"var" keyword used', + rule: 'no-var', + type: 'INFO', + suggestion: 'Use const or let instead for better scoping', + }, +]; + +// Good practices to detect (presence = positive contribution to score) +const GOOD_PRACTICES: Array<{ + pattern: RegExp; + name: string; + weight: number; +}> = [ + { pattern: /interface\s+\w+/g, name: 'interfaces', weight: 10 }, + { pattern: /type\s+\w+\s*=/g, name: 'type_aliases', weight: 5 }, + { pattern: /@Test|describe\s*\(|it\s*\(|test\s*\(|#\[test\]/g, name: 'tests', weight: 15 }, + { pattern: /class\s+\w*Error|class\s+\w*Exception|struct\s+\w*Error/g, name: 'custom_errors', weight: 10 }, + { pattern: /constructor\s*\([^)]*(?:private|readonly|protected)/g, name: 'constructor_injection', weight: 10 }, + { pattern: /implements\s+\w+/g, name: 'implements_interface', weight: 8 }, + { pattern: /readonly\s+|final\s+|const\s+\w+:/g, name: 'immutability', weight: 5 }, + { pattern: /@Injectable|@Service|@Component|@Repository/g, name: 'di_annotations', weight: 5 }, + { pattern: /async\s+\w+|Promise\s* i.rule === ap.rule && i.line === lineNum); + if (!existingIssue) { + issues.push({ + type: ap.type, + rule: ap.rule, + message: ap.message, + line: lineNum, + suggestion: ap.suggestion, + }); + } + match = regex.exec(code); + } + } + + // 2. Analyze method/class lengths + const methodLengths = analyzeMethodLengths(code); + const classLengths = analyzeClassLengths(code); + + for (const method of methodLengths) { + if (method.lines > 20) { + issues.push({ + type: 'WARNING', + rule: 'max-method-lines', + message: `Method "${method.name}" is ${method.lines} lines (max: 20)`, + line: method.startLine, + suggestion: 'Extract smaller methods with single responsibilities', + }); + } + } + + for (const cls of classLengths) { + if (cls.lines > 200) { + issues.push({ + type: 'WARNING', + rule: 'max-class-lines', + message: `Class "${cls.name}" is ${cls.lines} lines (max: 200)`, + line: cls.startLine, + suggestion: 'Split into smaller, focused classes', + }); + } + } + + // 3. Check for missing tests + const hasImplementation = /class\s+\w+|function\s+\w+|const\s+\w+\s*=\s*(?:async\s*)?\(/.test(code); + const hasTests = /@Test|describe\s*\(|it\s*\(|test\s*\(|#\[test\]/.test(code); + + if (hasImplementation && !hasTests) { + issues.push({ + type: 'CRITICAL', + rule: 'missing-tests', + message: 'No tests found - TDD requires tests before implementation', + suggestion: 'Write tests first, then implement to make them pass', + }); + } + + // 4. Check for missing interfaces (DI) + const hasClasses = /class\s+\w+/.test(code); + const hasInterfaces = /interface\s+\w+|type\s+\w+\s*=/.test(code); + + if (hasClasses && !hasInterfaces) { + issues.push({ + type: 'WARNING', + rule: 'missing-interfaces', + message: 'No interfaces found - use interfaces for dependency injection', + suggestion: 'Define interfaces for services and repositories to enable testing and flexibility', + }); + } + + // 5. Calculate metrics + const metrics = calculateMetrics(code, lines, methodLengths, classLengths); + + // 6. Calculate score + const score = calculateScore(code, issues); + + // 7. Generate summary + const summary = generateSummary(issues, score); + + // 8. Determine if passed + const criticalCount = issues.filter((i) => i.type === 'CRITICAL').length; + const passed = criticalCount === 0 && score >= 60; + + return { issues, score, summary, metrics, passed }; +} + +/** + * Analyze method lengths in the code. + */ +function analyzeMethodLengths(code: string): Array<{ name: string; lines: number; startLine: number }> { + const results: Array<{ name: string; lines: number; startLine: number }> = []; + const lines = code.split('\n'); + + // Patterns for method/function declarations in various languages + const methodPatterns = [ + // TypeScript/JavaScript: function name(), async function name(), name() {, async name() { + /(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*\(/g, + // Method in class: public/private/protected name() or async name() + /(?:public|private|protected)\s+(?:static\s+)?(?:async\s+)?(\w+)\s*\([^)]*\)\s*(?::\s*[\w<>[\]|,\s]+)?\s*\{/g, + // Arrow function assigned to const: const name = () => or const name = async () => + /(?:const|let)\s+(\w+)\s*=\s*(?:async\s*)?\([^)]*\)\s*(?::\s*[\w<>[\]|,\s]+)?\s*=>/g, + // Java/Kotlin: public void name(), private String name() + /(?:public|private|protected)\s+(?:static\s+)?(?:final\s+)?(?:\w+\s+)?(\w+)\s*\([^)]*\)\s*(?:throws\s+[\w,\s]+)?\s*\{/g, + // Python: def name( + /def\s+(\w+)\s*\([^)]*\)\s*(?:->\s*[\w[\],\s]+)?\s*:/g, + // Go: func name( or func (receiver) name( + /func\s+(?:\([^)]+\)\s+)?(\w+)\s*\([^)]*\)\s*(?:\([^)]*\)|[\w\s*]+)?\s*\{/g, + // Rust: fn name( + /fn\s+(\w+)\s*(?:<[^>]+>)?\s*\([^)]*\)\s*(?:->\s*[\w<>]+)?\s*\{/g, + ]; + + for (const pattern of methodPatterns) { + const regex = new RegExp(pattern.source, pattern.flags); + let match: RegExpExecArray | null = regex.exec(code); + + while (match !== null) { + const startIndex = match.index; + const startLine = code.substring(0, startIndex).split('\n').length; + const methodName = match[1] || 'anonymous'; + + // Skip if we already have this method (avoid duplicates from overlapping patterns) + if (results.some((r) => r.startLine === startLine)) { + match = regex.exec(code); + continue; + } + + // Count lines until matching closing brace + let braceCount = 0; + let endLine = startLine; + let started = false; + + for (let i = startLine - 1; i < lines.length; i++) { + const line = lines[i]; + + // Count braces (simple approach, doesn't handle strings/comments perfectly) + for (const char of line) { + if (char === '{') { + braceCount++; + started = true; + } + if (char === '}') braceCount--; + } + + if (started && braceCount === 0) { + endLine = i + 1; + break; + } + } + + const methodLines = endLine - startLine + 1; + if (methodLines > 1) { + // Only count if it's a real method, not just a declaration + results.push({ + name: methodName, + lines: methodLines, + startLine, + }); + } + match = regex.exec(code); + } + } + + return results; +} + +/** + * Analyze class lengths in the code. + */ +function analyzeClassLengths(code: string): Array<{ name: string; lines: number; startLine: number }> { + const results: Array<{ name: string; lines: number; startLine: number }> = []; + const lines = code.split('\n'); + + // Pattern for class declarations + const classPattern = + /(?:export\s+)?(?:abstract\s+)?class\s+(\w+)(?:\s+extends\s+\w+)?(?:\s+implements\s+[\w,\s]+)?\s*\{/g; + + let match: RegExpExecArray | null = classPattern.exec(code); + while (match !== null) { + const startIndex = match.index; + const startLine = code.substring(0, startIndex).split('\n').length; + const className = match[1]; + + let braceCount = 0; + let endLine = startLine; + let started = false; + + for (let i = startLine - 1; i < lines.length; i++) { + const line = lines[i]; + + for (const char of line) { + if (char === '{') { + braceCount++; + started = true; + } + if (char === '}') braceCount--; + } + + if (started && braceCount === 0) { + endLine = i + 1; + break; + } + } + + results.push({ + name: className, + lines: endLine - startLine + 1, + startLine, + }); + match = classPattern.exec(code); + } + + return results; +} + +/** + * Calculate code metrics. + */ +function calculateMetrics( + code: string, + lines: string[], + methodLengths: Array<{ name: string; lines: number; startLine: number }>, + classLengths: Array<{ name: string; lines: number; startLine: number }> +): CodeMetrics { + // Count comment lines (simple heuristic) + const commentLines = lines.filter((l) => { + const trimmed = l.trim(); + return ( + trimmed.startsWith('//') || + trimmed.startsWith('/*') || + trimmed.startsWith('*') || + trimmed.startsWith('#') || + trimmed.startsWith('"""') || + trimmed.startsWith("'''") + ); + }).length; + + // Count empty lines + const emptyLines = lines.filter((l) => l.trim() === '').length; + + return { + totalLines: lines.length, + codeLines: lines.length - commentLines - emptyLines, + commentLines, + methodCount: methodLengths.length, + classCount: classLengths.length, + interfaceCount: (code.match(/interface\s+\w+/g) || []).length, + testCount: (code.match(/@Test|it\s*\(|test\s*\(|describe\s*\(/g) || []).length, + maxMethodLines: Math.max(...methodLengths.map((m) => m.lines), 0), + maxClassLines: Math.max(...classLengths.map((c) => c.lines), 0), + customErrorCount: (code.match(/class\s+\w*Error|class\s+\w*Exception/g) || []).length, + importCount: (code.match(/^import\s+|^from\s+\w+\s+import/gm) || []).length, + }; +} + +/** + * Calculate quality score based on issues and good practices. + */ +function calculateScore(code: string, issues: AnalysisIssue[]): number { + let score = 100; + + // Deduct for issues + for (const issue of issues) { + switch (issue.type) { + case 'CRITICAL': + score -= 15; + break; + case 'WARNING': + score -= 5; + break; + case 'INFO': + score -= 1; + break; + } + } + + // Add for good practices + for (const gp of GOOD_PRACTICES) { + const matches = (code.match(gp.pattern) || []).length; + if (matches > 0) { + // Add points but cap at the weight to avoid gaming + score += Math.min(gp.weight, matches * 2); + } + } + + // Normalize score to 0-100 range + return Math.max(0, Math.min(100, score)); +} + +/** + * Generate a human-readable summary. + */ +function generateSummary(issues: AnalysisIssue[], score: number): string { + const criticals = issues.filter((i) => i.type === 'CRITICAL').length; + const warnings = issues.filter((i) => i.type === 'WARNING').length; + + if (criticals > 0) { + return `NEEDS WORK: ${criticals} critical issue(s) must be fixed before proceeding`; + } + + if (warnings > 5) { + return `ACCEPTABLE: ${warnings} warnings should be addressed to improve quality`; + } + + if (score >= 85) { + return `EXCELLENT: Code follows best practices with high quality`; + } + + if (score >= 70) { + return `GOOD: Code follows most best practices`; + } + + if (score >= 60) { + return `FAIR: Some improvements recommended`; + } + + return `NEEDS IMPROVEMENT: Multiple issues detected that should be addressed`; +} + +/** + * Format analysis result as markdown. + */ +export function formatAnalysisAsMarkdown(result: AnalysisResult): string { + const lines: string[] = []; + + // Header with summary + const statusEmoji = result.passed ? '✅' : '❌'; + lines.push(`# ${statusEmoji} Code Analysis Results`, ''); + lines.push(`**${result.summary}**`, ''); + lines.push(`**Score: ${result.score}/100**`, ''); + lines.push(''); + + // Metrics section + lines.push('---', '', '## Metrics', ''); + lines.push(`| Metric | Value |`); + lines.push(`|--------|-------|`); + lines.push(`| Total Lines | ${result.metrics.totalLines} |`); + lines.push(`| Code Lines | ${result.metrics.codeLines} |`); + lines.push(`| Methods | ${result.metrics.methodCount} |`); + lines.push(`| Classes | ${result.metrics.classCount} |`); + lines.push(`| Interfaces | ${result.metrics.interfaceCount} |`); + lines.push(`| Tests | ${result.metrics.testCount} |`); + lines.push(`| Custom Errors | ${result.metrics.customErrorCount} |`); + lines.push(`| Max Method Lines | ${result.metrics.maxMethodLines} |`); + lines.push(`| Max Class Lines | ${result.metrics.maxClassLines} |`); + lines.push(''); + + // Issues by type + const criticals = result.issues.filter((i) => i.type === 'CRITICAL'); + const warnings = result.issues.filter((i) => i.type === 'WARNING'); + const infos = result.issues.filter((i) => i.type === 'INFO'); + + if (criticals.length > 0) { + lines.push('---', '', '## CRITICAL Issues (must fix)', ''); + for (const issue of criticals) { + lines.push(`- **Line ${issue.line || '?'}:** ${issue.message}`); + if (issue.suggestion) { + lines.push(` - Suggestion: ${issue.suggestion}`); + } + } + lines.push(''); + } + + if (warnings.length > 0) { + lines.push('---', '', '## Warnings (should fix)', ''); + for (const issue of warnings) { + lines.push(`- **Line ${issue.line || '?'}:** ${issue.message}`); + if (issue.suggestion) { + lines.push(` - Suggestion: ${issue.suggestion}`); + } + } + lines.push(''); + } + + if (infos.length > 0) { + lines.push('---', '', '## Info', ''); + for (const issue of infos) { + lines.push(`- Line ${issue.line || '?'}: ${issue.message}`); + } + lines.push(''); + } + + // Required actions + lines.push('---', '', '## Required Actions', ''); + + if (criticals.length > 0) { + lines.push(`1. Fix ${criticals.length} critical issue(s) - these are blocking`); + } + + if (warnings.length > 3) { + lines.push(`2. Address ${warnings.length} warnings to improve maintainability`); + } + + if (result.metrics.testCount === 0) { + lines.push('3. Add tests - TDD requires tests before implementation'); + } + + if (result.metrics.interfaceCount === 0 && result.metrics.classCount > 0) { + lines.push('4. Add interfaces for dependency injection'); + } + + if (result.passed) { + lines.push(''); + lines.push('**Code meets quality standards. Ready for review.**'); + } else { + lines.push(''); + lines.push('**Fix critical issues before proceeding.**'); + } + + return lines.join('\n'); +} diff --git a/src/tools/definitions.ts b/src/tools/definitions.ts index 372d21e..3950751 100644 --- a/src/tools/definitions.ts +++ b/src/tools/definitions.ts @@ -48,21 +48,28 @@ EXAMPLE: get_context({ task: "Create payment service", project_dir: "/path/to/pr }, }, - // VALIDATE - Check code against standards + // VALIDATE - Real code analysis { name: 'validate', - description: `Validate code against coding standards. Returns validation criteria and checklist. + description: `Analyze code against coding standards with REAL code analysis. WHEN TO USE: -- After writing code, before committing -- During code review -- To check if code follows project standards +- After writing code, to check for issues +- During iterative development +- Before calling verify for final approval + +PERFORMS REAL ANALYSIS: +- Detects anti-patterns (empty catch, hardcoded secrets, etc.) +- Measures method/class lengths +- Checks for interfaces and tests +- Calculates quality score RETURNS: -- Code quality thresholds (max method lines, coverage) -- Guardrails for the task type -- Naming convention checks -- Review checklist (CRITICAL/WARNINGS/Score) +- Score (0-100) +- CRITICAL issues (must fix) +- WARNINGS (should fix) +- Metrics (lines, methods, tests, etc.) +- PASSED/NEEDS WORK verdict EXAMPLE: validate({ code: "public class UserService { ... }", task_type: "feature" })`, inputSchema: { @@ -74,7 +81,7 @@ EXAMPLE: validate({ code: "public class UserService { ... }", task_type: "featur }, task_type: { type: 'string', - enum: ['feature', 'bugfix', 'refactor', 'test'], + enum: ['feature', 'bugfix', 'refactor', 'test', 'security', 'performance'], description: 'Type of task for context-aware validation (optional)', }, }, @@ -82,6 +89,58 @@ EXAMPLE: validate({ code: "public class UserService { ... }", task_type: "featur }, }, + // VERIFY - Gate before presenting code to user (NEW in v2.0) + { + name: 'verify', + description: `REQUIRED: Verify generated code before presenting to user. + +WHEN TO USE: +- ALWAYS call this AFTER generating code +- BEFORE presenting code to the user +- This is the final quality gate + +WHAT IT CHECKS: +- Tests are provided (TDD compliance) +- Interfaces exist (DI compliance) +- No critical code issues +- Quality score >= 50 + +RETURNS: +- PASS: Code meets standards, present to user +- FAIL: Issues to fix, iterate and verify again + +WORKFLOW: +1. Generate code following get_context guidelines +2. Call verify({ code, tests, interfaces }) +3. If FAIL: fix issues and call verify again +4. If PASS: present code to user + +EXAMPLE: verify({ code: "class UserServiceImpl...", tests: "describe('UserService')...", interfaces: "interface UserService..." })`, + inputSchema: { + type: 'object' as const, + properties: { + code: { + type: 'string', + description: 'All implementation code', + }, + tests: { + type: 'string', + description: 'All test code (REQUIRED for TDD compliance)', + }, + interfaces: { + type: 'string', + description: 'All interfaces and type definitions', + }, + task_type: { + type: 'string', + enum: ['feature', 'bugfix', 'refactor', 'test', 'security', 'performance'], + description: 'Type of task for context-aware verification', + }, + }, + required: ['code'], + }, + }, + // SEARCH - Find specific topics in documentation { name: 'search', @@ -176,4 +235,4 @@ RETURNS: ]; // Tool names for type safety -export type ToolName = 'get_context' | 'validate' | 'search' | 'profiles' | 'health' | 'init'; +export type ToolName = 'get_context' | 'validate' | 'verify' | 'search' | 'profiles' | 'health' | 'init'; diff --git a/src/tools/handlers/get-context.ts b/src/tools/handlers/get-context.ts index 798796d..7a076c9 100644 --- a/src/tools/handlers/get-context.ts +++ b/src/tools/handlers/get-context.ts @@ -137,6 +137,125 @@ export async function handleGetContext( lines.push('4. VERIFY → Tests pass, linter clean'); lines.push('5. REVIEW → Self-check as expert'); lines.push('```'); + lines.push(''); + + // === STRATEGY 1: CHECKPOINT VERIFICATION === + lines.push('---', '', '## MANDATORY CHECKPOINT', ''); + lines.push(` +⚠️ **BEFORE writing ANY code**, respond with this JSON: + +\`\`\`json +{ + "checkpoint": { + "task_understood": true, + "clarifications_needed": [], + "approach": "Brief description of implementation approach" + }, + "architecture": { + "layers_affected": ["domain", "application", "infrastructure"], + "interfaces_to_create": ["InterfaceName1", "InterfaceName2"], + "classes_to_create": ["ClassName1", "ClassName2"] + }, + "tdd_plan": { + "tests_to_write": [ + "should_do_X_when_Y", + "should_fail_when_Z", + "should_handle_edge_case" + ] + }, + "quality_commitments": { + "max_method_lines": 20, + "max_class_lines": 200, + "dependency_injection": true, + "custom_errors": true, + "test_coverage_target": 80 + } +} +\`\`\` + +**Only proceed to code generation after completing this checkpoint.** +`); + + // === STRATEGY 4: CONTRACTUAL RESPONSE FORMAT === + lines.push('---', '', '## REQUIRED RESPONSE STRUCTURE', ''); + lines.push(` +Structure your response in this **exact order**: + +### 1. CHECKPOINT +Complete the checkpoint JSON above first. + +### 2. INTERFACES / TYPES +\`\`\` +// Define ALL interfaces and types FIRST +interface ServiceName { ... } +type ResultType = { ... } +\`\`\` + +### 3. TESTS +\`\`\` +// Write tests BEFORE implementation (TDD) +describe('ServiceName', () => { + it('should do X when Y', () => { ... }); + it('should fail when Z', () => { ... }); +}); +\`\`\` + +### 4. IMPLEMENTATION +\`\`\` +// NOW implement to make tests pass +class ServiceNameImpl implements ServiceName { ... } +\`\`\` + +### 5. SELF-REVIEW +Complete the self-review JSON below. + +⚠️ **Code not following this structure will not meet quality standards.** +`); + + // === STRATEGY 3: MANDATORY SELF-REVIEW === + lines.push('---', '', '## MANDATORY SELF-REVIEW', ''); + lines.push(` +After generating code, perform a self-review and report: + +\`\`\`json +{ + "self_review": { + "methods_over_20_lines": 0, + "classes_over_200_lines": 0, + "interfaces_created": 3, + "tests_written": 5, + "custom_errors_defined": 2, + "dependency_injection_used": true, + "hardcoded_values": 0, + "todos_or_fixmes": 0 + }, + "quality_score": "8/10", + "confidence": "high", + "improvements_if_more_time": [ + "Add more edge case tests", + "Extract validation logic to separate class" + ] +} +\`\`\` + +**If quality_score < 7, iterate and improve before presenting the code.** +`); + + // === VERIFY TOOL REMINDER === + lines.push('---', '', '## FINAL STEP', ''); + lines.push(` +After completing all code, call the \`verify\` tool with your generated code: + +\`\`\` +verify({ + code: "// all implementation code", + tests: "// all test code", + interfaces: "// all interfaces" +}) +\`\`\` + +Only present code to user after verify returns **PASS**. +`); return { content: [{ type: 'text', text: lines.join('\n') }] }; } diff --git a/src/tools/handlers/index.ts b/src/tools/handlers/index.ts index 1b79a71..f7c02bf 100644 --- a/src/tools/handlers/index.ts +++ b/src/tools/handlers/index.ts @@ -9,3 +9,4 @@ export { handleInit } from './init.js'; export { handleProfiles } from './profiles.js'; export { handleSearch } from './search.js'; export { handleValidate } from './validate.js'; +export { handleVerify } from './verify.js'; diff --git a/src/tools/handlers/validate.ts b/src/tools/handlers/validate.ts index 422ad21..da5c842 100644 --- a/src/tools/handlers/validate.ts +++ b/src/tools/handlers/validate.ts @@ -1,17 +1,24 @@ import { getGuardrails } from '../../agent.js'; +import { analyzeCode, formatAnalysisAsMarkdown } from '../../analysis/code-analyzer.js'; import { config } from '../../config.js'; import { getProfile } from '../../profiles.js'; import { ValidateSchema } from '../schemas.js'; /** * Handler for the validate tool. - * Returns validation criteria for code against standards. + * Performs real code analysis and returns actionable feedback. + * + * This is a key part of the Smart Enforcement system - it actually + * analyzes code instead of just returning a checklist. */ export async function handleValidate( args: Record ): Promise<{ content: Array<{ type: 'text'; text: string }>; isError?: boolean }> { const { code, task_type } = ValidateSchema.parse(args); + // Run real code analysis + const analysis = analyzeCode(code); + const profileId = config.defaultProfile; const profile = await getProfile(profileId); @@ -24,64 +31,54 @@ export async function handleValidate( const guardrails = task_type ? await getGuardrails(task_type, null) : null; - const lines: string[] = [ - '# Code Validation', - '', - '## Code', - '```', - code.slice(0, 2000) + (code.length > 2000 ? '\n...(truncated)' : ''), - '```', - '', - '---', - '', - '## Validation Criteria', - '', - ]; + // Build output with analysis results + const lines: string[] = []; + + // Add the formatted analysis + lines.push(formatAnalysisAsMarkdown(analysis)); + lines.push(''); + + // Add profile thresholds for reference + lines.push('---', '', '## Profile Standards Reference', ''); - // Code quality thresholds if (profile.codeQuality) { - lines.push('**Thresholds:**'); - lines.push(`- Max method lines: ${profile.codeQuality.maxMethodLines}`); - lines.push(`- Max class lines: ${profile.codeQuality.maxClassLines}`); - lines.push(`- Max parameters: ${profile.codeQuality.maxMethodParameters}`); - lines.push(`- Min coverage: ${profile.codeQuality.minimumTestCoverage}%`); + lines.push('**Configured Thresholds:**'); + lines.push(`- Max method lines: ${profile.codeQuality.maxMethodLines} (yours: ${analysis.metrics.maxMethodLines})`); + lines.push(`- Max class lines: ${profile.codeQuality.maxClassLines} (yours: ${analysis.metrics.maxClassLines})`); + lines.push(`- Min test coverage: ${profile.codeQuality.minimumTestCoverage}%`); lines.push(''); } - // Guardrails if task type specified + // Add guardrails reminder if task type specified if (guardrails) { - lines.push(`**${task_type?.toUpperCase()} Guardrails:**`); - lines.push(''); - lines.push('Must:'); - for (const rule of guardrails.mandatory.slice(0, 4)) { + lines.push('---', '', `## ${task_type?.toUpperCase()} Guardrails Reminder`, ''); + lines.push('**Must:**'); + for (const rule of guardrails.mandatory.slice(0, 3)) { lines.push(`- ${rule}`); } lines.push(''); - lines.push('Avoid:'); + lines.push('**Avoid:**'); for (const rule of guardrails.avoid.slice(0, 3)) { lines.push(`- ${rule}`); } lines.push(''); } - // Naming conventions - if (profile.naming) { - lines.push('**Naming:**'); - const naming = profile.naming as Record; - if (naming.general && typeof naming.general === 'object') { - for (const [key, value] of Object.entries(naming.general as Record)) { - lines.push(`- ${key}: ${value}`); - } - } + // Final verdict + lines.push('---', '', '## Verdict', ''); + + if (analysis.passed) { + lines.push('**PASSED** - Code meets quality standards.'); lines.push(''); + lines.push('You may present this code to the user or call `verify` for final confirmation.'); + } else { + lines.push('**NEEDS WORK** - Address the issues above before proceeding.'); + lines.push(''); + lines.push('Fix critical issues first, then warnings, then call `validate` again.'); } - lines.push('---', ''); - lines.push('## Review Checklist', ''); - lines.push('Analyze the code and report:', ''); - lines.push('1. **CRITICAL** - Must fix (bugs, security, violations)'); - lines.push('2. **WARNINGS** - Should fix (style, best practices)'); - lines.push('3. **Score** - Compliance 0-100 with justification'); - - return { content: [{ type: 'text', text: lines.join('\n') }] }; + return { + content: [{ type: 'text', text: lines.join('\n') }], + isError: !analysis.passed, + }; } diff --git a/src/tools/handlers/verify.ts b/src/tools/handlers/verify.ts new file mode 100644 index 0000000..a5fe84e --- /dev/null +++ b/src/tools/handlers/verify.ts @@ -0,0 +1,206 @@ +import { z } from 'zod'; +import { type AnalysisResult, analyzeCode } from '../../analysis/code-analyzer.js'; + +/** + * Schema for verify tool input. + */ +export const VerifySchema = z.object({ + code: z.string().describe('All generated implementation code'), + tests: z.string().optional().describe('All test code'), + interfaces: z.string().optional().describe('All interfaces/types'), + task_type: z.enum(['feature', 'bugfix', 'refactor', 'test', 'security', 'performance']).optional(), +}); + +export type VerifyInput = z.infer; + +/** + * Handler for the verify tool. + * + * This is the "gate" that LLMs must pass before presenting code to the user. + * It analyzes all generated code and returns PASS/FAIL with specific feedback. + * + * Key principles: + * - Tests are REQUIRED (TDD compliance) + * - Interfaces are strongly recommended (DI) + * - Critical issues block passing + * - Clear feedback on what to fix + */ +export async function handleVerify( + args: Record +): Promise<{ content: Array<{ type: 'text'; text: string }>; isError?: boolean }> { + const { code, tests, interfaces } = VerifySchema.parse(args); + + // Combine all code for comprehensive analysis + const allCodeParts: string[] = []; + + if (interfaces) { + allCodeParts.push('// === INTERFACES ===', interfaces); + } + + if (tests) { + allCodeParts.push('// === TESTS ===', tests); + } + + allCodeParts.push('// === IMPLEMENTATION ===', code); + + const allCode = allCodeParts.join('\n\n'); + + // Run analysis + const analysis = analyzeCode(allCode); + + // Additional verification checks beyond the analyzer + const verificationResults = runVerificationChecks(code, tests, interfaces, analysis); + + // Build response + const lines: string[] = []; + + if (verificationResults.passed) { + lines.push('# VERIFICATION PASSED', ''); + lines.push(`**Score: ${analysis.score}/100**`, ''); + lines.push(''); + lines.push('The code meets quality standards and is ready to present to the user.', ''); + + // Show any optional improvements + if (verificationResults.warnings.length > 0) { + lines.push('---', '', '## Optional Improvements', ''); + for (const warning of verificationResults.warnings.slice(0, 5)) { + lines.push(`- ${warning}`); + } + lines.push(''); + } + + // Summary of what was verified + lines.push('---', '', '## Verification Summary', ''); + lines.push(`- Tests provided: ${tests ? 'Yes' : 'No'}`); + lines.push(`- Interfaces provided: ${interfaces ? 'Yes' : 'No'}`); + lines.push(`- Critical issues: ${analysis.issues.filter((i) => i.type === 'CRITICAL').length}`); + lines.push(`- Warnings: ${analysis.issues.filter((i) => i.type === 'WARNING').length}`); + lines.push(`- Test count: ${analysis.metrics.testCount}`); + lines.push(`- Interface count: ${analysis.metrics.interfaceCount}`); + lines.push(''); + + lines.push('---', ''); + lines.push('**You may now present this code to the user.**'); + } else { + lines.push('# VERIFICATION FAILED', ''); + lines.push(`**Score: ${analysis.score}/100**`, ''); + lines.push(''); + lines.push('The code does not meet quality standards. Fix the issues below and verify again.', ''); + + // Show failures (blocking issues) + lines.push('---', '', '## Issues to Fix (Blocking)', ''); + for (const failure of verificationResults.failures) { + lines.push(`- ${failure}`); + } + lines.push(''); + + // Show warnings + if (verificationResults.warnings.length > 0) { + lines.push('---', '', '## Warnings (Should Fix)', ''); + for (const warning of verificationResults.warnings.slice(0, 8)) { + lines.push(`- ${warning}`); + } + lines.push(''); + } + + // Analysis issues + const criticals = analysis.issues.filter((i) => i.type === 'CRITICAL'); + if (criticals.length > 0) { + lines.push('---', '', '## Critical Code Issues', ''); + for (const issue of criticals) { + lines.push(`- **Line ${issue.line || '?'}:** ${issue.message}`); + if (issue.suggestion) { + lines.push(` - Fix: ${issue.suggestion}`); + } + } + lines.push(''); + } + + lines.push('---', ''); + lines.push('**Fix these issues and call `verify` again before presenting code to user.**'); + } + + return { + content: [{ type: 'text', text: lines.join('\n') }], + isError: !verificationResults.passed, + }; +} + +/** + * Run additional verification checks beyond the code analyzer. + */ +function runVerificationChecks( + code: string, + tests: string | undefined, + interfaces: string | undefined, + analysis: AnalysisResult +): { passed: boolean; failures: string[]; warnings: string[] } { + const failures: string[] = []; + const warnings: string[] = []; + + // Check 1: Tests MUST be provided (TDD requirement) + if (!tests || tests.trim().length === 0) { + failures.push('No tests provided - TDD requires tests before/with implementation'); + } else { + // Check test count + const testCount = analysis.metrics.testCount; + if (testCount === 0) { + failures.push('Test code provided but no test cases detected (missing @Test, it(), test(), etc.)'); + } else if (testCount < 2) { + warnings.push(`Only ${testCount} test(s) found - consider adding more for better coverage`); + } + } + + // Check 2: Interfaces strongly recommended for DI + const hasClasses = /class\s+\w+/.test(code); + if (hasClasses) { + if (!interfaces || interfaces.trim().length === 0) { + // Check if interfaces are in the main code instead + const hasInlineInterfaces = /interface\s+\w+/.test(code); + if (!hasInlineInterfaces) { + warnings.push('No interfaces provided - consider adding for dependency injection'); + } + } + } + + // Check 3: No critical issues from analysis + const criticalCount = analysis.issues.filter((i) => i.type === 'CRITICAL').length; + if (criticalCount > 0) { + failures.push(`${criticalCount} critical code issue(s) detected - see details below`); + } + + // Check 4: Score threshold + if (analysis.score < 50) { + failures.push(`Quality score too low (${analysis.score}/100) - must be at least 50`); + } else if (analysis.score < 70) { + warnings.push(`Quality score is ${analysis.score}/100 - consider improvements to reach 70+`); + } + + // Check 5: Too many warnings + const warningCount = analysis.issues.filter((i) => i.type === 'WARNING').length; + if (warningCount > 10) { + warnings.push(`High number of warnings (${warningCount}) - consider addressing some`); + } + + // Check 6: Method/class size + if (analysis.metrics.maxMethodLines > 30) { + warnings.push(`Longest method is ${analysis.metrics.maxMethodLines} lines - consider refactoring`); + } + if (analysis.metrics.maxClassLines > 300) { + warnings.push(`Longest class is ${analysis.metrics.maxClassLines} lines - consider splitting`); + } + + // Check 7: Look for common issues in the code + if (/throw\s+new\s+Error\s*\(/.test(code) && !/class\s+\w*Error/.test(code)) { + warnings.push('Generic Error thrown without custom error classes defined'); + } + + if (/any\s*[;,)>]/.test(code)) { + warnings.push('TypeScript "any" type usage detected - consider specific types'); + } + + // Determine if passed + const passed = failures.length === 0; + + return { passed, failures, warnings }; +} diff --git a/src/tools/index.ts b/src/tools/index.ts index 897fa9f..40e2c73 100644 --- a/src/tools/index.ts +++ b/src/tools/index.ts @@ -17,6 +17,7 @@ import { handleProfiles, handleSearch, handleValidate, + handleVerify, } from './handlers/index.js'; export type { ToolName } from './definitions.js'; @@ -47,6 +48,9 @@ export async function handleToolCall( case 'validate': result = await handleValidate(args); break; + case 'verify': + result = await handleVerify(args); + break; case 'search': result = await handleSearch(args); break; @@ -65,7 +69,7 @@ export async function handleToolCall( content: [ { type: 'text', - text: `Unknown tool: ${name}. Available: get_context, validate, search, profiles, health, init`, + text: `Unknown tool: ${name}. Available: get_context, validate, verify, search, profiles, health, init`, }, ], isError: true, diff --git a/tests/analysis/code-analyzer.test.ts b/tests/analysis/code-analyzer.test.ts new file mode 100644 index 0000000..32ee4c3 --- /dev/null +++ b/tests/analysis/code-analyzer.test.ts @@ -0,0 +1,438 @@ +import { describe, expect, it } from 'vitest'; +import { analyzeCode, formatAnalysisAsMarkdown } from '../../src/analysis/code-analyzer.js'; + +describe('CodeAnalyzer', () => { + describe('Anti-pattern detection', () => { + it('should detect empty catch block', () => { + const code = ` + try { + doSomething(); + } catch (e) { } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-empty-catch')).toBe(true); + expect(result.issues.find((i) => i.rule === 'no-empty-catch')?.type).toBe('CRITICAL'); + }); + + it('should detect generic exception catch', () => { + const code = ` + try { + doSomething(); + } catch (Exception e) { + log(e); + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-generic-catch')).toBe(true); + }); + + it('should detect hardcoded secrets', () => { + const code = `const password = "secret123";`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-hardcoded-secrets')).toBe(true); + expect(result.issues.find((i) => i.rule === 'no-hardcoded-secrets')?.type).toBe('CRITICAL'); + }); + + it('should detect API key in code', () => { + const code = `const apiKey = "sk-abc123def456";`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-hardcoded-secrets')).toBe(true); + }); + + it('should detect console statements', () => { + const code = `console.log("debug");`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-console')).toBe(true); + }); + + it('should detect System.out in Java', () => { + const code = `System.out.println("hello");`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-system-out')).toBe(true); + }); + + it('should detect field injection', () => { + const code = `@Autowired private UserRepository repo;`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-field-injection')).toBe(true); + }); + + it('should detect eval usage', () => { + const code = `const result = eval("2 + 2");`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-eval')).toBe(true); + expect(result.issues.find((i) => i.rule === 'no-eval')?.type).toBe('CRITICAL'); + }); + + it('should detect innerHTML assignment', () => { + const code = `element.innerHTML = userInput;`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-inner-html')).toBe(true); + }); + + it('should detect TypeScript any type', () => { + const code = `function process(data: any): void { }`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-any-type')).toBe(true); + }); + + it('should detect loose equality', () => { + const code = `if (a == b) { }`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'strict-equality')).toBe(true); + }); + + it('should detect TODO comments', () => { + const code = `// TODO: fix this later`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-todo')).toBe(true); + expect(result.issues.find((i) => i.rule === 'no-todo')?.type).toBe('INFO'); + }); + + it('should detect var keyword', () => { + const code = `var x = 5;`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-var')).toBe(true); + }); + + it('should detect generic error thrown', () => { + const code = `throw new Error("Something went wrong");`; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'no-generic-throw')).toBe(true); + }); + }); + + describe('Method length analysis', () => { + it('should detect methods over 20 lines', () => { + const longMethodBody = Array(25).fill(' doSomething();').join('\n'); + const code = ` + class Test { + public void longMethod() { + ${longMethodBody} + } + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'max-method-lines')).toBe(true); + }); + + it('should not flag short methods', () => { + const code = ` + class Test { + public void shortMethod() { + doSomething(); + doSomethingElse(); + } + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'max-method-lines')).toBe(false); + }); + + it('should handle TypeScript methods', () => { + const longBody = Array(25).fill(' console.log("x");').join('\n'); + const code = ` + class Service { + async processData(): Promise { + ${longBody} + } + } + `; + const result = analyzeCode(code); + // Should detect both method length and console usage + expect(result.issues.some((i) => i.rule === 'no-console')).toBe(true); + }); + }); + + describe('Class length analysis', () => { + it('should detect classes over 200 lines', () => { + const methods = Array(50) + .fill( + ` + public void method() { + doSomething(); + doSomethingElse(); + doMore(); + } + ` + ) + .join('\n'); + const code = ` + class LargeClass { + ${methods} + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'max-class-lines')).toBe(true); + }); + }); + + describe('Missing tests detection', () => { + it('should detect missing tests in implementation code', () => { + const code = ` + class UserService { + constructor(private repo: UserRepository) {} + + getUser(id: string): User { + return this.repo.findById(id); + } + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'missing-tests')).toBe(true); + }); + + it('should not flag code with Jest tests', () => { + const code = ` + describe('UserService', () => { + it('should get user', () => { + expect(service.getUser('1')).toBeDefined(); + }); + }); + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'missing-tests')).toBe(false); + }); + + it('should not flag code with JUnit tests', () => { + const code = ` + class UserServiceTest { + @Test + void shouldGetUser() { + assertNotNull(service.getUser("1")); + } + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'missing-tests')).toBe(false); + }); + }); + + describe('Missing interfaces detection', () => { + it('should detect missing interfaces when classes exist', () => { + const code = ` + class UserService { + getUser(id: string): User { + return null; + } + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'missing-interfaces')).toBe(true); + }); + + it('should not flag code with interfaces', () => { + const code = ` + interface UserService { + getUser(id: string): User; + } + + class UserServiceImpl implements UserService { + getUser(id: string): User { + return null; + } + } + `; + const result = analyzeCode(code); + expect(result.issues.some((i) => i.rule === 'missing-interfaces')).toBe(false); + }); + }); + + describe('Score calculation', () => { + it('should give high score for clean code with good practices', () => { + const code = ` + interface UserRepository { + findById(id: string): User | null; + } + + interface UserService { + getUser(id: string): User; + } + + class UserServiceImpl implements UserService { + constructor(private readonly repo: UserRepository) {} + + getUser(id: string): User { + const user = this.repo.findById(id); + if (!user) throw new UserNotFoundError(id); + return user; + } + } + + class UserNotFoundError extends Error { + constructor(id: string) { + super(\`User not found: \${id}\`); + } + } + + describe('UserService', () => { + it('should get user', () => { + expect(true).toBe(true); + }); + }); + `; + const result = analyzeCode(code); + expect(result.score).toBeGreaterThan(70); + }); + + it('should give low score for problematic code', () => { + const code = ` + class BadService { + password = "secret123"; + + doStuff() { + try { + console.log("doing stuff"); + } catch (e) { } + } + } + `; + const result = analyzeCode(code); + expect(result.score).toBeLessThan(60); + expect(result.passed).toBe(false); + }); + + it('should deduct more for CRITICAL issues', () => { + const codeWithCritical = ` + const password = "secret123"; + `; + const codeWithWarning = ` + console.log("debug"); + `; + + const criticalResult = analyzeCode(codeWithCritical); + const warningResult = analyzeCode(codeWithWarning); + + // Critical issues should impact score more + expect(criticalResult.score).toBeLessThan(warningResult.score); + }); + }); + + describe('Metrics calculation', () => { + it('should count lines correctly', () => { + const code = ` + // Comment + class Test { + method() { + return 1; + } + } + `; + const result = analyzeCode(code); + expect(result.metrics.totalLines).toBeGreaterThan(0); + expect(result.metrics.commentLines).toBeGreaterThan(0); + }); + + it('should count classes', () => { + const code = ` + class UserService { + public getUser() { return null; } + public createUser() { return null; } + } + + class OrderService { + public getOrder() { return null; } + } + `; + const result = analyzeCode(code); + expect(result.metrics.classCount).toBe(2); + }); + + it('should count interfaces', () => { + const code = ` + interface UserRepo { } + interface OrderRepo { } + interface PaymentGateway { } + `; + const result = analyzeCode(code); + expect(result.metrics.interfaceCount).toBe(3); + }); + + it('should count tests', () => { + const code = ` + describe('Service', () => { + it('test 1', () => {}); + it('test 2', () => {}); + test('test 3', () => {}); + }); + `; + const result = analyzeCode(code); + expect(result.metrics.testCount).toBeGreaterThanOrEqual(3); + }); + }); + + describe('Summary generation', () => { + it('should generate NEEDS WORK summary for critical issues', () => { + const code = `catch (e) { }`; + const result = analyzeCode(code); + expect(result.summary).toContain('NEEDS WORK'); + }); + + it('should generate appropriate summary based on score', () => { + const goodCode = ` + interface Service { } + describe('test', () => { it('works', () => {}); }); + `; + const result = analyzeCode(goodCode); + expect(result.summary).not.toContain('NEEDS WORK'); + }); + }); + + describe('formatAnalysisAsMarkdown', () => { + it('should format results as markdown', () => { + const code = ` + class Test { + test() { console.log("x"); } + } + `; + const result = analyzeCode(code); + const markdown = formatAnalysisAsMarkdown(result); + + expect(markdown).toContain('# '); + expect(markdown).toContain('Score:'); + expect(markdown).toContain('Metrics'); + expect(markdown).toContain('| Metric | Value |'); + }); + + it('should include issues in markdown output', () => { + const code = `console.log("test");`; + const result = analyzeCode(code); + const markdown = formatAnalysisAsMarkdown(result); + + expect(markdown).toContain('Warnings'); + // Check for console-related content in the output + expect(markdown).toContain('Console'); + }); + }); + + describe('Edge cases', () => { + it('should handle empty code', () => { + const result = analyzeCode(''); + expect(result.score).toBeDefined(); + expect(result.issues).toBeDefined(); + expect(result.metrics.totalLines).toBe(1); // Empty string splits to [''] + }); + + it('should handle code with only comments', () => { + const code = ` + // This is a comment + /* Multi-line + comment */ + // Another comment + `; + const result = analyzeCode(code); + expect(result.metrics.commentLines).toBeGreaterThan(0); + }); + + it('should not create duplicate issues for same line', () => { + const code = ` + console.log("test"); + console.log("test2"); + `; + const result = analyzeCode(code); + const consoleIssues = result.issues.filter((i) => i.rule === 'no-console'); + // Should have 2 issues for 2 different lines + expect(consoleIssues.length).toBe(2); + }); + }); +}); diff --git a/tests/tools/verify.test.ts b/tests/tools/verify.test.ts new file mode 100644 index 0000000..4dba855 --- /dev/null +++ b/tests/tools/verify.test.ts @@ -0,0 +1,310 @@ +import { describe, expect, it } from 'vitest'; +import { handleVerify } from '../../src/tools/handlers/verify.js'; + +describe('verify tool', () => { + describe('Passing scenarios', () => { + it('should pass for code with tests and interfaces', async () => { + const result = await handleVerify({ + code: ` + class UserServiceImpl implements UserService { + constructor(private readonly repo: UserRepository) {} + + getUser(id: string): User { + return this.repo.findById(id); + } + } + `, + tests: ` + describe('UserService', () => { + it('should get user by id', () => { + const user = service.getUser('1'); + expect(user).toBeDefined(); + }); + + it('should throw when user not found', () => { + expect(() => service.getUser('invalid')).toThrow(); + }); + }); + `, + interfaces: ` + interface UserService { + getUser(id: string): User; + } + + interface UserRepository { + findById(id: string): User | null; + } + `, + }); + + expect(result.content[0].text).toContain('VERIFICATION PASSED'); + expect(result.isError).toBeFalsy(); + }); + + it('should pass for code with inline interfaces', async () => { + const result = await handleVerify({ + code: ` + interface UserService { + getUser(id: string): User; + } + + class UserServiceImpl implements UserService { + getUser(id: string): User { + return { id, name: 'Test' }; + } + } + `, + tests: ` + describe('UserService', () => { + it('test 1', () => { expect(true).toBe(true); }); + it('test 2', () => { expect(true).toBe(true); }); + }); + `, + }); + + expect(result.content[0].text).toContain('VERIFICATION PASSED'); + }); + }); + + describe('Failing scenarios', () => { + it('should fail for code without tests', async () => { + const result = await handleVerify({ + code: ` + class UserService { + getUser(id: string): User { + return null; + } + } + `, + }); + + expect(result.content[0].text).toContain('VERIFICATION FAILED'); + expect(result.content[0].text).toContain('No tests provided'); + expect(result.isError).toBe(true); + }); + + it('should fail for empty tests string', async () => { + const result = await handleVerify({ + code: `class Service { }`, + tests: ' ', + }); + + expect(result.content[0].text).toContain('VERIFICATION FAILED'); + expect(result.content[0].text).toContain('No tests provided'); + }); + + it('should fail for test code without actual test cases', async () => { + const result = await handleVerify({ + code: `class Service { method() { return 1; } }`, + tests: `// This is supposed to be tests but has no actual tests`, + }); + + expect(result.content[0].text).toContain('VERIFICATION FAILED'); + expect(result.content[0].text).toContain('no test cases detected'); + }); + + it('should fail for code with critical issues', async () => { + const result = await handleVerify({ + code: ` + class BadService { + password = "secret123"; + + doStuff() { + try { + process(); + } catch (e) { } + } + } + `, + tests: ` + describe('Service', () => { + it('test', () => { expect(true).toBe(true); }); + it('test2', () => { expect(true).toBe(true); }); + }); + `, + }); + + expect(result.content[0].text).toContain('VERIFICATION FAILED'); + expect(result.isError).toBe(true); + }); + + it('should fail for very low quality score', async () => { + const code = ` + class Bad { + var x = "test"; + password = "secret"; + eval("code"); + doStuff() { + console.log("x"); + try { x(); } catch (e) { } + } + } + `; + const result = await handleVerify({ + code, + tests: `it('t', () => {}); it('t2', () => {});`, + }); + + expect(result.content[0].text).toContain('VERIFICATION FAILED'); + }); + }); + + describe('Warnings', () => { + it('should warn about missing interfaces for classes', async () => { + const result = await handleVerify({ + code: ` + class UserService { + getUser(id: string): User { + return null; + } + } + `, + tests: ` + describe('UserService', () => { + it('test 1', () => { expect(true).toBe(true); }); + it('test 2', () => { expect(true).toBe(true); }); + }); + `, + }); + + const text = result.content[0].text; + expect(text).toContain('interface'); + }); + + it('should show test count in summary', async () => { + const result = await handleVerify({ + code: ` + interface Service { method(): void; } + class ServiceImpl implements Service { method() {} } + `, + tests: ` + describe('Service', () => { + it('test 1', () => { expect(true).toBe(true); }); + it('test 2', () => { expect(true).toBe(true); }); + }); + `, + interfaces: `interface Service { method(): void; }`, + }); + + const text = result.content[0].text; + // Should show test count in summary + expect(text).toContain('Test count:'); + }); + + it('should warn about any type usage', async () => { + const result = await handleVerify({ + code: ` + interface Service { process(data: any): void; } + class ServiceImpl implements Service { + process(data: any): void { } + } + `, + tests: ` + describe('Test', () => { + it('t1', () => {}); + it('t2', () => {}); + }); + `, + interfaces: `interface Service { process(data: any): void; }`, + }); + + expect(result.content[0].text).toContain('any'); + }); + }); + + describe('Verification summary', () => { + it('should include verification summary for passed code', async () => { + const result = await handleVerify({ + code: `class Impl implements Service { }`, + tests: `describe('x', () => { it('t', () => {}); it('t2', () => {}); });`, + interfaces: `interface Service { }`, + }); + + const text = result.content[0].text; + expect(text).toContain('Verification Summary'); + expect(text).toContain('Tests provided: Yes'); + expect(text).toContain('Interfaces provided: Yes'); + }); + + it('should show critical issue details for failed code', async () => { + const result = await handleVerify({ + code: `const password = "secret123";`, + tests: `it('t', () => {}); it('t2', () => {});`, + }); + + const text = result.content[0].text; + expect(text).toContain('Critical Code Issues'); + expect(text).toContain('secret'); + }); + }); + + describe('Task type handling', () => { + it('should accept valid task types', async () => { + const taskTypes = ['feature', 'bugfix', 'refactor', 'test', 'security', 'performance']; + + for (const taskType of taskTypes) { + const result = await handleVerify({ + code: `class X { }`, + tests: `it('t', () => {}); it('t2', () => {});`, + task_type: taskType, + }); + + // Should not throw + expect(result.content).toBeDefined(); + } + }); + }); + + describe('Edge cases', () => { + it('should handle code with mixed good and bad patterns', async () => { + const result = await handleVerify({ + code: ` + interface GoodService { + process(): void; + } + + class GoodServiceImpl implements GoodService { + constructor(private readonly dep: Dependency) {} + + process(): void { + console.log("debug"); // Warning but not critical + } + } + `, + tests: ` + describe('GoodService', () => { + it('should process', () => { expect(true).toBe(true); }); + it('should handle errors', () => { expect(true).toBe(true); }); + }); + `, + interfaces: `interface GoodService { process(): void; }`, + }); + + // Should pass despite warning (console.log is not critical) + expect(result.content[0].text).toContain('PASSED'); + }); + + it('should handle very large code input', async () => { + const largeCode = Array(100) + .fill(` + class Service${Math.random()} { + method() { return 1; } + } + `) + .join('\n'); + + const result = await handleVerify({ + code: largeCode, + tests: ` + describe('Tests', () => { + it('test 1', () => {}); + it('test 2', () => {}); + }); + `, + }); + + // Should complete without timeout + expect(result.content).toBeDefined(); + }); + }); +});