From 8a2444c1e3250e0b3b52286cd6d70a8c648c088b Mon Sep 17 00:00:00 2001 From: bohe76 Date: Sun, 24 May 2026 00:11:27 +0900 Subject: [PATCH 1/3] fix: reduce generic context entry noise --- __tests__/context.test.ts | 58 +++++++++++++++++++++++++++++++++++++++ src/context/index.ts | 38 +++++++++++++++++++++++++ 2 files changed, 96 insertions(+) diff --git a/__tests__/context.test.ts b/__tests__/context.test.ts index 52dae1fe..fa5d9af7 100644 --- a/__tests__/context.test.ts +++ b/__tests__/context.test.ts @@ -20,6 +20,8 @@ describe('Context Builder', () => { // Create a sample codebase const srcDir = path.join(testDir, 'src'); fs.mkdirSync(srcDir); + const evalDir = path.join(testDir, '__tests__', 'evaluation'); + fs.mkdirSync(evalDir, { recursive: true }); // Create a payment service file fs.writeFileSync( @@ -135,6 +137,46 @@ export function validateEmail(email: string): boolean { ` ); + fs.writeFileSync( + path.join(srcDir, 'affected.ts'), + ` +export function getFileDependents(filePath: string): string[] { + return [filePath]; +} + +export function affected(filePath: string): string[] { + return getFileDependents(filePath); +} +` + ); + + fs.writeFileSync( + path.join(srcDir, 'cache.ts'), + ` +export class LRUCache { + private values = new Map(); + + get(key: string): string | undefined { + return this.values.get(key); + } + + getDb(): Map { + return this.values; + } +} +` + ); + + fs.writeFileSync( + path.join(evalDir, 'test-cases.ts'), + ` +export const testCases = [ + { id: 'search-class-exact', query: 'PaymentService' }, + { id: 'context-affected', query: 'affected tests' }, +]; +` + ); + // Initialize CodeGraph cg = CodeGraph.initSync(testDir, { config: { @@ -194,6 +236,22 @@ export function validateEmail(email: string): boolean { ).toBe(true); }); + it('should avoid generic split-term entry points when an exact compound symbol matches', async () => { + const result = await cg.findRelevantContext('getFileDependents affected tests', { + searchLimit: 3, + maxNodes: 12, + }); + + const entryNames = result.roots + .map((id) => result.nodes.get(id)?.name) + .filter(Boolean); + + expect(entryNames).toContain('getFileDependents'); + expect(entryNames).not.toContain('get'); + expect(entryNames).not.toContain('getDb'); + expect(entryNames).not.toContain('testCases'); + }); + it('should include edges in the result', async () => { const result = await cg.findRelevantContext('checkout', { traversalDepth: 2, diff --git a/src/context/index.ts b/src/context/index.ts index 7298cd41..581d2640 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -131,6 +131,31 @@ function extractSymbolsFromQuery(query: string): string[] { return Array.from(symbols).filter(s => !commonWords.has(s.toLowerCase())); } +const GENERIC_SPLIT_SYMBOLS = new Set([ + 'get', 'set', 'add', 'run', 'build', 'create', 'find', 'list', 'load', + 'save', 'read', 'write', 'delete', 'remove', 'update', 'handle', 'process', + 'init', 'open', 'close', 'test', 'tests', 'testcase', 'testcases', 'spec', + 'specs', +]); + +function isSpecificCompoundSymbol(symbol: string): boolean { + return symbol.length >= 6 && /[A-Z_.:]/.test(symbol); +} + +function matchesGenericSplitSymbol(name: string): boolean { + const lower = name.toLowerCase(); + for (const generic of GENERIC_SPLIT_SYMBOLS) { + if (lower === generic) return true; + if (lower.startsWith(generic) && name.length > generic.length) { + const next = name[generic.length]!; + if (next === '_' || next === '-' || /[A-Z]/.test(next)) { + return true; + } + } + } + return false; +} + /** * Default options for context building * @@ -431,6 +456,19 @@ export class ContextBuilder { })) .sort((a, b) => b.score - a.score) .slice(0, opts.searchLimit * 2); + + const exactMatchFiles = new Set(exactMatches.map((r) => r.node.filePath)); + const hasSpecificExactMatch = exactMatches.some((r) => + symbolsFromQuery.some((symbol) => + isSpecificCompoundSymbol(symbol) && + r.node.name.toLowerCase() === symbol.toLowerCase() + ) + ); + if (hasSpecificExactMatch && exactMatchFiles.size > 0) { + textResults = textResults.filter((r) => { + return !matchesGenericSplitSymbol(r.node.name) || exactMatchFiles.has(r.node.filePath); + }); + } } logDebug('Text search results', { count: textResults.length }); } catch (error) { From a9a74a74df152fb4e162286b8079266b6c4511c0 Mon Sep 17 00:00:00 2001 From: bohe76 Date: Sun, 24 May 2026 00:53:35 +0900 Subject: [PATCH 2/3] fix: reduce default context code block noise --- __tests__/context.test.ts | 29 +++++++++++++++++++++++++++++ src/bin/codegraph.ts | 4 ++-- src/context/index.ts | 15 ++++----------- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/__tests__/context.test.ts b/__tests__/context.test.ts index fa5d9af7..686b19ab 100644 --- a/__tests__/context.test.ts +++ b/__tests__/context.test.ts @@ -325,6 +325,35 @@ export const testCases = [ expect(markdown).toContain('```typescript'); }); + it('should avoid related class code blocks for method-focused context', async () => { + const result = await cg.buildContext('processCheckout', { + format: 'json', + includeCode: true, + maxCodeBlocks: 10, + traversalDepth: 2, + }); + + const parsed = JSON.parse(result as string); + const codeBlocks = parsed.codeBlocks as Array<{ + nodeName: string; + nodeKind: string; + filePath: string; + startLine: number; + }>; + const entryKeys = new Set( + parsed.entryPoints.map((node: { name: string; filePath: string; startLine: number }) => + `${node.name}:${node.filePath}:${node.startLine}` + ) + ); + const relatedClassBlocks = codeBlocks.filter((block) => + block.nodeKind === 'class' && + !entryKeys.has(`${block.nodeName}:${block.filePath}:${block.startLine}`) + ); + + expect(codeBlocks.some((block) => block.nodeName === 'processCheckout')).toBe(true); + expect(relatedClassBlocks).toHaveLength(0); + }); + it('should exclude code blocks when requested', async () => { const result = await cg.buildContext('payment', { format: 'markdown', diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index 6bc63b3f..e177b5e0 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -1085,7 +1085,7 @@ program .description('Build context for a task (outputs markdown)') .option('-p, --path ', 'Project path') .option('-n, --max-nodes ', 'Maximum nodes to include', '50') - .option('-c, --max-code ', 'Maximum code blocks', '10') + .option('-c, --max-code ', 'Maximum code blocks', '3') .option('--no-code', 'Exclude code blocks') .option('-f, --format ', 'Output format (markdown, json)', 'markdown') .action(async (task: string, options: { @@ -1108,7 +1108,7 @@ program const context = await cg.buildContext(task, { maxNodes: parseInt(options.maxNodes || '50', 10), - maxCodeBlocks: parseInt(options.maxCode || '10', 10), + maxCodeBlocks: parseInt(options.maxCode || '3', 10), includeCode: options.code !== false, format: options.format as 'markdown' | 'json', }); diff --git a/src/context/index.ts b/src/context/index.ts index 581d2640..508a9ff0 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -166,7 +166,7 @@ function matchesGenericSplitSymbol(name: string): boolean { */ const DEFAULT_BUILD_OPTIONS: Required = { maxNodes: 20, // Reduced from 50 - most tasks don't need 50 symbols - maxCodeBlocks: 5, // Reduced from 10 - only show most relevant code + maxCodeBlocks: 3, // Reduced from 10 - only show most relevant code maxCodeBlockSize: 1500, // Reduced from 2000 includeCode: true, format: 'markdown', @@ -1009,7 +1009,9 @@ export class ContextBuilder { ): Promise { const blocks: CodeBlock[] = []; - // Prioritize entry points, then functions/methods + // Prioritize entry points, then functions/methods. Entry point classes are + // still included via the roots above, but related classes are usually large + // containers whose location is enough for context output. const priorityNodes: Node[] = []; // First: entry points @@ -1029,15 +1031,6 @@ export class ContextBuilder { } } - // Then: classes - for (const node of subgraph.nodes.values()) { - if (!subgraph.roots.includes(node.id)) { - if (node.kind === 'class') { - priorityNodes.push(node); - } - } - } - // Extract code for priority nodes for (const node of priorityNodes) { if (blocks.length >= maxBlocks) break; From 3c449cf9d14aa397d11b18341fa87bcaf97cabb5 Mon Sep 17 00:00:00 2001 From: bohe76 Date: Sun, 24 May 2026 01:17:45 +0900 Subject: [PATCH 3/3] fix: add focused context snippets --- __tests__/context.test.ts | 16 ++++ src/bin/codegraph.ts | 4 +- src/context/formatter.ts | 2 +- src/context/index.ts | 149 +++++++++++++++++++++++++++++++++++++- 4 files changed, 164 insertions(+), 7 deletions(-) diff --git a/__tests__/context.test.ts b/__tests__/context.test.ts index 686b19ab..b350cafc 100644 --- a/__tests__/context.test.ts +++ b/__tests__/context.test.ts @@ -354,6 +354,22 @@ export const testCases = [ expect(relatedClassBlocks).toHaveLength(0); }); + it('should include a compact snippet when a focus term shares a file with the symbol', async () => { + const result = await cg.buildContext('getFileDependents affected tests', { + format: 'markdown', + includeCode: true, + maxCodeBlocks: 2, + }); + + const markdown = result as string; + + expect(markdown).toContain('#### getFileDependents'); + expect(markdown).toContain('#### Snippet'); + expect(markdown).toContain('function affected'); + expect(markdown).toContain('return getFileDependents(filePath)'); + expect(markdown).not.toContain('class LRUCache'); + }); + it('should exclude code blocks when requested', async () => { const result = await cg.buildContext('payment', { format: 'markdown', diff --git a/src/bin/codegraph.ts b/src/bin/codegraph.ts index e177b5e0..ac56010a 100644 --- a/src/bin/codegraph.ts +++ b/src/bin/codegraph.ts @@ -1085,7 +1085,7 @@ program .description('Build context for a task (outputs markdown)') .option('-p, --path ', 'Project path') .option('-n, --max-nodes ', 'Maximum nodes to include', '50') - .option('-c, --max-code ', 'Maximum code blocks', '3') + .option('-c, --max-code ', 'Maximum code blocks', '2') .option('--no-code', 'Exclude code blocks') .option('-f, --format ', 'Output format (markdown, json)', 'markdown') .action(async (task: string, options: { @@ -1108,7 +1108,7 @@ program const context = await cg.buildContext(task, { maxNodes: parseInt(options.maxNodes || '50', 10), - maxCodeBlocks: parseInt(options.maxCode || '3', 10), + maxCodeBlocks: parseInt(options.maxCode || '2', 10), includeCode: options.code !== false, format: options.format as 'markdown' | 'json', }); diff --git a/src/context/formatter.ts b/src/context/formatter.ts index 37a08ee8..1b402ab7 100644 --- a/src/context/formatter.ts +++ b/src/context/formatter.ts @@ -59,7 +59,7 @@ export function formatContextAsMarkdown(context: TaskContext): string { if (context.codeBlocks.length > 0) { lines.push('### Code\n'); for (const block of context.codeBlocks) { - const nodeName = block.node?.name ?? 'Unknown'; + const nodeName = block.node?.name ?? 'Snippet'; lines.push(`#### ${nodeName} (${block.filePath}:${block.startLine})\n`); lines.push('```' + block.language); lines.push(block.content); diff --git a/src/context/index.ts b/src/context/index.ts index 508a9ff0..e8e41655 100644 --- a/src/context/index.ts +++ b/src/context/index.ts @@ -156,6 +156,34 @@ function matchesGenericSplitSymbol(name: string): boolean { return false; } +function splitIdentifierTerms(value: string): string[] { + return value + .replace(/([a-z])([A-Z])/g, '$1 $2') + .replace(/([A-Z]+)([A-Z][a-z])/g, '$1 $2') + .split(/[^a-zA-Z0-9]+/) + .map((term) => term.toLowerCase()) + .filter((term) => term.length >= 3); +} + +function extractSnippetFocusTerms(query: string, rootNames: string[]): string[] { + const rootTerms = new Set(); + for (const name of rootNames) { + rootTerms.add(name.toLowerCase()); + for (const term of splitIdentifierTerms(name)) { + rootTerms.add(term); + } + } + + const focusTerms = new Set(); + for (const term of splitIdentifierTerms(query)) { + if (rootTerms.has(term)) continue; + if (GENERIC_SPLIT_SYMBOLS.has(term)) continue; + if (term === 'test' || term === 'tests' || term === 'spec' || term === 'specs') continue; + focusTerms.add(term); + } + return Array.from(focusTerms); +} + /** * Default options for context building * @@ -166,7 +194,7 @@ function matchesGenericSplitSymbol(name: string): boolean { */ const DEFAULT_BUILD_OPTIONS: Required = { maxNodes: 20, // Reduced from 50 - most tasks don't need 50 symbols - maxCodeBlocks: 3, // Reduced from 10 - only show most relevant code + maxCodeBlocks: 2, // Reduced from 10 - only show most relevant code maxCodeBlockSize: 1500, // Reduced from 2000 includeCode: true, format: 'markdown', @@ -254,7 +282,7 @@ export class ContextBuilder { // Extract code blocks for key nodes const codeBlocks = opts.includeCode - ? await this.extractCodeBlocks(subgraph, opts.maxCodeBlocks, opts.maxCodeBlockSize) + ? await this.extractCodeBlocks(query, subgraph, opts.maxCodeBlocks, opts.maxCodeBlockSize) : []; // Get related files @@ -1003,11 +1031,15 @@ export class ContextBuilder { * Extract code blocks for key nodes in the subgraph */ private async extractCodeBlocks( + query: string, subgraph: Subgraph, maxBlocks: number, maxBlockSize: number ): Promise { const blocks: CodeBlock[] = []; + const snippets = await this.extractQuerySnippets(query, subgraph, maxBlockSize); + const snippetSlots = snippets.length > 0 && maxBlocks > 1 ? 1 : 0; + const regularLimit = Math.max(0, maxBlocks - snippetSlots); // Prioritize entry points, then functions/methods. Entry point classes are // still included via the roots above, but related classes are usually large @@ -1031,12 +1063,21 @@ export class ContextBuilder { } } + const duplicateNames = new Map(); + for (const node of priorityNodes) { + duplicateNames.set(node.name, (duplicateNames.get(node.name) ?? 0) + 1); + } + // Extract code for priority nodes for (const node of priorityNodes) { - if (blocks.length >= maxBlocks) break; + if (blocks.length >= regularLimit) break; const code = await this.extractNodeCode(node); if (code) { + if (this.isLikelyDelegatingWrapper(node, code, duplicateNames)) { + continue; + } + // Truncate if too long. Language-neutral marker (no `//` — not a // comment in Python, Ruby, etc.); this renders inside a fenced // source block whose language varies. @@ -1055,7 +1096,107 @@ export class ContextBuilder { } } - return blocks; + return [ + ...blocks, + ...snippets.slice(0, Math.max(0, maxBlocks - blocks.length)), + ]; + } + + private isLikelyDelegatingWrapper( + node: Node, + code: string, + duplicateNames: Map + ): boolean { + if ((duplicateNames.get(node.name) ?? 0) < 2) return false; + if (node.kind !== 'function' && node.kind !== 'method') return false; + if (code.length > 350) return false; + return /\breturn\s+this\.[A-Za-z_$][\w$]*\.[A-Za-z_$][\w$]*\s*\(/.test(code); + } + + private async extractQuerySnippets( + query: string, + subgraph: Subgraph, + maxBlockSize: number + ): Promise { + const rootNames = subgraph.roots + .map((id) => subgraph.nodes.get(id)?.name) + .filter((name): name is string => Boolean(name)); + const rootNeedles = [...new Set(rootNames.map((name) => name.toLowerCase()))]; + const focusTerms = extractSnippetFocusTerms(query, rootNames); + if (rootNeedles.length === 0 || focusTerms.length === 0) { + return []; + } + + let best: + | { + score: number; + filePath: string; + language: CodeBlock['language']; + startLine: number; + endLine: number; + content: string; + } + | null = null; + + for (const file of this.queries.getAllFiles()) { + const fullPath = validatePathWithinRoot(this.projectRoot, file.path); + if (!fullPath || !fs.existsSync(fullPath)) continue; + if (file.size > 200_000) continue; + + let content: string; + try { + content = fs.readFileSync(fullPath, 'utf-8'); + } catch { + continue; + } + + const lower = content.toLowerCase(); + if (!rootNeedles.some((needle) => lower.includes(needle))) continue; + if (!focusTerms.some((term) => lower.includes(term))) continue; + + const lines = content.split(/\r?\n/); + for (let idx = 0; idx < lines.length; idx++) { + const lineLower = lines[idx]!.toLowerCase(); + if (!rootNeedles.some((needle) => lineLower.includes(needle))) continue; + + const startIdx = Math.max(0, idx - 4); + const endIdx = Math.min(lines.length - 1, idx + 7); + const window = lines.slice(startIdx, endIdx + 1).join('\n'); + const windowLower = window.toLowerCase(); + const termHits = focusTerms.filter((term) => windowLower.includes(term)).length; + if (termHits === 0) continue; + + let score = termHits * 20; + if (!isTestFile(file.path)) score += 5; + if (focusTerms.some((term) => lower.includes(`.command('${term}`) || lower.includes(`.command("${term}`))) { + score += 15; + } + + const trimmed = window.length > Math.min(maxBlockSize, 900) + ? window.slice(0, Math.min(maxBlockSize, 900)) + '\n... (truncated) ...' + : window; + + if (!best || score > best.score) { + best = { + score, + filePath: file.path, + language: file.language, + startLine: startIdx + 1, + endLine: endIdx + 1, + content: trimmed, + }; + } + } + } + + if (!best) return []; + return [{ + content: best.content, + filePath: best.filePath, + startLine: best.startLine, + endLine: best.endLine, + language: best.language, + }]; } /**