Skip to content

Commit 24de50e

Browse files
committed
fix: harden search reliability and indexing hygiene
1 parent a6b65f1 commit 24de50e

23 files changed

+1380
-133
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ dist/
77
*.log
88
.DS_Store
99
.env
10+
opencode.jsonc
11+
nul
1012
.vscode/
1113
*.swp
1214
*.swo

src/core/analyzer-registry.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,9 @@ export class AnalyzerRegistry {
7070
const analyzer = this.findAnalyzer(filePath, content);
7171

7272
if (!analyzer) {
73-
console.warn(`No analyzer found for file: ${filePath}`);
73+
if (process.env.CODEBASE_CONTEXT_DEBUG) {
74+
console.error(`[DEBUG] No analyzer found for file: ${filePath}`);
75+
}
7476
return null;
7577
}
7678

src/core/indexer.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -210,6 +210,20 @@ export class CodebaseIndexer {
210210
this.updateProgress('complete', 100);
211211
stats.duration = Date.now() - startTime;
212212
stats.completedAt = new Date();
213+
214+
// Preserve accurate counts from the existing index (nothing changed, index is intact)
215+
try {
216+
const existingIndexPath = path.join(contextDir, KEYWORD_INDEX_FILENAME);
217+
const existingChunks = JSON.parse(await fs.readFile(existingIndexPath, 'utf-8'));
218+
if (Array.isArray(existingChunks)) {
219+
stats.totalChunks = existingChunks.length;
220+
const uniqueFiles = new Set(existingChunks.map((c: { filePath?: string }) => c.filePath));
221+
stats.indexedFiles = uniqueFiles.size;
222+
}
223+
} catch {
224+
// Keyword index doesn't exist yet — keep counts as 0
225+
}
226+
213227
return stats;
214228
}
215229
}
@@ -591,6 +605,7 @@ export class CodebaseIndexer {
591605

592606
private async scanFiles(): Promise<string[]> {
593607
const files: string[] = [];
608+
const seen = new Set<string>();
594609

595610
// Read .gitignore if respecting it
596611
let ig: ReturnType<typeof ignore.default> | null = null;
@@ -617,6 +632,12 @@ export class CodebaseIndexer {
617632
});
618633

619634
for (const file of matches) {
635+
const normalizedFile = file.replace(/\\/g, '/');
636+
if (seen.has(normalizedFile)) {
637+
continue;
638+
}
639+
seen.add(normalizedFile);
640+
620641
const relativePath = path.relative(this.rootPath, file);
621642

622643
// Check gitignore

src/core/search-quality.ts

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import type { SearchResult } from '../types/index.js';
2+
import { isTestingRelatedQuery } from '../preflight/query-scope.js';
3+
4+
export interface SearchQualityAssessment {
5+
status: 'ok' | 'low_confidence';
6+
confidence: number;
7+
signals: string[];
8+
nextSteps?: string[];
9+
}
10+
11+
export function isTestArtifactPath(filePath: string): boolean {
12+
const normalized = filePath.toLowerCase().replace(/\\/g, '/');
13+
return (
14+
normalized.includes('.spec.') ||
15+
normalized.includes('.test.') ||
16+
normalized.includes('/e2e/') ||
17+
normalized.includes('/__tests__/')
18+
);
19+
}
20+
21+
export function assessSearchQuality(
22+
query: string,
23+
results: SearchResult[]
24+
): SearchQualityAssessment {
25+
if (results.length === 0) {
26+
return {
27+
status: 'low_confidence',
28+
confidence: 0,
29+
signals: ['no results returned'],
30+
nextSteps: [
31+
'Try a narrower query with one concrete symbol, route, or file hint.',
32+
'Apply search filters (framework/language/componentType/layer).',
33+
'Use get_component_usage for dependency or wiring lookups.'
34+
]
35+
};
36+
}
37+
38+
const topSlice = results.slice(0, Math.min(3, results.length));
39+
const topScore = results[0].score;
40+
const secondScore = results[1]?.score ?? topScore;
41+
const topAverage = topSlice.reduce((sum, result) => sum + result.score, 0) / topSlice.length;
42+
const topSeparation = Math.max(0, topScore - secondScore);
43+
const testRatio =
44+
topSlice.filter((result) => isTestArtifactPath(result.filePath)).length / topSlice.length;
45+
const queryIsTesting = isTestingRelatedQuery(query);
46+
47+
const signals: string[] = [];
48+
if (topScore < 0.3) {
49+
signals.push(`low top score (${topScore.toFixed(2)})`);
50+
}
51+
if (topAverage < 0.32) {
52+
signals.push(`weak top-${topSlice.length} average (${topAverage.toFixed(2)})`);
53+
}
54+
if (topSlice.length > 1 && topSeparation < 0.03) {
55+
signals.push(`tight top spread (${topSeparation.toFixed(2)})`);
56+
}
57+
if (!queryIsTesting && testRatio >= 0.67) {
58+
signals.push(
59+
`test artifacts dominate top-${topSlice.length} (${Math.round(testRatio * 100)}%)`
60+
);
61+
}
62+
63+
let confidence = topScore;
64+
if (topAverage < 0.32) confidence -= 0.08;
65+
if (topSlice.length > 1 && topSeparation < 0.03) confidence -= 0.05;
66+
if (!queryIsTesting && testRatio >= 0.67) confidence -= 0.15;
67+
confidence = Math.max(0, Math.min(1, Number(confidence.toFixed(2))));
68+
69+
const lowConfidence = signals.length >= 2 || confidence < 0.35;
70+
71+
return {
72+
status: lowConfidence ? 'low_confidence' : 'ok',
73+
confidence,
74+
signals,
75+
...(lowConfidence && {
76+
nextSteps: [
77+
'Add one or two concrete symbols, routes, or file hints to the query.',
78+
'Apply filters (framework/language/componentType/layer) to narrow candidates.',
79+
'Use get_component_usage when the question is about wiring or usages.'
80+
]
81+
})
82+
};
83+
}

0 commit comments

Comments
 (0)