From 820ff834c3867b952ebaabeffa0ff3c04945affa Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 03:37:11 -0600 Subject: [PATCH 01/26] chore: remove dead duplicate type declarations from types.ts (closes #1727) --- src/types.ts | 82 ---------------------------------------------------- 1 file changed, 82 deletions(-) diff --git a/src/types.ts b/src/types.ts index 9f85a9f33..d52bd81bf 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1108,68 +1108,6 @@ export interface DataflowMutation { line: number; } -// ════════════════════════════════════════════════════════════════════════ -// §9 Graph Model (CodeGraph) -// ════════════════════════════════════════════════════════════════════════ - -/** Node attributes stored in the in-memory graph. */ -export interface GraphNodeAttrs { - label?: string; - kind?: string; - file?: string; - name?: string; - line?: number; - dbId?: number; - [key: string]: unknown; -} - -/** Edge attributes stored in the in-memory graph. */ -export interface GraphEdgeAttrs { - kind?: string; - confidence?: number; - weight?: number; - [key: string]: unknown; -} - -/** The unified in-memory graph model. */ -export interface CodeGraph { - readonly directed: boolean; - readonly nodeCount: number; - readonly edgeCount: number; - - // Node operations - addNode(id: string, attrs?: GraphNodeAttrs): CodeGraph; - hasNode(id: string): boolean; - getNodeAttrs(id: string): GraphNodeAttrs | undefined; - nodes(): IterableIterator<[string, GraphNodeAttrs]>; - nodeIds(): string[]; - - // Edge operations - addEdge(source: string, target: string, attrs?: GraphEdgeAttrs): CodeGraph; - hasEdge(source: string, target: string): boolean; - getEdgeAttrs(source: string, target: string): GraphEdgeAttrs | undefined; - edges(): Generator<[string, string, GraphEdgeAttrs]>; - - // Adjacency - successors(id: string): string[]; - predecessors(id: string): string[]; - neighbors(id: string): string[]; - outDegree(id: string): number; - inDegree(id: string): number; - - // Filtering - subgraph(predicate: (id: string, attrs: GraphNodeAttrs) => boolean): CodeGraph; - filterEdges(predicate: (src: string, tgt: string, attrs: GraphEdgeAttrs) => boolean): CodeGraph; - - // Conversion - toEdgeArray(): Array<{ source: string; target: string }>; - toGraphology(opts?: { type?: string }): unknown; - - // Utilities - clone(): CodeGraph; - merge(other: CodeGraph): CodeGraph; -} - // ════════════════════════════════════════════════════════════════════════ // §10 Build Pipeline // ════════════════════════════════════════════════════════════════════════ @@ -1628,26 +1566,6 @@ export interface PaginatedItems { /** A result object with optional _pagination metadata. */ export type Paginated = T & { _pagination?: PaginationMeta }; -// ════════════════════════════════════════════════════════════════════════ -// §13 Error Hierarchy -// ════════════════════════════════════════════════════════════════════════ - -export type ErrorCode = - | 'CODEGRAPH_ERROR' - | 'PARSE_FAILED' - | 'DB_ERROR' - | 'CONFIG_INVALID' - | 'RESOLUTION_FAILED' - | 'ENGINE_UNAVAILABLE' - | 'ANALYSIS_FAILED' - | 'BOUNDARY_VIOLATION'; - -export interface CodegraphErrorOpts { - code?: ErrorCode; - file?: string; - cause?: Error; -} - // ════════════════════════════════════════════════════════════════════════ // §14 Feature Module Result Shapes // ════════════════════════════════════════════════════════════════════════ From 9fa427902c51d9c13bfcb5c0dcd9c6fb8ce2ca9e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 03:46:14 -0600 Subject: [PATCH 02/26] chore: remove unused iterComplexity export from complexity-query.ts (docs check acknowledged) Impact: 1 functions changed, 2 affected --- src/features/complexity-query.ts | 103 ------------------------------- src/features/complexity.ts | 2 +- 2 files changed, 1 insertion(+), 104 deletions(-) diff --git a/src/features/complexity-query.ts b/src/features/complexity-query.ts index 27eb5fc08..5f3b9d121 100644 --- a/src/features/complexity-query.ts +++ b/src/features/complexity-query.ts @@ -283,106 +283,3 @@ export function complexityData( db.close(); } } - -interface IterComplexityRow { - name: string; - kind: string; - file: string; - line: number; - end_line: number | null; - cognitive: number; - cyclomatic: number; - max_nesting: number; - loc: number; - sloc: number; -} - -export function* iterComplexity( - customDbPath?: string, - opts: { - noTests?: boolean; - file?: string; - target?: string; - kind?: string; - sort?: string; - } = {}, -): Generator<{ - name: string; - kind: string; - file: string; - line: number; - endLine: number | null; - cognitive: number; - cyclomatic: number; - maxNesting: number; - loc: number; - sloc: number; -}> { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; - const sort = opts.sort || 'cognitive'; - - let where = "WHERE n.kind IN ('function','method')"; - const params: unknown[] = []; - - if (noTests) { - where += ` AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'`; - } - if (opts.target) { - where += ' AND n.name LIKE ?'; - params.push(`%${opts.target}%`); - } - { - const fc = buildFileConditionSQL(opts.file as string, 'n.file'); - where += fc.sql; - params.push(...fc.params); - } - if (opts.kind) { - where += ' AND n.kind = ?'; - params.push(opts.kind); - } - - const orderMap: Record = { - cognitive: 'fc.cognitive DESC', - cyclomatic: 'fc.cyclomatic DESC', - nesting: 'fc.max_nesting DESC', - mi: 'fc.maintainability_index ASC', - volume: 'fc.halstead_volume DESC', - effort: 'fc.halstead_effort DESC', - bugs: 'fc.halstead_bugs DESC', - loc: 'fc.loc DESC', - }; - const orderBy = orderMap[sort] || 'fc.cognitive DESC'; - - const stmt = db.prepare( - `SELECT n.name, n.kind, n.file, n.line, n.end_line, - fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.loc, fc.sloc - FROM function_complexity fc - JOIN nodes n ON fc.node_id = n.id - ${where} - ORDER BY ${orderBy}`, - ); - for (const r of stmt.iterate(...params)) { - if (noTests && isTestFile(r.file)) continue; - yield { - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - endLine: r.end_line || null, - cognitive: r.cognitive, - cyclomatic: r.cyclomatic, - maxNesting: r.max_nesting, - loc: r.loc || 0, - sloc: r.sloc || 0, - }; - } - } finally { - db.close(); - } -} diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 307ed8549..8e1b6fb06 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -750,4 +750,4 @@ export async function buildComplexityMetrics( // ─── Query-Time Functions (re-exported from complexity-query.ts) ────────── // Split to separate query-time concerns (DB reads, filtering, pagination) // from compute-time concerns (AST traversal, metric algorithms). -export { complexityData, iterComplexity } from './complexity-query.js'; +export { complexityData } from './complexity-query.js'; From 0f9bbe6f05779ca7531b0b01807e6c35a7092baf Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 03:58:55 -0600 Subject: [PATCH 03/26] refactor: extract shared aggregate and typed-array helpers from leiden algorithm files docs check acknowledged: internal helper extraction only, no user-facing feature/language/architecture-table changes. Impact: 10 functions changed, 27 affected --- src/graph/algorithms/leiden/adapter.ts | 14 +-- .../algorithms/leiden/aggregate-helpers.ts | 83 ++++++++++++++++++ src/graph/algorithms/leiden/index.ts | 73 +--------------- src/graph/algorithms/leiden/partition.ts | 86 ++----------------- .../algorithms/leiden/typed-array-helpers.ts | 28 ++++++ 5 files changed, 121 insertions(+), 163 deletions(-) create mode 100644 src/graph/algorithms/leiden/aggregate-helpers.ts create mode 100644 src/graph/algorithms/leiden/typed-array-helpers.ts diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts index 4425cbb30..390a15aa3 100644 --- a/src/graph/algorithms/leiden/adapter.ts +++ b/src/graph/algorithms/leiden/adapter.ts @@ -6,6 +6,7 @@ */ import type { CodeGraph, EdgeAttrs, NodeAttrs } from '../../model.js'; +import { fget, taAdd } from './typed-array-helpers.js'; export interface EdgeEntry { to: number; @@ -39,17 +40,6 @@ export interface GraphAdapter { forEachNeighbor: (i: number, cb: (to: number, w: number) => void) => void; } -// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess -// widens the return to `number | undefined`. These helpers wrap compound assignment -// patterns (+=, -=) that appear frequently in this performance-critical code. -function taGet(a: Float64Array, i: number): number { - return a[i] as number; -} - -function taAdd(a: Float64Array, i: number, v: number): void { - a[i] = taGet(a, i) + v; -} - /** * Populate edge arrays for a directed graph. Each edge is stored once in * outEdges[from] and inEdges[to]. Self-loops are tracked in both the selfLoop @@ -145,7 +135,7 @@ function populateUndirectedEdges( // Note: uses single-w convention (not standard 2w) — the modularity formulas in // modularity.ts are written to match this convention, keeping the system self-consistent. for (let v = 0; v < n; v++) { - const w: number = taGet(selfLoop, v); + const w: number = fget(selfLoop, v); if (w !== 0) { (outEdges[v] as EdgeEntry[]).push({ to: v, w }); (inEdges[v] as InEdgeEntry[]).push({ from: v, w }); diff --git a/src/graph/algorithms/leiden/aggregate-helpers.ts b/src/graph/algorithms/leiden/aggregate-helpers.ts new file mode 100644 index 000000000..306063409 --- /dev/null +++ b/src/graph/algorithms/leiden/aggregate-helpers.ts @@ -0,0 +1,83 @@ +/** + * Per-community aggregate accumulation shared by partition.ts (live + * optimisation state, mutated move-by-move) and index.ts (one-shot + * evaluation on the original graph for quality()). Both need to reduce the + * graph's per-node size/strength/self-loop values down to one row per + * community using identical directed/undirected branching — extracting + * this once prevents the two copies from silently drifting apart on a + * future edit to only one of them. + */ + +import type { GraphAdapter } from './adapter.js'; +import { fget, iget } from './typed-array-helpers.js'; + +/** + * Accumulate per-community node-level totals (size, strength, self-loop + * weight) into the provided aggregate arrays. + * + * `nodeCount` is optional: partition.ts's live optimisation state tracks + * per-community node counts (used by compactCommunityIds's size/count sort + * tie-break), while index.ts's read-only quality evaluation does not need + * it and omits the argument. + */ +export function accumulateNodeAggregates( + graph: GraphAdapter, + nodeCommunity: Int32Array, + n: number, + totalSize: Float64Array, + internalEdgeWeight: Float64Array, + totalStrength: Float64Array, + totalOutStrength: Float64Array, + totalInStrength: Float64Array, + nodeCount?: Int32Array, +): void { + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + totalSize[c] = fget(totalSize, c) + fget(graph.size, i); + if (nodeCount) nodeCount[c] = iget(nodeCount, c) + 1; + if (graph.directed) { + totalOutStrength[c] = fget(totalOutStrength, c) + fget(graph.strengthOut, i); + totalInStrength[c] = fget(totalInStrength, c) + fget(graph.strengthIn, i); + } else { + totalStrength[c] = fget(totalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + internalEdgeWeight[c] = fget(internalEdgeWeight, c) + fget(graph.selfLoop, i); + } +} + +/** + * Accumulate intra-community edge weights. For directed graphs, counts all + * intra-community non-self edges. For undirected, counts each edge once + * (j > i) to avoid double-counting. + */ +export function accumulateInternalEdgeWeights( + graph: GraphAdapter, + nodeCommunity: Int32Array, + n: number, + internalEdgeWeight: Float64Array, +): void { + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; + } + } + } +} diff --git a/src/graph/algorithms/leiden/index.ts b/src/graph/algorithms/leiden/index.ts index a69df7caf..9a2a05a9b 100644 --- a/src/graph/algorithms/leiden/index.ts +++ b/src/graph/algorithms/leiden/index.ts @@ -8,10 +8,12 @@ import type { CodeGraph } from '../../model.js'; import type { GraphAdapter } from './adapter.js'; +import { accumulateInternalEdgeWeights, accumulateNodeAggregates } from './aggregate-helpers.js'; import { qualityCPM } from './cpm.js'; import { qualityModularity } from './modularity.js'; import type { LeidenOptions } from './optimiser.js'; import { runLouvainUndirectedModularity } from './optimiser.js'; +import { iget } from './typed-array-helpers.js'; export type { LeidenOptions } from './optimiser.js'; @@ -27,14 +29,6 @@ export interface DetectClustersResult { }; } -// Typed array safe-access helpers (see adapter.ts for rationale) -function fget(a: Float64Array, i: number): number { - return a[i] as number; -} -function iget(a: Int32Array, i: number): number { - return a[i] as number; -} - /** * Detect communities in a CodeGraph using the Leiden algorithm. * @@ -119,67 +113,6 @@ interface OriginalPartition { getInEdgeWeightFromCommunity(c: number): number; } -/** - * Accumulate intra-community edge weights for quality evaluation. - * For directed graphs, counts all intra-community non-self edges. - * For undirected, counts each edge once (j > i) to avoid double-counting. - */ -function accumulateInternalEdgeWeights( - g: GraphAdapter, - communityMap: Int32Array, - n: number, - internalWeight: Float64Array, -): void { - if (g.directed) { - for (let i = 0; i < n; i++) { - const ci: number = iget(communityMap, i); - const list = g.outEdges[i]!; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]!; - if (i === j) continue; - if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci: number = iget(communityMap, i); - const list = g.outEdges[i]!; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]!; - if (j <= i) continue; - if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; - } - } - } -} - -/** - * Accumulate per-community node-level aggregates (size, strength) from - * the graph adapter and community mapping. - */ -function accumulateNodeAggregates( - g: GraphAdapter, - communityMap: Int32Array, - n: number, - totalSize: Float64Array, - totalStr: Float64Array, - totalOutStr: Float64Array, - totalInStr: Float64Array, - internalWeight: Float64Array, -): void { - for (let i = 0; i < n; i++) { - const c: number = iget(communityMap, i); - totalSize[c] = fget(totalSize, c) + fget(g.size, i); - if (g.directed) { - totalOutStr[c] = fget(totalOutStr, c) + fget(g.strengthOut, i); - totalInStr[c] = fget(totalInStr, c) + fget(g.strengthIn, i); - } else { - totalStr[c] = fget(totalStr, c) + fget(g.strengthOut, i); - } - if (fget(g.selfLoop, i)) internalWeight[c] = fget(internalWeight, c) + fget(g.selfLoop, i); - } -} - function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): OriginalPartition { const n: number = g.n; let maxC: number = 0; @@ -201,10 +134,10 @@ function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): Orig communityMap, n, totalSize, + internalWeight, totalStr, totalOutStr, totalInStr, - internalWeight, ); accumulateInternalEdgeWeights(g, communityMap, n, internalWeight); diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index 38b67737c..de78b8f3e 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -8,6 +8,8 @@ */ import type { GraphAdapter } from './adapter.js'; +import { accumulateInternalEdgeWeights, accumulateNodeAggregates } from './aggregate-helpers.js'; +import { fget, iget, u8get } from './typed-array-helpers.js'; export interface CompactOptions { keepOldOrder?: boolean; @@ -44,18 +46,6 @@ export interface Partition { graph?: GraphAdapter; } -// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess -// widens to `number | undefined`. These helpers keep the compound assignment patterns readable. -function fget(a: Float64Array, i: number): number { - return a[i] as number; -} -function iget(a: Int32Array, i: number): number { - return a[i] as number; -} -function u8get(a: Uint8Array, i: number): number { - return a[i] as number; -} - /* ------------------------------------------------------------------ */ /* Internal mutable state bucket shared by all extracted functions */ /* ------------------------------------------------------------------ */ @@ -81,75 +71,9 @@ interface PartitionState { } /* ------------------------------------------------------------------ */ -/* Aggregate helpers (shared by initializeAggregates & compact) */ +/* Community-ID sort helper (used by compact) */ /* ------------------------------------------------------------------ */ -/** - * Accumulate per-community node-level totals (size, count, strength) into the - * provided aggregate arrays. Both `initializeAggregates` and `compactCommunityIds` - * share this logic — extracting it eliminates the duplication. - */ -function accumulateNodeAggregates( - graph: GraphAdapter, - nodeCommunity: Int32Array, - n: number, - totalSize: Float64Array, - nodeCount: Int32Array, - internalEdgeWeight: Float64Array, - totalStrength: Float64Array, - totalOutStrength: Float64Array, - totalInStrength: Float64Array, -): void { - for (let i = 0; i < n; i++) { - const c: number = iget(nodeCommunity, i); - totalSize[c] = fget(totalSize, c) + fget(graph.size, i); - nodeCount[c] = iget(nodeCount, c) + 1; - if (graph.directed) { - totalOutStrength[c] = fget(totalOutStrength, c) + fget(graph.strengthOut, i); - totalInStrength[c] = fget(totalInStrength, c) + fget(graph.strengthIn, i); - } else { - totalStrength[c] = fget(totalStrength, c) + fget(graph.strengthOut, i); - } - if (fget(graph.selfLoop, i) !== 0) - internalEdgeWeight[c] = fget(internalEdgeWeight, c) + fget(graph.selfLoop, i); - } -} - -/** - * Accumulate intra-community edge weights. For directed graphs, counts all - * intra-community non-self edges. For undirected, counts each edge once (j > i). - */ -function accumulateInternalEdgeWeights( - graph: GraphAdapter, - nodeCommunity: Int32Array, - n: number, - internalEdgeWeight: Float64Array, -): void { - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const neighbors = graph.outEdges[i]!; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]!; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === iget(nodeCommunity, j)) - internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const neighbors = graph.outEdges[i]!; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]!; - if (j <= i) continue; - if (ci === iget(nodeCommunity, j)) - internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; - } - } - } -} - /** * Sort community IDs according to the compaction options: preserve original * order, respect a user-provided label map, or sort by descending size. @@ -218,11 +142,11 @@ function initAggregates(s: PartitionState): void { s.nodeCommunity, s.n, s.communityTotalSize, - s.communityNodeCount, s.communityInternalEdgeWeight, s.communityTotalStrength, s.communityTotalOutStrength, s.communityTotalInStrength, + s.communityNodeCount, ); accumulateInternalEdgeWeights(s.graph, s.nodeCommunity, s.n, s.communityInternalEdgeWeight); } @@ -463,11 +387,11 @@ function compactIds(s: PartitionState, opts: CompactOptions = {}): void { s.nodeCommunity, s.n, newTotalSize, - newNodeCount, newInternalEdgeWeight, newTotalStrength, newTotalOutStrength, newTotalInStrength, + newNodeCount, ); accumulateInternalEdgeWeights(s.graph, s.nodeCommunity, s.n, newInternalEdgeWeight); diff --git a/src/graph/algorithms/leiden/typed-array-helpers.ts b/src/graph/algorithms/leiden/typed-array-helpers.ts new file mode 100644 index 000000000..ce3ef58a4 --- /dev/null +++ b/src/graph/algorithms/leiden/typed-array-helpers.ts @@ -0,0 +1,28 @@ +/** + * Typed-array safe-access helpers shared by the leiden algorithm files. + * + * Typed arrays always return a number for in-bounds access, but + * noUncheckedIndexedAccess widens the return type to `number | undefined`. + * These helpers keep index reads and compound-assignment patterns (`+=`) + * readable in this performance-critical code, without partition.ts, + * adapter.ts, and index.ts each maintaining their own hand-copied variant + * (previously named fget/iget/u8get in two files and taGet/taAdd in the + * third — same idiom, three independent copies). + */ + +export function fget(a: Float64Array, i: number): number { + return a[i] as number; +} + +export function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +export function u8get(a: Uint8Array, i: number): number { + return a[i] as number; +} + +/** In-place compound addition: `a[i] += v`, safe under noUncheckedIndexedAccess. */ +export function taAdd(a: Float64Array, i: number, v: number): void { + a[i] = fget(a, i) + v; +} From f3e1119e4802ea4babbefc4c0b913f536e14c3ff Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 04:19:45 -0600 Subject: [PATCH 04/26] refactor: extract shared name-map scanner into scripts/lib/name-map.mjs Impact: 11 functions changed, 11 affected --- scripts/compare-tools.mjs | 143 +----------------- scripts/import-jelly-micro.mjs | 153 ++------------------ scripts/lib/name-map.mjs | 256 +++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 278 deletions(-) create mode 100644 scripts/lib/name-map.mjs diff --git a/scripts/compare-tools.mjs b/scripts/compare-tools.mjs index 530c067ea..d1c18174c 100644 --- a/scripts/compare-tools.mjs +++ b/scripts/compare-tools.mjs @@ -25,6 +25,7 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; +import { buildFileLineNameMap, buildFileNameLookup } from './lib/name-map.mjs'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(__dirname, '..'); @@ -71,75 +72,6 @@ function findBin(name, envVar) { } } -// ── Name resolution from source ──────────────────────────────────────────── - -/** - * Parse source files to build a map of (file, startLine) → class-qualified name. - * Returns a Map<"filename:line", string>. - * - * Heuristic — works well for the small hand-annotated fixtures. - */ -function buildNameMap(fixtureDir, lang) { - const exts = EXTENSIONS[lang] || ['.js']; - const nameMap = new Map(); - - for (const filename of fs.readdirSync(fixtureDir)) { - if (!exts.some((e) => filename.endsWith(e))) continue; - - const src = fs.readFileSync(path.join(fixtureDir, filename), 'utf8'); - const lines = src.split('\n'); - let currentClass = null; - let classDepth = 0; - let braceDepth = 0; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const lineNo = i + 1; - const key = `${filename}:${lineNo}`; - - const classMatch = line.match(/^\s*(?:export\s+)?class\s+(\w+)/); - if (classMatch) { - currentClass = classMatch[1]; - classDepth = braceDepth; - nameMap.set(key, classMatch[1]); - } - - for (const ch of line) { - if (ch === '{') braceDepth++; - else if (ch === '}') { - braceDepth--; - if (currentClass && braceDepth === classDepth) currentClass = null; - } - } - - if (classMatch) continue; - - const funcMatch = line.match(/^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[(<]/); - if (funcMatch) { nameMap.set(key, funcMatch[1]); continue; } - - const arrowMatch = line.match(/^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=/); - if (arrowMatch && (line.includes('=>') || line.includes('function'))) { - nameMap.set(key, arrowMatch[1]); continue; - } - - if (currentClass) { - if (/^\s+constructor\s*\(/.test(line)) { - nameMap.set(key, currentClass); continue; - } - const methodMatch = line.match(/^\s+(?:async\s+|static\s+|(?:get|set)\s+)*(\w+)\s*\(/); - if (methodMatch) { - const mname = methodMatch[1]; - if (!['if', 'for', 'while', 'switch', 'catch'].includes(mname)) { - nameMap.set(key, `${currentClass}.${mname}`); - } - } - } - } - } - - return nameMap; -} - // ── Jelly ────────────────────────────────────────────────────────────────── function runJelly(lang, fixtureDir) { @@ -180,7 +112,7 @@ function runJelly(lang, fixtureDir) { } function jellyEdgesToSet(cg, fixtureDir, lang) { - const nameMap = buildNameMap(fixtureDir, lang); + const nameMap = buildFileLineNameMap(fixtureDir, EXTENSIONS[lang] || ['.js']); const files = cg.files; const functions = cg.functions; @@ -261,75 +193,6 @@ function runAcg(lang, fixtureDir) { } } -/** - * Build a lookup from (basename, unqualifiedName) → Set. - * - * ACG provides function names directly (e.g. "createUser") but not class - * prefixes. This map lets us resolve "createUser in service.js" → - * "UserService.createUser" using the same source scan as buildNameMap. - * - * The value is a Set to handle the case where multiple classes in the same - * file share a method name (e.g. Shape.area + Circle.area + Rectangle.area - * all in hierarchy.ts). Callers should try all candidates rather than - * assuming a 1:1 mapping. - */ -function buildAcgNameLookup(fixtureDir, lang) { - const exts = EXTENSIONS[lang] || ['.js']; - // Map: "basename:unqualifiedName" → Set<"qualifiedName"> - const lookup = new Map(); - - /** Add a (key → value) entry, accumulating into the existing Set if any. */ - function add(key, value) { - const existing = lookup.get(key); - if (existing) existing.add(value); - else lookup.set(key, new Set([value])); - } - - for (const filename of fs.readdirSync(fixtureDir)) { - if (!exts.some((e) => filename.endsWith(e))) continue; - const src = fs.readFileSync(path.join(fixtureDir, filename), 'utf8'); - const lines = src.split('\n'); - let currentClass = null; - let classDepth = 0; - let braceDepth = 0; - - for (const line of lines) { - const classMatch = line.match(/^\s*(?:export\s+)?class\s+(\w+)/); - if (classMatch) { - currentClass = classMatch[1]; - classDepth = braceDepth; - // "ClassName" as an unqualified name refers to the class itself (constructor call sites) - add(`${filename}:${classMatch[1]}`, classMatch[1]); - } - for (const ch of line) { - if (ch === '{') braceDepth++; - else if (ch === '}') { - braceDepth--; - if (currentClass && braceDepth === classDepth) currentClass = null; - } - } - if (classMatch) continue; - - const funcMatch = line.match(/^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[(<]/); - if (funcMatch) { add(`${filename}:${funcMatch[1]}`, funcMatch[1]); continue; } - - if (currentClass) { - // constructor → ClassName (ACG labels constructors as "constructor" in the source) - if (/^\s+constructor\s*\(/.test(line)) { - add(`${filename}:constructor`, currentClass); continue; - } - const methodMatch = line.match(/^\s+(?:async\s+|static\s+|(?:get|set)\s+)*(\w+)\s*\(/); - if (methodMatch) { - const mname = methodMatch[1]; - if (!['if', 'for', 'while', 'switch', 'catch'].includes(mname)) - add(`${filename}:${mname}`, `${currentClass}.${mname}`); - } - } - } - } - return lookup; -} - /** * Parse ACG text output into a set of "source→target" edge strings. * @@ -342,7 +205,7 @@ function buildAcgNameLookup(fixtureDir, lang) { * declaration line. So we use the function name directly for the lookup. */ function acgOutputToSet(stdout, fixtureDir, lang) { - const lookup = buildAcgNameLookup(fixtureDir, lang); + const lookup = buildFileNameLookup(fixtureDir, EXTENSIONS[lang] || ['.js']); // 'funcName' (file.js@line:start-end) -> 'funcName' (file.js@line:start-end) const edgeRe = /^'(\w+)'\s+\((\S+?)@\d+:[^)]+\)\s+->\s+'(\w+)'\s+\((\S+?)@\d+:[^)]+\)/; diff --git a/scripts/import-jelly-micro.mjs b/scripts/import-jelly-micro.mjs index 3cafdf171..3f8566ad8 100644 --- a/scripts/import-jelly-micro.mjs +++ b/scripts/import-jelly-micro.mjs @@ -25,6 +25,7 @@ import https from 'node:https'; import os from 'node:os'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; +import { buildLineNameMap } from './lib/name-map.mjs'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(__dirname, '..'); @@ -33,6 +34,11 @@ const OUT_DIR = path.join(ROOT, 'tests/benchmarks/resolution/fixtures/jelly-micr const JELLY_RAW = 'https://raw.githubusercontent.com/cs-au-dk/jelly/master/tests/micro'; const JELLY_API = 'https://api.github.com/repos/cs-au-dk/jelly/contents/tests/micro'; +// HTTP status-code ranges used by fetchText's redirect-following logic. +const HTTP_STATUS_REDIRECT_MIN = 300; +const HTTP_STATUS_REDIRECT_MAX = 400; // exclusive +const HTTP_STATUS_ERROR_MIN = 400; + // ── Args ──────────────────────────────────────────────────────────────────── const args = process.argv.slice(2); @@ -46,7 +52,11 @@ function fetchText(url, redirectsLeft = 10) { return new Promise((resolve, reject) => { const client = url.startsWith('http:') ? http : https; client.get(url, { headers: { 'User-Agent': 'codegraph-benchmark' } }, (res) => { - if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + if ( + res.statusCode >= HTTP_STATUS_REDIRECT_MIN && + res.statusCode < HTTP_STATUS_REDIRECT_MAX && + res.headers.location + ) { if (redirectsLeft === 0) { reject(new Error(`Too many redirects: ${url}`)); return; @@ -57,7 +67,7 @@ function fetchText(url, redirectsLeft = 10) { let body = ''; res.on('data', (d) => (body += d)); res.on('end', () => { - if (res.statusCode && res.statusCode >= 400) { + if (res.statusCode && res.statusCode >= HTTP_STATUS_ERROR_MIN) { reject(new Error(`HTTP ${res.statusCode}: ${body.slice(0, 200)}`)); } else { resolve(body); @@ -68,139 +78,6 @@ function fetchText(url, redirectsLeft = 10) { }); } -// ── Name mapping ───────────────────────────────────────────────────────────── - -/** - * Build a Map<"startLine:startCol", name> for all functions in a JS source. - * - * Extends the basic regex approach with: - * - Object method shorthand: { foo() {} } - * - Object property fn: { foo: function() {} } - * - Prototype assignment: Foo.prototype.bar = function() {} - * - Class static blocks: static { ... } - * - * Functions that cannot be named receive the label "". - */ -function buildNameMap(src, filename) { - const lines = src.split('\n'); - const nameMap = new Map(); // "line:col" → name (1-based line, 1-based col) - - let currentClass = null; - let classDepth = 0; - let braceDepth = 0; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const lineNo = i + 1; - - // Class declaration - const classMatch = line.match(/^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/); - if (classMatch) { - currentClass = classMatch[1]; - classDepth = braceDepth; - } - - // Count braces - for (const ch of line) { - if (ch === '{') braceDepth++; - else if (ch === '}') { - braceDepth--; - if (currentClass !== null && braceDepth === classDepth) { - currentClass = null; - } - } - } - - if (classMatch) { - // Class itself: name the position of the opening brace - // Jelly assigns the class-level function to the line of "class Foo {" - nameMap.set(`${lineNo}:1`, classMatch[1]); - continue; - } - - // Top-level named function declaration - const funcDecl = line.match(/^\s*(?:export\s+(?:default\s+)?)?(?:async\s+)?function\s*\*?\s+(\w+)\s*[\(<]/); - if (funcDecl) { - nameMap.set(`${lineNo}:1`, funcDecl[1]); - continue; - } - - // Variable assignment: const/let/var foo = function/() => - const varDecl = line.match(/^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=/); - if (varDecl && (line.includes('=>') || line.includes('function'))) { - nameMap.set(`${lineNo}:1`, varDecl[1]); - continue; - } - - // Prototype assignment: Foo.prototype.bar = function() {} - const protoMatch = line.match(/^\s*(\w+)\.prototype\.(\w+)\s*=\s*function/); - if (protoMatch) { - nameMap.set(`${lineNo}:1`, `${protoMatch[1]}.${protoMatch[2]}`); - continue; - } - - // Static property assignment: Foo.bar = function() {} - const staticPropMatch = line.match(/^\s*(\w+)\.(\w+)\s*=\s*function/); - if (staticPropMatch) { - nameMap.set(`${lineNo}:1`, `${staticPropMatch[1]}.${staticPropMatch[2]}`); - continue; - } - - // Class methods (inside class body) - if (currentClass !== null) { - // constructor - if (/^\s+constructor\s*\(/.test(line)) { - nameMap.set(`${lineNo}:1`, currentClass); - continue; - } - // static block: static { ... } - if (/^\s+static\s*\{/.test(line)) { - nameMap.set(`${lineNo}:1`, `${currentClass}.`); - continue; - } - // static property with initializer: static foo = ... - const staticProp = line.match(/^\s+static\s+(\w+)\s*=/); - if (staticProp && (line.includes('=>') || line.includes('function') || line.includes('('))) { - nameMap.set(`${lineNo}:1`, `${currentClass}.${staticProp[1]}`); - continue; - } - // Named method (including async, static, get/set, generator) - const methodMatch = line.match( - /^\s+(?:(?:static|async|get|set)\s+)*(?:\*\s*)?(\w+)\s*\(/ - ); - if (methodMatch) { - const mname = methodMatch[1]; - if (!['if', 'for', 'while', 'switch', 'catch', 'return', 'new'].includes(mname)) { - nameMap.set(`${lineNo}:1`, `${currentClass}.${mname}`); - continue; - } - } - // Class field arrow: foo = () => {} - const fieldArrow = line.match(/^\s+(\w+)\s*=\s*(?:async\s+)?\(/); - if (fieldArrow) { - nameMap.set(`${lineNo}:1`, `${currentClass}.${fieldArrow[1]}`); - continue; - } - } - - // Object shorthand method: { foo() {} } or { async foo() {} } - const objMethod = line.match(/^\s+(?:async\s+)?(\w+)\s*\(.*\)\s*\{/); - if (objMethod && !['if', 'for', 'while', 'switch', 'catch', 'function'].includes(objMethod[1])) { - nameMap.set(`${lineNo}:1`, objMethod[1]); - continue; - } - - // Object property: foo: function() {} or foo: () => {} - const objProp = line.match(/^\s+(\w+)\s*:\s*(?:async\s+)?(?:function|\(|[a-zA-Z_$].*=>)/); - if (objProp) { - nameMap.set(`${lineNo}:1`, objProp[1]); - continue; - } - } - - return nameMap; -} - // ── Jelly → expected-edges conversion ──────────────────────────────────────── const SCHEMA = '../../../expected-edges.schema.json'; @@ -209,8 +86,8 @@ const SCHEMA = '../../../expected-edges.schema.json'; * Convert a Jelly .json call graph + .js source to codegraph expected-edges format. * * Jelly function spec: "fileIdx:startLine:startCol:endLine:endCol" (1-based lines) - * We map each function to a name using buildNameMap. Unmapped functions get - * the label "". + * We map each function to a name using buildLineNameMap (scripts/lib/name-map.mjs). + * Unmapped functions get the label "". * * The "module root" function (always index 0 in Jelly) represents the top-level * script scope. We label it "" so edges from it are trackable. @@ -221,7 +98,7 @@ function convertJellyGraph(jellyJson, jsSrc, jsFilename) { const { files, functions, fun2fun } = jellyJson; if (!files || !functions || !fun2fun) return { edges: [], stats: {} }; - const nameMap = buildNameMap(jsSrc, jsFilename); + const nameMap = buildLineNameMap(jsSrc); // Map function index → { name, file } function resolveFunc(idx) { diff --git a/scripts/lib/name-map.mjs b/scripts/lib/name-map.mjs new file mode 100644 index 000000000..2fcf7c026 --- /dev/null +++ b/scripts/lib/name-map.mjs @@ -0,0 +1,256 @@ +/** + * Shared source-scanning heuristics for mapping declaration positions to + * codegraph-style symbol names. + * + * Used by codegraph's external call-graph comparison tooling + * (scripts/import-jelly-micro.mjs, scripts/compare-tools.mjs) to correlate + * Jelly's line-indexed function specs and ACG's textual function names with + * codegraph's own naming scheme. + * + * This is a line-oriented regex heuristic (not an AST parse) — it walks + * source text, tracks the enclosing class via a brace-depth counter, and + * matches an ordered set of declaration patterns. It's good enough for the + * small hand-authored benchmark fixtures these tools run against; it is not + * a general-purpose JS parser. + */ + +import fs from 'node:fs'; +import path from 'node:path'; + +// Bare words that can be mistaken for a class-body method declaration when +// they're the first word before `(` on a line (e.g. `return (x) => …`). +const METHOD_KEYWORD_EXCLUSIONS = new Set(['if', 'for', 'while', 'switch', 'catch', 'return', 'new']); +const OBJECT_METHOD_KEYWORD_EXCLUSIONS = new Set(['if', 'for', 'while', 'switch', 'catch', 'function']); + +const CLASS_DECL_RE = /^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/; + +/** + * Matchers evaluated on every line regardless of class scope, in priority + * order. Each: { regex, extract(match) => name, guard?(line, match) => boolean } + */ +const TOP_LEVEL_MATCHERS = [ + { + // function foo() / export default function* foo() + regex: /^\s*(?:export\s+(?:default\s+)?)?(?:async\s+)?function\s*\*?\s+(\w+)\s*[\(<]/, + extract: (m) => m[1], + }, + { + // const/let/var foo = function ... | foo = () => ... + regex: /^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=/, + guard: (line) => line.includes('=>') || line.includes('function'), + extract: (m) => m[1], + }, + { + // Foo.prototype.bar = function() {} + regex: /^\s*(\w+)\.prototype\.(\w+)\s*=\s*function/, + extract: (m) => `${m[1]}.${m[2]}`, + }, + { + // Foo.bar = function() {} + regex: /^\s*(\w+)\.(\w+)\s*=\s*function/, + extract: (m) => `${m[1]}.${m[2]}`, + }, +]; + +/** + * Matchers evaluated only while inside a class body. `extract` receives the + * regex match and the enclosing class name. The constructor matcher is + * flagged separately (`isConstructor`) since it needs distinct accumulation + * in name-lookup consumers (ACG labels constructor call targets literally as + * "constructor", not ".constructor"). + */ +const CLASS_MEMBER_MATCHERS = [ + { + regex: /^\s+constructor\s*\(/, + isConstructor: true, + extract: (_m, cls) => cls, + }, + { + // static { ... } + regex: /^\s+static\s*\{/, + extract: (_m, cls) => `${cls}.`, + }, + { + // static foo = ... (only when it looks like a function/call, not a plain value) + regex: /^\s+static\s+(\w+)\s*=/, + guard: (line) => line.includes('=>') || line.includes('function') || line.includes('('), + extract: (m, cls) => `${cls}.${m[1]}`, + }, + { + // named method, incl. async/static/get/set/generator + regex: /^\s+(?:(?:static|async|get|set)\s+)*(?:\*\s*)?(\w+)\s*\(/, + guard: (_line, m) => !METHOD_KEYWORD_EXCLUSIONS.has(m[1]), + extract: (m, cls) => `${cls}.${m[1]}`, + }, + { + // class field arrow: foo = () => {} + regex: /^\s+(\w+)\s*=\s*(?:async\s+)?\(/, + extract: (m, cls) => `${cls}.${m[1]}`, + }, +]; + +/** + * Object-literal matchers, evaluated as the final fallback on any line not + * already claimed by a top-level or class-member matcher. + */ +const OBJECT_MEMBER_MATCHERS = [ + { + // { foo() {} } or { async foo() {} } + regex: /^\s+(?:async\s+)?(\w+)\s*\(.*\)\s*\{/, + guard: (_line, m) => !OBJECT_METHOD_KEYWORD_EXCLUSIONS.has(m[1]), + extract: (m) => m[1], + }, + { + // foo: function() {} or foo: () => {} + regex: /^\s+(\w+)\s*:\s*(?:async\s+)?(?:function|\(|[a-zA-Z_$].*=>)/, + extract: (m) => m[1], + }, +]; + +/** Run `line` through an ordered matcher list; return the first hit or null. */ +function tryMatchers(line, matchers, ctx) { + for (const matcher of matchers) { + const m = line.match(matcher.regex); + if (!m) continue; + if (matcher.guard && !matcher.guard(line, m)) continue; + return { name: matcher.extract(m, ctx), isConstructor: !!matcher.isConstructor }; + } + return null; +} + +/** + * Walk `src` line by line, tracking the enclosing class via a brace-depth + * counter, and invoke `onDeclaration(entry)` for every recognized + * class/function/method declaration. + * + * `entry` is `{ line, name, className, isConstructor }` (1-based line + * number). `className` is set only for members matched inside a class body; + * `isConstructor` distinguishes constructor declarations from other members + * that resolve to the same "ClassName" value (the class declaration itself). + * + * This is the shared scanning core behind both benchmark tools' heuristic + * name resolution: position→name for Jelly's line-indexed call graph, and + * name→qualified-name lookups for ACG's textual output. + */ +export function scanDeclarations(src, onDeclaration) { + const lines = src.split('\n'); + let currentClass = null; + let classDepth = 0; + let braceDepth = 0; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNo = i + 1; + + const classMatch = line.match(CLASS_DECL_RE); + if (classMatch) { + currentClass = classMatch[1]; + classDepth = braceDepth; + } + + for (const ch of line) { + if (ch === '{') braceDepth++; + else if (ch === '}') { + braceDepth--; + if (currentClass !== null && braceDepth === classDepth) currentClass = null; + } + } + + if (classMatch) { + onDeclaration({ line: lineNo, name: classMatch[1], className: null, isConstructor: false }); + continue; + } + + const topLevel = tryMatchers(line, TOP_LEVEL_MATCHERS, null); + if (topLevel) { + onDeclaration({ line: lineNo, name: topLevel.name, className: null, isConstructor: false }); + continue; + } + + if (currentClass !== null) { + const member = tryMatchers(line, CLASS_MEMBER_MATCHERS, currentClass); + if (member) { + onDeclaration({ + line: lineNo, + name: member.name, + className: currentClass, + isConstructor: member.isConstructor, + }); + continue; + } + } + + const obj = tryMatchers(line, OBJECT_MEMBER_MATCHERS, null); + if (obj) { + onDeclaration({ line: lineNo, name: obj.name, className: null, isConstructor: false }); + } + } +} + +/** + * Build a Map<"line:1", name> for all functions/methods/classes in a single + * JS source string (1-based line, column normalised to 1 on insert). + * + * Used by import-jelly-micro.mjs to resolve Jelly's line-indexed function + * specs within one source file. + */ +export function buildLineNameMap(src) { + const nameMap = new Map(); + scanDeclarations(src, (d) => nameMap.set(`${d.line}:1`, d.name)); + return nameMap; +} + +/** + * Build a Map<"filename:line", name> across every file in `dir` whose + * extension is in `exts`. + * + * Used by compare-tools.mjs to resolve Jelly's (file, line) function specs + * against a multi-file fixture directory. + */ +export function buildFileLineNameMap(dir, exts) { + const nameMap = new Map(); + for (const filename of fs.readdirSync(dir)) { + if (!exts.some((e) => filename.endsWith(e))) continue; + const src = fs.readFileSync(path.join(dir, filename), 'utf8'); + scanDeclarations(src, (d) => nameMap.set(`${filename}:${d.line}`, d.name)); + } + return nameMap; +} + +/** + * Build a Map<"filename:unqualifiedName", Set> across every + * file in `dir` whose extension is in `exts`. + * + * Used by compare-tools.mjs to resolve ACG's unqualified function names (no + * class prefix) back to codegraph-style qualified names. A Set is needed + * because multiple classes in the same file can share a method name (e.g. + * Shape.area + Circle.area + Rectangle.area) — callers should try all + * candidates rather than assume a 1:1 mapping. + * + * Constructors are indexed under the literal key "constructor" (ACG labels + * constructor call targets that way), mapping to the enclosing class name. + */ +export function buildFileNameLookup(dir, exts) { + const lookup = new Map(); + const add = (key, value) => { + const existing = lookup.get(key); + if (existing) existing.add(value); + else lookup.set(key, new Set([value])); + }; + + for (const filename of fs.readdirSync(dir)) { + if (!exts.some((e) => filename.endsWith(e))) continue; + const src = fs.readFileSync(path.join(dir, filename), 'utf8'); + scanDeclarations(src, (d) => { + if (d.isConstructor) { + add(`${filename}:constructor`, d.name); + } else if (d.className) { + const member = d.name.slice(d.className.length + 1); + add(`${filename}:${member}`, d.name); + } else { + add(`${filename}:${d.name}`, d.name); + } + }); + } + return lookup; +} From e945bcae7a2d691118a5787fb792dd5eb90044a1 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 04:36:44 -0600 Subject: [PATCH 05/26] fix: replace event-loop-blocking Atomics.wait with shared sleepSync in readFileSafe readFileSafe's Atomics.wait busy-block froze the entire Node.js event loop (all I/O and timer callbacks) for up to 100ms per retry on the watch-mode hot path. Extracts journal.ts's existing sleepSync busy-spin helper into src/shared/sleep.ts so both readFileSafe and journal.ts's lock-retry loop share one implementation instead of duplicating it. docs check acknowledged: internal bug fix, no feature/language/architecture table changes warranted in README.md, CLAUDE.md, or ROADMAP.md. Impact: 2 functions changed, 31 affected --- src/domain/graph/builder/helpers.ts | 4 ++-- src/domain/graph/journal.ts | 12 +----------- src/shared/sleep.ts | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 13 deletions(-) create mode 100644 src/shared/sleep.ts diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index 1c6859667..f0f34cffa 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -10,6 +10,7 @@ import { purgeFilesData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js'; import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js'; +import { sleepSync } from '../../../shared/sleep.js'; import type { BetterSqlite3Database, CodegraphConfig, @@ -326,8 +327,7 @@ export function readFileSafe(filePath: string, retries: number = 2): string { return fs.readFileSync(filePath, 'utf-8'); } catch (err: unknown) { if (attempt < retries && TRANSIENT_CODES.has((err as NodeJS.ErrnoException).code ?? '')) { - const sharedBuf = new SharedArrayBuffer(4); - Atomics.wait(new Int32Array(sharedBuf), 0, 0, RETRY_DELAY_MS); + sleepSync(RETRY_DELAY_MS); continue; } throw err; diff --git a/src/domain/graph/journal.ts b/src/domain/graph/journal.ts index 8d68256ca..01775ace5 100644 --- a/src/domain/graph/journal.ts +++ b/src/domain/graph/journal.ts @@ -2,6 +2,7 @@ import crypto from 'node:crypto'; import fs from 'node:fs'; import path from 'node:path'; import { debug, warn } from '../../infrastructure/logger.js'; +import { sleepSync } from '../../shared/sleep.js'; export const JOURNAL_FILENAME = 'changes.journal'; const HEADER_PREFIX = '# codegraph-journal v1 '; @@ -10,17 +11,6 @@ const LOCK_TIMEOUT_MS = 5_000; const LOCK_STALE_MS = 30_000; const LOCK_RETRY_MS = 25; -// Busy-spin sleep avoids blocking the Node.js event loop (unlike Atomics.wait, -// which freezes all I/O and timer callbacks). The retry interval is short -// (25ms), so the CPU cost is negligible while keeping unrelated callbacks -// responsive in watcher processes. -function sleepSync(ms: number): void { - const end = process.hrtime.bigint() + BigInt(ms) * 1_000_000n; - while (process.hrtime.bigint() < end) { - /* spin */ - } -} - function isPidAlive(pid: number): boolean { if (!Number.isFinite(pid) || pid <= 0) return false; try { diff --git a/src/shared/sleep.ts b/src/shared/sleep.ts new file mode 100644 index 000000000..8e3accb41 --- /dev/null +++ b/src/shared/sleep.ts @@ -0,0 +1,20 @@ +/** + * Synchronous sleep utilities for short retry/backoff loops. + */ + +/** + * Busy-spin sleep for `ms` milliseconds. + * + * Deliberately avoids `Atomics.wait`, which blocks the calling thread at the + * OS level and freezes all libuv I/O and timer callbacks for the duration of + * the wait — unsafe on hot paths shared with watcher processes. The retry + * intervals this is used for are short (tens of ms), so the CPU cost of + * spinning is negligible next to the safety of keeping unrelated callbacks + * responsive. + */ +export function sleepSync(ms: number): void { + const end = process.hrtime.bigint() + BigInt(ms) * 1_000_000n; + while (process.hrtime.bigint() < end) { + /* spin */ + } +} From 4a348dbe6e0bc214d9f97c4a6dd6d847fb40d0a6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 04:52:12 -0600 Subject: [PATCH 06/26] refactor: extract shared resolveFileTree helper from dataflow.ts and complexity.ts Impact: 14 functions changed, 10 affected --- src/features/complexity.ts | 48 +++++--------- src/features/dataflow.ts | 57 ++++------------- src/features/shared/resolve-file-tree.ts | 81 ++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 75 deletions(-) create mode 100644 src/features/shared/resolve-file-tree.ts diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 8e1b6fb06..fb3cca381 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -1,4 +1,3 @@ -import fs from 'node:fs'; import path from 'node:path'; import { computeLOCMetrics as _computeLOCMetrics, @@ -22,6 +21,7 @@ import type { LOCMetrics, TreeSitterNode, } from '../types.js'; +import { resolveFileTree } from './shared/resolve-file-tree.js'; // Re-export rules for backward compatibility export { COMPLEXITY_RULES, HALSTEAD_RULES }; @@ -437,41 +437,25 @@ function getTreeForFile( extToLang: Map | null, getParser: (parsers: any, absPath: string) => any, ): { tree: { rootNode: TreeSitterNode }; langId: string } | null { - let tree = symbols._tree; - let langId = symbols._langId; - const allPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, ); - if (!allPrecomputed && !tree) { - const ext = path.extname(relPath).toLowerCase(); - if (!COMPLEXITY_EXTENSIONS.has(ext)) return null; - if (!extToLang) return null; - langId = extToLang.get(ext); - if (!langId) return null; - - const absPath = path.join(rootDir, relPath); - let code: string; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e: unknown) { - debug(`complexity: cannot read ${relPath}: ${(e as Error).message}`); - return null; - } - - const parser = getParser(parsers, absPath); - if (!parser) return null; - - try { - tree = parser.parse(code); - } catch (e: unknown) { - debug(`complexity: parse failed for ${relPath}: ${(e as Error).message}`); - return null; - } - } - - return tree && langId ? { tree: tree as { rootNode: TreeSitterNode }, langId } : null; + // Every definition already has precomputed complexity and there's no cached + // tree to fall back on — nothing to parse. + if (allPrecomputed && !symbols._tree) return null; + + return resolveFileTree({ + relPath, + rootDir, + cachedTree: symbols._tree, + cachedLangId: symbols._langId, + extensions: COMPLEXITY_EXTENSIONS, + extToLang, + parsers, + getParser, + logPrefix: 'complexity', + }); } function upsertPrecomputedComplexity( diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index 369265bed..102f7f20d 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -9,7 +9,6 @@ * Opt-in via `build --dataflow`. Supports all languages with DATAFLOW_RULES. */ -import fs from 'node:fs'; import path from 'node:path'; import { DATAFLOW_RULES } from '../ast-analysis/rules/index.js'; import { @@ -27,6 +26,7 @@ import type { NormalizedSymbol } from '../shared/normalize.js'; import { paginateResult } from '../shared/paginate.js'; import type { BetterSqlite3Database, NativeDatabase, NodeRow, TreeSitterNode } from '../types.js'; import { findNodes } from './shared/find-nodes.js'; +import { resolveFileTree } from './shared/resolve-file-tree.js'; // Re-export for backward compatibility export { _makeDataflowRules as makeDataflowRules, DATAFLOW_RULES }; @@ -116,49 +116,20 @@ function getDataflowForFile( ): DataflowResult | null { if (symbols.dataflow) return symbols.dataflow; - let tree = symbols._tree; - let langId = symbols._langId; - - if (!tree) { - if (!getParserFn) return null; - const ext = path.extname(relPath).toLowerCase(); - langId = extToLang.get(ext); - if (!langId || !DATAFLOW_RULES.has(langId)) return null; - - const absPath = path.join(rootDir, relPath); - let code: string; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e: unknown) { - debug(`dataflow: cannot read ${relPath}: ${(e as Error).message}`); - return null; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) return null; - - try { - tree = parser.parse(code); - } catch (e: unknown) { - debug(`dataflow: parse failed for ${relPath}: ${(e as Error).message}`); - return null; - } - } - - if (!langId) { - const ext = path.extname(relPath).toLowerCase(); - langId = extToLang.get(ext); - if (!langId) return null; - } - - if (!DATAFLOW_RULES.has(langId)) return null; - - return extractDataflow( - tree as { rootNode: TreeSitterNode }, + const resolved = resolveFileTree({ relPath, - symbols.definitions, - langId, - ); + rootDir, + cachedTree: symbols._tree, + cachedLangId: symbols._langId, + extensions: DATAFLOW_EXTENSIONS, + extToLang, + parsers, + getParser: getParserFn, + logPrefix: 'dataflow', + }); + if (!resolved) return null; + + return extractDataflow(resolved.tree, relPath, symbols.definitions, resolved.langId); } interface ArgFlow { diff --git a/src/features/shared/resolve-file-tree.ts b/src/features/shared/resolve-file-tree.ts new file mode 100644 index 000000000..f34d84d9d --- /dev/null +++ b/src/features/shared/resolve-file-tree.ts @@ -0,0 +1,81 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { debug } from '../../infrastructure/logger.js'; +import type { TreeSitterNode } from '../../types.js'; + +export interface ResolveFileTreeOptions { + /** Repo-relative path of the file being resolved. */ + relPath: string; + /** Absolute root directory the repo-relative path is joined against. */ + rootDir: string; + /** Already-parsed tree, if the caller has one cached (e.g. from a fresh build). */ + cachedTree?: { rootNode: TreeSitterNode } | null; + /** Language id paired with `cachedTree`, if known. */ + cachedLangId?: string | null; + /** Extensions this analysis supports — gates whether a fallback parse is attempted. */ + extensions: Set; + /** Extension → language id map used both for the allowlist gate and langId lookup. */ + extToLang: Map | null | undefined; + /** Opaque parser table passed through to `getParser`. */ + parsers: unknown; + /** Resolves a tree-sitter parser instance for a given absolute path. */ + getParser: + | ((parsers: unknown, absPath: string) => { parse(code: string): unknown } | null | undefined) + | null; + /** Prefix used in debug log messages (e.g. "complexity", "dataflow"). */ + logPrefix: string; +} + +export interface ResolvedFileTree { + tree: { rootNode: TreeSitterNode }; + langId: string; +} + +/** + * Resolve a parsed tree-sitter tree for a file: prefer an already-cached + * tree/langId pair, otherwise read the file from disk and parse it, with + * debug-logged fallback on read/parse errors. Shared by + * complexity.ts's getTreeForFile and dataflow.ts's getDataflowForFile, + * which previously duplicated this ~20-line cache/read/parse dance. + */ +export function resolveFileTree(opts: ResolveFileTreeOptions): ResolvedFileTree | null { + const { relPath, rootDir, extensions, extToLang, parsers, getParser, logPrefix } = opts; + let tree = opts.cachedTree; + let langId = opts.cachedLangId; + + if (!tree) { + if (!getParser) return null; + const ext = path.extname(relPath).toLowerCase(); + if (!extensions.has(ext)) return null; + if (!extToLang) return null; + langId = extToLang.get(ext); + if (!langId) return null; + + const absPath = path.join(rootDir, relPath); + let code: string; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e: unknown) { + debug(`${logPrefix}: cannot read ${relPath}: ${(e as Error).message}`); + return null; + } + + const parser = getParser(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code) as { rootNode: TreeSitterNode }; + } catch (e: unknown) { + debug(`${logPrefix}: parse failed for ${relPath}: ${(e as Error).message}`); + return null; + } + } + + if (!langId) { + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang?.get(ext); + if (!langId) return null; + } + + return tree && langId ? { tree, langId } : null; +} From 8fed8bc2355e9c0957b57ec1bc59c5de031f300c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 05:15:47 -0600 Subject: [PATCH 07/26] refactor: extend DEFAULTS with previously-hardcoded config constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Registers resolveSecrets' execFileSync timeout/maxBuffer in DEFAULTS.llm (apiKeyCommandTimeoutMs, apiKeyCommandMaxBufferBytes) and wires resolveSecrets to read them from config instead of hardcoding. Also adds three purely-additive @reserved DEFAULTS entries for constants hardcoded elsewhere in the codebase (build. largeCodebaseFileThreshold, db.busyTimeoutMs, community. capacityGrowthFactor) so their consumer files can be wired to them in follow-up commits. docs check acknowledged — no new feature/language/architecture change; docs/guides/configuration.md (the actual config reference) is already updated in this commit. Impact: 5 functions changed, 87 affected --- docs/guides/configuration.md | 4 +++- src/infrastructure/config.ts | 31 +++++++++++++++++++++++++++++-- src/types.ts | 34 ++++++++++++++++++++++++++++++++++ tests/unit/config.test.ts | 17 +++++++++++++++-- 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 02e08c5d6..ff338f20d 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -214,8 +214,10 @@ Used by features that call out to a chat-completion API (e.g. query expansion), | `model` | `string \| null` | `null` | Model identifier passed to the provider. | | `baseUrl` | `string \| null` | `null` | Override the provider's base URL (for compatible proxies, local servers, etc.). | | `apiKey` | `string \| null` | `null` | Plaintext API key. Prefer `apiKeyCommand` or env vars over this. | -| `apiKeyCommand` | `string \| null` | `null` | Shell-out command that prints the key to stdout. Split on whitespace and run via `execFileSync` (no shell — `$(...)`, pipes, globs, and variable expansion are not supported). 10s timeout, 64 KB max output. | +| `apiKeyCommand` | `string \| null` | `null` | Shell-out command that prints the key to stdout. Split on whitespace and run via `execFileSync` (no shell — `$(...)`, pipes, globs, and variable expansion are not supported). Timeout/output cap controlled by `apiKeyCommandTimeoutMs`/`apiKeyCommandMaxBufferBytes` below. | | `requestTimeoutMs` | `number` | `120000` | Per-request timeout for remote HTTP calls made against `baseUrl` (currently the [remote embedding provider](#embeddings-embeddings)). Aborts and throws if a self-hosted server hangs mid-request instead of blocking indefinitely. | +| `apiKeyCommandTimeoutMs` | `number` | `10000` | Timeout for the `apiKeyCommand` subprocess. Prevents a hung secret-manager CLI from blocking config loading indefinitely. | +| `apiKeyCommandMaxBufferBytes` | `number` | `65536` | Max stdout buffer size (bytes) for the `apiKeyCommand` subprocess. | Resolution order (first non-empty wins): `apiKeyCommand` output → `CODEGRAPH_LLM_API_KEY` env var → `apiKey` field. diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index c8a146873..a6bf6321e 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -29,10 +29,27 @@ export const DEFAULTS = { dbPath: '.codegraph/graph.db', driftThreshold: 0.2, smallFilesThreshold: 5, + /** + * Minimum existing file-node count for a repo to be treated as a "large + * codebase" when deciding whether to scope node loading to changed files. + * @reserved — currently not wired; loadNodes() in + * `src/domain/graph/builder/stages/build-edges.ts` still uses the + * hardcoded literal `20` at its `existingFileCount > 20` gate. + */ + largeCodebaseFileThreshold: 20, typescriptResolver: true, engine: 'auto' as 'auto' | 'native' | 'wasm', fastSkipDiag: false, }, + db: { + /** + * SQLite `busy_timeout` pragma (ms) applied to every opened connection. + * @reserved — currently not wired; `src/db/connection.ts` still sets the + * hardcoded literal `5000` directly via `db.pragma('busy_timeout = 5000')` + * in both `openDb` and `openReadonlyOrFail`. + */ + busyTimeoutMs: 5000, + }, query: { defaultDepth: 3, defaultLimit: 20, @@ -50,6 +67,8 @@ export const DEFAULTS = { apiKey: null as string | null, apiKeyCommand: null as string | null, requestTimeoutMs: 120_000, + apiKeyCommandTimeoutMs: 10_000, + apiKeyCommandMaxBufferBytes: 64 * 1024, }, search: { defaultMinScore: 0.2, rrfK: 60, topK: 15, similarityWarnThreshold: 0.85 }, ci: { failOnCycles: false, impactThreshold: null as number | null }, @@ -119,6 +138,14 @@ export const DEFAULTS = { maxLevels: 50, maxLocalPasses: 20, refinementTheta: 1.0, + /** + * Growth multiplier applied when a Leiden partition's per-community + * typed arrays need to be resized to fit a larger community count. + * @reserved — currently not wired; `ensureCommCapacity()` in + * `src/graph/algorithms/leiden/partition.ts` still uses the hardcoded + * literal `1.5` directly. + */ + capacityGrowthFactor: 1.5, }, structure: { cohesionThreshold: 0.3, @@ -747,8 +774,8 @@ export function resolveSecrets(config: CodegraphConfig): CodegraphConfig { try { const result = execFileSync(executable!, args, { encoding: 'utf-8', - timeout: 10_000, - maxBuffer: 64 * 1024, + timeout: config.llm.apiKeyCommandTimeoutMs, + maxBuffer: config.llm.apiKeyCommandMaxBufferBytes, stdio: ['ignore', 'pipe', 'pipe'], }).trim(); if (result) { diff --git a/src/types.ts b/src/types.ts index d52bd81bf..9ffb1f9b7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1307,6 +1307,13 @@ export interface CodegraphConfig { dbPath: string; driftThreshold: number; smallFilesThreshold: number; + /** + * Minimum existing file-node count for a repo to be treated as a "large + * codebase" when deciding whether to scope node loading to changed files. + * @reserved — currently not wired; see `largeCodebaseFileThreshold` in + * `src/infrastructure/config.ts` for wiring status. + */ + largeCodebaseFileThreshold: number; /** * Use the TypeScript compiler API to enrich typeMap for .ts/.tsx files. * Improves method-call edge accuracy for patterns like `const svc = container.get()`. @@ -1337,6 +1344,15 @@ export interface CodegraphConfig { fastSkipDiag: boolean; }; + db: { + /** + * SQLite `busy_timeout` pragma (ms) applied to every opened connection. + * @reserved — currently not wired; see `busyTimeoutMs` in + * `src/infrastructure/config.ts` for wiring status. + */ + busyTimeoutMs: number; + }; + query: { defaultDepth: number; defaultLimit: number; @@ -1377,6 +1393,17 @@ export interface CodegraphConfig { * self-hosted server from hanging the process indefinitely. Default: 120000. */ requestTimeoutMs: number; + /** + * Timeout (ms) for the `apiKeyCommand` subprocess spawned via `execFileSync`. + * Prevents a hung secret-manager CLI from blocking config loading indefinitely. + * Default: 10000. + */ + apiKeyCommandTimeoutMs: number; + /** + * Max stdout buffer size (bytes) for the `apiKeyCommand` subprocess spawned via + * `execFileSync`. Default: 65536 (64 KB). + */ + apiKeyCommandMaxBufferBytes: number; }; search: { @@ -1443,6 +1470,13 @@ export interface CodegraphConfig { maxLevels: number; maxLocalPasses: number; refinementTheta: number; + /** + * Growth multiplier applied when a Leiden partition's per-community + * typed arrays need to be resized to fit a larger community count. + * @reserved — currently not wired; see `capacityGrowthFactor` in + * `src/infrastructure/config.ts` for wiring status. + */ + capacityGrowthFactor: number; }; structure: { cohesionThreshold: number }; diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 7979a6313..df0baede5 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -69,6 +69,8 @@ describe('DEFAULTS', () => { apiKey: null, apiKeyCommand: null, requestTimeoutMs: 120_000, + apiKeyCommandTimeoutMs: 10_000, + apiKeyCommandMaxBufferBytes: 64 * 1024, }); }); @@ -131,6 +133,7 @@ describe('DEFAULTS', () => { maxLevels: 50, maxLocalPasses: 20, refinementTheta: 1.0, + capacityGrowthFactor: 1.5, }); }); @@ -138,6 +141,14 @@ describe('DEFAULTS', () => { expect(DEFAULTS.structure).toEqual({ cohesionThreshold: 0.3 }); }); + it('has db defaults', () => { + expect(DEFAULTS.db).toEqual({ busyTimeoutMs: 5000 }); + }); + + it('has build defaults', () => { + expect(DEFAULTS.build).toHaveProperty('largeCodebaseFileThreshold', 20); + }); + it('has mcp defaults', () => { expect(DEFAULTS.mcp.defaults.list_functions).toBe(100); expect(DEFAULTS.mcp.defaults.fn_impact).toBe(5); @@ -455,13 +466,15 @@ describe('resolveSecrets', () => { baseUrl: null, apiKey: null, apiKeyCommand: 'op read secret/key', + apiKeyCommandTimeoutMs: DEFAULTS.llm.apiKeyCommandTimeoutMs, + apiKeyCommandMaxBufferBytes: DEFAULTS.llm.apiKeyCommandMaxBufferBytes, }, }; resolveSecrets(config); expect(mockExecFile).toHaveBeenCalledWith('op', ['read', 'secret/key'], { encoding: 'utf-8', - timeout: 10_000, - maxBuffer: 64 * 1024, + timeout: DEFAULTS.llm.apiKeyCommandTimeoutMs, + maxBuffer: DEFAULTS.llm.apiKeyCommandMaxBufferBytes, stdio: ['ignore', 'pipe', 'pipe'], }); expect(config.llm.apiKey).toBe('secret-key-123'); From f0a488261d7b5c2a27182d361c7eef4d85ce683c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 05:40:34 -0600 Subject: [PATCH 08/26] refactor: move generatePlotHTML from features/graph-enrichment.ts to presentation/plot.ts Impact: 4 functions changed, 3 affected --- CLAUDE.md | 3 +- src/cli/commands/plot.ts | 3 +- src/features/graph-enrichment.ts | 18 +---- src/presentation/plot.ts | 21 +++++ tests/graph/viewer.test.ts | 107 +------------------------- tests/presentation/plot.test.ts | 128 +++++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+), 125 deletions(-) create mode 100644 src/presentation/plot.ts create mode 100644 tests/presentation/plot.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index cc8f2a65b..cb147dac9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -136,9 +136,10 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live | `features/snapshot.ts` | SQLite DB backup and restore | | `features/structure.ts` | Codebase structure analysis | | `features/triage.ts` | Risk-ranked audit priority queue (delegates scoring to `graph/classifiers/`) | -| `features/graph-enrichment.ts` | Data enrichment for HTML viewer (complexity, communities, fan-in/out) | +| `features/graph-enrichment.ts` | Plot data preparation for the HTML viewer (complexity, communities, fan-in/out) — pure data prep, no HTML/presentation imports beyond shared color/config types | | **`presentation/`** | **Pure output formatting + CLI command wrappers** | | `presentation/viewer.ts` | Interactive HTML renderer with vis-network | +| `presentation/plot.ts` | Thin `generatePlotHTML` wrapper — prepares data via `features/graph-enrichment.ts`, renders via `presentation/viewer.ts` | | `presentation/queries-cli/` | CLI display wrappers for query functions, split by concern: `path.ts`, `overview.ts`, `inspect.ts`, `impact.ts`, `exports.ts` | | `presentation/*.ts` | Command formatters (audit, batch, check, communities, complexity, etc.) — call `features/*.ts`, format output, set exit codes | | `presentation/export.ts` | DOT/Mermaid/GraphML/Neo4j serializers | diff --git a/src/cli/commands/plot.ts b/src/cli/commands/plot.ts index bc25ae732..9a40923ae 100644 --- a/src/cli/commands/plot.ts +++ b/src/cli/commands/plot.ts @@ -41,7 +41,8 @@ export const command: CommandDefinition = { ['--color-by ', 'Color nodes by: kind | role | community | complexity'], ], async execute(_args, opts, ctx) { - const { generatePlotHTML, loadPlotConfig } = await import('../../features/graph-enrichment.js'); + const { generatePlotHTML } = await import('../../presentation/plot.js'); + const { loadPlotConfig } = await import('../../presentation/viewer.js'); const os = await import('node:os'); const { db, close } = openGraph(opts as { db?: string }); diff --git a/src/features/graph-enrichment.ts b/src/features/graph-enrichment.ts index 886d09bf8..a05674ff6 100644 --- a/src/features/graph-enrichment.ts +++ b/src/features/graph-enrichment.ts @@ -7,7 +7,7 @@ import { DEFAULT_NODE_COLORS, DEFAULT_ROLE_COLORS, } from '../presentation/colors.js'; -import { DEFAULT_CONFIG, type PlotConfig, renderPlotHTML } from '../presentation/viewer.js'; +import { DEFAULT_CONFIG, type PlotConfig } from '../presentation/viewer.js'; import type { BetterSqlite3Database } from '../types.js'; // Re-export presentation utilities for backward compatibility @@ -475,19 +475,3 @@ function prepareFileLevelData( return { nodes: visNodes, edges: visEdges, seedNodeIds: selectFileSeedNodes(visNodes, cfg) }; } - -// ─── HTML Generation (thin wrapper) ────────────────────────────────── - -export function generatePlotHTML( - db: BetterSqlite3Database, - opts: { - fileLevel?: boolean; - noTests?: boolean; - minConfidence?: number; - config?: PlotConfig; - } = {}, -): string { - const cfg = opts.config || DEFAULT_CONFIG; - const data = prepareGraphData(db, opts); - return renderPlotHTML(data, cfg); -} diff --git a/src/presentation/plot.ts b/src/presentation/plot.ts new file mode 100644 index 000000000..b666198e6 --- /dev/null +++ b/src/presentation/plot.ts @@ -0,0 +1,21 @@ +import { prepareGraphData } from '../features/graph-enrichment.js'; +import type { BetterSqlite3Database } from '../types.js'; +import { DEFAULT_CONFIG, type PlotConfig, renderPlotHTML } from './viewer.js'; + +/** + * Generate a full interactive HTML plot document for the dependency graph. + * Thin wrapper: prepares graph data (features layer) then renders it (presentation layer). + */ +export function generatePlotHTML( + db: BetterSqlite3Database, + opts: { + fileLevel?: boolean; + noTests?: boolean; + minConfidence?: number; + config?: PlotConfig; + } = {}, +): string { + const cfg = opts.config || DEFAULT_CONFIG; + const data = prepareGraphData(db, opts); + return renderPlotHTML(data, cfg); +} diff --git a/tests/graph/viewer.test.ts b/tests/graph/viewer.test.ts index 9347833f6..368639cc0 100644 --- a/tests/graph/viewer.test.ts +++ b/tests/graph/viewer.test.ts @@ -5,11 +5,7 @@ import Database from 'better-sqlite3'; import { describe, expect, it } from 'vitest'; import { initSchema } from '../../src/db/index.js'; -import { - generatePlotHTML, - loadPlotConfig, - prepareGraphData, -} from '../../src/features/graph-enrichment.js'; +import { loadPlotConfig, prepareGraphData } from '../../src/features/graph-enrichment.js'; function createTestDb() { const db = new Database(':memory:'); @@ -36,107 +32,6 @@ function insertComplexity(db, nodeId, cognitive, cyclomatic, mi) { ).run(nodeId, cognitive, cyclomatic, mi); } -describe('generatePlotHTML', () => { - it('returns a valid HTML document', () => { - const db = createTestDb(); - const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); - const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); - insertEdge(db, a, b, 'imports'); - - const html = generatePlotHTML(db); - expect(html).toContain(''); - expect(html).toContain(''); - db.close(); - }); - - it('embeds graph data as JSON', () => { - const db = createTestDb(); - const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); - const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); - insertEdge(db, a, b, 'imports'); - - const html = generatePlotHTML(db); - expect(html).toContain('var allNodes ='); - expect(html).toContain('var allEdges ='); - expect(html).toContain('a.js'); - expect(html).toContain('b.js'); - db.close(); - }); - - it('includes vis-network CDN script', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain('vis-network'); - expect(html).toContain('unpkg.com'); - db.close(); - }); - - it('applies custom config title', () => { - const db = createTestDb(); - const html = generatePlotHTML(db, { - config: { - title: 'My Custom Graph', - layout: { algorithm: 'hierarchical', direction: 'LR' }, - physics: { enabled: true, nodeDistance: 150 }, - nodeColors: {}, - roleColors: {}, - colorBy: 'kind', - edgeStyle: { color: '#666', smooth: true }, - filter: { kinds: null, roles: null, files: null }, - seedStrategy: 'all', - seedCount: 30, - clusterBy: 'none', - sizeBy: 'uniform', - overlays: { complexity: false, risk: false }, - riskThresholds: { highBlastRadius: 10, lowMI: 40 }, - }, - }); - expect(html).toContain('My Custom Graph'); - db.close(); - }); - - it('handles empty graph without error', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain(''); - expect(html).toContain('var allNodes = []'); - expect(html).toContain('var allEdges = []'); - db.close(); - }); - - it('supports function-level mode', () => { - const db = createTestDb(); - const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); - const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); - insertEdge(db, fnA, fnB, 'calls'); - - const html = generatePlotHTML(db, { fileLevel: false }); - expect(html).toContain('doWork'); - expect(html).toContain('helper'); - db.close(); - }); - - it('includes detail panel elements', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain('id="detail"'); - expect(html).toContain('id="detailContent"'); - expect(html).toContain('id="detailClose"'); - db.close(); - }); - - it('includes new control elements', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain('id="colorBySelect"'); - expect(html).toContain('id="sizeBySelect"'); - expect(html).toContain('id="clusterBySelect"'); - expect(html).toContain('id="riskToggle"'); - db.close(); - }); -}); - describe('prepareGraphData', () => { it('embeds complexity data into function-level nodes', () => { const db = createTestDb(); diff --git a/tests/presentation/plot.test.ts b/tests/presentation/plot.test.ts new file mode 100644 index 000000000..9cfe84a2c --- /dev/null +++ b/tests/presentation/plot.test.ts @@ -0,0 +1,128 @@ +/** + * Interactive HTML plot generation tests. + */ + +import Database from 'better-sqlite3'; +import { describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db/index.js'; +import { generatePlotHTML } from '../../src/presentation/plot.js'; + +function createTestDb() { + const db = new Database(':memory:'); + db.pragma('journal_mode = WAL'); + initSchema(db); + return db; +} + +function insertNode(db, name, kind, file, line, role) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)') + .run(name, kind, file, line, role || null).lastInsertRowid; +} + +function insertEdge(db, sourceId, targetId, kind) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, 1.0, 0)', + ).run(sourceId, targetId, kind); +} + +describe('generatePlotHTML', () => { + it('returns a valid HTML document', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const html = generatePlotHTML(db); + expect(html).toContain(''); + expect(html).toContain(''); + db.close(); + }); + + it('embeds graph data as JSON', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const html = generatePlotHTML(db); + expect(html).toContain('var allNodes ='); + expect(html).toContain('var allEdges ='); + expect(html).toContain('a.js'); + expect(html).toContain('b.js'); + db.close(); + }); + + it('includes vis-network CDN script', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('vis-network'); + expect(html).toContain('unpkg.com'); + db.close(); + }); + + it('applies custom config title', () => { + const db = createTestDb(); + const html = generatePlotHTML(db, { + config: { + title: 'My Custom Graph', + layout: { algorithm: 'hierarchical', direction: 'LR' }, + physics: { enabled: true, nodeDistance: 150 }, + nodeColors: {}, + roleColors: {}, + colorBy: 'kind', + edgeStyle: { color: '#666', smooth: true }, + filter: { kinds: null, roles: null, files: null }, + seedStrategy: 'all', + seedCount: 30, + clusterBy: 'none', + sizeBy: 'uniform', + overlays: { complexity: false, risk: false }, + riskThresholds: { highBlastRadius: 10, lowMI: 40 }, + }, + }); + expect(html).toContain('My Custom Graph'); + db.close(); + }); + + it('handles empty graph without error', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain(''); + expect(html).toContain('var allNodes = []'); + expect(html).toContain('var allEdges = []'); + db.close(); + }); + + it('supports function-level mode', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const html = generatePlotHTML(db, { fileLevel: false }); + expect(html).toContain('doWork'); + expect(html).toContain('helper'); + db.close(); + }); + + it('includes detail panel elements', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('id="detail"'); + expect(html).toContain('id="detailContent"'); + expect(html).toContain('id="detailClose"'); + db.close(); + }); + + it('includes new control elements', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('id="colorBySelect"'); + expect(html).toContain('id="sizeBySelect"'); + expect(html).toContain('id="clusterBySelect"'); + expect(html).toContain('id="riskToggle"'); + db.close(); + }); +}); From 370f336f02d7a1d3cdbbe291090c681ec62663cd Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 06:13:08 -0600 Subject: [PATCH 09/26] refactor: decompose extractDestructuredBindingsWalk/handleVariableDecl/runContextCollectorWalk Pure extract-method decomposition of the three highest-complexity functions in extractors/javascript.ts (Titan phase 10, sync.json commit message shortened to fit the 100-char commitlint header limit). No extraction logic, node-type handling, or edge-case behavior changed -- verified byte-identical resolution-benchmark precision/recall across all 34 fixture languages and byte-identical codegraph query/where output for 3 real non-fixture files before/after. No Rust files touched, so native/WASM parity is unaffected by construction. docs check acknowledged: internal-only refactor, no new languages/commands/ architecture; README.md, CLAUDE.md, and ROADMAP.md are unaffected. Impact: 20 functions changed, 15 affected --- src/extractors/javascript.ts | 753 +++++++++++++++++++++-------------- 1 file changed, 460 insertions(+), 293 deletions(-) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 56c97b280..8c9931d22 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -535,71 +535,7 @@ function extractDestructuredBindingsWalk( if (inner) declNode = inner; } - const t = declNode.type; - if ( - (t === 'lexical_declaration' || t === 'variable_declaration') && - declNode.text.startsWith('const ') - ) { - for (let j = 0; j < declNode.childCount; j++) { - const declarator = declNode.child(j); - if (declarator?.type !== 'variable_declarator') continue; - const nameN = declarator.childForFieldName('name'); - if (nameN && nameN.type === 'object_pattern') { - extractDestructuredBindings( - nameN, - nodeStartLine(declNode), - nodeEndLine(declNode), - definitions, - ); - // Record CJS require bindings so importedNames can classify these names - // as import artifacts, preventing false local-definition blocking (#1661). - if (cjsRequireBindings) { - const valueN = declarator.childForFieldName('value'); - if (valueN?.type === 'call_expression') { - const fn = valueN.childForFieldName('function'); - if (fn?.text === 'require') { - const args = valueN.childForFieldName('arguments'); - const strArg = args && findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - const names: string[] = []; - for (let k = 0; k < nameN.childCount; k++) { - const prop = nameN.child(k); - if (!prop) continue; - if ( - prop.type === 'shorthand_property_identifier_pattern' || - prop.type === 'shorthand_property_identifier' - ) { - names.push(prop.text); - } else if (prop.type === 'pair_pattern' || prop.type === 'pair') { - const val = prop.childForFieldName('value'); - if ( - val?.type === 'identifier' || - val?.type === 'shorthand_property_identifier_pattern' - ) { - names.push(val.text); - } - } - } - if (names.length > 0) { - cjsRequireBindings.push({ names, source: modPath }); - } - } - } - } - } - } else if (nameN && nameN.type === 'array_pattern') { - // `const [x, y] = ...` — emit a single constant node whose name is the - // full array pattern text (e.g. `[x, y]`), matching native engine behaviour. - definitions.push({ - name: nameN.text, - kind: 'constant', - line: nodeStartLine(declNode), - endLine: nodeEndLine(declNode), - }); - } - } - } + extractDestructuredDeclarators(declNode, definitions, cjsRequireBindings); if (child.type !== 'export_statement') { extractDestructuredBindingsWalk(child, definitions, cjsRequireBindings); @@ -607,6 +543,93 @@ function extractDestructuredBindingsWalk( } } +/** + * Extract object/array-pattern destructured const bindings from a single declaration + * node — the per-declaration counterpart to extractDestructuredBindingsWalk's tree walk. + */ +function extractDestructuredDeclarators( + declNode: TreeSitterNode, + definitions: Definition[], + cjsRequireBindings?: Array<{ names: string[]; source: string }>, +): void { + const t = declNode.type; + if ( + (t !== 'lexical_declaration' && t !== 'variable_declaration') || + !declNode.text.startsWith('const ') + ) { + return; + } + + for (let j = 0; j < declNode.childCount; j++) { + const declarator = declNode.child(j); + if (declarator?.type !== 'variable_declarator') continue; + const nameN = declarator.childForFieldName('name'); + if (nameN && nameN.type === 'object_pattern') { + extractDestructuredBindings( + nameN, + nodeStartLine(declNode), + nodeEndLine(declNode), + definitions, + ); + // Record CJS require bindings so importedNames can classify these names + // as import artifacts, preventing false local-definition blocking (#1661). + if (cjsRequireBindings) { + const valueN = declarator.childForFieldName('value'); + const binding = extractCjsRequireBinding(nameN, valueN); + if (binding) cjsRequireBindings.push(binding); + } + } else if (nameN && nameN.type === 'array_pattern') { + // `const [x, y] = ...` — emit a single constant node whose name is the + // full array pattern text (e.g. `[x, y]`), matching native engine behaviour. + definitions.push({ + name: nameN.text, + kind: 'constant', + line: nodeStartLine(declNode), + endLine: nodeEndLine(declNode), + }); + } + } +} + +/** + * Compute a `const { X } = require('./path')` CJS binding record from a destructured + * object-pattern name node and its declarator's value node, for import-artifact + * classification (#1661). Returns null when the value isn't a static require() call or + * no destructured names could be extracted. Shared by the walk-based + * (extractDestructuredDeclarators) and query-based (handleVariableDecl) const-destructuring + * paths, which independently need the identical extraction. + */ +function extractCjsRequireBinding( + nameN: TreeSitterNode, + valueN: TreeSitterNode | null | undefined, +): { names: string[]; source: string } | null { + if (valueN?.type !== 'call_expression') return null; + const fn = valueN.childForFieldName('function'); + if (fn?.text !== 'require') return null; + const args = valueN.childForFieldName('arguments'); + const strArg = args && findChild(args, 'string'); + if (!strArg) return null; + const modPath = strArg.text.replace(/['"]/g, ''); + const names: string[] = []; + for (let k = 0; k < nameN.childCount; k++) { + const prop = nameN.child(k); + if (!prop) continue; + if ( + prop.type === 'shorthand_property_identifier_pattern' || + prop.type === 'shorthand_property_identifier' + ) { + names.push(prop.text); + } else if (prop.type === 'pair_pattern' || prop.type === 'pair') { + const val = prop.childForFieldName('value'); + if (val?.type === 'identifier' || val?.type === 'shorthand_property_identifier_pattern') { + names.push(val.text); + } + } + } + if (names.length === 0) return null; + return { names, source: modPath }; +} + /** Extract constant definitions from a `const` declaration node. */ function extractConstDeclarators(declNode: TreeSitterNode, definitions: Definition[]): void { const t = declNode.type; @@ -1095,123 +1118,133 @@ function handleVariableDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { for (let i = 0; i < node.childCount; i++) { const declarator = node.child(i); if (declarator && declarator.type === 'variable_declarator') { - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - - if (nameN && valueN) { - const valType = valueN.type; - if ( - valType === 'arrow_function' || - valType === 'function_expression' || - valType === 'function' || - valType === 'generator_function' - ) { - const varFnChildren = extractParameters(valueN); - ctx.definitions.push({ - name: nameN.text, - kind: 'function', - line: nodeStartLine(node), - endLine: nodeEndLine(valueN), - children: varFnChildren.length > 0 ? varFnChildren : undefined, - }); - } else if ( - isConst && - nameN.type === 'identifier' && - isConstantValue(valueN) && - !hasFunctionScopeAncestor(node) - ) { - ctx.definitions.push({ - name: nameN.text, - kind: 'constant', - line: nodeStartLine(node), - endLine: nodeEndLine(node), - }); - // Phase 8.3f: extract function/arrow properties from object literals so that - // this.method() calls inside Object.defineProperty accessors can resolve them. - // Scope guard: hasFunctionScopeAncestor mirrors the Rust path's find_parent_of_types - // check and the sibling destructured-binding branch below — skips object literals - // inside function bodies to avoid polluting the global definition index with - // local variable properties (e.g. `localObj.fn` from `const localObj = { fn: ... }` - // inside a function). - if (valueN.type === 'object') { - extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); - } - } else if ( - !isConst && - nameN.type === 'identifier' && - valueN.type === 'object' && - !hasFunctionScopeAncestor(node) - ) { - // `let`/`var` object literals: extract qualified method definitions so that - // `obj.method()` calls resolve correctly. Mirrors Rust match_js_objlit_qualified_method_defs - // which emits method_definition qualified names for ALL declaration kinds and - // pair+arrow/function for let/var only (const is already handled above). - // Scope guard prevents local object properties from polluting the global index. - extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); - } else if (isConst && nameN.type === 'object_pattern' && !hasFunctionScopeAncestor(node)) { - // Destructured bindings: const { handleToken, checkPermissions } = initAuth(...) - // Each destructured property becomes a function definition so it can be - // resolved when passed as a callback (e.g. router.use(handleToken)). - // Restricted to const to avoid creating spurious definitions for - // transient let/var destructuring (e.g. let { userId } = parseRequest(req)). - // Scope guard mirrors extractDestructuredBindingsWalk (query path) and - // handle_var_decl (Rust path) — skips bindings inside function bodies. - extractDestructuredBindings( - nameN, - nodeStartLine(node), - nodeEndLine(node), - ctx.definitions, - ); - // Record CJS require bindings for import-artifact classification (#1661). - if (valueN?.type === 'call_expression') { - const fn = valueN.childForFieldName('function'); - if (fn?.text === 'require') { - const args = valueN.childForFieldName('arguments'); - const strArg = args && findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - const names: string[] = []; - for (let k = 0; k < nameN.childCount; k++) { - const prop = nameN.child(k); - if (!prop) continue; - if ( - prop.type === 'shorthand_property_identifier_pattern' || - prop.type === 'shorthand_property_identifier' - ) { - names.push(prop.text); - } else if (prop.type === 'pair_pattern' || prop.type === 'pair') { - const val = prop.childForFieldName('value'); - if ( - val?.type === 'identifier' || - val?.type === 'shorthand_property_identifier_pattern' - ) { - names.push(val.text); - } - } - } - if (names.length > 0) { - if (!ctx.cjsRequireBindings) ctx.cjsRequireBindings = []; - ctx.cjsRequireBindings.push({ names, source: modPath }); - } - } - } - } - } else if (isConst && nameN.type === 'array_pattern' && !hasFunctionScopeAncestor(node)) { - // Array destructuring: `const [x, y] = ...` — emit a single constant node - // whose name is the full array pattern text (e.g. `[x, y]`), matching - // native engine behaviour. Scope guard mirrors the object_pattern branch above. - ctx.definitions.push({ - name: nameN.text, - kind: 'constant', - line: nodeStartLine(node), - endLine: nodeEndLine(node), - }); - } - } + handleVariableDeclarator(node, declarator, isConst, ctx); } } } +/** + * Dispatch a single variable_declarator within a variable/lexical declaration to the + * handler matching its value/name-pattern kind. Mirrors the query-based path's + * per-capture handler functions (handleFnCapture, etc.) already used elsewhere in this file. + */ +function handleVariableDeclarator( + node: TreeSitterNode, + declarator: TreeSitterNode, + isConst: boolean, + ctx: ExtractorOutput, +): void { + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if (!nameN || !valueN) return; + + const valType = valueN.type; + if ( + valType === 'arrow_function' || + valType === 'function_expression' || + valType === 'function' || + valType === 'generator_function' + ) { + handleVarFnAssignment(node, nameN, valueN, ctx); + } else if ( + isConst && + nameN.type === 'identifier' && + isConstantValue(valueN) && + !hasFunctionScopeAncestor(node) + ) { + handleConstIdentifierAssignment(node, nameN, valueN, ctx); + } else if ( + !isConst && + nameN.type === 'identifier' && + valueN.type === 'object' && + !hasFunctionScopeAncestor(node) + ) { + // `let`/`var` object literals: extract qualified method definitions so that + // `obj.method()` calls resolve correctly. Mirrors Rust match_js_objlit_qualified_method_defs + // which emits method_definition qualified names for ALL declaration kinds and + // pair+arrow/function for let/var only (const is already handled above). + // Scope guard prevents local object properties from polluting the global index. + extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); + } else if (isConst && nameN.type === 'object_pattern' && !hasFunctionScopeAncestor(node)) { + handleConstObjectPatternAssignment(node, nameN, valueN, ctx); + } else if (isConst && nameN.type === 'array_pattern' && !hasFunctionScopeAncestor(node)) { + // Array destructuring: `const [x, y] = ...` — emit a single constant node + // whose name is the full array pattern text (e.g. `[x, y]`), matching + // native engine behaviour. Scope guard mirrors the object_pattern branch above. + ctx.definitions.push({ + name: nameN.text, + kind: 'constant', + line: nodeStartLine(node), + endLine: nodeEndLine(node), + }); + } +} + +/** Handle `const/let fn = (...) => {...}` — a function/arrow value assigned to a variable. */ +function handleVarFnAssignment( + node: TreeSitterNode, + nameN: TreeSitterNode, + valueN: TreeSitterNode, + ctx: ExtractorOutput, +): void { + const varFnChildren = extractParameters(valueN); + ctx.definitions.push({ + name: nameN.text, + kind: 'function', + line: nodeStartLine(node), + endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, + }); +} + +/** Handle `const X = ` — a plain constant identifier assignment. */ +function handleConstIdentifierAssignment( + node: TreeSitterNode, + nameN: TreeSitterNode, + valueN: TreeSitterNode, + ctx: ExtractorOutput, +): void { + ctx.definitions.push({ + name: nameN.text, + kind: 'constant', + line: nodeStartLine(node), + endLine: nodeEndLine(node), + }); + // Phase 8.3f: extract function/arrow properties from object literals so that + // this.method() calls inside Object.defineProperty accessors can resolve them. + // Scope guard: hasFunctionScopeAncestor mirrors the Rust path's find_parent_of_types + // check and the sibling destructured-binding branch below — skips object literals + // inside function bodies to avoid polluting the global definition index with + // local variable properties (e.g. `localObj.fn` from `const localObj = { fn: ... }` + // inside a function). + if (valueN.type === 'object') { + extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); + } +} + +/** Handle `const { a, b } = value` — destructured object-pattern const bindings. */ +function handleConstObjectPatternAssignment( + node: TreeSitterNode, + nameN: TreeSitterNode, + valueN: TreeSitterNode, + ctx: ExtractorOutput, +): void { + // Destructured bindings: const { handleToken, checkPermissions } = initAuth(...) + // Each destructured property becomes a function definition so it can be + // resolved when passed as a callback (e.g. router.use(handleToken)). + // Restricted to const to avoid creating spurious definitions for + // transient let/var destructuring (e.g. let { userId } = parseRequest(req)). + // Scope guard mirrors extractDestructuredBindingsWalk (query path) and + // handle_var_decl (Rust path) — skips bindings inside function bodies. + extractDestructuredBindings(nameN, nodeStartLine(node), nodeEndLine(node), ctx.definitions); + // Record CJS require bindings for import-artifact classification (#1661). + const binding = extractCjsRequireBinding(nameN, valueN); + if (binding) { + if (!ctx.cjsRequireBindings) ctx.cjsRequireBindings = []; + ctx.cjsRequireBindings.push(binding); + } +} + /** * Phase 8.3f: extract function/arrow function properties from an object literal as standalone * definitions so that `this.method()` calls inside Object.defineProperty accessor functions can @@ -2004,6 +2037,230 @@ interface ContextCollectorOutputs { * before any declarator is processed (a function declared *after* its first * use would otherwise be missed). */ +/** + * Push node onto classStack when it's a named class declaration/expression, for + * method_definition qualification below. Returns whether a push happened. + * The `identifier`-only check keeps the original walk's behaviour (TS class names + * parse as type_identifier and were never pushed), while typeMapClass/objectRestClass + * elsewhere use the bare text like their original walks did. + */ +function pushClassContext( + classStack: string[], + className: string | null, + classNameIsIdentifier: boolean, +): boolean { + if (className && classNameIsIdentifier) { + classStack.push(className); + return true; + } + return false; +} + +/** Push node onto funcStack when it's a named function_declaration/generator_function_declaration. */ +function pushFnDeclContext(funcStack: string[], node: TreeSitterNode): boolean { + const nameNode = node.childForFieldName('name'); + if (nameNode?.type === 'identifier') { + funcStack.push(nameNode.text); + return true; + } + return false; +} + +/** + * Resolve the raw method name from a method_definition's name field, unwrapping + * computed_property_name string literals (e.g. `['foo']() {}` -> 'foo'). Returns '' + * for non-string computed keys (no resolvable name). + */ +function resolveMethodDefinitionName(nameNode: TreeSitterNode): string { + if (nameNode.type !== 'computed_property_name') return nameNode.text; + const inner = nameNode.child(1); + if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) { + // Non-string computed key — skip adding to funcStack (no resolvable name). + return ''; + } + return inner.text.replace(/^['"]|['"]$/g, ''); +} + +/** + * Push node onto funcStack for a method_definition, qualified with the enclosing class + * name so the PTS key matches callerName from findCaller (which uses + * def.name = 'ClassName.method'). + */ +function pushMethodDefContext( + classStack: string[], + funcStack: string[], + node: TreeSitterNode, +): boolean { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return false; + const enclosingClass = classStack.length > 0 ? classStack[classStack.length - 1] : null; + const rawName = resolveMethodDefinitionName(nameNode); + if (!rawName) return false; + const qualifiedName = enclosingClass ? `${enclosingClass}.${rawName}` : rawName; + funcStack.push(qualifiedName); + return true; +} + +/** + * Push node onto funcStack for `const process = (arr) => { ... }` — arrow/expression + * functions assigned to a variable have no `name` field on the function node itself. + */ +function pushArrowVarContext(funcStack: string[], node: TreeSitterNode): boolean { + const nameNode = node.childForFieldName('name'); + const valueNode = node.childForFieldName('value'); + if ( + nameNode?.type === 'identifier' && + (valueNode?.type === 'arrow_function' || valueNode?.type === 'function_expression') + ) { + funcStack.push(nameNode.text); + return true; + } + return false; +} + +/** + * Push node onto funcStack for `obj.method = function() { ... }` func-prop assignment. + * Mirrors handleFuncPropAssignment's logic so for-of loops inside the body get the + * correct enclosingFunc (e.g. 'obj.method') instead of '' or the wrong outer + * function name. + */ +function pushFuncPropContext(funcStack: string[], node: TreeSitterNode): boolean { + const lhs = node.childForFieldName('left'); + const rhs = node.childForFieldName('right'); + if ( + lhs?.type === 'member_expression' && + (rhs?.type === 'function_expression' || rhs?.type === 'arrow_function') + ) { + const obj = lhs.childForFieldName('object'); + const prop = lhs.childForFieldName('property'); + if ( + obj?.type === 'identifier' && + (prop?.type === 'property_identifier' || prop?.type === 'identifier') && + !BUILTIN_GLOBALS.has(obj.text) && + prop.text !== 'prototype' + ) { + funcStack.push(`${obj.text}.${prop.text}`); + return true; + } + } + return false; +} + +/** + * Compute the class name (and whether it's a plain identifier) for a class_declaration/ + * class-expression node — read once, shared by pushClassContext and computeChildContext. + * Returns nulls/false for any other node type. + */ +function computeClassNameContext( + node: TreeSitterNode, + isClassDecl: boolean, + isClassExpr: boolean, +): { className: string | null; classNameIsIdentifier: boolean } { + if (!isClassDecl && !isClassExpr) return { className: null, classNameIsIdentifier: false }; + const nameNode = node.childForFieldName('name'); + return { + className: nameNode?.text ?? null, + classNameIsIdentifier: nameNode?.type === 'identifier', + }; +} + +/** + * Dispatch the enclosing-context stack push for a node to the handler matching its type. + * Returns which stack (if any) was pushed, so the caller can pop the matching stack + * after visiting children. + */ +function pushEnclosingContext( + node: TreeSitterNode, + t: string, + isClassDecl: boolean, + isClassExpr: boolean, + isFnDecl: boolean, + className: string | null, + classNameIsIdentifier: boolean, + classStack: string[], + funcStack: string[], +): { pushedFunc: boolean; pushedClass: boolean } { + if (isClassDecl || isClassExpr) { + return { + pushedFunc: false, + pushedClass: pushClassContext(classStack, className, classNameIsIdentifier), + }; + } + if (isFnDecl) { + return { pushedFunc: pushFnDeclContext(funcStack, node), pushedClass: false }; + } + if (t === 'method_definition') { + return { pushedFunc: pushMethodDefContext(classStack, funcStack, node), pushedClass: false }; + } + if (t === 'variable_declarator') { + return { pushedFunc: pushArrowVarContext(funcStack, node), pushedClass: false }; + } + if (t === 'assignment_expression') { + return { pushedFunc: pushFuncPropContext(funcStack, node), pushedClass: false }; + } + return { pushedFunc: false, pushedClass: false }; +} + +/** + * Run the per-node-type collectors (typeMap/binding extraction) for a single node during + * runContextCollectorWalk's traversal, mirroring the query-based path's capture-handler + * pattern (handleFnCapture, etc.) already used elsewhere in this file. + */ +function dispatchNodeCollectors( + node: TreeSitterNode, + t: string, + typeMapClass: string | null, + objectRestClass: string | null, + funcStack: string[], + out: ContextCollectorOutputs, +): void { + if (t === 'variable_declarator') { + handleVarDeclaratorTypeMap( + node, + out.typeMap, + out.returnTypeMap, + out.callAssignments, + out.fnRefBindings, + ); + collectCollectionWrapBinding(node, out.fnRefBindings); + } else if (t === 'required_parameter' || t === 'optional_parameter') { + handleParamTypeMap(node, out.typeMap); + } else if (t === 'public_field_definition' || t === 'field_definition') { + handleFieldDefTypeMap(node, out.typeMap, typeMapClass); + } else if (t === 'assignment_expression') { + handlePropWriteTypeMap(node, out.typeMap, typeMapClass); + } else if (t === 'call_expression') { + handleDefinePropertyTypeMap(node, out.typeMap); + collectSpreadAndArrayFromBindings(node, out.spreadArgBindings, out.arrayCallbackBindings); + } else if (t === 'for_in_statement') { + const enclosingFunc = funcStack.length > 0 ? funcStack[funcStack.length - 1]! : ''; + collectForOfBinding(node, enclosingFunc, out.forOfBindings); + } + collectObjectRestParams(node, t, objectRestClass, out.objectRestParamBindings); +} + +/** + * Compute the typeMapClass/objectRestClass context to thread into this node's children — + * each concern keeps its own reset rules (see runContextCollectorWalk's doc comment). + */ +function computeChildContext( + t: string, + isClassDecl: boolean, + isClassExpr: boolean, + className: string | null, + typeMapClass: string | null, + objectRestClass: string | null, +): { childTypeMapClass: string | null; childObjectRestClass: string | null } { + const childTypeMapClass = isClassDecl ? className : isClassExpr ? null : typeMapClass; + let childObjectRestClass: string | null = null; + if (t === 'class_declaration' || t === 'class') { + childObjectRestClass = className; + } else if (t === 'class_body') { + childObjectRestClass = objectRestClass; + } + return { childTypeMapClass, childObjectRestClass }; +} + function runContextCollectorWalk(rootNode: TreeSitterNode, out: ContextCollectorOutputs): void { const funcStack: string[] = []; const classStack: string[] = []; @@ -2021,127 +2278,37 @@ function runContextCollectorWalk(rootNode: TreeSitterNode, out: ContextCollector const isClassExpr = t === 'class'; const isFnDecl = t === 'function_declaration' || t === 'generator_function_declaration'; - // Class name read once, shared by every concern that needs it below. - let className: string | null = null; - let classNameIsIdentifier = false; - if (isClassDecl || isClassExpr) { - const nameNode = node.childForFieldName('name'); - className = nameNode?.text ?? null; - classNameIsIdentifier = nameNode?.type === 'identifier'; - } + const { className, classNameIsIdentifier } = computeClassNameContext( + node, + isClassDecl, + isClassExpr, + ); // ── spread/for-of enclosing-context stacks (push on enter, pop after children) ── - let pushedFunc = false; - let pushedClass = false; - if (isClassDecl || isClassExpr) { - // The stack push keeps the original walk's `identifier`-only check (TS - // class names parse as type_identifier and were never pushed), while - // typeMapClass/objectRestClass below use the bare text like their - // original walks did. - if (className && classNameIsIdentifier) { - classStack.push(className); - pushedClass = true; - } - } else if (isFnDecl) { - const nameNode = node.childForFieldName('name'); - if (nameNode?.type === 'identifier') { - funcStack.push(nameNode.text); - pushedFunc = true; - } - } else if (t === 'method_definition') { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - // Qualify with the enclosing class name so the PTS key matches - // callerName from findCaller (which uses def.name = 'ClassName.method'). - const enclosingClass = classStack.length > 0 ? classStack[classStack.length - 1] : null; - let rawName: string; - if (nameNode.type === 'computed_property_name') { - const inner = nameNode.child(1); - if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) { - // Non-string computed key — skip adding to funcStack (no resolvable name). - rawName = ''; - } else { - rawName = inner.text.replace(/^['"]|['"]$/g, ''); - } - } else { - rawName = nameNode.text; - } - if (rawName) { - const qualifiedName = enclosingClass ? `${enclosingClass}.${rawName}` : rawName; - funcStack.push(qualifiedName); - pushedFunc = true; - } - } - } else if (t === 'variable_declarator') { - // `const process = (arr) => { ... }` — arrow/expression functions assigned - // to a variable have no `name` field on the function node itself. - const nameNode = node.childForFieldName('name'); - const valueNode = node.childForFieldName('value'); - if ( - nameNode?.type === 'identifier' && - (valueNode?.type === 'arrow_function' || valueNode?.type === 'function_expression') - ) { - funcStack.push(nameNode.text); - pushedFunc = true; - } - } else if (t === 'assignment_expression') { - // `obj.method = function() { ... }` — func-prop assignment. - // Mirror handleFuncPropAssignment's logic so for-of loops inside the - // body get the correct enclosingFunc (e.g. 'obj.method') instead of - // '' or the wrong outer function name. - const lhs = node.childForFieldName('left'); - const rhs = node.childForFieldName('right'); - if ( - lhs?.type === 'member_expression' && - (rhs?.type === 'function_expression' || rhs?.type === 'arrow_function') - ) { - const obj = lhs.childForFieldName('object'); - const prop = lhs.childForFieldName('property'); - if ( - obj?.type === 'identifier' && - (prop?.type === 'property_identifier' || prop?.type === 'identifier') && - !BUILTIN_GLOBALS.has(obj.text) && - prop.text !== 'prototype' - ) { - funcStack.push(`${obj.text}.${prop.text}`); - pushedFunc = true; - } - } - } + const { pushedFunc, pushedClass } = pushEnclosingContext( + node, + t, + isClassDecl, + isClassExpr, + isFnDecl, + className, + classNameIsIdentifier, + classStack, + funcStack, + ); // ── per-node collectors (class nodes match none of these types) ── - if (t === 'variable_declarator') { - handleVarDeclaratorTypeMap( - node, - out.typeMap, - out.returnTypeMap, - out.callAssignments, - out.fnRefBindings, - ); - collectCollectionWrapBinding(node, out.fnRefBindings); - } else if (t === 'required_parameter' || t === 'optional_parameter') { - handleParamTypeMap(node, out.typeMap); - } else if (t === 'public_field_definition' || t === 'field_definition') { - handleFieldDefTypeMap(node, out.typeMap, typeMapClass); - } else if (t === 'assignment_expression') { - handlePropWriteTypeMap(node, out.typeMap, typeMapClass); - } else if (t === 'call_expression') { - handleDefinePropertyTypeMap(node, out.typeMap); - collectSpreadAndArrayFromBindings(node, out.spreadArgBindings, out.arrayCallbackBindings); - } else if (t === 'for_in_statement') { - const enclosingFunc = funcStack.length > 0 ? funcStack[funcStack.length - 1]! : ''; - collectForOfBinding(node, enclosingFunc, out.forOfBindings); - } - collectObjectRestParams(node, t, objectRestClass, out.objectRestParamBindings); + dispatchNodeCollectors(node, t, typeMapClass, objectRestClass, funcStack, out); // ── child context per concern ── - const childTypeMapClass = isClassDecl ? className : isClassExpr ? null : typeMapClass; - let childObjectRestClass: string | null = null; - if (t === 'class_declaration' || t === 'class') { - childObjectRestClass = className; - } else if (t === 'class_body') { - childObjectRestClass = objectRestClass; - } + const { childTypeMapClass, childObjectRestClass } = computeChildContext( + t, + isClassDecl, + isClassExpr, + className, + typeMapClass, + objectRestClass, + ); for (let i = 0; i < node.childCount; i++) { walk(node.child(i)!, depth + 1, childTypeMapClass, childObjectRestClass); From ad14cf78e638fed7f3ebe21c1184e4a36b0a527f Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 06:45:04 -0600 Subject: [PATCH 10/26] refactor: decompose resolveFallbackTargets/buildEdges/buildCallEdgesNative in build-edges.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs check acknowledged: pure internal extract-method refactor, no new features, commands, languages, or architecture changes — README/CLAUDE/ROADMAP do not need updates. Impact: 20 functions changed, 15 affected --- .../graph/builder/stages/build-edges.ts | 768 +++++++++++------- src/infrastructure/config.ts | 5 +- src/types.ts | 3 +- 3 files changed, 468 insertions(+), 308 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index e5663f339..efeee79f6 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -496,6 +496,80 @@ function propagateReturnTypesAcrossFiles( // ── Call edges (native engine) ────────────────────────────────────────── +/** + * Build the deduplicated native typeMap array for a single file's symbols. + * Deduplicate: keep highest-confidence entry per name (first-wins on tie), + * matching JS setTypeMapEntry semantics. The Map branch is already + * deduped by setTypeMapEntry — this loop is only needed for the Array + * branch (pre-rebuilt native addon) but runs unconditionally as + * belt-and-suspenders since it's a cheap O(n) pass. + */ +function buildNativeTypeMapEntries( + symbols: ExtractorOutput, +): Array<{ name: string; typeName: string; confidence: number }> { + const typeMapRaw: Array<{ name: string; typeName: string; confidence: number }> = + symbols.typeMap instanceof Map + ? [...symbols.typeMap.entries()].map(([name, entry]) => ({ + name, + typeName: typeof entry === 'string' ? entry : entry.type, + confidence: typeof entry === 'object' ? entry.confidence : 0.9, + })) + : Array.isArray(symbols.typeMap) + ? (symbols.typeMap as Array<{ name: string; typeName: string; confidence: number }>) + : []; + const typeMapDedup = new Map(); + for (const entry of typeMapRaw) { + const existing = typeMapDedup.get(entry.name); + if (!existing || entry.confidence > existing.confidence) { + typeMapDedup.set(entry.name, entry); + } + } + return [...typeMapDedup.values()]; +} + +/** Build the native FFI file entry for a single file, including pts-analysis bindings. */ +function buildNativeFileEntry( + ctx: PipelineContext, + relPath: string, + fileNodeId: number, + symbols: ExtractorOutput, + rootDir: string, +): NativeFileEntry { + const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); + const typeMap = buildNativeTypeMapEntries(symbols); + return { + file: relPath, + fileNodeId, + definitions: symbols.definitions.map((d) => { + const params = d.children?.filter((c) => c.kind === 'parameter').map((c) => c.name); + return { + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? null, + params: params?.length ? params : undefined, + }; + }), + calls: symbols.calls, + importedNames, + classes: symbols.classes, + typeMap, + fnRefBindings: symbols.fnRefBindings?.length ? symbols.fnRefBindings : undefined, + paramBindings: symbols.paramBindings?.length ? symbols.paramBindings : undefined, + thisCallBindings: symbols.thisCallBindings?.length ? symbols.thisCallBindings : undefined, + arrayElemBindings: symbols.arrayElemBindings?.length ? symbols.arrayElemBindings : undefined, + spreadArgBindings: symbols.spreadArgBindings?.length ? symbols.spreadArgBindings : undefined, + forOfBindings: symbols.forOfBindings?.length ? symbols.forOfBindings : undefined, + arrayCallbackBindings: symbols.arrayCallbackBindings?.length + ? symbols.arrayCallbackBindings + : undefined, + objectRestParamBindings: symbols.objectRestParamBindings?.length + ? symbols.objectRestParamBindings + : undefined, + objectPropBindings: symbols.objectPropBindings?.length ? symbols.objectPropBindings : undefined, + }; +} + function buildCallEdgesNative( ctx: PipelineContext, getNodeIdStmt: NodeIdStmt, @@ -511,63 +585,7 @@ function buildCallEdgesNative( const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); if (!fileNodeRow) continue; - const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); - const typeMapRaw: Array<{ name: string; typeName: string; confidence: number }> = - symbols.typeMap instanceof Map - ? [...symbols.typeMap.entries()].map(([name, entry]) => ({ - name, - typeName: typeof entry === 'string' ? entry : entry.type, - confidence: typeof entry === 'object' ? entry.confidence : 0.9, - })) - : Array.isArray(symbols.typeMap) - ? (symbols.typeMap as Array<{ name: string; typeName: string; confidence: number }>) - : []; - // Deduplicate: keep highest-confidence entry per name (first-wins on tie), - // matching JS setTypeMapEntry semantics. The Map branch is already - // deduped by setTypeMapEntry — this loop is only needed for the Array - // branch (pre-rebuilt native addon) but runs unconditionally as - // belt-and-suspenders since it's a cheap O(n) pass. - const typeMapDedup = new Map(); - for (const entry of typeMapRaw) { - const existing = typeMapDedup.get(entry.name); - if (!existing || entry.confidence > existing.confidence) { - typeMapDedup.set(entry.name, entry); - } - } - const typeMap = [...typeMapDedup.values()]; - nativeFiles.push({ - file: relPath, - fileNodeId: fileNodeRow.id, - definitions: symbols.definitions.map((d) => { - const params = d.children?.filter((c) => c.kind === 'parameter').map((c) => c.name); - return { - name: d.name, - kind: d.kind, - line: d.line, - endLine: d.endLine ?? null, - params: params?.length ? params : undefined, - }; - }), - calls: symbols.calls, - importedNames, - classes: symbols.classes, - typeMap, - fnRefBindings: symbols.fnRefBindings?.length ? symbols.fnRefBindings : undefined, - paramBindings: symbols.paramBindings?.length ? symbols.paramBindings : undefined, - thisCallBindings: symbols.thisCallBindings?.length ? symbols.thisCallBindings : undefined, - arrayElemBindings: symbols.arrayElemBindings?.length ? symbols.arrayElemBindings : undefined, - spreadArgBindings: symbols.spreadArgBindings?.length ? symbols.spreadArgBindings : undefined, - forOfBindings: symbols.forOfBindings?.length ? symbols.forOfBindings : undefined, - arrayCallbackBindings: symbols.arrayCallbackBindings?.length - ? symbols.arrayCallbackBindings - : undefined, - objectRestParamBindings: symbols.objectRestParamBindings?.length - ? symbols.objectRestParamBindings - : undefined, - objectPropBindings: symbols.objectPropBindings?.length - ? symbols.objectPropBindings - : undefined, - }); + nativeFiles.push(buildNativeFileEntry(ctx, relPath, fileNodeRow.id, symbols, rootDir)); } const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [ @@ -1041,6 +1059,183 @@ function buildDefinitionParamsMap( // ── Per-call resolution helpers ───────────────────────────────────────── +/** + * RES-4: Kotlin member callable reference — `Greeter::greet` emits + * { name: 'greet', receiver: 'Greeter', dynamicKind: 'reflection' }. + * The receiver is the class qualifier (not a typeMap variable), so + * resolveCallTargets would find a same-named top-level function via + * byNameAndFile('greet', relPath) before the qualified form is tried. + * Prefer `Greeter.greet` in the same file first; fall through to the + * normal path only when no qualified match exists. + */ +function resolveKotlinReflectionPreQualified( + call: Call, + relPath: string, + lookup: CallNodeLookup, +): ReadonlyArray<{ id: number; file: string; kind?: string }> { + if ( + call.dynamicKind === 'reflection' && + call.receiver && + !call.keyExpr && + !isModuleScopedLanguage(relPath) + ) { + return lookup + .byNameAndFile(`${call.receiver}.${call.name}`, relPath) + .filter((n) => n.kind === 'method' || n.kind === 'function'); + } + return []; +} + +/** + * Shared by both same-class fallback strategies below: derive the enclosing + * class name from the caller's qualified name (the segment immediately before + * the final dot, e.g. `Namespace.MyClass.method` → `MyClass`), then look up + * `ClassName.callName` as a method in the same file. + */ +function resolveSameClassQualifiedMethod( + callName: string, + callerName: string, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + const lastDot = callerName.lastIndexOf('.'); + if (lastDot <= 0) return []; + const prevDot = callerName.lastIndexOf('.', lastDot - 1); + const className = callerName.slice(prevDot + 1, lastDot); + return lookup + .byNameAndFile(`${className}.${callName}`, relPath) + .filter((n) => n.kind === 'method'); +} + +/** + * Same-class `this.method()` fallback: when the call receiver is `this` and + * resolveCallTargets found nothing, derive the enclosing class name from the + * caller (e.g. `Logger.info` → class prefix `Logger`) and retry with the + * qualified method name `Logger._write`. This mirrors what the native Rust + * engine does implicitly via its class-scoped symbol table. + * NOTE: restricted to `this` only — `super.method()` targets a parent class, + * not the enclosing class, so qualifying with the child class name would + * produce a false edge when the child also defines a same-named method. + */ +function resolveSameClassThisFallback( + call: Call, + callerName: string | null, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + if (call.receiver !== 'this' || callerName == null) return []; + return resolveSameClassQualifiedMethod(call.name, callerName, relPath, lookup); +} + +/** + * Same-class bare-call fallback: when a no-receiver call can't be resolved + * globally, try the caller's own class as a qualifier. Handles C# static + * sibling calls: `IsValidEmail()` inside `Validators.ValidateUser` resolves + * to `Validators.IsValidEmail`. Skipped for JS/TS where bare calls are + * module-scoped, not class-scoped. + */ +function resolveSameClassBareCallFallback( + call: Call, + callerName: string | null, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + if (call.receiver || callerName == null || isModuleScopedLanguage(relPath)) return []; + return resolveSameClassQualifiedMethod(call.name, callerName, relPath, lookup); +} + +/** + * RES-3: reflection with literal method name — JVM getMethod("name") / invokeMethod("name"). + * Java/Scala/Groovy methods are stored as class-qualified names (e.g. Reflection.greet), + * so lookup.byNameAndFile('greet', relPath) finds nothing. When dynamicKind='reflection' + * and keyExpr is set (a string-literal method name was captured), try the qualified form: + * 1. typeMap[receiver] → resolvedType → lookup `resolvedType.keyExpr` (type-annotated local) + * 2. callerName class prefix → `CallerClass.keyExpr` (same-class sibling, e.g. Groovy obj) + * Scoped to non-JS/TS files to avoid interfering with the JS reflection path. + */ +function resolveReflectionKeyExprFallback( + call: Call, + callerName: string | null, + relPath: string, + typeMap: Map, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + if ( + call.dynamicKind !== 'reflection' || + !call.keyExpr || + !call.receiver || + isModuleScopedLanguage(relPath) + ) { + return []; + } + const typeEntry = typeMap.get(call.receiver); + const resolvedType = typeEntry + ? typeof typeEntry === 'string' + ? typeEntry + : (typeEntry as { type?: string }).type + : null; + if (resolvedType) { + const qualified = lookup + .byNameAndFile(`${resolvedType}.${call.keyExpr}`, relPath) + .filter((n) => n.kind === 'method' || n.kind === 'function'); + if (qualified.length > 0) return qualified; + } + if (callerName != null) { + const lastDot = callerName.lastIndexOf('.'); + if (lastDot > 0) { + const prevDot = callerName.lastIndexOf('.', lastDot - 1); + const callerClass = callerName.slice(prevDot + 1, lastDot); + const qualified = lookup + .byNameAndFile(`${callerClass}.${call.keyExpr}`, relPath) + .filter((n) => n.kind === 'method' || n.kind === 'function'); + if (qualified.length > 0) return qualified; + } + } + return []; +} + +/** + * Object.defineProperty accessor fallback: when a function is registered as + * a getter/setter via `Object.defineProperty(obj, "bar", { get: getter })`, + * calls to `this.X()` inside `getter` resolve against `obj` (this === obj + * when the accessor is invoked). If the same-class fallback above found + * nothing, try treating `obj` as the receiver and look up `obj.X` in the + * typeMap, or fall back to a same-file lookup of any definition named X + * that belongs to the object literal or its type. + */ +function resolveDefinePropertyAccessorFallback( + call: Call, + callerName: string | null, + relPath: string, + typeMap: Map, + lookup: CallNodeLookup, + definePropertyReceivers: Map | undefined, +): Array<{ id: number; file: string; kind?: string }> { + if (call.receiver !== 'this' || callerName == null || !definePropertyReceivers) return []; + const receiverVarName = definePropertyReceivers.get(callerName); + if (!receiverVarName) return []; + + const typeEntry = typeMap.get(receiverVarName); + const typeName = typeEntry + ? typeof typeEntry === 'string' + ? typeEntry + : (typeEntry as { type?: string }).type + : null; + if (typeName) { + const qualified = lookup.byNameAndFile(`${typeName}.${call.name}`, relPath); + if (qualified.length > 0) return [...qualified]; + } + // If still no targets, search for any definition named `call.name` in + // the same file — handles plain object literals where the method isn't + // qualified (e.g. `const obj = { baz() {} }` defines `baz` directly). + // Note: this is intentionally broad — it matches any same-file definition + // with the called name, not just members of the receiver object. This is + // the same behaviour used by the native post-pass path (buildDefinePropertyPostPass). + const sameFile = lookup.byNameAndFile(call.name, relPath); + if (sameFile.length > 0) return [...sameFile]; + return []; +} + /** * Resolve targets for a single call site with all JS-path fallbacks applied. * @@ -1064,24 +1259,7 @@ function resolveFallbackTargets( targets: ReadonlyArray<{ id: number; file: string; kind?: string }>; importedFrom: string | null | undefined; } { - // RES-4: Kotlin member callable reference — `Greeter::greet` emits - // { name: 'greet', receiver: 'Greeter', dynamicKind: 'reflection' }. - // The receiver is the class qualifier (not a typeMap variable), so - // resolveCallTargets would find a same-named top-level function via - // byNameAndFile('greet', relPath) before the qualified form is tried. - // Prefer `Greeter.greet` in the same file first; fall through to the - // normal path only when no qualified match exists. - let preQualifiedTargets: ReadonlyArray<{ id: number; file: string; kind?: string }> = []; - if ( - call.dynamicKind === 'reflection' && - call.receiver && - !call.keyExpr && - !isModuleScopedLanguage(relPath) - ) { - preQualifiedTargets = lookup - .byNameAndFile(`${call.receiver}.${call.name}`, relPath) - .filter((n) => n.kind === 'method' || n.kind === 'function'); - } + const preQualifiedTargets = resolveKotlinReflectionPreQualified(call, relPath, lookup); let { targets, importedFrom } = preQualifiedTargets.length > 0 @@ -1098,123 +1276,41 @@ function resolveFallbackTargets( caller.callerName, ); - // Same-class `this.method()` fallback: when the call receiver is `this` and - // resolveCallTargets found nothing, derive the enclosing class name from the - // caller (e.g. `Logger.info` → class prefix `Logger`) and retry with the - // qualified method name `Logger._write`. This mirrors what the native Rust - // engine does implicitly via its class-scoped symbol table. - // NOTE: restricted to `this` only — `super.method()` targets a parent class, - // not the enclosing class, so qualifying with the child class name would - // produce a false edge when the child also defines a same-named method. - if (targets.length === 0 && call.receiver === 'this' && caller.callerName != null) { - const lastDot = caller.callerName.lastIndexOf('.'); - if (lastDot > 0) { - const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); - const className = caller.callerName.slice(prevDot + 1, lastDot); - const qualified = lookup - .byNameAndFile(`${className}.${call.name}`, relPath) - .filter((n) => n.kind === 'method'); - if (qualified.length > 0) targets = qualified; - } + // Fallback strategies, applied in order until one yields a match. Each + // helper folds its own applicability guard internally (see helper doc + // comments above) — the checks here are unchanged from before, just + // relocated to keep this dispatcher a thin, low-complexity orchestrator. + if (targets.length === 0) { + const qualified = resolveSameClassThisFallback(call, caller.callerName, relPath, lookup); + if (qualified.length > 0) targets = qualified; } - // Same-class bare-call fallback: when a no-receiver call can't be resolved - // globally, try the caller's own class as a qualifier. Handles C# static - // sibling calls: `IsValidEmail()` inside `Validators.ValidateUser` resolves - // to `Validators.IsValidEmail`. Skipped for JS/TS where bare calls are - // module-scoped, not class-scoped. - if ( - targets.length === 0 && - !call.receiver && - caller.callerName != null && - !isModuleScopedLanguage(relPath) - ) { - const lastDot = caller.callerName.lastIndexOf('.'); - if (lastDot > 0) { - const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); - const className = caller.callerName.slice(prevDot + 1, lastDot); - const qualified = lookup - .byNameAndFile(`${className}.${call.name}`, relPath) - .filter((n) => n.kind === 'method'); - if (qualified.length > 0) targets = qualified; - } + if (targets.length === 0) { + const qualified = resolveSameClassBareCallFallback(call, caller.callerName, relPath, lookup); + if (qualified.length > 0) targets = qualified; } - // RES-3: reflection with literal method name — JVM getMethod("name") / invokeMethod("name"). - // Java/Scala/Groovy methods are stored as class-qualified names (e.g. Reflection.greet), - // so lookup.byNameAndFile('greet', relPath) finds nothing. When dynamicKind='reflection' - // and keyExpr is set (a string-literal method name was captured), try the qualified form: - // 1. typeMap[receiver] → resolvedType → lookup `resolvedType.keyExpr` (type-annotated local) - // 2. callerName class prefix → `CallerClass.keyExpr` (same-class sibling, e.g. Groovy obj) - // Scoped to non-JS/TS files to avoid interfering with the JS reflection path. - if ( - targets.length === 0 && - call.dynamicKind === 'reflection' && - call.keyExpr && - call.receiver && - !isModuleScopedLanguage(relPath) - ) { - const typeEntry = typeMap.get(call.receiver); - const resolvedType = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; - if (resolvedType) { - const qualified = lookup - .byNameAndFile(`${resolvedType}.${call.keyExpr}`, relPath) - .filter((n) => n.kind === 'method' || n.kind === 'function'); - if (qualified.length > 0) targets = qualified; - } - if (targets.length === 0 && caller.callerName != null) { - const lastDot = caller.callerName.lastIndexOf('.'); - if (lastDot > 0) { - const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); - const callerClass = caller.callerName.slice(prevDot + 1, lastDot); - const qualified = lookup - .byNameAndFile(`${callerClass}.${call.keyExpr}`, relPath) - .filter((n) => n.kind === 'method' || n.kind === 'function'); - if (qualified.length > 0) targets = qualified; - } - } + if (targets.length === 0) { + const qualified = resolveReflectionKeyExprFallback( + call, + caller.callerName, + relPath, + typeMap, + lookup, + ); + if (qualified.length > 0) targets = qualified; } - // Object.defineProperty accessor fallback: when a function is registered as - // a getter/setter via `Object.defineProperty(obj, "bar", { get: getter })`, - // calls to `this.X()` inside `getter` resolve against `obj` (this === obj - // when the accessor is invoked). If the same-class fallback above found - // nothing, try treating `obj` as the receiver and look up `obj.X` in the - // typeMap, or fall back to a same-file lookup of any definition named X - // that belongs to the object literal or its type. - if ( - targets.length === 0 && - call.receiver === 'this' && - caller.callerName != null && - definePropertyReceivers - ) { - const receiverVarName = definePropertyReceivers.get(caller.callerName); - if (receiverVarName) { - const typeEntry = typeMap.get(receiverVarName); - const typeName = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; - if (typeName) { - const qualified = lookup.byNameAndFile(`${typeName}.${call.name}`, relPath); - if (qualified.length > 0) targets = [...qualified]; - } - // If still no targets, search for any definition named `call.name` in - // the same file — handles plain object literals where the method isn't - // qualified (e.g. `const obj = { baz() {} }` defines `baz` directly). - // Note: this is intentionally broad — it matches any same-file definition - // with the called name, not just members of the receiver object. This is - // the same behaviour used by the native post-pass path (buildDefinePropertyPostPass). - if (targets.length === 0) { - const sameFile = lookup.byNameAndFile(call.name, relPath); - if (sameFile.length > 0) targets = [...sameFile]; - } - } + if (targets.length === 0) { + const qualified = resolveDefinePropertyAccessorFallback( + call, + caller.callerName, + relPath, + typeMap, + lookup, + definePropertyReceivers, + ); + if (qualified.length > 0) targets = qualified; } return { targets, importedFrom }; @@ -1904,7 +2000,7 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea const existingFileCount = ( db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number } ).c; - if (existingFileCount > 20) { + if (existingFileCount > ctx.config.build.largeCodebaseFileThreshold) { // Collect relevant files: changed files + their import targets const relevantFiles = new Set(fileSymbols.keys()); if (batchResolved) { @@ -1962,50 +2058,149 @@ function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void { }; } -export async function buildEdges(ctx: PipelineContext): Promise { - const { db, engineName } = ctx; - - const getNodeIdStmt = makeGetNodeIdStmt(db); - +/** Load node-lookup structures used throughout edge construction (Phase 0 setup). */ +function prepareNodeLookups(ctx: PipelineContext): { + getNodeIdStmt: NodeIdStmt; + allNodesBefore: QueryNodeRow[]; +} { + const getNodeIdStmt = makeGetNodeIdStmt(ctx.db); const { rows: allNodesBefore, scoped: scopedLoad } = loadNodes(ctx); setupNodeLookups(ctx, allNodesBefore); addLazyFallback(ctx, scopedLoad); + return { getNodeIdStmt, allNodesBefore }; +} - const t0 = performance.now(); - - // Enrich typeMap for .ts/.tsx files using the TypeScript compiler API. - // Runs before call-edge construction so the accurate types are available - // for method-call resolution. Gated on config so users can opt out. - // - // Skip for small incremental builds: TypeScript program creation requires - // loading the entire tsconfig file list (~700ms startup on the codegraph - // corpus), which dominates the 1-file rebuild time. Native engine bypasses - // this entirely via the Rust orchestrator; WASM/JS engines need this gate - // to match native's effective behaviour on tiny incremental changes. - // Mirrors the smallFilesThreshold gates for nativeDb and native call-edges. +/** + * Enrich typeMap for .ts/.tsx files using the TypeScript compiler API. + * Runs before call-edge construction so the accurate types are available + * for method-call resolution. Gated on config so users can opt out. + * + * Skip for small incremental builds: TypeScript program creation requires + * loading the entire tsconfig file list (~700ms startup on the codegraph + * corpus), which dominates the 1-file rebuild time. Native engine bypasses + * this entirely via the Rust orchestrator; WASM/JS engines need this gate + * to match native's effective behaviour on tiny incremental changes. + * Mirrors the smallFilesThreshold gates for nativeDb and native call-edges. + */ +async function maybeEnrichTypeMapWithTsc(ctx: PipelineContext): Promise { const isSmallIncremental = !ctx.isFullBuild && ctx.fileSymbols.size <= ctx.config.build.smallFilesThreshold; if (ctx.config.build.typescriptResolver && !isSmallIncremental) { await enrichTypeMapWithTsc(ctx.rootDir, ctx.fileSymbols); } +} - const native = engineName === 'native' ? loadNative() : null; +/** + * Import-edge sub-phase: native fast path (with JS fallback for a #750-related + * key-format mismatch) or the JS path directly. + */ +function buildImportEdgesPhase( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + native: NativeAddon | null, +): void { + // Skip native import-edge path for small incremental builds: napi-rs + // marshaling overhead (~13ms) exceeds Rust computation savings at this scale. + const useNativeImportEdges = + native?.buildImportEdges && + (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); + if (useNativeImportEdges) { + const beforeLen = allEdgeRows.length; + buildImportEdgesNative(ctx, getNodeIdStmt, allEdgeRows, native!); + // Fallback: if native produced 0 import edges but there are imports to + // process, the native binary may have a key-format mismatch (e.g. Windows + // path separators — #750). Retry with the JS implementation. + // NOTE: This also fires for codebases where every import targets an + // external package (npm deps) that the resolver intentionally skips. + // In that case the JS path resolves zero edges too, so the only cost + // is the redundant JS traversal — no correctness impact. + const hasImports = [...ctx.fileSymbols.values()].some((s) => s.imports.length > 0); + if (allEdgeRows.length === beforeLen && hasImports) { + debug('Native buildImportEdges produced 0 edges — falling back to JS'); + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + } + } else { + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + } +} - // Phase 8.2: Augment typeMaps with cross-file return-type propagation before - // the transaction opens. This is pure in-memory mutation (no DB I/O) and must - // run outside the transaction to avoid leaving ctx.fileSymbols in a partial - // state if the transaction rolls back unexpectedly. - propagateReturnTypesAcrossFiles(ctx.fileSymbols, ctx, ctx.rootDir); - // Phase 8.5: Build CHA context after propagation so typeMap confidence values - // (used for RTA seeding) reflect any cross-file propagated types. - const chaCtx = buildChaContext(ctx.fileSymbols); +/** + * Call-edge sub-phase: native fast path (+ JS-only post-passes for + * Object.defineProperty accessor dispatch and CHA/RTA expansion — capabilities + * the native engine doesn't implement) or the full JS fallback path. + */ +function buildCallEdgesPhase( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + allNodesBefore: QueryNodeRow[], + native: NativeAddon | null, + chaCtx: ChaContext, +): void { + // Skip native call-edge path for small incremental builds: napi-rs + // marshaling overhead for allNodes exceeds Rust computation savings. + const useNativeCallEdges = + native?.buildCallEdges && + (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); + if (useNativeCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); + // The native engine receives all pts bindings (paramBindings, + // fnRefBindings, thisCallBindings, objectRestParamBindings, …) through + // NativeFileEntry and runs the same points-to solver as the JS path, so + // no pts post-passes are needed here. Only capabilities that remain + // JS-only run as post-passes below. + const sharedLookup = makeContextLookup(ctx, getNodeIdStmt); + // Object.defineProperty accessor post-pass: resolve this-dispatch inside + // getter/setter functions registered via Object.defineProperty. + buildDefinePropertyPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); + // Phase 8.5 post-pass: augment native call edges with CHA-resolved dispatch. + // The native Rust engine has no knowledge of the CHA context, so this/self + // calls and interface dispatch are not expanded to concrete implementations. + buildChaPostPass(ctx, getNodeIdStmt, allEdgeRows, chaCtx); + } else { + buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows, chaCtx); + } +} - // Phase 1: Compute edges inside a better-sqlite3 transaction. - // Barrel-edge deletion lives here so that the JS path (which also inserts - // edges in this transaction) keeps deletion + insertion atomic. - // When using the native rusqlite path, insertion happens in Phase 2 on a - // separate connection — a crash between Phase 1 and Phase 2 would leave - // barrel edges missing until the next incremental rebuild re-creates them. +/** + * Apply the ts-native confidence floor to allEdgeRows in-memory. The proximity + * heuristic returns 0.3 for cross-module calls with no import-path evidence, + * but both WASM and native engines perform actual name-based symbol lookup, + * which is stronger evidence than pure proximity. Clamping to + * TS_NATIVE_CONFIDENCE_FLOOR (0.5) avoids unfairly dragging down the + * call-confidence metric. Sink edges (confidence = 0.0) are excluded so + * they remain below DEFAULT_MIN_CONFIDENCE. + */ +function applyTsNativeConfidenceFloor(allEdgeRows: EdgeRowTuple[]): void { + for (const r of allEdgeRows) { + if ( + r[2] === 'calls' && + r[5] === 'ts-native' && + (r[3] as number) > 0 && + (r[3] as number) < TS_NATIVE_CONFIDENCE_FLOOR + ) { + r[3] = TS_NATIVE_CONFIDENCE_FLOOR; + } + } +} + +/** + * Phase 1: Compute edges inside a better-sqlite3 transaction. + * Barrel-edge deletion lives here so that the JS path (which also inserts + * edges in this transaction) keeps deletion + insertion atomic. + * When using the native rusqlite path, insertion happens in Phase 2 on a + * separate connection — a crash between Phase 1 and Phase 2 would leave + * barrel edges missing until the next incremental rebuild re-creates them. + */ +function computeAndInsertEdges( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allNodesBefore: QueryNodeRow[], + native: NativeAddon | null, + chaCtx: ChaContext, +): EdgeRowTuple[] { + const { db } = ctx; const allEdgeRows: EdgeRowTuple[] = []; const computeEdgesTx = db.transaction(() => { if (ctx.barrelOnlyFiles.size > 0) { @@ -2017,71 +2212,9 @@ export async function buildEdges(ctx: PipelineContext): Promise { } } - // Skip native import-edge path for small incremental builds: napi-rs - // marshaling overhead (~13ms) exceeds Rust computation savings at this scale. - const useNativeImportEdges = - native?.buildImportEdges && - (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); - if (useNativeImportEdges) { - const beforeLen = allEdgeRows.length; - buildImportEdgesNative(ctx, getNodeIdStmt, allEdgeRows, native!); - // Fallback: if native produced 0 import edges but there are imports to - // process, the native binary may have a key-format mismatch (e.g. Windows - // path separators — #750). Retry with the JS implementation. - // NOTE: This also fires for codebases where every import targets an - // external package (npm deps) that the resolver intentionally skips. - // In that case the JS path resolves zero edges too, so the only cost - // is the redundant JS traversal — no correctness impact. - const hasImports = [...ctx.fileSymbols.values()].some((s) => s.imports.length > 0); - if (allEdgeRows.length === beforeLen && hasImports) { - debug('Native buildImportEdges produced 0 edges — falling back to JS'); - buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); - } - } else { - buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); - } - - // Skip native call-edge path for small incremental builds: napi-rs - // marshaling overhead for allNodes exceeds Rust computation savings. - const useNativeCallEdges = - native?.buildCallEdges && - (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); - if (useNativeCallEdges) { - buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); - // The native engine receives all pts bindings (paramBindings, - // fnRefBindings, thisCallBindings, objectRestParamBindings, …) through - // NativeFileEntry and runs the same points-to solver as the JS path, so - // no pts post-passes are needed here. Only capabilities that remain - // JS-only run as post-passes below. - const sharedLookup = makeContextLookup(ctx, getNodeIdStmt); - // Object.defineProperty accessor post-pass: resolve this-dispatch inside - // getter/setter functions registered via Object.defineProperty. - buildDefinePropertyPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); - // Phase 8.5 post-pass: augment native call edges with CHA-resolved dispatch. - // The native Rust engine has no knowledge of the CHA context, so this/self - // calls and interface dispatch are not expanded to concrete implementations. - buildChaPostPass(ctx, getNodeIdStmt, allEdgeRows, chaCtx); - } else { - buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows, chaCtx); - } - - // Apply ts-native confidence floor to allEdgeRows in-memory. The proximity - // heuristic returns 0.3 for cross-module calls with no import-path evidence, - // but both WASM and native engines perform actual name-based symbol lookup, - // which is stronger evidence than pure proximity. Clamping to - // TS_NATIVE_CONFIDENCE_FLOOR (0.5) avoids unfairly dragging down the - // call-confidence metric. Sink edges (confidence = 0.0) are excluded so - // they remain below DEFAULT_MIN_CONFIDENCE. - for (const r of allEdgeRows) { - if ( - r[2] === 'calls' && - r[5] === 'ts-native' && - (r[3] as number) > 0 && - (r[3] as number) < TS_NATIVE_CONFIDENCE_FLOOR - ) { - r[3] = TS_NATIVE_CONFIDENCE_FLOOR; - } - } + buildImportEdgesPhase(ctx, getNodeIdStmt, allEdgeRows, native); + buildCallEdgesPhase(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native, chaCtx); + applyTsNativeConfidenceFloor(allEdgeRows); // When using native edge insert, skip JS insert here — do it after tx commits. // Otherwise insert edges within this transaction for atomicity. @@ -2091,26 +2224,55 @@ export async function buildEdges(ctx: PipelineContext): Promise { } }); computeEdgesTx(); + return allEdgeRows; +} - // Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction - // to avoid SQLITE_BUSY contention). Uses NativeDatabase persistent connection. - // Standalone napi functions were removed in 6.17. - if (ctx.engineName === 'native' && ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0) { - const nativeEdges = allEdgeRows.map((r) => ({ - sourceId: r[0], - targetId: r[1], - kind: r[2], - confidence: r[3], - dynamic: r[4], - })); - const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges); - if (!ok) { - debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); - batchInsertEdges(ctx.db, allEdgeRows); - } else { - applyEdgeTechniquesAfterNativeInsert(ctx.db, allEdgeRows); - } +/** + * Phase 2: Native rusqlite bulk insert (outside the better-sqlite3 transaction + * to avoid SQLITE_BUSY contention). Uses the NativeDatabase persistent + * connection. Standalone napi functions were removed in 6.17. + */ +function insertNativeBulkEdges(ctx: PipelineContext, allEdgeRows: EdgeRowTuple[]): void { + if (!(ctx.engineName === 'native' && ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0)) { + return; } + const nativeEdges = allEdgeRows.map((r) => ({ + sourceId: r[0], + targetId: r[1], + kind: r[2], + confidence: r[3], + dynamic: r[4], + })); + const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges); + if (!ok) { + debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); + batchInsertEdges(ctx.db, allEdgeRows); + } else { + applyEdgeTechniquesAfterNativeInsert(ctx.db, allEdgeRows); + } +} + +export async function buildEdges(ctx: PipelineContext): Promise { + const { getNodeIdStmt, allNodesBefore } = prepareNodeLookups(ctx); + + const t0 = performance.now(); + + await maybeEnrichTypeMapWithTsc(ctx); + + const native = ctx.engineName === 'native' ? loadNative() : null; + + // Phase 8.2: Augment typeMaps with cross-file return-type propagation before + // the transaction opens. This is pure in-memory mutation (no DB I/O) and must + // run outside the transaction to avoid leaving ctx.fileSymbols in a partial + // state if the transaction rolls back unexpectedly. + propagateReturnTypesAcrossFiles(ctx.fileSymbols, ctx, ctx.rootDir); + // Phase 8.5: Build CHA context after propagation so typeMap confidence values + // (used for RTA seeding) reflect any cross-file propagated types. + const chaCtx = buildChaContext(ctx.fileSymbols); + + const allEdgeRows = computeAndInsertEdges(ctx, getNodeIdStmt, allNodesBefore, native, chaCtx); + + insertNativeBulkEdges(ctx, allEdgeRows); // Phase 3: Reconnect saved reverse-dep edges (#932, #933). // When the WASM/JS path purged changed files, edges FROM reverse-dep files TO @@ -2126,7 +2288,7 @@ export async function buildEdges(ctx: PipelineContext): Promise { // committed so the DB is consistent. // Note: the native orchestrator success path runs this independently in // tryNativeOrchestrator; this phase covers the WASM and native-fallback paths. - runChaPostPass(db); + runChaPostPass(ctx.db); ctx.timing.edgesMs = performance.now() - t0; } diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index a6bf6321e..7e23f90ba 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -32,9 +32,8 @@ export const DEFAULTS = { /** * Minimum existing file-node count for a repo to be treated as a "large * codebase" when deciding whether to scope node loading to changed files. - * @reserved — currently not wired; loadNodes() in - * `src/domain/graph/builder/stages/build-edges.ts` still uses the - * hardcoded literal `20` at its `existingFileCount > 20` gate. + * Used by loadNodes() in `src/domain/graph/builder/stages/build-edges.ts` + * at its `existingFileCount > largeCodebaseFileThreshold` gate. */ largeCodebaseFileThreshold: 20, typescriptResolver: true, diff --git a/src/types.ts b/src/types.ts index 9ffb1f9b7..88c85c3b8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1310,8 +1310,7 @@ export interface CodegraphConfig { /** * Minimum existing file-node count for a repo to be treated as a "large * codebase" when deciding whether to scope node loading to changed files. - * @reserved — currently not wired; see `largeCodebaseFileThreshold` in - * `src/infrastructure/config.ts` for wiring status. + * Used by loadNodes() in `src/domain/graph/builder/stages/build-edges.ts`. */ largeCodebaseFileThreshold: number; /** From 51c38160717618bac8e3fce58ffa8aabe17ac071 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 07:05:04 -0600 Subject: [PATCH 11/26] refactor: extract NativeOrchestrationSession from tryNativeOrchestrator in native-orchestrator.ts Pure extract-class decomposition of the DECOMPOSE-flagged worst offender in Titan phase 12 (halstead.bugs 1.17). tryNativeOrchestrator's native-DB lifecycle steps (open/build/backfill/handoff/close) are now owned by a NativeOrchestrationSession class; tryNativeOrchestrator becomes a thin sequencer of session method calls. No dispatch logic, fallback conditions, or error handling changed -- verified via full test suite (200/200 files, 3330/3330 tests), byte-identical resolution-benchmark output across all 34 fixture languages, and byte-identical native-engine DB dumps (full build + incremental early-exit) on tests/fixtures/sample-project before/after. tryNativeOrchestrator: cognitive 35->24, cyclomatic 34->25, halstead.bugs 1.17->0.83, mi 49.7->54.1. docs check acknowledged: pure internal extract-class refactor, no new features, commands, languages, or architecture changes -- README/CLAUDE/ ROADMAP do not need updates. Impact: 9 functions changed, 6 affected Impact: 9 functions changed, 6 affected --- .../builder/stages/native-orchestrator.ts | 205 ++++++++++++------ 1 file changed, 141 insertions(+), 64 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index f6721c874..0ca5d25b2 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -2124,6 +2124,128 @@ async function runPostNativePasses( }; } +/** + * Owns the native-DB lifecycle steps for a single {@link tryNativeOrchestrator} + * run: opening the Rust-backed connection, invoking `nativeDb.buildGraph()` + * behind the FK-pragma toggle, detecting + backfilling dropped-language files, + * handing the WAL off to a fresh better-sqlite3 connection for the JS + * post-passes, and closing both connections. + * + * Pure extract-class refactor of steps that previously lived inline in + * `tryNativeOrchestrator` (plus thin wrappers around the pre-existing + * `openNativeDatabase`/`handoffWalAfterNativeBuild`/dropped-language helpers) + * so the orchestrator function itself reads as a short sequence of steps. + * Behavior, ordering, and error handling are unchanged from the inline + * version — see the class's methods for the original comments explaining + * each step's rationale. + */ +class NativeOrchestrationSession { + private readonly ctx: PipelineContext; + + constructor(ctx: PipelineContext) { + this.ctx = ctx; + } + + /** Open NativeDatabase on demand (see {@link openNativeDatabase}). */ + open(): void { + openNativeDatabase(this.ctx); + } + + /** True once `open()` succeeded and the native `buildGraph` entry point is available. */ + get isReady(): boolean { + return !!this.ctx.nativeDb?.buildGraph; + } + + /** + * Invoke the Rust orchestrator's `buildGraph()`. + * + * The previous full build's clear_all_graph_data() sets PRAGMA foreign_keys = ON + * on the native connection. Older native binaries (< v3.14) do not delete + * dataflow_vertices / dataflow_summary / call_edge_id rows before purging + * nodes/edges during incremental builds, so FK enforcement causes the purge + * statements to fail silently — leaving stale nodes and edges that then get + * duplicated when the barrel-candidate re-parse re-inserts them (issue #1644). + * Disabling FK before buildGraph() lets the purge succeed; FK is restored in + * a finally block so post-passes (gap-repair, structure patch) retain FK protection + * even if buildGraph() throws. + * + * Caller must only invoke this after `isReady` is true. + */ + runBuildGraph(): NativeOrchestratorResult { + const nativeDb = this.ctx.nativeDb as NonNullable; + try { + nativeDb.exec('PRAGMA foreign_keys = OFF'); + } catch { + // exec may not exist on very old addon versions — safe to ignore + } + + let resultJson: string; + try { + resultJson = nativeDb.buildGraph!( + this.ctx.rootDir, + JSON.stringify(this.ctx.config), + JSON.stringify(this.ctx.aliases), + JSON.stringify(this.ctx.opts), + ); + } finally { + // Restore FK enforcement so any subsequent writes to this connection + // (gap-repair, structure patch) retain FK protection — even if buildGraph() + // throws. + try { + nativeDb.exec('PRAGMA foreign_keys = ON'); + } catch { + // safe to ignore on very old addon versions + } + } + + return JSON.parse(resultJson) as NativeOrchestratorResult; + } + + /** + * Even on no-op rebuilds, dropped-language files added since the last + * full build are still missing from `nodes`/`file_hashes` (#1083), and + * WASM-only files deleted from disk leave stale rows behind (#1073). + * The orchestrator's collect_files skipped them, so its earlyExit + * doesn't imply DB consistency. Detect and repair the gap. + */ + async backfillDroppedLanguages(): Promise { + const gap = detectDroppedLanguageGap(this.ctx); + if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) { + await backfillNativeDroppedFiles(this.ctx, gap); + } + } + + /** + * Ensure a proper better-sqlite3 connection is open before any post-pass that + * writes edges (dropped-language backfill, CHA) and before structure/analysis. + * When analysis fallback is needed, close the native proxy and reopen + * better-sqlite3 directly; otherwise hand the WAL off via + * {@link handoffWalAfterNativeBuild} unless a proxy conversion is already in + * place (deferred so CHA and technique-backfill can still write rows). + * + * Returns false if the DB reopen/handoff failed (caller should return a + * partial result with no post-pass phases completed). + */ + ensureJsDbForPostPasses(needsStructure: boolean, needsAnalysisFallback: boolean): boolean { + if (!needsStructure && !needsAnalysisFallback) return true; + if (needsAnalysisFallback && this.ctx.nativeFirstProxy) { + closeNativeDb(this.ctx, 'pre-analysis-fallback'); + this.ctx.db = openDb(this.ctx.dbPath); + this.ctx.nativeFirstProxy = false; + return true; + } + if (!this.ctx.nativeFirstProxy) { + return handoffWalAfterNativeBuild(this.ctx); + } + return true; + } + + /** Close both the better-sqlite3 and native connections. */ + close(): void { + closeDbPair({ db: this.ctx.db, nativeDb: this.ctx.nativeDb }); + } +} + /** * Try the native build orchestrator. * @@ -2136,7 +2258,9 @@ async function runPostNativePasses( * invoke `nativeDb.buildGraph()` (the Rust pipeline), and run post-native * structure + analysis fallbacks. Lives in its own file to keep the Rust * orchestrator entry point separated from the JS-side `buildGraph()` driver - * in `pipeline.ts`. + * in `pipeline.ts`. The native-DB lifecycle steps (open/build/backfill/handoff/close) + * are delegated to {@link NativeOrchestrationSession} so this function reads as + * a thin sequencer. */ export async function tryNativeOrchestrator( ctx: PipelineContext, @@ -2147,58 +2271,17 @@ export async function tryNativeOrchestrator( return undefined; } - openNativeDatabase(ctx); + const session = new NativeOrchestrationSession(ctx); + session.open(); - if (!ctx.nativeDb?.buildGraph) return undefined; + if (!session.isReady) return undefined; - // The previous full build's clear_all_graph_data() sets PRAGMA foreign_keys = ON - // on the native connection. Older native binaries (< v3.14) do not delete - // dataflow_vertices / dataflow_summary / call_edge_id rows before purging - // nodes/edges during incremental builds, so FK enforcement causes the purge - // statements to fail silently — leaving stale nodes and edges that then get - // duplicated when the barrel-candidate re-parse re-inserts them (issue #1644). - // Disabling FK before buildGraph() lets the purge succeed; FK is restored in - // a finally block so post-passes (gap-repair, structure patch) retain FK protection - // even if buildGraph() throws. - try { - ctx.nativeDb.exec('PRAGMA foreign_keys = OFF'); - } catch { - // exec may not exist on very old addon versions — safe to ignore - } - - let resultJson: string; - try { - resultJson = ctx.nativeDb.buildGraph( - ctx.rootDir, - JSON.stringify(ctx.config), - JSON.stringify(ctx.aliases), - JSON.stringify(ctx.opts), - ); - } finally { - // Restore FK enforcement so any subsequent writes to this connection - // (gap-repair, structure patch) retain FK protection — even if buildGraph() - // throws. - try { - ctx.nativeDb.exec('PRAGMA foreign_keys = ON'); - } catch { - // safe to ignore on very old addon versions - } - } - - const result = JSON.parse(resultJson) as NativeOrchestratorResult; + const result = session.runBuildGraph(); if (result.earlyExit) { info('No changes detected'); - // Even on no-op rebuilds, dropped-language files added since the last - // full build are still missing from `nodes`/`file_hashes` (#1083), and - // WASM-only files deleted from disk leave stale rows behind (#1073). - // The orchestrator's collect_files skipped them, so its earlyExit - // doesn't imply DB consistency. Run the gap repair before returning. - const gap = detectDroppedLanguageGap(ctx); - if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) { - await backfillNativeDroppedFiles(ctx, gap); - } - closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); + await session.backfillDroppedLanguages(); + session.close(); return 'early-exit'; } @@ -2265,21 +2348,15 @@ export async function tryNativeOrchestrator( // When analysis fallback is needed the handoff already happened above; when // neither structure nor analysis is needed the proxy conversion is deferred to // here so CHA and technique-backfill can still write rows. - if (needsStructure || needsAnalysisFallback) { - if (needsAnalysisFallback && ctx.nativeFirstProxy) { - closeNativeDb(ctx, 'pre-analysis-fallback'); - ctx.db = openDb(ctx.dbPath); - ctx.nativeFirstProxy = false; - } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { - // DB reopen failed — return partial result (no post-pass phases completed) - return formatNativeTimingResult(p, 0, analysisTiming, { - gapDetectMs: 0, - chaMs: 0, - thisDispatchMs: 0, - reclassifyMs: 0, - techniqueBackfillMs: 0, - }); - } + if (!session.ensureJsDbForPostPasses(needsStructure, needsAnalysisFallback)) { + // DB reopen failed — return partial result (no post-pass phases completed) + return formatNativeTimingResult(p, 0, analysisTiming, { + gapDetectMs: 0, + chaMs: 0, + thisDispatchMs: 0, + reclassifyMs: 0, + techniqueBackfillMs: 0, + }); } const postPassTimings = await runPostNativePasses(ctx, result); @@ -2316,6 +2393,6 @@ export async function tryNativeOrchestrator( await runDataflowVertexPass(ctx, result.changedFiles); } - closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); + session.close(); return formatNativeTimingResult(p, structurePatchMs, analysisTiming, postPassTimings); } From 63ab855b8a4093965be630e39bb38b6451430393 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 07:16:37 -0600 Subject: [PATCH 12/26] refactor: split embedRemote into request-executor and response-validator in remote.ts Extract-method refactor only, no behavior change. embedRemote's per-batch body is split into executeRemoteEmbeddingRequest (build request body, fetch-with-timeout, map network/timeout/status failures to EngineError) and mapRemoteEmbeddingResponse (shape-check, index-sort, embedding-field check, cross-batch dimension-consistency check, Float32Array conversion), with the outer loop calling both in the same order as before. Drops embedRemote from cognitive=36/halstead.bugs=1.10 (DECOMPOSE-flagged worst offender in GAUNTLET) to cognitive=6/bugs=0.38; both new helpers are well within thresholds (cognitive 11 and 9, bugs 0.38 and 0.33). Deliberately does not fix the gauntlet's secondary finding that response.json() sits outside error handling (a malformed body throws a raw SyntaxError instead of EngineError) -- that's a behavior change, out of scope for this pure decomposition. Filed as #1745. docs check acknowledged: internal refactor only, no CLI/feature/language/ architecture surface changed -- README/CLAUDE.md/ROADMAP untouched by design. Impact: 3 functions changed, 10 affected --- src/domain/search/providers/remote.ts | 170 +++++++++++++++++--------- 1 file changed, 109 insertions(+), 61 deletions(-) diff --git a/src/domain/search/providers/remote.ts b/src/domain/search/providers/remote.ts index 545a1b1d9..d323cdabd 100644 --- a/src/domain/search/providers/remote.ts +++ b/src/domain/search/providers/remote.ts @@ -69,6 +69,102 @@ export function resolveRemoteEmbeddingOptions( }; } +/** + * Execute a single batched `/embeddings` request: build the request body, + * fetch with an abort-on-timeout guard, and normalize network/timeout/status + * failures into a descriptive `EngineError` naming the endpoint. Does not + * touch the response body — callers are responsible for parsing/validating it. + */ +async function executeRemoteEmbeddingRequest( + url: string, + headers: Record, + model: string, + batch: string[], + timeoutMs: number, + batchNumber: number, +): Promise { + const controller = new AbortController(); + const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); + + let response: Response; + try { + response = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify({ model, input: batch }), + signal: controller.signal, + }); + } catch (err: unknown) { + if (err instanceof Error && err.name === 'AbortError') { + throw new EngineError( + `Remote embedding endpoint ${url} did not respond within ${timeoutMs}ms ` + + `(batch ${batchNumber})`, + ); + } + throw new EngineError( + `Failed to reach remote embedding endpoint at ${url}: ${err instanceof Error ? err.message : String(err)}`, + { cause: err instanceof Error ? err : undefined }, + ); + } finally { + clearTimeout(timeoutHandle); + } + + if (!response.ok) { + const body = await response.text().catch(() => ''); + throw new EngineError( + `Remote embedding endpoint ${url} returned ${response.status} ${response.statusText}` + + (body ? `: ${body.slice(0, 500)}` : ''), + ); + } + + return response; +} + +/** + * Validate and map a parsed `/embeddings` response body into vectors: + * shape-check `data` against the batch length, sort by index (servers aren't + * guaranteed to preserve input order), validate each item's `embedding` + * field, and enforce dimension consistency against the running `dim` seen + * across earlier batches in this `embedRemote` call. + */ +function mapRemoteEmbeddingResponse( + json: OpenAIEmbeddingResponse, + batch: string[], + url: string, + dim: number, +): { vectors: Float32Array[]; dim: number } { + if (!Array.isArray(json.data) || json.data.length !== batch.length) { + throw new EngineError( + `Remote embedding endpoint ${url} returned an unexpected response shape ` + + `(expected ${batch.length} embeddings, got ${json.data?.length ?? 0})`, + ); + } + + // OpenAI-compatible servers aren't guaranteed to preserve input order — sort by index. + const sorted = [...json.data].sort((a, b) => a.index - b.index); + const vectors: Float32Array[] = []; + for (const item of sorted) { + if (!Array.isArray(item.embedding)) { + throw new EngineError( + `Remote embedding endpoint ${url} returned an item with a missing or non-array ` + + `"embedding" field (index ${item.index})`, + ); + } + const vec = Float32Array.from(item.embedding); + if (dim === 0) { + dim = vec.length; + } else if (vec.length !== dim) { + throw new EngineError( + `Remote embedding endpoint ${url} returned inconsistent vector dimensions ` + + `(expected ${dim}, got ${vec.length} for response item index ${item.index})`, + ); + } + vectors.push(vec); + } + + return { vectors, dim }; +} + /** * Generate embeddings via a remote OpenAI-compatible `/embeddings` endpoint. * Works with OpenAI itself and any self-hosted server implementing the same @@ -90,69 +186,21 @@ export async function embedRemote( for (let i = 0; i < texts.length; i += REMOTE_BATCH_SIZE) { const batch = texts.slice(i, i + REMOTE_BATCH_SIZE); - - const controller = new AbortController(); - const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); - - let response: Response; - try { - response = await fetch(url, { - method: 'POST', - headers, - body: JSON.stringify({ model: options.model, input: batch }), - signal: controller.signal, - }); - } catch (err: unknown) { - if (err instanceof Error && err.name === 'AbortError') { - throw new EngineError( - `Remote embedding endpoint ${url} did not respond within ${timeoutMs}ms ` + - `(batch ${Math.floor(i / REMOTE_BATCH_SIZE) + 1})`, - ); - } - throw new EngineError( - `Failed to reach remote embedding endpoint at ${url}: ${err instanceof Error ? err.message : String(err)}`, - { cause: err instanceof Error ? err : undefined }, - ); - } finally { - clearTimeout(timeoutHandle); - } - - if (!response.ok) { - const body = await response.text().catch(() => ''); - throw new EngineError( - `Remote embedding endpoint ${url} returned ${response.status} ${response.statusText}` + - (body ? `: ${body.slice(0, 500)}` : ''), - ); - } + const batchNumber = Math.floor(i / REMOTE_BATCH_SIZE) + 1; + + const response = await executeRemoteEmbeddingRequest( + url, + headers, + options.model, + batch, + timeoutMs, + batchNumber, + ); const json = (await response.json()) as OpenAIEmbeddingResponse; - if (!Array.isArray(json.data) || json.data.length !== batch.length) { - throw new EngineError( - `Remote embedding endpoint ${url} returned an unexpected response shape ` + - `(expected ${batch.length} embeddings, got ${json.data?.length ?? 0})`, - ); - } - - // OpenAI-compatible servers aren't guaranteed to preserve input order — sort by index. - const sorted = [...json.data].sort((a, b) => a.index - b.index); - for (const item of sorted) { - if (!Array.isArray(item.embedding)) { - throw new EngineError( - `Remote embedding endpoint ${url} returned an item with a missing or non-array ` + - `"embedding" field (index ${item.index})`, - ); - } - const vec = Float32Array.from(item.embedding); - if (dim === 0) { - dim = vec.length; - } else if (vec.length !== dim) { - throw new EngineError( - `Remote embedding endpoint ${url} returned inconsistent vector dimensions ` + - `(expected ${dim}, got ${vec.length} for response item index ${item.index})`, - ); - } - results.push(vec); - } + const mapped = mapRemoteEmbeddingResponse(json, batch, url, dim); + dim = mapped.dim; + results.push(...mapped.vectors); if (texts.length > REMOTE_BATCH_SIZE) { process.stderr.write( From f31468c6bc28e1f0d2992417797102739043b85d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 07:46:01 -0600 Subject: [PATCH 13/26] fix: correct in-place mutation bug in applyExcludeTestsShorthand and dedupe consent glob-matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit applyExcludeTestsShorthand mutated merged.query in place, which was still a live reference to the shared DEFAULTS.query singleton whenever no config layer had already overridden `query`. In long-running processes (e.g. `codegraph mcp --multi-repo`) this permanently leaked one repo's excludeTests setting into every subsequent loadConfig() call for any other repo. loadConfig now deep-clones DEFAULTS before merging so no layer can ever write onto a live DEFAULTS reference, and DEFAULTS itself is now deep-frozen so any future regression of this kind throws immediately instead of silently corrupting shared state. applyExcludeTestsShorthand was also hardened to copy-on-write its `query` key directly. Also dedupes the appliesTo-glob-matching logic (previously copy-pasted between resolveConsent and promptForConsentIfNeeded) into a shared matchesAppliesTo helper. No user-facing behavior, CLI surface, or language support changed — docs check acknowledged. Fixes #1725 Impact: 6 functions changed, 140 affected --- src/infrastructure/config.ts | 84 ++++++++++++++++++++++++++---------- tests/unit/config.test.ts | 69 +++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 23 deletions(-) diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 7e23f90ba..6181d450e 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -17,7 +17,27 @@ export const CONFIG_FILES: readonly string[] = [ 'codegraph.config.json', ]; -export const DEFAULTS = { +/** + * Recursively freeze an object (and all nested plain objects/arrays) so any + * accidental in-place mutation throws immediately (ES modules run in strict + * mode) instead of silently corrupting shared state. Applied to DEFAULTS + * below — see the loadConfig mutation-leak bug (issue #1725): DEFAULTS.query + * (and, by the same aliasing pattern, DEFAULTS.llm / DEFAULTS.build) must + * never be a live target for mergeConfig / applyExcludeTestsShorthand / + * applyEnvOverrides / resolveSecrets to write onto, whether directly or via + * an un-cloned nested reference. + */ +function deepFreeze(obj: T): T { + if (obj !== null && typeof obj === 'object' && !Object.isFrozen(obj)) { + Object.freeze(obj); + for (const value of Object.values(obj as Record)) { + deepFreeze(value); + } + } + return obj; +} + +export const DEFAULTS = deepFreeze({ include: [] as string[], exclude: [] as string[], ignoreDirs: [] as string[], @@ -209,7 +229,7 @@ export const DEFAULTS = { }, disabledTools: [] as string[], }, -} satisfies CodegraphConfig; +} satisfies CodegraphConfig); // ── Per-process user-config override (set by CLI flags) ──────────────── // Set once by the preAction hook before any command runs; cleared when changed. @@ -376,12 +396,19 @@ function applyExcludeTestsShorthand( rawLayer: Record, ): Record { if ('excludeTests' in rawLayer) { + const result = { ...merged }; + delete result.excludeTests; // Only hoist if this layer doesn't also set query.excludeTests if (!(rawLayer.query && 'excludeTests' in (rawLayer.query as object))) { - (merged.query as Record).excludeTests = Boolean(rawLayer.excludeTests); + // Copy-on-write: never mutate `merged.query` in place. If no layer so + // far has overridden `query`, `merged.query` is still the same object + // reference as DEFAULTS.query — writing to it directly would + // permanently poison the shared DEFAULTS singleton (issue #1725). + result.query = { + ...(merged.query as Record), + excludeTests: Boolean(rawLayer.excludeTests), + }; } - const result = { ...merged }; - delete result.excludeTests; return result; } if ('excludeTests' in merged) { @@ -400,6 +427,22 @@ interface ConsentResolutionResult { consentDecision: ConsentDecision | undefined; } +/** + * Check whether `rootDir` matches any of a parsed global config's `appliesTo` + * glob patterns (§4.2 step 3 of the user-config consent spec). Shared by + * `resolveConsent` and `promptForConsentIfNeeded` — previously duplicated + * verbatim between the two call sites. + */ +function matchesAppliesTo(parsed: ParsedUserConfig | null, rootDir: string): boolean { + if (!parsed?.appliesToGlobs.length) return false; + const expanded = parsed.appliesToGlobs.map((g) => + g.startsWith('~') ? path.join(os.homedir(), g.slice(1)) : g, + ); + const regexes = compileGlobs(expanded); + const absRoot = path.resolve(rootDir); + return matchesAny(regexes, absRoot); +} + /** * Resolve whether the global user config should be applied for a given repo. * Implements the §4.1/§4.2 precedence chain from the spec. @@ -451,15 +494,8 @@ function resolveConsent( // §4.2 step 3: appliesTo glob match (dynamic, never persisted) const parsed = loadUserConfigFile(globalPath); - if (parsed?.appliesToGlobs.length) { - const expanded = parsed.appliesToGlobs.map((g) => - g.startsWith('~') ? path.join(os.homedir(), g.slice(1)) : g, - ); - const regexes = compileGlobs(expanded); - const absRoot = path.resolve(rootDir); - if (matchesAny(regexes, absRoot)) { - return { applied: true, globalPath, consentDecision: undefined }; - } + if (matchesAppliesTo(parsed, rootDir)) { + return { applied: true, globalPath, consentDecision: undefined }; } // §4.2 steps 4–5: undecided — caller decides whether to prompt @@ -531,14 +567,7 @@ export async function promptForConsentIfNeeded( // Check appliesTo globs (dynamic consent — no prompt needed) const parsed = loadUserConfigFile(globalPath); - if (parsed?.appliesToGlobs.length) { - const expanded = parsed.appliesToGlobs.map((g) => - g.startsWith('~') ? path.join(os.homedir(), g.slice(1)) : g, - ); - const regexes = compileGlobs(expanded); - const absRoot = path.resolve(rootDir); - if (matchesAny(regexes, absRoot)) return; // covered by appliesTo - } + if (matchesAppliesTo(parsed, rootDir)) return; // covered by appliesTo // Only prompt in fully interactive sessions if (!process.stdin.isTTY || !process.stdout.isTTY) return; @@ -605,7 +634,16 @@ export function loadConfig(cwd?: string, opts?: LoadConfigOpts): CodegraphConfig } // ── Layer 0: DEFAULTS ───────────────────────────────────────────── - let merged = DEFAULTS as unknown as Record; + // Deep-clone so later layers (mergeConfig / applyExcludeTestsShorthand / + // applyEnvOverrides / resolveSecrets) never hold a live reference into the + // shared, frozen DEFAULTS singleton — writing to a nested key here must + // only ever affect this call's private copy. See issue #1725: + // DEFAULTS.query used to leak mutations across repos in long-running + // processes (e.g. `codegraph mcp --multi-repo`) because mergeConfig's + // shallow copy leaves untouched nested keys pointing straight at the + // DEFAULTS object — the same aliasing risk applied to DEFAULTS.llm / + // DEFAULTS.build via applyEnvOverrides / resolveSecrets. + let merged = structuredClone(DEFAULTS) as unknown as Record; // ── Layer 1: global (if applied) ────────────────────────────────── if (applied && globalPath) { diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index df0baede5..14122333c 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -333,6 +333,30 @@ describe('excludeTests hoisting', () => { expect(config.query.excludeTests).toBe(false); expect(config.excludeTests).toBeUndefined(); }); + + it('does not leak excludeTests across repos via the shared DEFAULTS singleton (issue #1725)', () => { + // Regression test: applyExcludeTestsShorthand used to write + // `merged.query.excludeTests` in place. Since mergeConfig only deep-copies + // a nested key when overrides actually set it, `merged.query` for a repo + // whose config uses ONLY the top-level `excludeTests` shorthand is still + // the literal DEFAULTS.query object — so the in-place write permanently + // mutated the module-level DEFAULTS singleton. In a long-running process + // (e.g. `codegraph mcp --multi-repo`) a later loadConfig() call for a + // totally unrelated repo would then silently inherit excludeTests: true. + const dirA = fs.mkdtempSync(path.join(tmpDir, 'exclude-leak-a-')); + fs.writeFileSync(path.join(dirA, '.codegraphrc.json'), JSON.stringify({ excludeTests: true })); + const configA = loadConfig(dirA); + expect(configA.query.excludeTests).toBe(true); + + // A second, unrelated repo with no excludeTests config of its own must + // still see the true default (false), not repo A's leaked value. + const dirB = fs.mkdtempSync(path.join(tmpDir, 'exclude-leak-b-')); + const configB = loadConfig(dirB); + expect(configB.query.excludeTests).toBe(false); + + // The shared DEFAULTS singleton itself must never be mutated. + expect(DEFAULTS.query.excludeTests).toBe(false); + }); }); describe('applyEnvOverrides', () => { @@ -445,6 +469,51 @@ describe('applyEnvOverrides', () => { }); }); +describe('DEFAULTS singleton immutability across loadConfig calls (issue #1725)', () => { + // The excludeTests-hoisting leak (above) was one symptom of a broader bug: + // when no config layer sets a given top-level key (e.g. `llm` or `build`), + // mergeConfig's shallow copy leaves `merged.` pointing straight at + // DEFAULTS.. applyEnvOverrides/resolveSecrets then write onto that + // nested object in place, permanently poisoning DEFAULTS for the rest of + // the process — the same aliasing pattern as applyExcludeTestsShorthand, + // just reached via env vars instead of a config-file shorthand. Covered + // here since it's the same root cause (loadConfig used to start from a + // live reference to DEFAULTS) rather than a separate bug. + afterEach(() => { + delete process.env.CODEGRAPH_ENGINE; + delete process.env.CODEGRAPH_LLM_API_KEY; + }); + + it('does not leak an env-driven build.engine override into DEFAULTS.build', () => { + const dir = fs.mkdtempSync(path.join(tmpDir, 'defaults-freeze-engine-')); + process.env.CODEGRAPH_ENGINE = 'native'; + let config: ReturnType | undefined; + expect(() => { + config = loadConfig(dir); + }).not.toThrow(); + expect(config?.build.engine).toBe('native'); + expect(DEFAULTS.build.engine).toBe('auto'); + }); + + it('does not leak an env-driven llm.apiKey override into DEFAULTS.llm', () => { + const dir = fs.mkdtempSync(path.join(tmpDir, 'defaults-freeze-apikey-')); + process.env.CODEGRAPH_LLM_API_KEY = 'sk-should-not-leak'; + let config: ReturnType | undefined; + expect(() => { + config = loadConfig(dir); + }).not.toThrow(); + expect(config?.llm.apiKey).toBe('sk-should-not-leak'); + expect(DEFAULTS.llm.apiKey).toBeNull(); + }); + + it('DEFAULTS is deeply frozen', () => { + expect(Object.isFrozen(DEFAULTS)).toBe(true); + expect(Object.isFrozen(DEFAULTS.query)).toBe(true); + expect(Object.isFrozen(DEFAULTS.llm)).toBe(true); + expect(Object.isFrozen(DEFAULTS.build)).toBe(true); + }); +}); + describe('resolveSecrets', () => { let mockExecFile: any; From 57d37825846c4fa04f292ad9ddea57b349661194 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:05:42 -0600 Subject: [PATCH 14/26] fix: correct connection-leak ordering in openReadonlyWithNative, dedupe engine resolution openReadonlyWithNative opened the better-sqlite3 handle before resolving the engine, and engine resolution calls loadConfig(), which can throw (e.g. ConfigError from resolveSecrets on a malformed llm.apiKeyCommand). If that throw happened, the already-open DB handle was never closed -- a real leak on the hot path used by dataflow/hotspots/stats commands. Fix: resolve the engine (and thus loadConfig) before opening the DB, mirroring openRepo's existing, correct ordering. Extracted the shared engine-resolution logic (customDbPath > rootDir > loadConfig priority chain) into resolveDbEngine(), used by both openRepo and openReadonlyWithNative so the two call sites can't drift again. Added tests/unit/openReadonlyWithNative-leak.test.ts: tracks every better-sqlite3 Database instantiation and asserts zero occur when loadConfig throws. Verified this test fails against the pre-fix ordering (it recorded a leaked instance) and passes against the fix. docs check acknowledged: internal bug fix + dedup, no CLI surface, language support, or documented architecture/design decision changed. Impact: 3 functions changed, 38 affected --- src/db/connection.ts | 52 +++++---- .../unit/openReadonlyWithNative-leak.test.ts | 103 ++++++++++++++++++ 2 files changed, 135 insertions(+), 20 deletions(-) create mode 100644 tests/unit/openReadonlyWithNative-leak.test.ts diff --git a/src/db/connection.ts b/src/db/connection.ts index fe7145283..eac517849 100644 --- a/src/db/connection.ts +++ b/src/db/connection.ts @@ -349,6 +349,29 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { return db; } +/** + * Resolve the effective engine for DB access: explicit opts.engine > config.build.engine > 'auto'. + * Derives rootDir from the resolved DB path so loadConfig reads the right project config. + * Shared by openRepo() and openReadonlyWithNative() so the two call sites can't drift. + * + * MUST be called before opening any DB handle: loadConfig can throw (e.g. ConfigError + * via resolveSecrets on a malformed llm.apiKeyCommand config), and an already-open + * handle at that point would never be closed. + */ +function resolveDbEngine( + customDbPath: string | undefined, + engineOpt: 'native' | 'wasm' | 'auto' | undefined, +): 'native' | 'wasm' | 'auto' { + // Using findDbPath (not path.resolve(customDbPath)) ensures directory inputs like + // --db /path/to/repo are normalised to .codegraph/graph.db before we strip two levels. + // Convention: resolvedDbPath = /.codegraph/graph.db + const resolvedDbPath = customDbPath ? findDbPath(customDbPath) : undefined; + const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; + // config.build.engine is already populated from CODEGRAPH_ENGINE env by applyEnvOverrides, + // so this covers both the env-var path and the .codegraphrc.json config-file path. + return engineOpt ?? loadConfig(rootDir).build.engine ?? 'auto'; +} + /** Open a NativeRepository via rusqlite, throwing DbError if the DB file is missing. */ function openRepoNative(customDbPath?: string): { repo: Repository; close(): void } { const dbPath = findDbPath(customDbPath); @@ -397,17 +420,9 @@ export function openRepo( return { repo: opts.repo, close() {} }; } - // Derive rootDir from the resolved DB path so loadConfig reads the right project config. - // Using findDbPath (not path.resolve(customDbPath)) ensures directory inputs like - // --db /path/to/repo are normalised to .codegraph/graph.db before we strip two levels. - // Convention: resolvedDbPath = /.codegraph/graph.db - const resolvedDbPath = customDbPath ? findDbPath(customDbPath) : undefined; - const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; // Respect explicit engine selection: opts.engine > config.build.engine > auto. - // config.build.engine is already populated from CODEGRAPH_ENGINE env by applyEnvOverrides, - // so this covers both the env-var path and the .codegraphrc.json config-file path. // This ensures --engine wasm and benchmark workers bypass the native path. - const engine = opts.engine ?? loadConfig(rootDir).build.engine ?? 'auto'; + const engine = resolveDbEngine(customDbPath, opts.engine); // Try native rusqlite path first (Phase 6.14) if (engine !== 'wasm' && isNativeAvailable()) { @@ -455,18 +470,15 @@ export function openReadonlyWithNative( nativeDb: NativeDatabase | undefined; close(): void; } { - const db = openReadonlyOrFail(customPath); + // Resolve engine (which may call loadConfig — and loadConfig can throw, e.g. + // ConfigError via resolveSecrets on a malformed llm.apiKeyCommand config) BEFORE + // opening the DB handle, mirroring openRepo()'s ordering. If this throws, no DB + // handle has been opened yet, so nothing is left leaked. (Previously this ran + // AFTER openReadonlyOrFail(), so a config error here leaked the already-open + // better-sqlite3 handle — see the phase-15 gauntlet finding.) + const engine = resolveDbEngine(customPath, opts.engine); - // Derive rootDir from the resolved DB path so loadConfig reads the right project config, - // consistent with openRepo(). Using findDbPath (not path.resolve(customPath)) ensures - // directory inputs like --db /path/to/repo are normalised before stripping two levels. - // Convention: resolvedDbPath = /.codegraph/graph.db - const resolvedDbPath = customPath ? findDbPath(customPath) : undefined; - const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; - // Respect explicit engine selection: opts.engine > config.build.engine > auto. - // config.build.engine covers both CODEGRAPH_ENGINE env (via applyEnvOverrides) - // and the .codegraphrc.json config-file path. Mirrors openRepo() priority chain. - const engine = opts.engine ?? loadConfig(rootDir).build.engine ?? 'auto'; + const db = openReadonlyOrFail(customPath); let nativeDb: NativeDatabase | undefined; if (engine !== 'wasm' && isNativeAvailable()) { diff --git a/tests/unit/openReadonlyWithNative-leak.test.ts b/tests/unit/openReadonlyWithNative-leak.test.ts new file mode 100644 index 000000000..9fb6fac9c --- /dev/null +++ b/tests/unit/openReadonlyWithNative-leak.test.ts @@ -0,0 +1,103 @@ +/** + * Regression test for a resource leak in openReadonlyWithNative (GAUNTLET + * phase-15 finding, rule 5): the function used to open the better-sqlite3 + * DB handle BEFORE resolving the engine, and engine resolution calls + * loadConfig(), which can throw (e.g. ConfigError from resolveSecrets when + * llm.apiKeyCommand is malformed). If that throw happened, the already-open + * DB handle was never closed — a real leak on a hot path used by + * dataflow/hotspots/stats CLI commands. + * + * The fix reorders openReadonlyWithNative() to resolve the engine (and thus + * call loadConfig) BEFORE opening the DB, mirroring openRepo()'s existing + * ordering. This test proves the fix by tracking every better-sqlite3 + * `Database` instantiation: when loadConfig throws, zero instances should + * ever be constructed (there's nothing to leak because nothing was opened). + */ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; + +const loadConfigSpy = vi.hoisted(() => vi.fn()); +const openedInstances = vi.hoisted(() => [] as { close: () => void }[]); + +// Delegate to the real loadConfig by default; individual tests override with +// mockImplementationOnce to simulate a throwing config resolution. +vi.mock('../../src/infrastructure/config.js', async (importOriginal) => { + const mod = await importOriginal(); + loadConfigSpy.mockImplementation(mod.loadConfig); + return { ...mod, loadConfig: loadConfigSpy }; +}); + +// Wrap the real better-sqlite3 Database constructor so every instantiation +// is recorded. This lets tests assert "no handle was ever opened" directly, +// rather than inferring it indirectly. +vi.mock('../../src/db/better-sqlite3.js', async (importOriginal) => { + const mod = await importOriginal(); + return { + ...mod, + getDatabase: () => { + const RealDatabase = mod.getDatabase(); + return new Proxy(RealDatabase, { + construct(target, args) { + const instance = Reflect.construct(target, args) as { close: () => void }; + openedInstances.push(instance); + return instance; + }, + }); + }, + }; +}); + +import { closeDb, initSchema, openDb, openReadonlyWithNative } from '../../src/db/index.js'; + +let tmpDir: string; +let dbPath: string; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-leak-')); + dbPath = path.join(tmpDir, 'graph.db'); + const db = openDb(dbPath); + initSchema(db); + closeDb(db); +}); + +beforeEach(() => { + // Only count instantiations made during the test body itself. + openedInstances.length = 0; + loadConfigSpy.mockClear(); +}); + +afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('openReadonlyWithNative resource-leak regression', () => { + it('does not open (and therefore cannot leak) a DB handle when engine/config resolution throws', () => { + loadConfigSpy.mockImplementationOnce(() => { + throw new Error('ConfigError: llm.apiKeyCommand must be a string'); + }); + + expect(() => openReadonlyWithNative(dbPath)).toThrow(/apiKeyCommand/); + + // The regression: previously openReadonlyOrFail() (which constructs the + // better-sqlite3 Database) ran BEFORE the loadConfig() call that could + // throw, so a config error left an already-opened handle dangling + // forever with no way for the caller to close it. With the fix, engine + // resolution runs first, so a thrown config error means the Database + // constructor is never invoked at all. + expect(openedInstances).toHaveLength(0); + }); + + it('still opens successfully and closes cleanly when config resolution succeeds', () => { + const result = openReadonlyWithNative(dbPath); + expect(result.db).toBeDefined(); + expect(openedInstances).toHaveLength(1); + + result.close(); + + // Prove the handle was actually closed, not merely constructed: + // any query against a closed better-sqlite3 connection throws. + expect(() => result.db.prepare('SELECT 1').get()).toThrow(); + }); +}); From 506e2cefeb0414ec2462251993e4e0fbd31e1941 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:22:53 -0600 Subject: [PATCH 15/26] fix: add debug() logging to silent catch blocks across builder pipeline and cli/commands/info.ts Converts 13 comment-only/silent catch blocks in pipeline.ts, native-orchestrator.ts, detect-changes.ts, helpers.ts, and info.ts from `catch { /* comment */ }` to `catch (e) { debug(...) }`, using the existing infrastructure/logger.ts debug() utility. Purely additive observability -- no control-flow changes, no change to what errors are swallowed vs rethrown. docs check acknowledged: internal logging-only change, no new feature/language/ architecture/command surface to document in README/CLAUDE.md/ROADMAP.md. Impact: 13 functions changed, 17 affected --- src/cli/commands/info.ts | 5 +++- src/domain/graph/builder/helpers.ts | 7 +++-- src/domain/graph/builder/pipeline.ts | 8 +++--- .../graph/builder/stages/detect-changes.ts | 9 ++++--- .../builder/stages/native-orchestrator.ts | 27 ++++++++++++------- 5 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/cli/commands/info.ts b/src/cli/commands/info.ts index fef4e2f9b..a68fad60f 100644 --- a/src/cli/commands/info.ts +++ b/src/cli/commands/info.ts @@ -1,3 +1,5 @@ +import { debug } from '../../infrastructure/logger.js'; +import { toErrorMessage } from '../../shared/errors.js'; import type { CommandDefinition } from '../types.js'; export const command: CommandDefinition = { @@ -72,8 +74,9 @@ export const command: CommandDefinition = { console.log(); } } - } catch { + } catch (e) { /* diagnostics must never crash */ + debug(`DB build-metadata diagnostics failed: ${toErrorMessage(e)}`); } }, }; diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index f0f34cffa..39ab75e5a 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -9,6 +9,7 @@ import path from 'node:path'; import { purgeFilesData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js'; +import { toErrorMessage } from '../../../shared/errors.js'; import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js'; import { sleepSync } from '../../../shared/sleep.js'; import type { @@ -126,8 +127,9 @@ export function readGitignorePatterns(rootDir: string): readonly RegExp[] { normalized = pattern; } regexes.push(globToRegex(normalized)); - } catch { + } catch (e) { // Ignore patterns that don't compile (e.g. those with unsupported syntax) + debug(`.gitignore pattern "${pattern}" failed to compile, skipping: ${toErrorMessage(e)}`); } } return Object.freeze(regexes); @@ -150,7 +152,8 @@ function isSymlinkLoop(dir: string, visited: Set): boolean { let realDir: string; try { realDir = fs.realpathSync(dir); - } catch { + } catch (e) { + debug(`realpathSync failed for ${dir}, treating as symlink loop: ${toErrorMessage(e)}`); return true; } if (visited.has(realDir)) { diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index d3360368b..fa35e9e1a 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -354,8 +354,8 @@ async function runPipelineStages(ctx: PipelineContext): Promise { if (tree && typeof tree.delete === 'function') { try { tree.delete(); - } catch { - /* ignore cleanup errors */ + } catch (e) { + debug(`WASM tree cleanup failed: ${toErrorMessage(e)}`); } } symbols._tree = undefined; @@ -497,8 +497,8 @@ export async function buildGraph( if (tree && typeof tree.delete === 'function') { try { tree.delete(); - } catch { - /* ignore cleanup errors */ + } catch (e) { + debug(`WASM tree cleanup failed: ${toErrorMessage(e)}`); } } symbols._tree = undefined; diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 7f68fafdb..a216beccb 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -11,6 +11,7 @@ import { performance } from 'node:perf_hooks'; import { closeDb } from '../../../../db/index.js'; import { debug, info } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import { toErrorMessage } from '../../../../shared/errors.js'; import type { BetterSqlite3Database, ExtractorOutput, NativeDatabase } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; @@ -66,8 +67,8 @@ function getChangedFiles( try { db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get(); hasTable = true; - } catch { - /* table doesn't exist */ + } catch (e) { + debug(`file_hashes table probe failed, assuming table doesn't exist: ${toErrorMessage(e)}`); } if (!hasTable) { @@ -331,8 +332,8 @@ function healMetadata(ctx: PipelineContext): void { healTx(); } debug(`Self-healed mtime/size for ${metadataUpdates.length} files`); - } catch { - /* ignore heal errors */ + } catch (e) { + debug(`Self-heal of mtime/size metadata failed: ${toErrorMessage(e)}`); } } diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 0ca5d25b2..c0157cf95 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -406,8 +406,11 @@ async function runDataflowVertexPass( } try { result = native.extractDataflowAnalysis(source, absPaths[i]!); - } catch { + } catch (e) { // Language-specific parse failure — fall through to WASM. + debug( + `native dataflow extraction failed for ${relPath}, falling back to WASM: ${toErrorMessage(e)}`, + ); } } if (result) { @@ -624,13 +627,13 @@ async function runPostNativeAnalysis( if (ctx.nativeDb) { try { ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)'); - } catch { - /* ignore checkpoint errors */ + } catch (e) { + debug(`native DB post-analysis WAL checkpoint failed: ${toErrorMessage(e)}`); } try { ctx.nativeDb.close(); - } catch { - /* ignore close errors */ + } catch (e) { + debug(`native DB close failed: ${toErrorMessage(e)}`); } ctx.nativeDb = undefined; if (ctx.engineOpts) { @@ -1216,8 +1219,8 @@ function cleanupThisDispatchWasmTrees(wasmResults: Map) if (tree && typeof tree.delete === 'function') { try { tree.delete(); - } catch { - /* ignore cleanup errors */ + } catch (e) { + debug(`WASM tree cleanup failed: ${toErrorMessage(e)}`); } } (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined; @@ -2175,8 +2178,11 @@ class NativeOrchestrationSession { const nativeDb = this.ctx.nativeDb as NonNullable; try { nativeDb.exec('PRAGMA foreign_keys = OFF'); - } catch { + } catch (e) { // exec may not exist on very old addon versions — safe to ignore + debug( + `PRAGMA foreign_keys=OFF failed (safe to ignore on old addon versions): ${toErrorMessage(e)}`, + ); } let resultJson: string; @@ -2193,8 +2199,11 @@ class NativeOrchestrationSession { // throws. try { nativeDb.exec('PRAGMA foreign_keys = ON'); - } catch { + } catch (e) { // safe to ignore on very old addon versions + debug( + `PRAGMA foreign_keys=ON restore failed (safe to ignore on old addon versions): ${toErrorMessage(e)}`, + ); } } From dbf34b87b2209ce772cc7ee429ce392894747522 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:35:58 -0600 Subject: [PATCH 16/26] refactor: split buildChaContext into three focused builder functions (docs check acknowledged) Impact: 4 functions changed, 4 affected --- src/domain/graph/builder/cha.ts | 97 +++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/domain/graph/builder/cha.ts b/src/domain/graph/builder/cha.ts index b2ff8bc76..20a7b1b90 100644 --- a/src/domain/graph/builder/cha.ts +++ b/src/domain/graph/builder/cha.ts @@ -10,7 +10,7 @@ * - buildChaPostPass (native path) — JS post-pass on top of native edges */ -import type { ExtractorOutput } from '../../../types.js'; +import type { ClassRelation, ExtractorOutput } from '../../../types.js'; import type { CallNodeLookup } from './call-resolver.js'; // ── CHA context ────────────────────────────────────────────────────────────── @@ -30,6 +30,63 @@ export const EMPTY_CHA_CONTEXT: ChaContext = { instantiatedTypes: new Set(), }; +/** + * Record a class's `implements` relationship into the implementors map + * (interface/class name → concrete classes that implement it). + */ +function recordImplements(cls: ClassRelation, implementors: Map): void { + if (!cls.implements) return; + let list = implementors.get(cls.implements); + if (!list) { + list = []; + implementors.set(cls.implements, list); + } + if (!list.includes(cls.name)) list.push(cls.name); +} + +/** + * Record a class's `extends` relationship into both the parents map (child → + * direct parent, for this/super hierarchy walking) and the implementors map + * (parent → children, for CHA dispatch expansion via extends). + */ +function recordExtends( + cls: ClassRelation, + implementors: Map, + parents: Map, +): void { + if (!cls.extends) return; + // child → parent (for this/super hierarchy walking) + if (!parents.has(cls.name)) parents.set(cls.name, cls.extends); + // parent → children (for CHA dispatch expansion via extends) + let list = implementors.get(cls.extends); + if (!list) { + list = []; + implementors.set(cls.extends, list); + } + if (!list.includes(cls.name)) list.push(cls.name); +} + +/** + * RTA: collect instantiated class names for one file's symbols — the Phase + * 8.5 dedicated `newExpressions` list (all `new X()` in the file), plus the + * constructor-confidence typeMap fallback (confidence >= 0.9) that covers + * codebases that haven't been re-parsed since Phase 8.5 was added. + */ +function collectInstantiatedTypes(symbols: ExtractorOutput, instantiatedTypes: Set): void { + if (symbols.newExpressions) { + for (const typeName of symbols.newExpressions) { + instantiatedTypes.add(typeName); + } + } + if (symbols.typeMap instanceof Map) { + for (const entry of symbols.typeMap.values()) { + if (typeof entry !== 'string' && entry.confidence >= 0.9) { + instantiatedTypes.add(entry.type); + } + } + } +} + /** * Build the CHA context from all parsed file symbols. * @@ -43,42 +100,10 @@ export function buildChaContext(fileSymbols: ReadonlyMap= 0.9) - // covers codebases that haven't been re-parsed since Phase 8.5 was added. - if (symbols.typeMap instanceof Map) { - for (const entry of symbols.typeMap.values()) { - if (typeof entry !== 'string' && entry.confidence >= 0.9) { - instantiatedTypes.add(entry.type); - } - } + recordImplements(cls, implementors); + recordExtends(cls, implementors, parents); } + collectInstantiatedTypes(symbols, instantiatedTypes); } return { implementors, parents, instantiatedTypes }; From a1946af21e8a59258276e59bb6e9ace267ada993 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:49:32 -0600 Subject: [PATCH 17/26] refactor: split purgeAndAddReverseDeps and wire fast-skip-diag via config (docs check acknowledged) Impact: 6 functions changed, 12 affected --- src/domain/graph/builder/pipeline.ts | 8 +- .../graph/builder/stages/detect-changes.ts | 210 +++++++++++------- 2 files changed, 137 insertions(+), 81 deletions(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index fa35e9e1a..490c5fe07 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -439,7 +439,13 @@ export async function buildGraph( try { await collectFiles(ctx); if ( - detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record) + detectNoChanges( + ctx.db, + ctx.allFiles, + ctx.rootDir, + ctx.opts as Record, + fastSkipDiag, + ) ) { info('No changes detected. Graph is up to date.'); writeJournalHeader(ctx.rootDir, Date.now()); diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index a216beccb..c33f6c70c 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -377,91 +377,132 @@ function findReverseDependencies( return reverseDeps; } -function purgeAndAddReverseDeps( +/** + * Reconnects reverse-dep files to the changed files they depend on. + * + * Native path: purgeFilesData already deleted + rebuilt the affected edges in + * one transaction, so this just enqueues the reverse-dep files for reparse + * (works correctly with the native edge builder). + * + * WASM/JS path: saves the edge topology from reverse-dep files → changed + * files BEFORE purge runs, so it can be reconnected to new node IDs after + * insertNodes (#932, #933). purgeFilesFromGraph deletes edges in BOTH + * directions for changed files, which already removes the reverse-dep → + * changed-file edges. The old approach then over-deleted ALL outgoing edges + * from reverse-dep files and reparsed them to rebuild everything — expensive + * (87 extra parses) and lossy (442 missing edges due to imperfect resolution + * on rebuild). This approach saves the edge topology, lets purge handle + * deletion, then reconnects using new node IDs. No reparse needed. + */ +function addReverseDeps( ctx: PipelineContext, changePaths: string[], reverseDeps: Set, + hasPurge: boolean, ): void { const { db, rootDir } = ctx; + if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { + for (const relPath of reverseDeps) { + const absPath = path.join(rootDir, relPath); + ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); + } + return; + } + + if (!(reverseDeps.size > 0 && hasPurge)) return; + const changePathSet = new Set(changePaths); + const saveEdgesStmt = db.prepare(` + SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind, + n_tgt.file AS tgt_file, n_tgt.line AS tgt_line, + e.kind AS edge_kind, e.confidence, e.dynamic, e.technique, e.dynamic_kind, + n_src.file AS src_file + FROM edges e + JOIN nodes n_src ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_tgt.file = ? AND n_src.file != n_tgt.file + `); + for (const changedPath of changePaths) { + for (const row of saveEdgesStmt.all(changedPath) as Array<{ + source_id: number; + tgt_name: string; + tgt_kind: string; + tgt_file: string; + tgt_line: number; + edge_kind: string; + confidence: number; + dynamic: number; + technique: string | null; + dynamic_kind: string | null; + src_file: string; + }>) { + // Skip edges whose source is also being purged — buildEdges will + // re-create them with correct new IDs. + if (changePathSet.has(row.src_file)) continue; + ctx.savedReverseDepEdges.push({ + sourceId: row.source_id, + tgtName: row.tgt_name, + tgtKind: row.tgt_kind, + tgtFile: row.tgt_file, + tgtLine: row.tgt_line, + edgeKind: row.edge_kind, + confidence: row.confidence, + dynamic: row.dynamic, + technique: row.technique, + dynamicKind: row.dynamic_kind, + }); + } + } + debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`); +} + +/** + * Deletes graph data for removed/changed files (and, on the native path, + * their reverse-dep edges) in one call. See `addReverseDeps` for the + * counterpart that reconnects reverse-dep topology around this deletion. + */ +function purgeStaleReverseDeps( + ctx: PipelineContext, + filesToPurge: string[], + hasPurge: boolean, + hasReverseDeps: boolean, + reverseDepList: string[], +): void { + // Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670) + if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { + ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined); + return; + } + // No outgoing-edge deletion for reverse-deps — purge already removed + // edges targeting the changed files, and other outgoing edges are valid. + // No reverse-deps added to parseChanges — no reparse needed. + if (hasPurge) { + purgeFilesFromGraph(ctx.db, filesToPurge, { purgeHashes: false }); + } +} + +function purgeAndAddReverseDeps( + ctx: PipelineContext, + changePaths: string[], + reverseDeps: Set, +): void { const hasPurge = changePaths.length > 0 || ctx.removed.length > 0; const hasReverseDeps = reverseDeps.size > 0; const reverseDepList = hasReverseDeps ? [...reverseDeps] : []; - if (hasPurge || hasReverseDeps) { - const filesToPurge = hasPurge ? [...ctx.removed, ...changePaths] : []; - // Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670) - if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { - ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined); - // Native path still reparses reverse-deps (works correctly with native edge builder) - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); - } - } else { - // WASM/JS path: save edges from reverse-dep files → changed files BEFORE - // purge, then reconnect them to new node IDs after insertNodes (#932, #933). - // - // purgeFilesFromGraph deletes edges in BOTH directions for changed files, - // which already removes the reverse-dep → changed-file edges. The old - // approach then over-deleted ALL outgoing edges from reverse-dep files and - // reparsed them to rebuild everything — expensive (87 extra parses) and - // lossy (442 missing edges due to imperfect resolution on rebuild). - // - // New approach: save the edge topology, let purge handle deletion, then - // reconnect using new node IDs. No reparse needed. - if (hasReverseDeps && hasPurge) { - const changePathSet = new Set(changePaths); - const saveEdgesStmt = db.prepare(` - SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind, - n_tgt.file AS tgt_file, n_tgt.line AS tgt_line, - e.kind AS edge_kind, e.confidence, e.dynamic, e.technique, e.dynamic_kind, - n_src.file AS src_file - FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file - `); - for (const changedPath of changePaths) { - for (const row of saveEdgesStmt.all(changedPath) as Array<{ - source_id: number; - tgt_name: string; - tgt_kind: string; - tgt_file: string; - tgt_line: number; - edge_kind: string; - confidence: number; - dynamic: number; - technique: string | null; - dynamic_kind: string | null; - src_file: string; - }>) { - // Skip edges whose source is also being purged — buildEdges will - // re-create them with correct new IDs. - if (changePathSet.has(row.src_file)) continue; - ctx.savedReverseDepEdges.push({ - sourceId: row.source_id, - tgtName: row.tgt_name, - tgtKind: row.tgt_kind, - tgtFile: row.tgt_file, - tgtLine: row.tgt_line, - edgeKind: row.edge_kind, - confidence: row.confidence, - dynamic: row.dynamic, - technique: row.technique, - dynamicKind: row.dynamic_kind, - }); - } - } - debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`); - } + if (!(hasPurge || hasReverseDeps)) return; - if (hasPurge) { - purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false }); - } - // No outgoing-edge deletion for reverse-deps — purge already removed - // edges targeting the changed files, and other outgoing edges are valid. - // No reverse-deps added to parseChanges — no reparse needed. - } + const filesToPurge = hasPurge ? [...ctx.removed, ...changePaths] : []; + const isNative = ctx.engineName === 'native' && !!ctx.nativeDb?.purgeFilesData; + + if (isNative) { + // Native: purge (which also rebuilds reverse-dep edges) runs first, then + // the reverse-dep files are enqueued for reparse. + purgeStaleReverseDeps(ctx, filesToPurge, hasPurge, hasReverseDeps, reverseDepList); + addReverseDeps(ctx, changePaths, reverseDeps, hasPurge); + } else { + // WASM/JS: edge topology must be saved BEFORE purge deletes it. + addReverseDeps(ctx, changePaths, reverseDeps, hasPurge); + purgeStaleReverseDeps(ctx, filesToPurge, hasPurge, hasReverseDeps, reverseDepList); } } @@ -533,11 +574,15 @@ function handleIncrementalBuild(ctx: PipelineContext): void { purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); } -/** Diagnostic logger gated by env var, used by both `detectNoChanges` branches. */ -function makeFastSkipLogger(): (reason: string) => void { - const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1'; +/** + * Diagnostic logger gated by `build.fastSkipDiag` config (resolved by the + * caller from `config.build.fastSkipDiag`, which `applyEnvOverrides` sets + * from `CODEGRAPH_FAST_SKIP_DIAG` — see `infrastructure/config.ts`). Used by + * both `detectNoChanges` branches. + */ +function makeFastSkipLogger(fastSkipDiag: boolean): (reason: string) => void { return (reason: string): void => { - if (diag) info(`[fast-skip] ${reason}`); + if (fastSkipDiag) info(`[fast-skip] ${reason}`); }; } @@ -653,14 +698,19 @@ function passesPendingAnalysisGuard( * repos where source files don't change between builds. * * Pure read of `db` and the filesystem — never mutates either. + * + * `fastSkipDiag` gates the `[fast-skip]` diagnostic log lines and defaults to + * `false` (matching `DEFAULTS.build.fastSkipDiag`) when the caller doesn't + * have a resolved config value to pass — see `makeFastSkipLogger`. */ export function detectNoChanges( db: BetterSqlite3Database, allFiles: string[], rootDir: string, opts?: Record, + fastSkipDiag = false, ): boolean { - const log = makeFastSkipLogger(); + const log = makeFastSkipLogger(fastSkipDiag); const existing = loadFileHashesForPreflight(db, log); if (!existing) return false; From 0e83ba03ab71353dde9160d84db6434eaacaf837 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 11:11:36 -0600 Subject: [PATCH 18/26] refactor: extract getOrCreateBatchStmt, dedupe batch-insert helpers (docs check acknowledged) Impact: 6 functions changed, 22 affected --- src/domain/graph/builder/helpers.ts | 99 +++++++++++++++++------------ 1 file changed, 58 insertions(+), 41 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index 39ab75e5a..c9af890b9 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -357,78 +357,95 @@ const BATCH_CHUNK = 500; const nodeStmtCache = new WeakMap>(); const edgeStmtCache = new WeakMap>(); -function getNodeStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { - let cache = nodeStmtCache.get(db); - if (!cache) { - cache = new Map(); - nodeStmtCache.set(db, cache); +/** + * Get (or lazily prepare + cache) a multi-value INSERT statement for a given + * chunk size, keyed per-database. Shared by getNodeStmt/getEdgeStmt, which + * previously duplicated this exact WeakMap> + * cache-getter shape — only the SQL text differed. + */ +function getOrCreateBatchStmt( + cache: WeakMap>, + db: BetterSqlite3Database, + chunkSize: number, + buildSql: (chunkSize: number) => string, +): SqliteStatement { + let perDb = cache.get(db); + if (!perDb) { + perDb = new Map(); + cache.set(db, perDb); } - let stmt = cache.get(chunkSize); + let stmt = perDb.get(chunkSize); if (!stmt) { + stmt = db.prepare(buildSql(chunkSize)); + perDb.set(chunkSize, stmt); + } + return stmt; +} + +function getNodeStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { + return getOrCreateBatchStmt(nodeStmtCache, db, chunkSize, (n) => { const ph = '(?,?,?,?,?,?,?,?,?)'; - stmt = db.prepare( + return ( 'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' + - Array.from({ length: chunkSize }, () => ph).join(','), + Array.from({ length: n }, () => ph).join(',') ); - cache.set(chunkSize, stmt); - } - return stmt; + }); } function getEdgeStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { - let cache = edgeStmtCache.get(db); - if (!cache) { - cache = new Map(); - edgeStmtCache.set(db, cache); - } - let stmt = cache.get(chunkSize); - if (!stmt) { + return getOrCreateBatchStmt(edgeStmtCache, db, chunkSize, (n) => { const ph = '(?,?,?,?,?,?,?)'; - stmt = db.prepare( + return ( 'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic,technique,dynamic_kind) VALUES ' + - Array.from({ length: chunkSize }, () => ph).join(','), + Array.from({ length: n }, () => ph).join(',') ); - cache.set(chunkSize, stmt); - } - return stmt; + }); } /** - * Batch-insert node rows via multi-value INSERT statements. - * Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] + * Chunk `rows` into `BATCH_CHUNK`-sized groups and run one multi-value INSERT + * per group via `getStmt`, flattening each row's positional args via + * `pushValues`. Shared by batchInsertNodes/batchInsertEdges, which previously + * duplicated this exact chunk-loop-and-run shape — only the statement getter + * and per-row value flattening differed. */ -export function batchInsertNodes(db: BetterSqlite3Database, rows: unknown[][]): void { +function runBatchInsert( + db: BetterSqlite3Database, + rows: unknown[][], + getStmt: (db: BetterSqlite3Database, chunkSize: number) => SqliteStatement, + pushValues: (row: unknown[], vals: unknown[]) => void, +): void { if (!rows.length) return; for (let i = 0; i < rows.length; i += BATCH_CHUNK) { const end = Math.min(i + BATCH_CHUNK, rows.length); const chunkSize = end - i; - const stmt = getNodeStmt(db, chunkSize); + const stmt = getStmt(db, chunkSize); const vals: unknown[] = []; for (let j = i; j < end; j++) { - const r = rows[j] as unknown[]; - vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]); + pushValues(rows[j] as unknown[], vals); } stmt.run(...vals); } } +/** + * Batch-insert node rows via multi-value INSERT statements. + * Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] + */ +export function batchInsertNodes(db: BetterSqlite3Database, rows: unknown[][]): void { + runBatchInsert(db, rows, getNodeStmt, (r, vals) => { + vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]); + }); +} + /** * Batch-insert edge rows via multi-value INSERT statements. * Each row: [source_id, target_id, kind, confidence, dynamic, technique, dynamic_kind] */ export function batchInsertEdges(db: BetterSqlite3Database, rows: unknown[][]): void { - if (!rows.length) return; - for (let i = 0; i < rows.length; i += BATCH_CHUNK) { - const end = Math.min(i + BATCH_CHUNK, rows.length); - const chunkSize = end - i; - const stmt = getEdgeStmt(db, chunkSize); - const vals: unknown[] = []; - for (let j = i; j < end; j++) { - const r = rows[j] as unknown[]; - vals.push(r[0], r[1], r[2], r[3], r[4], r[5] ?? null, r[6] ?? null); - } - stmt.run(...vals); - } + runBatchInsert(db, rows, getEdgeStmt, (r, vals) => { + vals.push(r[0], r[1], r[2], r[3], r[4], r[5] ?? null, r[6] ?? null); + }); } /** Confidence assigned to CHA-expanded interface/abstract dispatch edges. */ From c0c1f7d5e730de00b1baa1c49543d37e9a2f4980 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 13:17:25 -0600 Subject: [PATCH 19/26] fix: address quality issues in domain/graph/resolver (docs check acknowledged) Decompose gauntlet-flagged FAIL-level complexity in points-to.ts, strategy.ts, and ts-resolver.ts via pure extract-method refactoring. No resolution-behavior change (verified byte-for-byte identical resolution-benchmark output across all 34 fixture languages). Impact: 41 functions changed, 32 affected --- src/domain/graph/resolver/points-to.ts | 249 +++++++++---- src/domain/graph/resolver/strategy.ts | 328 +++++++++------- src/domain/graph/resolver/ts-resolver.ts | 452 +++++++++++++++-------- 3 files changed, 655 insertions(+), 374 deletions(-) diff --git a/src/domain/graph/resolver/points-to.ts b/src/domain/graph/resolver/points-to.ts index ffa36ec8e..5573c53ab 100644 --- a/src/domain/graph/resolver/points-to.ts +++ b/src/domain/graph/resolver/points-to.ts @@ -79,106 +79,195 @@ function buildThisAssignmentMap( } /** - * Append parameter-flow and array/spread/forOf/callback constraints (Phases 8.3c and 8.3e). + * Phase 8.3c: parameter-flow constraints. * - * Mutates `pts` (seeds array-element entries) and appends to `constraints`. + * For each call f(x) at argIndex i where f is locally defined, add + * constraint: pts(f::paramName_i) ⊇ pts(x). This makes the pts solver + * inter-procedural within the module so that `fn()` inside `f` resolves + * to the concrete function passed at each call site. + * + * Keys are scoped as "callee::paramName" to prevent name collisions: bare + * parameter names like `fn`, `cb`, and `callback` appear in many functions + * within the same file. Without scoping, pts(fn) from runA and runB would + * merge into a single set, producing spurious call edges. The scoped key is + * resolved in buildFileCallEdges by combining the enclosing caller's name + * with the call's name (see callerName::call.name lookup there). + * + * Scope: intra-module only (definitionParams contains local defs only). + * + * Appends to `constraints`. */ -function buildParamAndArrayConstraints( - pts: PointsToMap, +function buildParamFlowConstraints( constraints: Array<{ lhs: string; rhsKey: string }>, paramBindings?: readonly ParamBinding[], definitionParams?: ReadonlyMap, +): void { + if (!paramBindings || !definitionParams) return; + for (const { callee, argIndex, argName } of paramBindings) { + const params = definitionParams.get(callee); + if (!params || argIndex >= params.length) continue; + const paramName = params[argIndex]; + if (paramName) constraints.push({ lhs: `${callee}::${paramName}`, rhsKey: argName }); + } +} + +/** + * Phase 8.3e: array-element bindings — seed concrete elements and wildcard. + * + * `arr[0]` etc. are seeded from literal arrays; `arr[*]` collects all elements. + * + * Mutates `pts` (seeds per-index entries) and appends to `constraints`. + */ +function buildArrayElemConstraints( + pts: PointsToMap, + constraints: Array<{ lhs: string; rhsKey: string }>, arrayElemBindings?: readonly ArrayElemBinding[], - spreadArgBindings?: readonly SpreadArgBinding[], - forOfBindings?: readonly ForOfBinding[], - arrayCallbackBindings?: readonly ArrayCallbackBinding[], ): void { - // Phase 8.3c: parameter-flow constraints. - // For each call f(x) at argIndex i where f is locally defined, add - // constraint: pts(f::paramName_i) ⊇ pts(x). This makes the pts solver - // inter-procedural within the module so that `fn()` inside `f` resolves - // to the concrete function passed at each call site. - // - // Keys are scoped as "callee::paramName" to prevent name collisions: bare - // parameter names like `fn`, `cb`, and `callback` appear in many functions - // within the same file. Without scoping, pts(fn) from runA and runB would - // merge into a single set, producing spurious call edges. The scoped key is - // resolved in buildFileCallEdges by combining the enclosing caller's name - // with the call's name (see callerName::call.name lookup there). - // - // Scope: intra-module only (definitionParams contains local defs only). - if (paramBindings && definitionParams) { - for (const { callee, argIndex, argName } of paramBindings) { - const params = definitionParams.get(callee); - if (!params || argIndex >= params.length) continue; - const paramName = params[argIndex]; - if (paramName) constraints.push({ lhs: `${callee}::${paramName}`, rhsKey: argName }); - } + if (!arrayElemBindings || arrayElemBindings.length === 0) return; + for (const { arrayName, index, elemName } of arrayElemBindings) { + const elemKey = `${arrayName}[${index}]`; + const wildcardKey = `${arrayName}[*]`; + // Seed the per-index entry if the elemName is a concrete function. + if (!pts.has(elemKey)) pts.set(elemKey, new Set()); + pts.get(elemKey)!.add(elemName); + // Wildcard: array[*] collects all element targets for imprecise spread/for-of. + constraints.push({ lhs: wildcardKey, rhsKey: elemKey }); } +} - // Phase 8.3e: array-element bindings — seed concrete elements and wildcard. - // `arr[0]` etc. are seeded from literal arrays; `arr[*]` collects all elements. - if (arrayElemBindings && arrayElemBindings.length > 0) { - for (const { arrayName, index, elemName } of arrayElemBindings) { - const elemKey = `${arrayName}[${index}]`; - const wildcardKey = `${arrayName}[*]`; - // Seed the per-index entry if the elemName is a concrete function. - if (!pts.has(elemKey)) pts.set(elemKey, new Set()); - pts.get(elemKey)!.add(elemName); - // Wildcard: array[*] collects all element targets for imprecise spread/for-of. - constraints.push({ lhs: wildcardKey, rhsKey: elemKey }); - } +/** + * Build a per-array index count from arrayElemBindings for precise + * per-index spread-argument constraints. + */ +function computeArrayMaxIndex( + arrayElemBindings: readonly ArrayElemBinding[] | undefined, +): Map { + const arrayMaxIndex = new Map(); + for (const { arrayName, index } of arrayElemBindings ?? []) { + const cur = arrayMaxIndex.get(arrayName) ?? -1; + if (index > cur) arrayMaxIndex.set(arrayName, index); } + return arrayMaxIndex; +} - // Phase 8.3e: spread-argument constraints. - // f(...arr) → pts[f::param_i] ⊇ pts[arr[i]] for each known element. - if (spreadArgBindings && spreadArgBindings.length > 0 && definitionParams) { - // Build a per-array index count from arrayElemBindings for precise per-index constraints. - const arrayMaxIndex = new Map(); - for (const { arrayName, index } of arrayElemBindings ?? []) { - const cur = arrayMaxIndex.get(arrayName) ?? -1; - if (index > cur) arrayMaxIndex.set(arrayName, index); +/** + * Push spread-argument constraints for one callee: precise per-element + * constraints when the source array's max index is known, otherwise a + * wildcard constraint for every parameter at/after startIndex. + */ +function pushSpreadArgConstraintsForCallee( + constraints: Array<{ lhs: string; rhsKey: string }>, + callee: string, + params: readonly string[], + arrayName: string, + startIndex: number, + maxIdx: number, +): void { + if (maxIdx >= 0) { + // Precise: per-element constraints. + for (let i = 0; i <= maxIdx; i++) { + const paramIdx = startIndex + i; + if (paramIdx >= params.length) break; + constraints.push({ lhs: `${callee}::${params[paramIdx]}`, rhsKey: `${arrayName}[${i}]` }); } - - for (const { callee, arrayName, startIndex } of spreadArgBindings) { - const params = definitionParams.get(callee); - if (!params) continue; - const maxIdx = arrayMaxIndex.get(arrayName) ?? -1; - if (maxIdx >= 0) { - // Precise: per-element constraints. - for (let i = 0; i <= maxIdx; i++) { - const paramIdx = startIndex + i; - if (paramIdx >= params.length) break; - constraints.push({ lhs: `${callee}::${params[paramIdx]}`, rhsKey: `${arrayName}[${i}]` }); - } - } else { - // Unknown array size: all params at/after startIndex get the wildcard. - for (let j = startIndex; j < params.length; j++) { - constraints.push({ lhs: `${callee}::${params[j]}`, rhsKey: `${arrayName}[*]` }); - } - } + } else { + // Unknown array size: all params at/after startIndex get the wildcard. + for (let j = startIndex; j < params.length; j++) { + constraints.push({ lhs: `${callee}::${params[j]}`, rhsKey: `${arrayName}[*]` }); } } +} - // Phase 8.3e: for-of iteration constraints. - // `for (const x of arr)` inside `outer` → pts[outer::x] ⊇ pts[arr[*]] - if (forOfBindings) { - for (const { varName, sourceName, enclosingFunc } of forOfBindings) { - constraints.push({ lhs: `${enclosingFunc}::${varName}`, rhsKey: `${sourceName}[*]` }); - } +/** + * Phase 8.3e: spread-argument constraints. + * + * f(...arr) → pts[f::param_i] ⊇ pts[arr[i]] for each known element. + * + * Appends to `constraints`. + */ +function buildSpreadArgConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + spreadArgBindings?: readonly SpreadArgBinding[], + arrayElemBindings?: readonly ArrayElemBinding[], + definitionParams?: ReadonlyMap, +): void { + if (!spreadArgBindings || spreadArgBindings.length === 0 || !definitionParams) return; + const arrayMaxIndex = computeArrayMaxIndex(arrayElemBindings); + + for (const { callee, arrayName, startIndex } of spreadArgBindings) { + const params = definitionParams.get(callee); + if (!params) continue; + const maxIdx = arrayMaxIndex.get(arrayName) ?? -1; + pushSpreadArgConstraintsForCallee(constraints, callee, params, arrayName, startIndex, maxIdx); } +} - // Phase 8.3e: Array.from / callback constraints. - // Array.from(source, cb) → pts[cb::param0] ⊇ pts[source[*]] - if (arrayCallbackBindings && definitionParams) { - for (const { sourceName, calleeName } of arrayCallbackBindings) { - const params = definitionParams.get(calleeName); - if (!params || params.length === 0) continue; - constraints.push({ lhs: `${calleeName}::${params[0]}`, rhsKey: `${sourceName}[*]` }); - } +/** + * Phase 8.3e: for-of iteration constraints. + * + * `for (const x of arr)` inside `outer` → pts[outer::x] ⊇ pts[arr[*]] + * + * Appends to `constraints`. + */ +function buildForOfConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + forOfBindings?: readonly ForOfBinding[], +): void { + if (!forOfBindings) return; + for (const { varName, sourceName, enclosingFunc } of forOfBindings) { + constraints.push({ lhs: `${enclosingFunc}::${varName}`, rhsKey: `${sourceName}[*]` }); } } +/** + * Phase 8.3e: Array.from / callback constraints. + * + * Array.from(source, cb) → pts[cb::param0] ⊇ pts[source[*]] + * + * Appends to `constraints`. + */ +function buildArrayCallbackConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + arrayCallbackBindings?: readonly ArrayCallbackBinding[], + definitionParams?: ReadonlyMap, +): void { + if (!arrayCallbackBindings || !definitionParams) return; + for (const { sourceName, calleeName } of arrayCallbackBindings) { + const params = definitionParams.get(calleeName); + if (!params || params.length === 0) continue; + constraints.push({ lhs: `${calleeName}::${params[0]}`, rhsKey: `${sourceName}[*]` }); + } +} + +/** + * Append parameter-flow and array/spread/forOf/callback constraints (Phases 8.3c and 8.3e). + * + * Delegates to one named helper per binding kind (buildParamFlowConstraints, + * buildArrayElemConstraints, buildSpreadArgConstraints, buildForOfConstraints, + * buildArrayCallbackConstraints) — each handler owns exactly one binding kind's + * guard + iteration + constraint-push shape, called in the same order the + * original inline blocks ran in (none of the blocks read state written by an + * earlier one, so extraction does not change solver input order). + * + * Mutates `pts` (seeds array-element entries) and appends to `constraints`. + */ +function buildParamAndArrayConstraints( + pts: PointsToMap, + constraints: Array<{ lhs: string; rhsKey: string }>, + paramBindings?: readonly ParamBinding[], + definitionParams?: ReadonlyMap, + arrayElemBindings?: readonly ArrayElemBinding[], + spreadArgBindings?: readonly SpreadArgBinding[], + forOfBindings?: readonly ForOfBinding[], + arrayCallbackBindings?: readonly ArrayCallbackBinding[], +): void { + buildParamFlowConstraints(constraints, paramBindings, definitionParams); + buildArrayElemConstraints(pts, constraints, arrayElemBindings); + buildSpreadArgConstraints(constraints, spreadArgBindings, arrayElemBindings, definitionParams); + buildForOfConstraints(constraints, forOfBindings); + buildArrayCallbackConstraints(constraints, arrayCallbackBindings, definitionParams); +} + /** * Seed pts entries for object-rest parameter dispatch (Phase 8.3f). * diff --git a/src/domain/graph/resolver/strategy.ts b/src/domain/graph/resolver/strategy.ts index 84726b4a8..8bfe99cfb 100644 --- a/src/domain/graph/resolver/strategy.ts +++ b/src/domain/graph/resolver/strategy.ts @@ -57,41 +57,44 @@ export function isModuleScopedLanguage(relPath: string): boolean { return MODULE_SCOPED_BARE_CALL_EXTENSIONS.has(ext); } +// ── typeMap entry unwrapping ────────────────────────────────────────────────── + +/** + * Unwrap a typeMap entry to its plain string form. + * + * typeMap values are either a bare string (the target name) or an object of + * shape `{ type?: string }` (some seeders attach extra metadata alongside the + * target). This normalises both shapes to `string | null`, matching the + * falsy-check semantics every call site previously duplicated inline. + */ +function unwrapTypeEntry(entry: unknown): string | null { + if (!entry) return null; + return typeof entry === 'string' ? entry : ((entry as { type?: string }).type ?? null); +} + // ── resolveByReceiver ───────────────────────────────────────────────────────── /** - * Resolve a call site whose receiver is a concrete object reference - * (i.e. `receiver` is present and is NOT `this`, `self`, or `super`). + * Steps 1-3 of the resolveByReceiver cascade: resolve the type name for a + * concrete-object receiver. * - * Resolution cascade: * 1. typeMap class-scoped lookup (`ClassName.prop` key) for `this.prop` receivers. * 2. typeMap bare key, full-receiver key, callee-scoped rest-param key. * 3. Inline `new Ctor()` heuristic for un-normalised receiver text. - * 4. Typed method lookup via `TypeName.methodName` in symbol DB. - * 5. Prototype alias: `Foo.prototype.bar = identifier` via typeMap. - * 6. Direct qualified method lookup: `ClassName.staticMethod()`. - * 7. Composite pts key: `obj.prop` → callback target function. */ -export function resolveByReceiver( - lookup: StrategyLookup, - call: { name: string; receiver: string }, - relPath: string, +function resolveReceiverTypeName( typeMap: Map, + receiver: string, + effectiveReceiver: string, callerName?: string | null, -): ReadonlyArray<{ id: number; file: string }> { - // Strip "this." so `this.repo.method()` resolves via typeMap["repo"] - // (or the "this.repo" key seeded directly by the TSC property-declaration enricher). - const effectiveReceiver = call.receiver.startsWith('this.') - ? call.receiver.slice('this.'.length) - : call.receiver; - +): string | null { // For this.prop receivers, prefer the class-scoped key (ClassName.prop) seeded by // handlePropWriteTypeMap / handleFieldDefTypeMap — prevents false edges when multiple // classes define the same property name (issues #1323, #1458). // Class-scoped lookup runs first so bare fallback keys (confidence 0.6) don't shadow // the correct per-class entry when callerName is available. let typeEntry: unknown; - if (call.receiver.startsWith('this.') && callerName) { + if (receiver.startsWith('this.') && callerName) { const dotIdx = callerName.lastIndexOf('.'); if (dotIdx > -1) { const callerClass = callerName.slice(0, dotIdx); @@ -100,16 +103,12 @@ export function resolveByReceiver( } typeEntry ??= typeMap.get(effectiveReceiver) ?? - typeMap.get(call.receiver) ?? + typeMap.get(receiver) ?? // Phase 8.3f: callee-scoped rest-param key (`callee::restName`) to avoid // same-name rest-binding collision across functions in the same file (#1358). (callerName ? typeMap.get(`${callerName}::${effectiveReceiver}`) : undefined); - let typeName = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; + let typeName = unwrapTypeEntry(typeEntry); // Belt-and-suspenders fallback for inline new-expression receivers that // extractReceiverName did not normalise (e.g. raw text leaked from an @@ -120,77 +119,197 @@ export function resolveByReceiver( // The uppercase-initial restriction ([A-Z_$]) is a heuristic to distinguish // constructors (PascalCase) from regular functions and avoids false positives // on `(new xmlParser()).parse()` style calls. - if (!typeName && call.receiver) { - const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(call.receiver); + if (!typeName && receiver) { + const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(receiver); if (m?.[1]) typeName = m[1]; } + return typeName; +} + +/** Step 4: typed method lookup via `TypeName.methodName` in the symbol DB. */ +function resolveViaTypedMethod( + lookup: StrategyLookup, + typeName: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + return lookup + .byName(`${typeName}.${call.name}`) + .filter((n) => n.kind === 'method' && computeConfidence(relPath, n.file, null) >= 0.5); +} + +/** + * Step 5: prototype alias — `Foo.prototype.bar = identifier` seeds + * typeMap['Foo.bar'] = { type: identifier }. + * Checked after the symbol-DB lookup so an actual method definition always wins. + */ +function resolveViaPrototypeAlias( + lookup: StrategyLookup, + typeMap: Map, + typeName: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const protoTarget = unwrapTypeEntry(typeMap.get(`${typeName}.${call.name}`)); + if (!protoTarget) return []; + return lookup.byName(protoTarget).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Step 6: direct qualified method lookup — `ClassName.staticMethod()` or + * `ClassName.instanceMethod()` when the receiver is a class name with no + * typeMap entry. Handles static method calls like `C6.staticMethod()` or + * `D.d()` where the receiver IS the class. Matches both 'method' and + * 'function' kinds to cover field-initializer synthetic defs. + */ +function resolveViaDirectQualifiedMethod( + lookup: StrategyLookup, + effectiveReceiver: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const qualifiedName = `${effectiveReceiver}.${call.name}`; + return lookup + .byName(qualifiedName) + .filter( + (n) => + (n.kind === 'method' || n.kind === 'function') && + computeConfidence(relPath, n.file, null) >= 0.5, + ); +} + +/** + * Step 7: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] = { type: 'fn' } + * (Phase 8.3d). When a call site references `obj.prop` as a callback, resolve + * directly to the target fn. + */ +function resolveViaCompositePtsKey( + lookup: StrategyLookup, + typeMap: Map, + call: { name: string; receiver: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const ptsTarget = unwrapTypeEntry(typeMap.get(`${call.receiver}.${call.name}`)); + if (!ptsTarget) return []; + return lookup.byName(ptsTarget).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Resolve a call site whose receiver is a concrete object reference + * (i.e. `receiver` is present and is NOT `this`, `self`, or `super`). + * + * Resolution cascade (see the per-step helpers above for the numbered steps): + * 1-3. resolveReceiverTypeName — typeMap lookups + `new Ctor()` heuristic. + * 4. resolveViaTypedMethod — typed method lookup in symbol DB. + * 5. resolveViaPrototypeAlias — prototype alias via typeMap. + * 6. resolveViaDirectQualifiedMethod — direct qualified method lookup. + * 7. resolveViaCompositePtsKey — composite pts key → callback target function. + */ +export function resolveByReceiver( + lookup: StrategyLookup, + call: { name: string; receiver: string }, + relPath: string, + typeMap: Map, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + // Strip "this." so `this.repo.method()` resolves via typeMap["repo"] + // (or the "this.repo" key seeded directly by the TSC property-declaration enricher). + const effectiveReceiver = call.receiver.startsWith('this.') + ? call.receiver.slice('this.'.length) + : call.receiver; + + const typeName = resolveReceiverTypeName(typeMap, call.receiver, effectiveReceiver, callerName); + if (typeName) { - const typed = lookup - .byName(`${typeName}.${call.name}`) - .filter((n) => n.kind === 'method' && computeConfidence(relPath, n.file, null) >= 0.5); + const typed = resolveViaTypedMethod(lookup, typeName, call, relPath); if (typed.length > 0) return typed; - // Prototype alias: `Foo.prototype.bar = identifier` seeds typeMap['Foo.bar'] = { type: identifier }. - // Checked after the symbol-DB lookup so an actual method definition always wins. - const protoEntry = typeMap.get(`${typeName}.${call.name}`); - const protoTarget = protoEntry - ? typeof protoEntry === 'string' - ? protoEntry - : (protoEntry as { type?: string }).type - : null; - if (protoTarget) { - const resolved = lookup - .byName(protoTarget) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } - } - - // Direct qualified method lookup: ClassName.staticMethod() or ClassName.instanceMethod() - // when the receiver is a class name with no typeMap entry. Handles static method calls - // like `C6.staticMethod()` or `D.d()` where the receiver IS the class. - // Matches both 'method' and 'function' kinds to cover field-initializer synthetic defs. - if (!typeName) { - const qualifiedName = `${effectiveReceiver}.${call.name}`; - const direct = lookup - .byName(qualifiedName) - .filter( - (n) => - (n.kind === 'method' || n.kind === 'function') && - computeConfidence(relPath, n.file, null) >= 0.5, - ); + const viaPrototype = resolveViaPrototypeAlias(lookup, typeMap, typeName, call, relPath); + if (viaPrototype.length > 0) return viaPrototype; + } else { + const direct = resolveViaDirectQualifiedMethod(lookup, effectiveReceiver, call, relPath); if (direct.length > 0) return direct; } - // Phase 8.3d: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] = { type: 'fn' }. - // When a call site references `obj.prop` as a callback, resolve directly to the target fn. - const compositeEntry = typeMap.get(`${call.receiver}.${call.name}`); - const ptsTarget = compositeEntry - ? typeof compositeEntry === 'string' - ? compositeEntry - : (compositeEntry as { type?: string }).type - : null; - if (ptsTarget) { - const resolved = lookup - .byName(ptsTarget) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } + const viaComposite = resolveViaCompositePtsKey(lookup, typeMap, call, relPath); + if (viaComposite.length > 0) return viaComposite; return []; } // ── resolveByGlobal ─────────────────────────────────────────────────────────── +/** + * Step 1: accessor this-dispatch via Object.defineProperty (Phase 8.3f). + * + * When a plain function (no class prefix) is registered as a get/set accessor + * for `obj` via Object.defineProperty, typeMap seeds 'callerName:this' = 'obj'. + * We then resolve this.method() → typeMap['obj.method'] → the concrete + * definition. Only applies to a bare (non-qualified) callerName + `this` + * receiver; runs before the broad exact-name lookup to avoid false positives + * from unrelated same-file definitions. + */ +function resolveViaAccessorThisDispatch( + lookup: StrategyLookup, + typeMap: Map, + call: { name: string; receiver?: string | null }, + relPath: string, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + if (!(call.receiver === 'this' && callerName && !callerName.includes('.'))) return []; + const objName = unwrapTypeEntry(typeMap.get(`${callerName}:this`)); + if (!objName) return []; + const targetFn = unwrapTypeEntry(typeMap.get(`${objName}.${call.name}`)); + if (!targetFn) return []; + return lookup.byName(targetFn).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Step 3: same-class sibling method fallback via callerName. + * + * e.g. `this.area()` inside `Shape.describe` → try `Shape.area`. Also covers + * no-receiver calls inside class methods, e.g. `IsValidEmail(x)` inside + * `Validators.ValidateUser` → try `Validators.IsValidEmail` (C#/Java static + * siblings). This seeds the initial edge that runChaPostPass later expands to + * subclass overrides. + * + * For JS/TS, bare (no-receiver) calls are module-scoped — there is no + * implicit class binding. Skip the same-class fallback for bare calls in + * those languages to prevent false positives (e.g. `flush()` inside + * `Processor.run` must not resolve to `Processor.flush`). this.method() + * calls are unaffected: they still reach the fallback because + * `call.receiver === 'this'` is truthy, not a bare call. + */ +function resolveViaSameClassSibling( + lookup: StrategyLookup, + call: { name: string; receiver?: string | null }, + relPath: string, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + const isBareCall = !call.receiver; + if (!callerName || (isBareCall && isModuleScopedLanguage(relPath))) return []; + const dotIdx = callerName.lastIndexOf('.'); + if (dotIdx <= -1) return []; + // Extract only the segment immediately before the method name so that + // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. + // Symbols are stored under their bare class name, not their qualified path. + const prevDot = callerName.lastIndexOf('.', dotIdx - 1); + const callerClass = callerName.slice(prevDot + 1, dotIdx); + const qualifiedName = `${callerClass}.${call.name}`; + return lookup + .byName(qualifiedName) + .filter((t) => t.kind === 'method' && computeConfidence(relPath, t.file, null) >= 0.5); +} + /** * Resolve a call site with no receiver, or whose receiver is `this`, `self`, * or `super`. * * Resolution cascade: - * 1. Accessor this-dispatch via Object.defineProperty (Phase 8.3f). + * 1. resolveViaAccessorThisDispatch — Object.defineProperty this-dispatch (Phase 8.3f). * 2. Exact global name lookup with confidence filter. - * 3. Same-class sibling method fallback (C#/Java static siblings, this.method()). + * 3. resolveViaSameClassSibling — same-class sibling method fallback. */ export function resolveByGlobal( lookup: StrategyLookup, @@ -199,67 +318,16 @@ export function resolveByGlobal( typeMap: Map, callerName?: string | null, ): ReadonlyArray<{ id: number; file: string }> { - // Phase 8.3f: accessor this-dispatch via Object.defineProperty. - // When a plain function (no class prefix) is registered as a get/set accessor for `obj` - // via Object.defineProperty, typeMap seeds 'callerName:this' = 'obj'. - // We then resolve this.method() → typeMap['obj.method'] → the concrete definition. - // This runs before the broad exact-name lookup to avoid false positives from - // unrelated same-file definitions. - if (call.receiver === 'this' && callerName && !callerName.includes('.')) { - const accessorThisEntry = typeMap.get(`${callerName}:this`); - const objName = accessorThisEntry - ? typeof accessorThisEntry === 'string' - ? accessorThisEntry - : (accessorThisEntry as { type?: string }).type - : null; - if (objName) { - const objMethodEntry = typeMap.get(`${objName}.${call.name}`); - const targetFn = objMethodEntry - ? typeof objMethodEntry === 'string' - ? objMethodEntry - : (objMethodEntry as { type?: string }).type - : null; - if (targetFn) { - const resolved = lookup - .byName(targetFn) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } - } - } + const viaAccessor = resolveViaAccessorThisDispatch(lookup, typeMap, call, relPath, callerName); + if (viaAccessor.length > 0) return viaAccessor; const exact = lookup .byName(call.name) .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); if (exact.length > 0) return exact; - // Try same-class method lookup via callerName. - // e.g. `this.area()` inside `Shape.describe` → try `Shape.area`. - // Also covers no-receiver calls inside class methods, e.g. `IsValidEmail(x)` inside - // `Validators.ValidateUser` → try `Validators.IsValidEmail` (C#/Java static siblings). - // This seeds the initial edge that runChaPostPass later expands to subclass overrides. - // - // For JS/TS, bare (no-receiver) calls are module-scoped — there is no implicit class - // binding. Skip the same-class fallback for bare calls in those languages to prevent - // false positives (e.g. `flush()` inside `Processor.run` must not resolve to - // `Processor.flush`). this.method() calls are unaffected: they still reach the fallback - // because `call.receiver === 'this'` is truthy, not a bare call. - const isBareCall = !call.receiver; - if (callerName && !(isBareCall && isModuleScopedLanguage(relPath))) { - const dotIdx = callerName.lastIndexOf('.'); - if (dotIdx > -1) { - // Extract only the segment immediately before the method name so that - // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. - // Symbols are stored under their bare class name, not their qualified path. - const prevDot = callerName.lastIndexOf('.', dotIdx - 1); - const callerClass = callerName.slice(prevDot + 1, dotIdx); - const qualifiedName = `${callerClass}.${call.name}`; - const sameClass = lookup - .byName(qualifiedName) - .filter((t) => t.kind === 'method' && computeConfidence(relPath, t.file, null) >= 0.5); - if (sameClass.length > 0) return sameClass; - } - } + const sameClass = resolveViaSameClassSibling(lookup, call, relPath, callerName); + if (sameClass.length > 0) return sameClass; return exact; // empty } diff --git a/src/domain/graph/resolver/ts-resolver.ts b/src/domain/graph/resolver/ts-resolver.ts index 2aca2d5bc..4b4e67f43 100644 --- a/src/domain/graph/resolver/ts-resolver.ts +++ b/src/domain/graph/resolver/ts-resolver.ts @@ -159,6 +159,21 @@ function countLowConfidence(typeMap: Map): number { return count; } +/** + * Shared "collect candidates by name → keep only names with a single unique + * value → write" ambiguity-filtering algorithm used by both enrichSourceFile + * (ambiguity check on qualifiedName) and enrichCallAssignments (ambiguity + * check on calleeName). + * + * Returns `entries[0]` if every entry shares exactly one distinct value under + * `keyOf`, or `null` if they disagree (ambiguous) or `entries` is empty. + */ +function resolveUnambiguous(entries: readonly T[], keyOf: (entry: T) => string): T | null { + const uniqueKeys = new Set(entries.map(keyOf)); + if (uniqueKeys.size !== 1) return null; + return entries[0] ?? null; +} + /** * Walk up from rootDir looking for tsconfig.json (up to 4 levels). * Handles monorepo setups where rootDir is a package subdirectory but @@ -239,78 +254,105 @@ function createProgram(ts: TsModule, tsconfigPath: string): import('typescript') * Entries already at confidence 1.0 (e.g., `new Foo()` from tree-sitter) are * left unchanged. New entries from the compiler are added at confidence 1.0. */ -function enrichSourceFile( - ts: TsModule, - sourceFile: import('typescript').SourceFile, - checker: import('typescript').TypeChecker, - typeMap: Map, -): void { - // First pass: collect resolved types keyed by bare identifier name. - // Track both the short name (for typeMap writes) and the fully-qualified name +/** + * Mutable state threaded through the enrichSourceFile visitor. Grouped into + * one object (rather than closed-over locals) so the walk can be a plain + * top-level function, outside the enclosing function's own complexity count. + */ +interface SourceFileVisitContext { + ts: TsModule; + checker: import('typescript').TypeChecker; + // Collects resolved types keyed by bare identifier name. Tracks both the + // short name (for typeMap writes) and the fully-qualified name // (module-path-prefixed) for ambiguity detection. Two classes may share the // same short name (e.g., `OrderService` from two different modules), and // symbol.getName() returns the declared name — not the local alias — so // deduplication on short names alone would incorrectly collapse them. - const nameToEntries = new Map(); - // Track class property declaration names so we can also seed "this.X" entries. - const propertyDeclNames = new Set(); + nameToEntries: Map; + // Class property declaration names so we can also seed "this.X" entries. + propertyDeclNames: Set; +} - function visit(node: import('typescript').Node): void { - let identName: string | null = null; - let nameNode: import('typescript').Identifier | null = null; - - if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { - identName = node.name.text; - nameNode = node.name; - } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { - identName = node.name.text; - nameNode = node.name; - } else if (ts.isPropertyDeclaration(node) && ts.isIdentifier(node.name)) { - // TypeScript class field: `private repo: Repository` - // Seeds typeMap so `this.repo.method()` can be resolved via receiver type. - identName = node.name.text; - nameNode = node.name; - propertyDeclNames.add(node.name.text); - } +function visitSourceFileNode(ctx: SourceFileVisitContext, node: import('typescript').Node): void { + const { ts, checker, nameToEntries, propertyDeclNames } = ctx; + let identName: string | null = null; + let nameNode: import('typescript').Identifier | null = null; + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isPropertyDeclaration(node) && ts.isIdentifier(node.name)) { + // TypeScript class field: `private repo: Repository` + // Seeds typeMap so `this.repo.method()` can be resolved via receiver type. + identName = node.name.text; + nameNode = node.name; + propertyDeclNames.add(node.name.text); + } - if (identName && nameNode) { - const resolved = resolveTypeName(ts, nameNode, checker); - if (resolved) { - const existing = nameToEntries.get(identName); - if (existing) { - existing.push(resolved); - } else { - nameToEntries.set(identName, [resolved]); - } + if (identName && nameNode) { + const resolved = resolveTypeName(ts, nameNode, checker); + if (resolved) { + const existing = nameToEntries.get(identName); + if (existing) { + existing.push(resolved); + } else { + nameToEntries.set(identName, [resolved]); } } + } - ts.forEachChild(node, visit); + ts.forEachChild(node, (child) => visitSourceFileNode(ctx, child)); +} + +/** + * Write one (name → candidate entries) group to typeMap if unambiguous + * (single unique qualified type for the name), plus its "this." + * companion entry when name is a class property. + */ +function writeSourceFileTypeMapEntry( + typeMap: Map, + propertyDeclNames: ReadonlySet, + name: string, + entries: { shortName: string; qualifiedName: string }[], +): void { + const first = resolveUnambiguous(entries, (e) => e.qualifiedName); + if (!first) return; // ambiguous across modules, or no candidates — skip + const shortName = first.shortName; + const existing = typeMap.get(name); + if (!existing || existing.confidence < 1.0) { + typeMap.set(name, { type: shortName, confidence: 1.0 }); } - ts.forEachChild(sourceFile, visit); + // For class property declarations, also seed "this.fieldName" so that + // `this.repo.findById()` call sites resolve to the interface/class type. + if (propertyDeclNames.has(name)) { + const thisKey = `this.${name}`; + const existingThis = typeMap.get(thisKey); + if (!existingThis || existingThis.confidence < 1.0) { + typeMap.set(thisKey, { type: shortName, confidence: 1.0 }); + } + } +} + +function enrichSourceFile( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + typeMap: Map, +): void { + const ctx: SourceFileVisitContext = { + ts, + checker, + nameToEntries: new Map(), + propertyDeclNames: new Set(), + }; + ts.forEachChild(sourceFile, (node) => visitSourceFileNode(ctx, node)); // Second pass: only write unambiguous entries (single unique qualified type for a name) - for (const [name, entries] of nameToEntries) { - const uniqueQualified = [...new Set(entries.map((e) => e.qualifiedName))]; - if (uniqueQualified.length !== 1) continue; // ambiguous across modules — skip - // entries is non-empty because we only set() on first occurrence and push() after — - // TypeScript's noUncheckedIndexedAccess can flag [0] access, so assert the type. - const first = entries[0]; - if (!first) continue; - const shortName = first.shortName; - const existing = typeMap.get(name); - if (!existing || existing.confidence < 1.0) { - typeMap.set(name, { type: shortName, confidence: 1.0 }); - } - // For class property declarations, also seed "this.fieldName" so that - // `this.repo.findById()` call sites resolve to the interface/class type. - if (propertyDeclNames.has(name)) { - const thisKey = `this.${name}`; - const existingThis = typeMap.get(thisKey); - if (!existingThis || existingThis.confidence < 1.0) { - typeMap.set(thisKey, { type: shortName, confidence: 1.0 }); - } - } + for (const [name, entries] of ctx.nameToEntries) { + writeSourceFileTypeMapEntry(typeMap, ctx.propertyDeclNames, name, entries); } } @@ -327,98 +369,190 @@ function enrichSourceFile( * Async functions returning Promise are unwrapped: the inner type argument T is * used so that async methods receive a returnTypeMap entry just like sync ones. */ -function enrichReturnTypeMap( - ts: TsModule, - sourceFile: import('typescript').SourceFile, +/** + * Mutable state threaded through the enrichReturnTypeMap visitor. Grouped + * into one object (rather than closed-over locals) so the node-kind handlers + * below can be plain top-level functions, independently testable and outside + * the enclosing function's own complexity count. + */ +interface ReturnTypeVisitContext { + ts: TsModule; + checker: import('typescript').TypeChecker; + returnTypeMap: Map; + currentClass: string | null; +} + +/** + * Resolve the concrete return type name for a signature, unwrapping + * Promise so async functions contribute their inner type. + */ +function resolveReturnTypeName( checker: import('typescript').TypeChecker, - returnTypeMap: Map, -): void { - let currentClass: string | null = null; - - /** - * Resolve the concrete return type name for a signature, unwrapping - * Promise so async functions contribute their inner type. - */ - function resolveReturnTypeName(sig: import('typescript').Signature | undefined): string | null { - if (!sig) return null; - try { - let retType = checker.getReturnTypeOfSignature(sig); - - // Unwrap Promise → T so async functions get a useful returnTypeMap entry. - const outerSym = retType.getSymbol() ?? retType.aliasSymbol; - if (outerSym?.getName() === 'Promise') { - const args = checker.getTypeArguments(retType as import('typescript').TypeReference); - if (args.length > 0) retType = args[0]!; - } + sig: import('typescript').Signature | undefined, +): string | null { + if (!sig) return null; + try { + let retType = checker.getReturnTypeOfSignature(sig); - const sym = retType.getSymbol() ?? retType.aliasSymbol; - if (!sym) return null; - const name = sym.getName(); - if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) - return null; - return name; - } catch { - return null; + // Unwrap Promise → T so async functions get a useful returnTypeMap entry. + const outerSym = retType.getSymbol() ?? retType.aliasSymbol; + if (outerSym?.getName() === 'Promise') { + const args = checker.getTypeArguments(retType as import('typescript').TypeReference); + if (args.length > 0) retType = args[0]!; } + + const sym = retType.getSymbol() ?? retType.aliasSymbol; + if (!sym) return null; + const name = sym.getName(); + if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) return null; + return name; + } catch { + return null; } +} - function writeEntry(fnName: string, sigNode: import('typescript').SignatureDeclaration): void { - const typeName = resolveReturnTypeName(checker.getSignatureFromDeclaration(sigNode)); - if (typeName) { - const existing = returnTypeMap.get(fnName); - if (!existing || existing.confidence < 1.0) - returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); - } +function writeReturnTypeEntry( + ctx: ReturnTypeVisitContext, + fnName: string, + sigNode: import('typescript').SignatureDeclaration, +): void { + const typeName = resolveReturnTypeName( + ctx.checker, + ctx.checker.getSignatureFromDeclaration(sigNode), + ); + if (typeName) { + const existing = ctx.returnTypeMap.get(fnName); + if (!existing || existing.confidence < 1.0) + ctx.returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); } +} - /** - * Visit nodes at the current lexical scope (module level or class body). - * Does NOT recurse into function/method bodies to avoid capturing local - * helper functions under bare names. - */ - function visit(node: import('typescript').Node): void { - if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { - // Enter class scope: visit direct children (method/property declarations). - const saved = currentClass; - currentClass = - (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name - ?.text ?? null; - ts.forEachChild(node, visit); - currentClass = saved; - return; // class body fully handled — stop here - } +/** + * Enter class scope: visit direct children (method/property declarations), + * then restore the enclosing class name. + */ +function visitClassScopeForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').Node, +): void { + const saved = ctx.currentClass; + ctx.currentClass = + (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name + ?.text ?? null; + ctx.ts.forEachChild(node, (child) => visitReturnTypeNode(ctx, child)); + ctx.currentClass = saved; +} - if (ts.isFunctionDeclaration(node) && node.name) { - // Module-level function declaration: record and stop (no body descent). - writeEntry(node.name.text, node); - return; - } +/** Module-level function declaration: record and stop (no body descent). */ +function visitFunctionDeclarationForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').FunctionDeclaration, +): void { + // node.name is guaranteed truthy by the caller's guard. + writeReturnTypeEntry(ctx, node.name!.text, node); +} - if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { - // Class method: record as ClassName.methodName and stop. - const fnName = currentClass ? `${currentClass}.${node.name.text}` : node.name.text; - writeEntry(fnName, node); - return; - } +/** Class method: record as ClassName.methodName and stop. */ +function visitMethodDeclarationForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').MethodDeclaration, +): void { + // node.name is guaranteed to be an Identifier by the caller's guard. + const name = (node.name as import('typescript').Identifier).text; + const fnName = ctx.currentClass ? `${ctx.currentClass}.${name}` : name; + writeReturnTypeEntry(ctx, fnName, node); +} - if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { - // Arrow/function-expression assigned to a variable at the current scope. - // Because we never recurse into function bodies, any VariableDeclaration - // we see here is guaranteed to be at module scope or inside a class body - // (not inside a method body), making the bare name safe for cross-file use. - const init = node.initializer; - if (ts.isArrowFunction(init) || ts.isFunctionExpression(init)) { - writeEntry(node.name.text, init); - } - return; // variable declaration fully handled — stop here - } +/** + * Arrow/function-expression assigned to a variable at the current scope. + * Because we never recurse into function bodies, any VariableDeclaration + * seen here is guaranteed to be at module scope or inside a class body + * (not inside a method body), making the bare name safe for cross-file use. + */ +function visitVariableInitializerForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').VariableDeclaration, +): void { + // node.name is guaranteed to be an Identifier and node.initializer is + // guaranteed defined by the caller's guard. + const init = node.initializer!; + if (ctx.ts.isArrowFunction(init) || ctx.ts.isFunctionExpression(init)) { + writeReturnTypeEntry(ctx, (node.name as import('typescript').Identifier).text, init); + } +} - // For all other node kinds (VariableStatement, VariableDeclarationList, - // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. - ts.forEachChild(node, visit); +/** + * Visit nodes at the current lexical scope (module level or class body). + * Does NOT recurse into function/method bodies to avoid capturing local + * helper functions under bare names. + */ +function visitReturnTypeNode(ctx: ReturnTypeVisitContext, node: import('typescript').Node): void { + const { ts } = ctx; + + if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { + visitClassScopeForReturnType(ctx, node); + return; // class body fully handled — stop here } - ts.forEachChild(sourceFile, visit); + if (ts.isFunctionDeclaration(node) && node.name) { + visitFunctionDeclarationForReturnType(ctx, node); + return; + } + + if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { + visitMethodDeclarationForReturnType(ctx, node); + return; + } + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { + visitVariableInitializerForReturnType(ctx, node); + return; // variable declaration fully handled — stop here + } + + // For all other node kinds (VariableStatement, VariableDeclarationList, + // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. + ts.forEachChild(node, (child) => visitReturnTypeNode(ctx, child)); +} + +function enrichReturnTypeMap( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + returnTypeMap: Map, +): void { + const ctx: ReturnTypeVisitContext = { ts, checker, returnTypeMap, currentClass: null }; + ts.forEachChild(sourceFile, (node) => visitReturnTypeNode(ctx, node)); +} + +/** + * Resolve the callee name and, for receiver method calls (`obj.method()`), + * the receiver's typeMap-resolved type name, from a call expression's callee. + * + * Handles two callee shapes: a bare identifier (`fn()`) and a property-access + * expression (`obj.method()`); any other callee shape (e.g. a call expression + * itself, as in `getFactory()()`) yields no calleeName. + */ +function resolveCalleeNameAndReceiverType( + ts: TsModule, + call: import('typescript').CallExpression, + typeMap: Map, +): { calleeName: string | null; receiverTypeName: string | undefined } { + if (ts.isIdentifier(call.expression)) { + return { calleeName: call.expression.text, receiverTypeName: undefined }; + } + + if (ts.isPropertyAccessExpression(call.expression)) { + const calleeName = call.expression.name.text; + const obj = call.expression.expression; + let receiverTypeName: string | undefined; + if (ts.isIdentifier(obj)) { + const entry = typeMap.get(obj.text); + if (entry && typeof entry === 'object') receiverTypeName = entry.type; + } + return { calleeName, receiverTypeName }; + } + + return { calleeName: null, receiverTypeName: undefined }; } /** @@ -426,13 +560,14 @@ function enrichReturnTypeMap( * is not yet in typeMap into callAssignments for cross-file propagation. * Phase 8.1 already resolved the common case into typeMap; this captures the rest. * - * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile`: - * collect all candidates first, then only push entries where a given `varName` - * maps to exactly one distinct `calleeName`. This prevents multiple methods in the - * same file that each bind a different imported function to a common local name - * (e.g., `const result = getA()` in one method, `const result = getB()` in - * another) from both landing in `callAssignments`, which would cause - * `propagateReturnTypesAcrossFiles` to silently resolve one arbitrarily. + * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile` + * (via the shared `resolveUnambiguous` helper): collect all candidates first, + * then only push entries where a given `varName` maps to exactly one distinct + * `calleeName`. This prevents multiple methods in the same file that each bind + * a different imported function to a common local name (e.g., `const result = + * getA()` in one method, `const result = getB()` in another) from both landing + * in `callAssignments`, which would cause `propagateReturnTypesAcrossFiles` to + * silently resolve one arbitrarily. */ function enrichCallAssignments( ts: TsModule, @@ -452,20 +587,11 @@ function enrichCallAssignments( ) { const varName = node.name.text; if (!typeMap.has(varName)) { - const call = node.initializer; - let calleeName: string | null = null; - let receiverTypeName: string | undefined; - - if (ts.isIdentifier(call.expression)) { - calleeName = call.expression.text; - } else if (ts.isPropertyAccessExpression(call.expression)) { - calleeName = call.expression.name.text; - const obj = call.expression.expression; - if (ts.isIdentifier(obj)) { - const entry = typeMap.get(obj.text); - if (entry && typeof entry === 'object') receiverTypeName = entry.type; - } - } + const { calleeName, receiverTypeName } = resolveCalleeNameAndReceiverType( + ts, + node.initializer, + typeMap, + ); if (calleeName) { const ca: CallAssignment = { varName, calleeName, receiverTypeName }; @@ -488,10 +614,8 @@ function enrichCallAssignments( // calleeName. Ambiguous varNames (same name, different callees across scopes) // are excluded to avoid silently resolving the wrong type cross-file. for (const entries of candidates.values()) { - const uniqueCallees = new Set(entries.map((e) => e.calleeName)); - if (uniqueCallees.size === 1) { - callAssignments.push(entries[0] as CallAssignment); - } + const resolved = resolveUnambiguous(entries, (e) => e.calleeName); + if (resolved) callAssignments.push(resolved); } } From 57143a8e2ef683ed9dbbd9823925adb5ea279498 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 13:32:50 -0600 Subject: [PATCH 20/26] fix: adopt buildFileConditionSQL in prepare.ts and move console.log out of domain layer Impact: 1 functions changed, 8 affected --- src/domain/search/search/prepare.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/domain/search/search/prepare.ts b/src/domain/search/search/prepare.ts index a28330ced..ec670d9b8 100644 --- a/src/domain/search/search/prepare.ts +++ b/src/domain/search/search/prepare.ts @@ -1,6 +1,7 @@ import { openReadonlyOrFail } from '../../../db/index.js'; -import { escapeLike } from '../../../db/query-builder.js'; +import { buildFileConditionSQL } from '../../../db/query-builder.js'; import { getEmbeddingCount, getEmbeddingMeta } from '../../../db/repository/embeddings.js'; +import { info } from '../../../infrastructure/logger.js'; import type { BetterSqlite3Database } from '../../../types.js'; import { MODELS } from '../models.js'; import { applyFilters } from './filters.js'; @@ -47,7 +48,7 @@ export function prepareSearch( try { const count = getEmbeddingCount(db); if (count === 0) { - console.log('No embeddings found. Run `codegraph embed` first.'); + info('No embeddings found. Run `codegraph embed` first.'); db.close(); return null; } @@ -82,12 +83,11 @@ export function prepareSearch( params.push(opts.kind); } if (fpArr.length > 0 && !isGlob) { - if (fpArr.length === 1) { - conditions.push("n.file LIKE ? ESCAPE '\\'"); - params.push(`%${escapeLike(fpArr[0]!)}%`); - } else { - conditions.push(`(${fpArr.map(() => "n.file LIKE ? ESCAPE '\\'").join(' OR ')})`); - params.push(...fpArr.map((f) => `%${escapeLike(f)}%`)); + const fc = buildFileConditionSQL(fpArr, 'n.file'); + if (fc.sql) { + // Strip leading ' AND ' since we're using conditions array + conditions.push(fc.sql.replace(/^ AND /, '')); + params.push(...fc.params); } } if (conditions.length > 0) { From 21db9a9f9698a623fa25fc678b51d2bf80f881cc Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 13:54:44 -0600 Subject: [PATCH 21/26] fix: address quality issues in graph unified model (model.ts merge() aliasing, leiden complexity) model.ts: merge() aliased NodeAttrs/EdgeAttrs objects by reference instead of cloning, unlike subgraph()/filterEdges()/clone() which all defensively copy with { ...attrs }. A caller merging graph B into graph A could silently leak mutations across graphs via the shared attrs object. No production caller exists today (verified via fn-impact: 0 callers besides the test), so this was a latent defect, not a demonstrated one -- now fixed to match the file's established convention. leiden/partition.ts + leiden/adapter.ts: decomposed the remaining cognitive/cyclomatic-exceeding functions left after phase 3's shared aggregate/typed-array helper extraction (commit 0f9bbe6f), following the same directed/undirected-branch-splitting pattern gauntlet recommended: - moveNode (cognitive 16->2, cyclomatic 13->3): split into applyMoveStrengthTotals + applyMoveInternalEdgeWeightDelta[Directed/Undirected] - buildSortedCommunityIds (cognitive 17->3): extracted compareBySizeDesc/ compareByPreserveMap comparators - computeDeltaCPM (cognitive 17->4): extracted computeCpmEdgeWeights[Directed/Undirected] - makeGraphAdapter (cognitive 27->3): extracted resolveAdapterOptions, buildNodeIndex, computeNodeSizes, makeForEachNeighbor - populateUndirectedEdges (cognitive 28->0): extracted aggregateUndirectedPairs/recordUndirectedPairWeight, emitUndirectedPairs, applyUndirectedSelfLoops Pure behavior-preserving decomposition -- no algorithm changes. Verified: full test suite (201/201 files, 3336 tests), leiden-specific suite (22/22), graph suite (177/177 incl. merge()), typecheck, and lint all green. Community-detection output on the leiden-specific directories is byte-identical before/after (confirmed via codegraph communities --drift split-candidates, controlling for the known #1734 run-to-run noise). docs check acknowledged: internal refactor + bug fix only, no user-facing feature/language/architecture-table changes. Impact: 28 functions changed, 31 affected --- src/graph/algorithms/leiden/adapter.ts | 178 ++++++++++++++----- src/graph/algorithms/leiden/partition.ts | 217 +++++++++++++++-------- src/graph/model.ts | 4 +- 3 files changed, 280 insertions(+), 119 deletions(-) diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts index 390a15aa3..29efc1d55 100644 --- a/src/graph/algorithms/leiden/adapter.ts +++ b/src/graph/algorithms/leiden/adapter.ts @@ -74,22 +74,33 @@ function populateDirectedEdges( } } -/** - * Populate edge arrays for an undirected graph. Reciprocal pairs are - * symmetrized and averaged to produce a single weight per undirected edge. - * Self-loops use single-w convention (matching modularity.ts formulas). - */ -function populateUndirectedEdges( +/** Fold a single a→b weight into the unordered-pair aggregate, tracking which direction(s) were seen. */ +function recordUndirectedPairWeight( + pairAgg: Map, + a: number, + b: number, + w: number, +): void { + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; +} + +/** Aggregate raw undirected edges into one weighted record per unordered node pair. */ +function aggregateUndirectedPairs( graph: CodeGraph, idToIndex: Map, linkWeight: (attrs: EdgeAttrs) => number, - n: number, selfLoop: Float64Array, - outEdges: EdgeEntry[][], - inEdges: InEdgeEntry[][], - strengthOut: Float64Array, - strengthIn: Float64Array, -): void { +): Map { const pairAgg = new Map(); for (const [src, tgt, attrs] of graph.edges()) { @@ -101,19 +112,20 @@ function populateUndirectedEdges( taAdd(selfLoop, a, w); continue; } - const i = a < b ? a : b; - const j = a < b ? b : a; - const key = `${i}:${j}`; - let rec = pairAgg.get(key); - if (!rec) { - rec = { sum: 0, seenAB: 0, seenBA: 0 }; - pairAgg.set(key, rec); - } - rec.sum += w; - if (a === i) rec.seenAB = 1; - else rec.seenBA = 1; + recordUndirectedPairWeight(pairAgg, a, b, w); } + return pairAgg; +} + +/** Emit symmetrized undirected edges (averaged over any reciprocal pairs) into the adjacency lists. */ +function emitUndirectedPairs( + pairAgg: Map, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { for (const [key, rec] of pairAgg.entries()) { const parts = key.split(':'); const i = +(parts[0] as string); @@ -130,10 +142,21 @@ function populateUndirectedEdges( taAdd(strengthIn, i, w); taAdd(strengthIn, j, w); } +} - // Add self-loops into adjacency and strengths. - // Note: uses single-w convention (not standard 2w) — the modularity formulas in - // modularity.ts are written to match this convention, keeping the system self-consistent. +/** + * Add self-loops into adjacency and strengths. + * Note: uses single-w convention (not standard 2w) — the modularity formulas in + * modularity.ts are written to match this convention, keeping the system self-consistent. + */ +function applyUndirectedSelfLoops( + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { for (let v = 0; v < n; v++) { const w: number = fget(selfLoop, v); if (w !== 0) { @@ -145,15 +168,56 @@ function populateUndirectedEdges( } } -export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { - const linkWeight: (attrs: EdgeAttrs) => number = - opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); - const nodeSize: (attrs: NodeAttrs) => number = - opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); - const directed: boolean = !!opts.directed; - const baseNodeIds: string[] | undefined = opts.baseNodeIds; - - // Build dense node index mapping +/** + * Populate edge arrays for an undirected graph. Reciprocal pairs are + * symmetrized and averaged to produce a single weight per undirected edge. + * Self-loops use single-w convention (matching modularity.ts formulas). + */ +function populateUndirectedEdges( + graph: CodeGraph, + idToIndex: Map, + linkWeight: (attrs: EdgeAttrs) => number, + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { + const pairAgg = aggregateUndirectedPairs(graph, idToIndex, linkWeight, selfLoop); + emitUndirectedPairs(pairAgg, outEdges, inEdges, strengthOut, strengthIn); + applyUndirectedSelfLoops(n, selfLoop, outEdges, inEdges, strengthOut, strengthIn); +} + +interface ResolvedAdapterOptions { + linkWeight: (attrs: EdgeAttrs) => number; + nodeSize: (attrs: NodeAttrs) => number; + directed: boolean; + baseNodeIds: string[] | undefined; +} + +/** Apply GraphAdapterOptions defaults (weight=1, size=1, directed=false). */ +function resolveAdapterOptions(opts: GraphAdapterOptions): ResolvedAdapterOptions { + return { + linkWeight: + opts.linkWeight || + ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)), + nodeSize: + opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)), + directed: !!opts.directed, + baseNodeIds: opts.baseNodeIds, + }; +} + +/** + * Build the dense node index mapping. When `baseNodeIds` is provided, node + * order/indices are pinned to it (used to align adapters built from related + * graphs); otherwise indices are assigned in CodeGraph iteration order. + */ +function buildNodeIndex( + graph: CodeGraph, + baseNodeIds: string[] | undefined, +): { nodeIds: string[]; idToIndex: Map } { const nodeIds: string[] = []; const idToIndex = new Map(); if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { @@ -169,10 +233,39 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { nodeIds.push(id); } } + return { nodeIds, idToIndex }; +} + +/** Resolve per-node sizes via the adapter's nodeSize accessor, dense-indexed. */ +function computeNodeSizes( + graph: CodeGraph, + idToIndex: Map, + n: number, + nodeSize: (attrs: NodeAttrs) => number, +): Float64Array { + const size = new Float64Array(n); + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + return size; +} + +function makeForEachNeighbor( + outEdges: EdgeEntry[][], +): (i: number, cb: (to: number, w: number) => void) => void { + return (i, cb) => { + const list = outEdges[i] as EdgeEntry[]; + for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); + }; +} + +export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { + const { linkWeight, nodeSize, directed, baseNodeIds } = resolveAdapterOptions(opts); + const { nodeIds, idToIndex } = buildNodeIndex(graph, baseNodeIds); const n: number = nodeIds.length; // Storage - const size = new Float64Array(n); const selfLoop = new Float64Array(n); const strengthOut = new Float64Array(n); const strengthIn = new Float64Array(n); @@ -211,20 +304,11 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { ); } - // Node sizes - for (const [id, attrs] of graph.nodes()) { - const i = idToIndex.get(id); - if (i != null) size[i] = +nodeSize(attrs) || 0; - } + const size = computeNodeSizes(graph, idToIndex, n, nodeSize); // Totals const totalWeight: number = strengthOut.reduce((a, b) => a + b, 0); - function forEachNeighbor(i: number, cb: (to: number, w: number) => void): void { - const list = outEdges[i] as EdgeEntry[]; - for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); - } - return { n, nodeIds, @@ -237,6 +321,6 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { inEdges, directed, totalWeight, - forEachNeighbor, + forEachNeighbor: makeForEachNeighbor(outEdges), }; } diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index de78b8f3e..8e76f8c50 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -74,6 +74,34 @@ interface PartitionState { /* Community-ID sort helper (used by compact) */ /* ------------------------------------------------------------------ */ +/** Comparator: descending by community size, tie-broken by node count then id. */ +function compareBySizeDesc( + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): (a: number, b: number) => number { + return (a, b) => + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b; +} + +/** Comparator: respects a user-provided label map, falling back to size-desc for unmapped ids. */ +function compareByPreserveMap( + preserveMap: Map, + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): (a: number, b: number) => number { + const fallback = compareBySizeDesc(communityTotalSize, communityNodeCount); + return (a, b) => { + const pa = preserveMap.get(a); + const pb = preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return fallback(a, b); + }; +} + /** * Sort community IDs according to the compaction options: preserve original * order, respect a user-provided label map, or sort by descending size. @@ -88,26 +116,9 @@ function buildSortedCommunityIds( if (opts.keepOldOrder) { ids.sort((a, b) => a - b); } else if (opts.preserveMap instanceof Map) { - const preserveMap = opts.preserveMap; - ids.sort((a, b) => { - const pa = preserveMap.get(a); - const pb = preserveMap.get(b); - if (pa != null && pb != null && pa !== pb) return pa - pb; - if (pa != null && pb == null) return -1; - if (pb != null && pa == null) return 1; - return ( - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b - ); - }); + ids.sort(compareByPreserveMap(opts.preserveMap, communityTotalSize, communityNodeCount)); } else { - ids.sort( - (a, b) => - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b, - ); + ids.sort(compareBySizeDesc(communityTotalSize, communityNodeCount)); } } @@ -273,30 +284,55 @@ function computeDeltaModularityDirected( return deltaInternal - deltaExpected; } +/** computeCpmEdgeWeights — directed branch: in+out weight, plus self-loop correction. */ +function computeCpmEdgeWeightsDirected( + s: PartitionState, + v: number, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + const wOld: number = + (fget(s.outEdgeWeightToCommunity, oldC) || 0) + (fget(s.inEdgeWeightFromCommunity, oldC) || 0); + const wNew: number = + newC < s.outEdgeWeightToCommunity.length + ? (fget(s.outEdgeWeightToCommunity, newC) || 0) + + (fget(s.inEdgeWeightFromCommunity, newC) || 0) + : 0; + // Self-loop correction (see cpm.ts diffCPM) + const selfCorrection: number = 2 * (fget(s.graph.selfLoop, v) || 0); + return { wOld, wNew, selfCorrection }; +} + +/** computeCpmEdgeWeights — undirected branch: single neighbor-weight-to-community value. */ +function computeCpmEdgeWeightsUndirected( + s: PartitionState, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + const wOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; + const wNew: number = + newC < s.neighborEdgeWeightToCommunity.length + ? fget(s.neighborEdgeWeightToCommunity, newC) || 0 + : 0; + return { wOld, wNew, selfCorrection: 0 }; +} + +/** Directed/undirected edge-weight-to-community split used by computeDeltaCPM. */ +function computeCpmEdgeWeights( + s: PartitionState, + v: number, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + return s.graph.directed + ? computeCpmEdgeWeightsDirected(s, v, oldC, newC) + : computeCpmEdgeWeightsUndirected(s, oldC, newC); +} + function computeDeltaCPM(s: PartitionState, v: number, newC: number, gamma: number = 1.0): number { const oldC: number = iget(s.nodeCommunity, v); if (newC === oldC) return 0; - let w_old: number; - let w_new: number; - let selfCorrection: number = 0; - if (s.graph.directed) { - w_old = - (fget(s.outEdgeWeightToCommunity, oldC) || 0) + - (fget(s.inEdgeWeightFromCommunity, oldC) || 0); - w_new = - newC < s.outEdgeWeightToCommunity.length - ? (fget(s.outEdgeWeightToCommunity, newC) || 0) + - (fget(s.inEdgeWeightFromCommunity, newC) || 0) - : 0; - // Self-loop correction (see cpm.ts diffCPM) - selfCorrection = 2 * (fget(s.graph.selfLoop, v) || 0); - } else { - w_old = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; - w_new = - newC < s.neighborEdgeWeightToCommunity.length - ? fget(s.neighborEdgeWeightToCommunity, newC) || 0 - : 0; - } + const { wOld: w_old, wNew: w_new, selfCorrection } = computeCpmEdgeWeights(s, v, oldC, newC); const nodeSz: number = fget(s.graph.size, v) || 1; const sizeOld: number = fget(s.communityTotalSize, oldC) || 0; const sizeNew: number = newC < s.communityTotalSize.length ? fget(s.communityTotalSize, newC) : 0; @@ -307,6 +343,75 @@ function computeDeltaCPM(s: PartitionState, v: number, newC: number, gamma: numb /* Extracted: node move */ /* ------------------------------------------------------------------ */ +/** Directed/undirected community strength-total delta applied by moveNode. */ +function applyMoveStrengthTotals( + s: PartitionState, + oldC: number, + newC: number, + strengthOutV: number, + strengthInV: number, +): void { + if (s.graph.directed) { + s.communityTotalOutStrength[oldC] = fget(s.communityTotalOutStrength, oldC) - strengthOutV; + s.communityTotalOutStrength[newC] = fget(s.communityTotalOutStrength, newC) + strengthOutV; + s.communityTotalInStrength[oldC] = fget(s.communityTotalInStrength, oldC) - strengthInV; + s.communityTotalInStrength[newC] = fget(s.communityTotalInStrength, newC) + strengthInV; + } else { + s.communityTotalStrength[oldC] = fget(s.communityTotalStrength, oldC) - strengthOutV; + s.communityTotalStrength[newC] = fget(s.communityTotalStrength, newC) + strengthOutV; + } +} + +/** applyMoveInternalEdgeWeightDelta — directed branch. */ +function applyMoveInternalEdgeWeightDeltaDirected( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + const outToOld: number = fget(s.outEdgeWeightToCommunity, oldC) || 0; + const inFromOld: number = fget(s.inEdgeWeightFromCommunity, oldC) || 0; + const outToNew: number = + newC < s.outEdgeWeightToCommunity.length ? fget(s.outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromNew: number = + newC < s.inEdgeWeightFromCommunity.length ? fget(s.inEdgeWeightFromCommunity, newC) || 0 : 0; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + s.communityInternalEdgeWeight[oldC] = + fget(s.communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); + s.communityInternalEdgeWeight[newC] = + fget(s.communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); +} + +/** applyMoveInternalEdgeWeightDelta — undirected branch. */ +function applyMoveInternalEdgeWeightDeltaUndirected( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + const weightToOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; + const weightToNew: number = fget(s.neighborEdgeWeightToCommunity, newC) || 0; + s.communityInternalEdgeWeight[oldC] = + fget(s.communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); + s.communityInternalEdgeWeight[newC] = + fget(s.communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); +} + +/** Directed/undirected community internal-edge-weight delta applied by moveNode. */ +function applyMoveInternalEdgeWeightDelta( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + if (s.graph.directed) { + applyMoveInternalEdgeWeightDeltaDirected(s, oldC, newC, selfLoopWeight); + } else { + applyMoveInternalEdgeWeightDeltaUndirected(s, oldC, newC, selfLoopWeight); + } +} + function moveNode(s: PartitionState, v: number, newC: number): boolean { const oldC: number = iget(s.nodeCommunity, v); if (oldC === newC) return false; @@ -323,37 +428,9 @@ function moveNode(s: PartitionState, v: number, newC: number): boolean { s.communityNodeCount[newC] = iget(s.communityNodeCount, newC) + 1; s.communityTotalSize[oldC] = fget(s.communityTotalSize, oldC) - nodeSz; s.communityTotalSize[newC] = fget(s.communityTotalSize, newC) + nodeSz; - if (s.graph.directed) { - s.communityTotalOutStrength[oldC] = fget(s.communityTotalOutStrength, oldC) - strengthOutV; - s.communityTotalOutStrength[newC] = fget(s.communityTotalOutStrength, newC) + strengthOutV; - s.communityTotalInStrength[oldC] = fget(s.communityTotalInStrength, oldC) - strengthInV; - s.communityTotalInStrength[newC] = fget(s.communityTotalInStrength, newC) + strengthInV; - } else { - s.communityTotalStrength[oldC] = fget(s.communityTotalStrength, oldC) - strengthOutV; - s.communityTotalStrength[newC] = fget(s.communityTotalStrength, newC) + strengthOutV; - } - if (s.graph.directed) { - const outToOld: number = fget(s.outEdgeWeightToCommunity, oldC) || 0; - const inFromOld: number = fget(s.inEdgeWeightFromCommunity, oldC) || 0; - const outToNew: number = - newC < s.outEdgeWeightToCommunity.length ? fget(s.outEdgeWeightToCommunity, newC) || 0 : 0; - const inFromNew: number = - newC < s.inEdgeWeightFromCommunity.length ? fget(s.inEdgeWeightFromCommunity, newC) || 0 : 0; - // outToOld/inFromOld already include the self-loop weight (self-loops are - // in outEdges/inEdges), so subtract it once to avoid triple-counting. - s.communityInternalEdgeWeight[oldC] = - fget(s.communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); - s.communityInternalEdgeWeight[newC] = - fget(s.communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); - } else { - const weightToOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; - const weightToNew: number = fget(s.neighborEdgeWeightToCommunity, newC) || 0; - s.communityInternalEdgeWeight[oldC] = - fget(s.communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); - s.communityInternalEdgeWeight[newC] = - fget(s.communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); - } + applyMoveStrengthTotals(s, oldC, newC, strengthOutV, strengthInV); + applyMoveInternalEdgeWeightDelta(s, oldC, newC, selfLoopWeight); s.nodeCommunity[v] = newC; return true; diff --git a/src/graph/model.ts b/src/graph/model.ts index d34270aa4..3961eb97f 100644 --- a/src/graph/model.ts +++ b/src/graph/model.ts @@ -228,10 +228,10 @@ export class CodeGraph { /** Merge another graph into this one. Nodes/edges from other override on conflict. */ merge(other: CodeGraph): this { for (const [id, attrs] of other.nodes()) { - this.addNode(id, attrs); + this.addNode(id, { ...attrs }); } for (const [src, tgt, attrs] of other.edges()) { - this.addEdge(src, tgt, attrs); + this.addEdge(src, tgt, { ...attrs }); } return this; } From f7ce3107900e06a5a17a75868cb2ed716f21bcd7 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 14:19:08 -0600 Subject: [PATCH 22/26] fix: address quality issues in features/complexity-query.ts (docs check acknowledged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract getExceededMetrics as the single source of truth for which manifesto thresholds a row exceeds, shared by mapComplexityRow and exceedsAnyThreshold — cuts mapComplexityRow's cyclomatic complexity from 23 (fail) to 10 and removes the duplicated 4-branch check. Replace the hardcoded default-threshold object with DEFAULTS.manifesto.rules (config.ts is already the source of truth for these values). Decompose complexityData/computeComplexitySummary (resolveComplexityQueryOptions, buildComplexityResult, queryComplexityRows, fetchAllComplexityMetrics, summarizeComplexityMetrics, average) to bring halstead.effort for every function in the file under the 15000 fail threshold. Pure decomposition, zero behavior change — verified via clean rebuild + full test suite. Widen tests/integration/complexity.test.ts's config.js mock to preserve real exports via importOriginal (it previously replaced the whole module, which broke once this file started importing DEFAULTS). Impact: 24 functions changed, 8 affected --- src/features/complexity-query.ts | 303 ++++++++++++++++----------- tests/integration/complexity.test.ts | 10 +- 2 files changed, 187 insertions(+), 126 deletions(-) diff --git a/src/features/complexity-query.ts b/src/features/complexity-query.ts index 5f3b9d121..d1ba86a3a 100644 --- a/src/features/complexity-query.ts +++ b/src/features/complexity-query.ts @@ -7,7 +7,7 @@ import { openReadonlyOrFail } from '../db/index.js'; import { buildFileConditionSQL } from '../db/query-builder.js'; -import { loadConfig } from '../infrastructure/config.js'; +import { DEFAULTS, loadConfig } from '../infrastructure/config.js'; import { debug } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; @@ -35,6 +35,61 @@ interface ComplexityRow { const isValidThreshold = (v: unknown): v is number => typeof v === 'number' && Number.isFinite(v); +/** Column-sort expressions for `codegraph complexity --sort `. */ +const ORDER_BY_MAP: Record = { + cognitive: 'fc.cognitive DESC', + cyclomatic: 'fc.cyclomatic DESC', + nesting: 'fc.max_nesting DESC', + mi: 'fc.maintainability_index ASC', + volume: 'fc.halstead_volume DESC', + effort: 'fc.halstead_effort DESC', + bugs: 'fc.halstead_bugs DESC', + loc: 'fc.loc DESC', +}; + +interface ThresholdMetrics { + cognitive: number; + cyclomatic: number; + max_nesting: number; + maintainability_index: number; +} + +/** Single source of truth for which metric names exceed which thresholds. */ +const METRIC_THRESHOLD_CHECKS: Array<{ + name: string; + exceeds: (r: ThresholdMetrics, thresholds: any) => boolean; +}> = [ + { + name: 'cognitive', + exceeds: (r, t) => + isValidThreshold(t.cognitive?.warn) && r.cognitive >= (t.cognitive?.warn ?? 0), + }, + { + name: 'cyclomatic', + exceeds: (r, t) => + isValidThreshold(t.cyclomatic?.warn) && r.cyclomatic >= (t.cyclomatic?.warn ?? 0), + }, + { + name: 'maxNesting', + exceeds: (r, t) => + isValidThreshold(t.maxNesting?.warn) && r.max_nesting >= (t.maxNesting?.warn ?? 0), + }, + { + name: 'maintainabilityIndex', + exceeds: (r, t) => + isValidThreshold(t.maintainabilityIndex?.warn) && + r.maintainability_index > 0 && + r.maintainability_index <= (t.maintainabilityIndex?.warn ?? 0), + }, +]; + +/** List of metric names a row exceeds (empty if none). */ +function getExceededMetrics(r: ThresholdMetrics, thresholds: any): string[] { + return METRIC_THRESHOLD_CHECKS.filter((check) => check.exceeds(r, thresholds)).map( + (check) => check.name, + ); +} + /** Build WHERE clause and params for complexity query filtering. */ function buildComplexityWhere(opts: { noTests: boolean; @@ -90,28 +145,7 @@ function buildThresholdHaving(thresholds: any): string { /** Map a raw DB row to the public complexity result shape. */ function mapComplexityRow(r: ComplexityRow, thresholds: any): Record { - const exceeds: string[] = []; - if ( - isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0) - ) - exceeds.push('cognitive'); - if ( - isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0) - ) - exceeds.push('cyclomatic'); - if ( - isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0) - ) - exceeds.push('maxNesting'); - if ( - isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0) - ) - exceeds.push('maintainabilityIndex'); + const exceeds = getExceededMetrics(r, thresholds); return { name: r.name, @@ -136,21 +170,48 @@ function mapComplexityRow(r: ComplexityRow, thresholds: any): Record 0; +} + +/** Fetch the bare metric columns (all rows) used to compute summary statistics. */ +function fetchAllComplexityMetrics( + db: ReturnType, + noTests: boolean, +): ThresholdMetrics[] { + return db + .prepare( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') + ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, + ) + .all(); +} + +/** Arithmetic mean, rounded to 1 decimal (matches the summary's existing precision). */ +function average(values: number[]): number { + return +(values.reduce((s, v) => s + v, 0) / values.length).toFixed(1); +} + +/** Reduce a set of complexity rows down to the public summary-statistics shape. */ +function summarizeComplexityMetrics( + allRows: ThresholdMetrics[], thresholds: any, -): boolean { - return ( - (isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0)) || - (isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0)) || - (isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0)) || - (isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0)) - ); +): Record { + const cognitiveValues = allRows.map((r) => r.cognitive); + const cyclomaticValues = allRows.map((r) => r.cyclomatic); + const miValues = allRows.map((r) => r.maintainability_index || 0); + return { + analyzed: allRows.length, + avgCognitive: average(cognitiveValues), + avgCyclomatic: average(cyclomaticValues), + maxCognitive: Math.max(...cognitiveValues), + maxCyclomatic: Math.max(...cyclomaticValues), + avgMI: average(miValues), + minMI: +Math.min(...miValues).toFixed(1), + aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, + }; } /** Compute summary statistics across all complexity rows. */ @@ -160,33 +221,9 @@ function computeComplexitySummary( thresholds: any, ): Record | null { try { - const allRows = db - .prepare<{ - cognitive: number; - cyclomatic: number; - max_nesting: number; - maintainability_index: number; - }>( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') - ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, - ) - .all(); - + const allRows = fetchAllComplexityMetrics(db, noTests); if (allRows.length === 0) return null; - - const miValues = allRows.map((r) => r.maintainability_index || 0); - return { - analyzed: allRows.length, - avgCognitive: +(allRows.reduce((s, r) => s + r.cognitive, 0) / allRows.length).toFixed(1), - avgCyclomatic: +(allRows.reduce((s, r) => s + r.cyclomatic, 0) / allRows.length).toFixed(1), - maxCognitive: Math.max(...allRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...allRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, - }; + return summarizeComplexityMetrics(allRows, thresholds); } catch (e: unknown) { debug(`complexity summary query failed: ${(e as Error).message}`); return null; @@ -203,33 +240,89 @@ function checkHasGraph(db: ReturnType): boolean { } } +/** Run the main complexity rows query; returns null if the table doesn't exist yet. */ +function queryComplexityRows( + db: ReturnType, + where: string, + having: string, + orderBy: string, + params: unknown[], +): ComplexityRow[] | null { + try { + return db + .prepare( + `SELECT n.name, n.kind, n.file, n.line, n.end_line, + fc.cognitive, fc.cyclomatic, fc.max_nesting, + fc.loc, fc.sloc, fc.maintainability_index, + fc.halstead_volume, fc.halstead_difficulty, fc.halstead_effort, fc.halstead_bugs + FROM function_complexity fc + JOIN nodes n ON fc.node_id = n.id + ${where} ${having} + ORDER BY ${orderBy}`, + ) + .all(...params); + } catch (e: unknown) { + debug(`complexity query failed (table may not exist): ${(e as Error).message}`); + return null; + } +} + +interface ComplexityQueryOpts { + target?: string; + limit?: number; + sort?: string; + aboveThreshold?: boolean; + file?: string; + kind?: string; + noTests?: boolean; + config?: CodegraphConfig; + offset?: number; +} + +/** Resolve query flags + effective manifesto thresholds from opts/config/DEFAULTS. */ +function resolveComplexityQueryOptions(opts: ComplexityQueryOpts): { + sort: string; + noTests: boolean; + aboveThreshold: boolean; + thresholds: any; +} { + const config = opts.config || loadConfig(process.cwd()); + return { + sort: opts.sort || 'cognitive', + noTests: opts.noTests || false, + aboveThreshold: opts.aboveThreshold || false, + thresholds: config.manifesto?.rules || DEFAULTS.manifesto.rules, + }; +} + +/** Run the query + summary and shape the pre-pagination result object. */ +function buildComplexityResult( + db: ReturnType, + sql: { where: string; having: string; orderBy: string; params: unknown[] }, + noTests: boolean, + thresholds: any, +): Record { + const rows = queryComplexityRows(db, sql.where, sql.having, sql.orderBy, sql.params); + if (rows === null) { + return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; + } + + const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; + const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); + + const summary = computeComplexitySummary(db, noTests, thresholds); + const hasGraph = summary === null ? checkHasGraph(db) : false; + + return { functions, summary, thresholds, hasGraph }; +} + export function complexityData( customDbPath?: string, - opts: { - target?: string; - limit?: number; - sort?: string; - aboveThreshold?: boolean; - file?: string; - kind?: string; - noTests?: boolean; - config?: CodegraphConfig; - offset?: number; - } = {}, + opts: ComplexityQueryOpts = {}, ): Record { const db = openReadonlyOrFail(customDbPath); try { - const sort = opts.sort || 'cognitive'; - const noTests = opts.noTests || false; - const aboveThreshold = opts.aboveThreshold || false; - - const config = opts.config || loadConfig(process.cwd()); - const thresholds: any = config.manifesto?.rules || { - cognitive: { warn: 15, fail: null }, - cyclomatic: { warn: 10, fail: null }, - maxNesting: { warn: 4, fail: null }, - maintainabilityIndex: { warn: 20, fail: null }, - }; + const { sort, noTests, aboveThreshold, thresholds } = resolveComplexityQueryOptions(opts); const { where, params } = buildComplexityWhere({ noTests, @@ -239,45 +332,9 @@ export function complexityData( }); const having = aboveThreshold ? buildThresholdHaving(thresholds) : ''; + const orderBy = ORDER_BY_MAP[sort] || 'fc.cognitive DESC'; - const orderMap: Record = { - cognitive: 'fc.cognitive DESC', - cyclomatic: 'fc.cyclomatic DESC', - nesting: 'fc.max_nesting DESC', - mi: 'fc.maintainability_index ASC', - volume: 'fc.halstead_volume DESC', - effort: 'fc.halstead_effort DESC', - bugs: 'fc.halstead_bugs DESC', - loc: 'fc.loc DESC', - }; - const orderBy = orderMap[sort] || 'fc.cognitive DESC'; - - let rows: ComplexityRow[]; - try { - rows = db - .prepare( - `SELECT n.name, n.kind, n.file, n.line, n.end_line, - fc.cognitive, fc.cyclomatic, fc.max_nesting, - fc.loc, fc.sloc, fc.maintainability_index, - fc.halstead_volume, fc.halstead_difficulty, fc.halstead_effort, fc.halstead_bugs - FROM function_complexity fc - JOIN nodes n ON fc.node_id = n.id - ${where} ${having} - ORDER BY ${orderBy}`, - ) - .all(...params); - } catch (e: unknown) { - debug(`complexity query failed (table may not exist): ${(e as Error).message}`); - return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; - } - - const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); - - const summary = computeComplexitySummary(db, noTests, thresholds); - const hasGraph = summary === null ? checkHasGraph(db) : false; - - const base = { functions, summary, thresholds, hasGraph }; + const base = buildComplexityResult(db, { where, having, orderBy, params }, noTests, thresholds); return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); } finally { db.close(); diff --git a/tests/integration/complexity.test.ts b/tests/integration/complexity.test.ts index ddfbd8a81..62f4a9b8c 100644 --- a/tests/integration/complexity.test.ts +++ b/tests/integration/complexity.test.ts @@ -14,9 +14,13 @@ import { initSchema } from '../../src/db/index.js'; import { complexityData } from '../../src/features/complexity.js'; import { loadConfig } from '../../src/infrastructure/config.js'; -vi.mock('../../src/infrastructure/config.js', () => ({ - loadConfig: vi.fn(() => ({})), -})); +vi.mock('../../src/infrastructure/config.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: vi.fn(() => ({})), + }; +}); // ─── Helpers ─────────────────────────────────────────────────────────── From 5b708ee844f9dfdba7e836522e053a7bcac0bc1f Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 15:38:19 -0600 Subject: [PATCH 23/26] fix: address quality issues in features/cochange.ts (docs check acknowledged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire computeCoChanges/analyzeCoChanges's minSupport/maxFilesPerCommit/since fallback literals through DEFAULTS.coChange instead of re-declaring the same magic numbers in two places (extended the same fix to minJaccard in coChangeData/coChangeTopData/coChangeForFiles for consistency). Decompose computeCoChanges' three passes (per-file counts, pair generation, Jaccard filtering) into named helpers (updateFileCommitCounts, updatePairCounts, buildCoChangeResults), plus scanGitHistory, analyzeCoChanges, coChangeData, coChangeTopData, and coChangeForFiles — bringing halstead.effort for every one of the 26 functions in the file under the 15000 fail threshold (worst was computeCoChanges at 65249.68). Fix the loadLastAnalyzedSha/loadKnownFiles silent catches to log via debug(), matching scanGitHistory's existing error-visibility pattern. Pure decomposition + config wiring, zero behavior change — verified via clean rebuild + full test suite (including the real git-history integration tests in cochange.test.ts). Impact: 23 functions changed, 15 affected --- src/features/cochange.ts | 377 +++++++++++++++++++++++++-------------- 1 file changed, 239 insertions(+), 138 deletions(-) diff --git a/src/features/cochange.ts b/src/features/cochange.ts index 2c4b9c379..48bed2c90 100644 --- a/src/features/cochange.ts +++ b/src/features/cochange.ts @@ -9,7 +9,8 @@ import { execFileSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { closeDb, findDbPath, initSchema, openDb, openReadonlyOrFail } from '../db/index.js'; -import { warn } from '../infrastructure/logger.js'; +import { DEFAULTS } from '../infrastructure/config.js'; +import { debug, warn } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { normalizePath } from '../shared/constants.js'; import { paginateResult } from '../shared/paginate.js'; @@ -34,10 +35,8 @@ interface CoChangeMeta { lastCommit: string | null; } -export function scanGitHistory( - repoRoot: string, - opts: { since?: string; afterSha?: string | null } = {}, -): { commits: CommitEntry[] } { +/** Build the `git log` argv for scanning co-change history. */ +function buildGitLogArgs(opts: { since?: string; afterSha?: string | null }): string[] { const args = [ 'log', '--name-only', @@ -48,10 +47,35 @@ export function scanGitHistory( if (opts.since) args.push(`--since=${opts.since}`); if (opts.afterSha) args.push(`${opts.afterSha}..HEAD`); args.push('--', '.'); + return args; +} +/** Parse `git log --name-only --pretty=format:%H%n%at` output into commit entries. */ +function parseGitLogOutput(output: string): CommitEntry[] { + const commits: CommitEntry[] = []; + // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2... + const blocks = output.trim().split(/\n\n+/); + for (const block of blocks) { + const lines = block.split('\n').filter((l) => l.length > 0); + if (lines.length < 2) continue; + const sha = lines[0]!; + const epoch = parseInt(lines[1]!, 10); + if (Number.isNaN(epoch)) continue; + const files = lines.slice(2).map((f) => normalizePath(f)); + if (files.length > 0) { + commits.push({ sha, epoch, files }); + } + } + return commits; +} + +export function scanGitHistory( + repoRoot: string, + opts: { since?: string; afterSha?: string | null } = {}, +): { commits: CommitEntry[] } { let output: string; try { - output = execFileSync('git', args, { + output = execFileSync('git', buildGitLogArgs(opts), { cwd: repoRoot, encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024, @@ -64,30 +88,63 @@ export function scanGitHistory( if (!output.trim()) return { commits: [] }; - const commits: CommitEntry[] = []; - // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2... - const blocks = output.trim().split(/\n\n+/); - for (const block of blocks) { - const lines = block.split('\n').filter((l) => l.length > 0); - if (lines.length < 2) continue; - const sha = lines[0]!; - const epoch = parseInt(lines[1]!, 10); - if (Number.isNaN(epoch)) continue; - const files = lines.slice(2).map((f) => normalizePath(f)); - if (files.length > 0) { - commits.push({ sha, epoch, files }); + return { commits: parseGitLogOutput(output) }; +} + +/** Pass 1: bump the per-file commit count for every file in a (filtered) commit. */ +function updateFileCommitCounts(files: string[], fileCommitCounts: Map): void { + for (const f of files) { + fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1); + } +} + +/** Pass 2: generate all unique file pairs for a commit (canonical: a < b) and tally them. */ +function updatePairCounts( + files: string[], + epoch: number, + pairCounts: Map, + pairLastEpoch: Map, +): void { + const sorted = [...new Set(files)].sort(); + for (let i = 0; i < sorted.length; i++) { + for (let j = i + 1; j < sorted.length; j++) { + const key = `${sorted[i]}\0${sorted[j]}`; + pairCounts.set(key, (pairCounts.get(key) || 0) + 1); + const prev = pairLastEpoch.get(key) || 0; + if (epoch > prev) pairLastEpoch.set(key, epoch); } } +} - return { commits }; +/** Pass 3: filter pairs by minSupport and compute their Jaccard similarity. */ +function buildCoChangeResults( + pairCounts: Map, + pairLastEpoch: Map, + fileCommitCounts: Map, + minSupport: number, +): Map { + const results = new Map(); + for (const [key, count] of pairCounts) { + if (count < minSupport) continue; + const [fileA, fileB] = key.split('\0') as [string, string]; + const countA = fileCommitCounts.get(fileA) || 0; + const countB = fileCommitCounts.get(fileB) || 0; + const jaccard = count / (countA + countB - count); + results.set(key, { + commitCount: count, + jaccard, + lastEpoch: pairLastEpoch.get(key) || 0, + }); + } + return results; } export function computeCoChanges( commits: CommitEntry[], opts: { minSupport?: number; maxFilesPerCommit?: number; knownFiles?: Set | null } = {}, ): { pairs: Map; fileCommitCounts: Map } { - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; + const minSupport = opts.minSupport ?? DEFAULTS.coChange.minSupport; + const maxFilesPerCommit = opts.maxFilesPerCommit ?? DEFAULTS.coChange.maxFilesPerCommit; const knownFiles = opts.knownFiles || null; const fileCommitCounts = new Map(); @@ -102,39 +159,14 @@ export function computeCoChanges( files = files.filter((f) => knownFiles.has(f)); } - // Count per-file commits - for (const f of files) { - fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1); - } - - // Generate all unique pairs (canonical: a < b) - const sorted = [...new Set(files)].sort(); - for (let i = 0; i < sorted.length; i++) { - for (let j = i + 1; j < sorted.length; j++) { - const key = `${sorted[i]}\0${sorted[j]}`; - pairCounts.set(key, (pairCounts.get(key) || 0) + 1); - const prev = pairLastEpoch.get(key) || 0; - if (commit.epoch > prev) pairLastEpoch.set(key, commit.epoch); - } - } + updateFileCommitCounts(files, fileCommitCounts); + updatePairCounts(files, commit.epoch, pairCounts, pairLastEpoch); } - // Filter by minSupport and compute Jaccard - const results = new Map(); - for (const [key, count] of pairCounts) { - if (count < minSupport) continue; - const [fileA, fileB] = key.split('\0') as [string, string]; - const countA = fileCommitCounts.get(fileA) || 0; - const countB = fileCommitCounts.get(fileB) || 0; - const jaccard = count / (countA + countB - count); - results.set(key, { - commitCount: count, - jaccard, - lastEpoch: pairLastEpoch.get(key) || 0, - }); - } - - return { pairs: results, fileCommitCounts }; + return { + pairs: buildCoChangeResults(pairCounts, pairLastEpoch, fileCommitCounts, minSupport), + fileCommitCounts, + }; } /** Read the SHA of the most recently analyzed commit (incremental state). */ @@ -146,8 +178,8 @@ function loadLastAnalyzedSha(db: BetterSqlite3Database): string | null { ) .get(); return row ? row.value : null; - } catch { - /* table may not exist yet */ + } catch (e: unknown) { + debug(`loadLastAnalyzedSha: co_change_meta table may not exist yet: ${(e as Error).message}`); return null; } } @@ -164,8 +196,8 @@ function loadKnownFiles(db: BetterSqlite3Database): Set | null { try { const rows = db.prepare<{ file: string }>('SELECT DISTINCT file FROM nodes').all(); return new Set(rows.map((r) => r.file)); - } catch { - /* nodes table may not exist */ + } catch (e: unknown) { + debug(`loadKnownFiles: nodes table may not exist: ${(e as Error).message}`); return null; } } @@ -236,6 +268,47 @@ function updateCoChangeMeta( metaUpsert.run('min_support', String(minSupport)); } +interface CoChangeAnalysisOptions { + since: string; + minSupport: number; + maxFilesPerCommit: number; +} + +/** Resolve since/minSupport/maxFilesPerCommit from opts, falling back to DEFAULTS.coChange. */ +function resolveCoChangeAnalysisOptions(opts: { + since?: string; + minSupport?: number; + maxFilesPerCommit?: number; +}): CoChangeAnalysisOptions { + return { + since: opts.since || DEFAULTS.coChange.since, + minSupport: opts.minSupport ?? DEFAULTS.coChange.minSupport, + maxFilesPerCommit: opts.maxFilesPerCommit ?? DEFAULTS.coChange.maxFilesPerCommit, + }; +} + +/** Scan git history, compute co-change pairs, and persist them + the run metadata. */ +function runCoChangeScanAndPersist( + db: BetterSqlite3Database, + repoRoot: string, + afterSha: string | null, + resolved: CoChangeAnalysisOptions, +): CommitEntry[] { + const knownFiles = loadKnownFiles(db); + const { commits } = scanGitHistory(repoRoot, { since: resolved.since, afterSha }); + const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { + minSupport: resolved.minSupport, + maxFilesPerCommit: resolved.maxFilesPerCommit, + knownFiles, + }); + + persistCoChangeResults(db, fileCommitCounts, coChanges); + recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); + updateCoChangeMeta(db, commits, resolved.since, resolved.minSupport); + + return commits; +} + export function analyzeCoChanges( customDbPath?: string, opts: { @@ -258,25 +331,11 @@ export function analyzeCoChanges( return { error: `Not a git repository: ${repoRoot}` }; } - const since = opts.since || '1 year ago'; - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; - + const resolved = resolveCoChangeAnalysisOptions(opts); const afterSha = opts.full ? null : loadLastAnalyzedSha(db); if (opts.full) clearCoChangeTables(db); - const knownFiles = loadKnownFiles(db); - - const { commits } = scanGitHistory(repoRoot, { since, afterSha }); - const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { - minSupport, - maxFilesPerCommit, - knownFiles, - }); - - persistCoChangeResults(db, fileCommitCounts, coChanges); - recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); - updateCoChangeMeta(db, commits, since, minSupport); + const commits = runCoChangeScanAndPersist(db, repoRoot, afterSha, resolved); const totalPairs = db .prepare<{ cnt: number }>('SELECT COUNT(*) as cnt FROM co_changes') @@ -287,8 +346,8 @@ export function analyzeCoChanges( return { pairsFound: totalPairs, commitsScanned: commits.length, - since, - minSupport, + since: resolved.since, + minSupport: resolved.minSupport, }; } @@ -300,6 +359,49 @@ interface CoChangeRow { last_commit_epoch: number; } +/** True if the `co_changes` table exists (i.e. `analyzeCoChanges` has run at least once). */ +function hasCoChangeTable(db: BetterSqlite3Database): boolean { + try { + db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); + return true; + } catch (e: unknown) { + debug(`hasCoChangeTable: co_changes table missing: ${(e as Error).message}`); + return false; + } +} + +/** Format a last-commit epoch (seconds) as `YYYY-MM-DD`, or null if absent. */ +function epochToDateString(epoch: number): string | null { + return epoch ? new Date(epoch * 1000).toISOString().slice(0, 10) : null; +} + +/** Shape+filter co-change rows into the public per-file "partners" list. */ +function buildCoChangePartners( + rows: CoChangeRow[], + resolvedFile: string, + noTests: boolean, + limit: number, +): Array<{ file: string; commitCount: number; jaccard: number; lastCommitDate: string | null }> { + const partners: Array<{ + file: string; + commitCount: number; + jaccard: number; + lastCommitDate: string | null; + }> = []; + for (const row of rows) { + const partner = row.file_a === resolvedFile ? row.file_b : row.file_a; + if (noTests && isTestFile(partner)) continue; + partners.push({ + file: partner, + commitCount: row.commit_count, + jaccard: row.jaccard, + lastCommitDate: epochToDateString(row.last_commit_epoch), + }); + if (partners.length >= limit) break; + } + return partners; +} + export function coChangeData( file: string, customDbPath?: string, @@ -307,13 +409,10 @@ export function coChangeData( ): Record { const db = openReadonlyOrFail(customDbPath); const limit = opts.limit || 20; - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const noTests = opts.noTests || false; - // Check if co_changes table exists - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - } catch { + if (!hasCoChangeTable(db)) { closeDb(db); return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' }; } @@ -334,31 +433,46 @@ export function coChangeData( ) .all(resolvedFile, resolvedFile, minJaccard); - const partners: Array<{ - file: string; + const partners = buildCoChangePartners(rows, resolvedFile, noTests, limit); + + const meta = getCoChangeMeta(db); + closeDb(db); + + const base = { file: resolvedFile, partners, meta }; + return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); +} + +/** Shape+filter co-change rows into the public global "top pairs" list. */ +function buildCoChangeTopPairs( + rows: CoChangeRow[], + noTests: boolean, + limit: number, +): Array<{ + fileA: string; + fileB: string; + commitCount: number; + jaccard: number; + lastCommitDate: string | null; +}> { + const pairs: Array<{ + fileA: string; + fileB: string; commitCount: number; jaccard: number; lastCommitDate: string | null; }> = []; for (const row of rows) { - const partner = row.file_a === resolvedFile ? row.file_b : row.file_a; - if (noTests && isTestFile(partner)) continue; - partners.push({ - file: partner, + if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue; + pairs.push({ + fileA: row.file_a, + fileB: row.file_b, commitCount: row.commit_count, jaccard: row.jaccard, - lastCommitDate: row.last_commit_epoch - ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10) - : null, + lastCommitDate: epochToDateString(row.last_commit_epoch), }); - if (partners.length >= limit) break; + if (pairs.length >= limit) break; } - - const meta = getCoChangeMeta(db); - closeDb(db); - - const base = { file: resolvedFile, partners, meta }; - return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); + return pairs; } export function coChangeTopData( @@ -367,12 +481,10 @@ export function coChangeTopData( ): Record { const db = openReadonlyOrFail(customDbPath); const limit = opts.limit || 20; - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const noTests = opts.noTests || false; - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - } catch { + if (!hasCoChangeTable(db)) { closeDb(db); return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' }; } @@ -386,32 +498,40 @@ export function coChangeTopData( ) .all(minJaccard); - const pairs: Array<{ - fileA: string; - fileB: string; + const pairs = buildCoChangeTopPairs(rows, noTests, limit); + + const meta = getCoChangeMeta(db); + closeDb(db); + + const base = { pairs, meta }; + return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); +} + +/** Shape+filter co-change rows into the public "coupled with an input file" list. */ +function buildCoChangeForFilesResults( + rows: Array<{ file_a: string; file_b: string; commit_count: number; jaccard: number }>, + inputSet: Set, + noTests: boolean, +): Array<{ file: string; coupledWith: string; commitCount: number; jaccard: number }> { + const results: Array<{ + file: string; + coupledWith: string; commitCount: number; jaccard: number; - lastCommitDate: string | null; }> = []; for (const row of rows) { - if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue; - pairs.push({ - fileA: row.file_a, - fileB: row.file_b, + const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a; + const source = inputSet.has(row.file_a) ? row.file_a : row.file_b; + if (inputSet.has(partner)) continue; + if (noTests && isTestFile(partner)) continue; + results.push({ + file: partner, + coupledWith: source, commitCount: row.commit_count, jaccard: row.jaccard, - lastCommitDate: row.last_commit_epoch - ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10) - : null, }); - if (pairs.length >= limit) break; } - - const meta = getCoChangeMeta(db); - closeDb(db); - - const base = { pairs, meta }; - return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); + return results; } export function coChangeForFiles( @@ -419,7 +539,7 @@ export function coChangeForFiles( db: BetterSqlite3Database, opts: { minJaccard?: number; limit?: number; noTests?: boolean } = {}, ): Array<{ file: string; coupledWith: string; commitCount: number; jaccard: number }> { - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const limit = opts.limit ?? 20; const noTests = opts.noTests || false; const inputSet = new Set(files); @@ -438,26 +558,7 @@ export function coChangeForFiles( ) .all(...files, ...files, minJaccard, limit); - const results: Array<{ - file: string; - coupledWith: string; - commitCount: number; - jaccard: number; - }> = []; - for (const row of rows) { - const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a; - const source = inputSet.has(row.file_a) ? row.file_a : row.file_b; - if (inputSet.has(partner)) continue; - if (noTests && isTestFile(partner)) continue; - results.push({ - file: partner, - coupledWith: source, - commitCount: row.commit_count, - jaccard: row.jaccard, - }); - } - - return results; + return buildCoChangeForFilesResults(rows, inputSet, noTests); } // ─── Internal Helpers ──────────────────────────────────────────────────── From 7c3b8696808c4e6625ae75128019ed374b19dace Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 15:39:36 -0600 Subject: [PATCH 24/26] fix: address quality issues in features/branch-compare.ts (docs check acknowledged) This was the run's worst gauntlet offender (halstead.bugs 1.585 on branchCompareData). Pure decomposition per the gauntlet recommendation: extract git-ref validation (validateBranchCompareRefs), dual-worktree + dual-buildGraph setup (setupCompareWorktrees), and output-shape cleanup (shapeBranchCompareSymbolLists) out of branchCompareData; unify attachImpactToSymbols/attachImpactToChanged into one generic attachImpact(symbols, resolveId, dbPath, maxDepth, noTests) parameterized by id-resolution strategy. Extended the same treatment to the file's other named-FAIL functions (loadSymbolsFromDb: halstead.effort 123718.05->12326.18, bugs 0.9546->0.2182; branchCompareMermaid: cyclomatic 22->6) and to pre-existing effort-fails gauntlet's summary didn't name explicitly (loadCallersFromDb, compareSymbols) -- consistent with this phase's cochange.ts/complexity-query.ts fixes, where the file-level FAIL verdict covers every function over threshold, not just the 2-3 worst examples cited in the audit detail text. Zero behavior change: both exported functions (branchCompareData, branchCompareMermaid) keep byte-identical signatures; every extraction preserves exact call order, error-handling scope (the try/catch/finally around worktree creation is untouched), and the existing mutate-in-place impact-attachment pattern. Verified via tests/integration/branch-compare.test.ts, which exercises real git worktrees + buildGraph + DB comparison end-to-end (not mocked), plus the full suite, both before and after each incremental edit. Impact: 44 functions changed, 15 affected --- src/features/branch-compare.ts | 798 +++++++++++++++++++++------------ 1 file changed, 504 insertions(+), 294 deletions(-) diff --git a/src/features/branch-compare.ts b/src/features/branch-compare.ts index 086ed1f11..cdf20674a 100644 --- a/src/features/branch-compare.ts +++ b/src/features/branch-compare.ts @@ -10,7 +10,7 @@ import { getNative, isNativeAvailable } from '../infrastructure/native.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { toErrorMessage } from '../shared/errors.js'; import { toSymbolRef } from '../shared/normalize.js'; -import type { EngineMode, NativeDatabase } from '../types.js'; +import type { BetterSqlite3Database, EngineMode, NativeDatabase } from '../types.js'; // ─── Git Helpers ──────────────────────────────────────────────────────── @@ -106,6 +106,96 @@ function makeSymbolKey(kind: string, file: string, name: string): string { return `${kind}::${file}::${name}`; } +interface RawNodeRow { + id: number; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; +} + +/** Try opening a NativeDatabase handle for batched fan-in/fan-out metrics. */ +function openNativeDbForFanMetrics(dbPath: string): NativeDatabase | undefined { + if (!isNativeAvailable()) return undefined; + try { + const native = getNative(); + return native.NativeDatabase.openReadonly(dbPath); + } catch (e) { + debug(`loadSymbolsFromDb: native path failed: ${toErrorMessage(e)}`); + return undefined; + } +} + +/** Query all non-file/directory nodes belonging to the given changed files. */ +function queryChangedFileNodes(db: BetterSqlite3Database, changedFiles: string[]): RawNodeRow[] { + const placeholders = changedFiles.map(() => '?').join(', '); + return db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line + FROM nodes n + WHERE n.file IN (${placeholders}) + AND n.kind NOT IN ('file', 'directory') + ORDER BY n.file, n.line`, + ) + .all(...changedFiles) as RawNodeRow[]; +} + +/** Build the public SymbolInfo shape from a raw row + its resolved fan metrics. */ +function makeSymbolInfo(row: RawNodeRow, fanIn: number, fanOut: number): SymbolInfo { + const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; + return { + id: row.id, + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + lineCount, + fanIn, + fanOut, + }; +} + +/** Native fast path: batch all fan-in/fan-out lookups in one napi call. */ +function buildSymbolsViaNativeBatch( + filtered: RawNodeRow[], + nativeDb: NativeDatabase, +): Map { + const symbols = new Map(); + const nodeIds = filtered.map((r) => r.id); + const metrics = nativeDb.batchFanMetrics!(nodeIds); + const metricsMap = new Map(metrics.map((m) => [m.nodeId, m])); + + for (const row of filtered) { + const m = metricsMap.get(row.id); + const key = makeSymbolKey(row.kind, row.file, row.name); + symbols.set(key, makeSymbolInfo(row, m?.fanIn ?? 0, m?.fanOut ?? 0)); + } + return symbols; +} + +/** JS fallback: per-row fan-in/fan-out COUNT queries. */ +function buildSymbolsViaJsFallback( + db: BetterSqlite3Database, + filtered: RawNodeRow[], +): Map { + const symbols = new Map(); + const fanInStmt = db.prepare( + `SELECT COUNT(*) AS cnt FROM edges WHERE target_id = ? AND kind = 'calls'`, + ); + const fanOutStmt = db.prepare( + `SELECT COUNT(*) AS cnt FROM edges WHERE source_id = ? AND kind = 'calls'`, + ); + + for (const row of filtered) { + const fanIn = (fanInStmt.get(row.id) as { cnt: number }).cnt; + const fanOut = (fanOutStmt.get(row.id) as { cnt: number }).cnt; + const key = makeSymbolKey(row.kind, row.file, row.name); + symbols.set(key, makeSymbolInfo(row, fanIn, fanOut)); + } + return symbols; +} + function loadSymbolsFromDb( dbPath: string, changedFiles: string[], @@ -113,97 +203,23 @@ function loadSymbolsFromDb( ): Map { const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }); - - // Try opening a NativeDatabase for batched fan metrics - let nativeDb: NativeDatabase | undefined; - if (isNativeAvailable()) { - try { - const native = getNative(); - nativeDb = native.NativeDatabase.openReadonly(dbPath); - } catch (e) { - debug(`loadSymbolsFromDb: native path failed: ${toErrorMessage(e)}`); - } - } + const nativeDb = openNativeDbForFanMetrics(dbPath); try { - const symbols = new Map(); - if (changedFiles.length === 0) { - return symbols; + return new Map(); } - const placeholders = changedFiles.map(() => '?').join(', '); - const rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line - FROM nodes n - WHERE n.file IN (${placeholders}) - AND n.kind NOT IN ('file', 'directory') - ORDER BY n.file, n.line`, - ) - .all(...changedFiles) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - end_line: number | null; - }>; + const rows = queryChangedFileNodes(db, changedFiles); // Filter first, then batch fan metrics for all surviving rows const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - // ── Native fast path: batch all fan-in/fan-out in one napi call ── if (nativeDb?.batchFanMetrics && filtered.length > 0) { - const nodeIds = filtered.map((r) => r.id); - const metrics = nativeDb.batchFanMetrics(nodeIds); - const metricsMap = new Map(metrics.map((m) => [m.nodeId, m])); - - for (const row of filtered) { - const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; - const m = metricsMap.get(row.id); - const key = makeSymbolKey(row.kind, row.file, row.name); - symbols.set(key, { - id: row.id, - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, - lineCount, - fanIn: m?.fanIn ?? 0, - fanOut: m?.fanOut ?? 0, - }); - } - return symbols; + return buildSymbolsViaNativeBatch(filtered, nativeDb); } - // ── JS fallback ─────────────────────────────────────────────────── - const fanInStmt = db.prepare( - `SELECT COUNT(*) AS cnt FROM edges WHERE target_id = ? AND kind = 'calls'`, - ); - const fanOutStmt = db.prepare( - `SELECT COUNT(*) AS cnt FROM edges WHERE source_id = ? AND kind = 'calls'`, - ); - - for (const row of filtered) { - const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; - const fanIn = (fanInStmt.get(row.id) as { cnt: number }).cnt; - const fanOut = (fanOutStmt.get(row.id) as { cnt: number }).cnt; - const key = makeSymbolKey(row.kind, row.file, row.name); - - symbols.set(key, { - id: row.id, - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, - lineCount, - fanIn, - fanOut, - }); - } - - return symbols; + return buildSymbolsViaJsFallback(db, filtered); } finally { db.close(); if (nativeDb) { @@ -232,37 +248,7 @@ function loadCallersFromDb( const allCallers = new Set(); for (const startId of nodeIds) { - const visited = new Set([startId]); - let frontier = [startId]; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier: number[] = []; - for (const fid of frontier) { - const callers = db - .prepare( - `SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind = 'calls'`, - ) - .all(fid) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - }>; - - for (const c of callers) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - allCallers.add(JSON.stringify(toSymbolRef(c))); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } + bfsCallersFromNode(db, startId, maxDepth, noTests, allCallers); } return [...allCallers].map((s) => JSON.parse(s) as CallerInfo); @@ -271,63 +257,130 @@ function loadCallersFromDb( } } +/** Direct DB callers of a single node id (one BFS-frontier expansion step). */ +function queryDirectCallers( + db: BetterSqlite3Database, + nodeId: number, +): Array<{ id: number; name: string; kind: string; file: string; line: number }> { + return db + .prepare( + `SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls'`, + ) + .all(nodeId) as Array<{ id: number; name: string; kind: string; file: string; line: number }>; +} + +/** BFS up to maxDepth from a single starting node, adding newly-seen callers to allCallers. */ +function bfsCallersFromNode( + db: BetterSqlite3Database, + startId: number, + maxDepth: number, + noTests: boolean, + allCallers: Set, +): void { + const visited = new Set([startId]); + let frontier = [startId]; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier: number[] = []; + for (const fid of frontier) { + const callers = queryDirectCallers(db, fid); + for (const c of callers) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + allCallers.add(JSON.stringify(toSymbolRef(c))); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } +} + // ─── Symbol Comparison ────────────────────────────────────────────────── -function compareSymbols( +/** Symbols present in `targetSymbols` but not `baseSymbols`. */ +function findAddedSymbols( baseSymbols: Map, targetSymbols: Map, -): { added: SymbolInfo[]; removed: SymbolInfo[]; changed: ChangedSymbol[] } { +): SymbolInfo[] { const added: SymbolInfo[] = []; - const removed: SymbolInfo[] = []; - const changed: ChangedSymbol[] = []; - for (const [key, sym] of targetSymbols) { - if (!baseSymbols.has(key)) { - added.push(sym); - } + if (!baseSymbols.has(key)) added.push(sym); } + return added; +} +/** Symbols present in `baseSymbols` but not `targetSymbols`. */ +function findRemovedSymbols( + baseSymbols: Map, + targetSymbols: Map, +): SymbolInfo[] { + const removed: SymbolInfo[] = []; for (const [key, sym] of baseSymbols) { - if (!targetSymbols.has(key)) { - removed.push(sym); - } + if (!targetSymbols.has(key)) removed.push(sym); } + return removed; +} +/** Build a ChangedSymbol entry from a base/target pair whose metrics diverged. */ +function buildChangedSymbol(baseSym: SymbolInfo, targetSym: SymbolInfo): ChangedSymbol | null { + const lineCountDelta = targetSym.lineCount - baseSym.lineCount; + const fanInDelta = targetSym.fanIn - baseSym.fanIn; + const fanOutDelta = targetSym.fanOut - baseSym.fanOut; + + if (lineCountDelta === 0 && fanInDelta === 0 && fanOutDelta === 0) return null; + + return { + name: baseSym.name, + kind: baseSym.kind, + file: baseSym.file, + base: { + line: baseSym.line, + lineCount: baseSym.lineCount, + fanIn: baseSym.fanIn, + fanOut: baseSym.fanOut, + }, + target: { + line: targetSym.line, + lineCount: targetSym.lineCount, + fanIn: targetSym.fanIn, + fanOut: targetSym.fanOut, + }, + changes: { + lineCount: lineCountDelta, + fanIn: fanInDelta, + fanOut: fanOutDelta, + }, + }; +} + +/** Symbols present in both maps whose line count / fan-in / fan-out diverged. */ +function findChangedSymbols( + baseSymbols: Map, + targetSymbols: Map, +): ChangedSymbol[] { + const changed: ChangedSymbol[] = []; for (const [key, baseSym] of baseSymbols) { const targetSym = targetSymbols.get(key); if (!targetSym) continue; - - const lineCountDelta = targetSym.lineCount - baseSym.lineCount; - const fanInDelta = targetSym.fanIn - baseSym.fanIn; - const fanOutDelta = targetSym.fanOut - baseSym.fanOut; - - if (lineCountDelta !== 0 || fanInDelta !== 0 || fanOutDelta !== 0) { - changed.push({ - name: baseSym.name, - kind: baseSym.kind, - file: baseSym.file, - base: { - line: baseSym.line, - lineCount: baseSym.lineCount, - fanIn: baseSym.fanIn, - fanOut: baseSym.fanOut, - }, - target: { - line: targetSym.line, - lineCount: targetSym.lineCount, - fanIn: targetSym.fanIn, - fanOut: targetSym.fanOut, - }, - changes: { - lineCount: lineCountDelta, - fanIn: fanInDelta, - fanOut: fanOutDelta, - }, - }); - } + const entry = buildChangedSymbol(baseSym, targetSym); + if (entry) changed.push(entry); } + return changed; +} - return { added, removed, changed }; +function compareSymbols( + baseSymbols: Map, + targetSymbols: Map, +): { added: SymbolInfo[]; removed: SymbolInfo[]; changed: ChangedSymbol[] } { + return { + added: findAddedSymbols(baseSymbols, targetSymbols), + removed: findRemovedSymbols(baseSymbols, targetSymbols), + changed: findChangedSymbols(baseSymbols, targetSymbols), + }; } // ─── Main Data Function ───────────────────────────────────────────────── @@ -362,48 +415,31 @@ interface BranchCompareResult { summary?: BranchCompareSummary; } -function attachImpactToSymbols( - symbols: SymbolInfo[], +/** + * Attach caller-impact data to each symbol, given a strategy for resolving + * its DB node id (removed symbols carry their own id; changed symbols must + * be looked up in the base-commit symbol map). + */ +function attachImpact( + symbols: T[], + resolveId: (sym: T) => number | undefined, dbPath: string, - _baseSymbols: Map, maxDepth: number, noTests: boolean, ): void { for (const sym of symbols) { - const symCallers = loadCallersFromDb(dbPath, sym.id ? [sym.id] : [], maxDepth, noTests); - (sym as SymbolInfo & { impact?: CallerInfo[] }).impact = symCallers; + const id = resolveId(sym); + const symCallers = loadCallersFromDb(dbPath, id ? [id] : [], maxDepth, noTests); + (sym as T & { impact?: CallerInfo[] }).impact = symCallers; } } -function attachImpactToChanged( - changed: ChangedSymbol[], - dbPath: string, - baseSymbols: Map, - maxDepth: number, - noTests: boolean, -): void { - for (const sym of changed) { - const baseSym = baseSymbols.get(makeSymbolKey(sym.kind, sym.file, sym.name)); - const symCallers = loadCallersFromDb( - dbPath, - baseSym?.id ? [baseSym.id] : [], - maxDepth, - noTests, - ); - sym.impact = symCallers; - } -} - -export async function branchCompareData( +/** Confirm repoRoot is a git repo and resolve baseRef/targetRef to full SHAs. */ +function validateBranchCompareRefs( + repoRoot: string, baseRef: string, targetRef: string, - opts: BranchCompareOpts = {}, -): Promise { - const repoRoot = opts.repoRoot || process.cwd(); - const maxDepth = opts.depth || 3; - const noTests = opts.noTests || false; - const engine = (opts.engine || 'wasm') as EngineMode; - +): { baseSha: string; targetSha: string } | { error: string } { try { execFileSync('git', ['rev-parse', '--git-dir'], { cwd: repoRoot, @@ -421,106 +457,249 @@ export async function branchCompareData( const targetSha = validateGitRef(repoRoot, targetRef); if (!targetSha) return { error: `Invalid git ref: "${targetRef}"` }; - const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); + return { baseSha, targetSha }; +} - if (changedFiles.length === 0) { - return { - baseRef, - targetRef, - baseSha, - targetSha, - changedFiles: [], - added: [], - removed: [], - changed: [], - summary: { - added: 0, - removed: 0, - changed: 0, - totalImpacted: 0, - filesAffected: 0, - }, - }; - } +/** Create detached worktrees for both refs and build their graphs. */ +async function setupCompareWorktrees( + repoRoot: string, + baseSha: string, + targetSha: string, + baseDir: string, + targetDir: string, + engine: EngineMode, +): Promise<{ baseDbPath: string; targetDbPath: string }> { + createWorktree(repoRoot, baseSha, baseDir); + createWorktree(repoRoot, targetSha, targetDir); + + await buildGraph(baseDir, { engine, skipRegistry: true }); + await buildGraph(targetDir, { engine, skipRegistry: true }); + + return { + baseDbPath: path.join(baseDir, '.codegraph', 'graph.db'), + targetDbPath: path.join(targetDir, '.codegraph', 'graph.db'), + }; +} - const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bc-')); - const baseDir = path.join(tmpBase, 'base'); - const targetDir = path.join(tmpBase, 'target'); +interface SymbolDiffWithImpact { + added: SymbolInfo[]; + removed: SymbolInfo[]; + changed: ChangedSymbol[]; + allImpacted: Set; + impactedFiles: Set; +} - try { - createWorktree(repoRoot, baseSha, baseDir); - createWorktree(repoRoot, targetSha, targetDir); +/** Resolve base-commit node ids for removed/changed symbols (for BFS impact queries). */ +function resolveImpactfulIds( + removed: SymbolInfo[], + changed: ChangedSymbol[], + baseSymbols: Map, +): { removedIds: number[]; changedIds: number[] } { + const removedIds = removed.map((s) => s.id).filter(Boolean); + const changedIds = changed + .map((s) => baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name))?.id) + .filter((id): id is number => Boolean(id)); + return { removedIds, changedIds }; +} - await buildGraph(baseDir, { engine, skipRegistry: true }); - await buildGraph(targetDir, { engine, skipRegistry: true }); +/** Collapse removed+changed caller lists into the summary's impacted-symbol/file sets. */ +function computeImpactedFileSets( + removedImpact: CallerInfo[], + changedImpact: CallerInfo[], +): { allImpacted: Set; impactedFiles: Set } { + const allImpacted = new Set(); + for (const c of removedImpact) allImpacted.add(`${c.file}:${c.name}`); + for (const c of changedImpact) allImpacted.add(`${c.file}:${c.name}`); - const baseDbPath = path.join(baseDir, '.codegraph', 'graph.db'); - const targetDbPath = path.join(targetDir, '.codegraph', 'graph.db'); + const impactedFiles = new Set(); + for (const key of allImpacted) impactedFiles.add(key.split(':')[0]!); - const normalizedFiles = changedFiles.map((f) => f.replace(/\\/g, '/')); + return { allImpacted, impactedFiles }; +} - const baseSymbols = loadSymbolsFromDb(baseDbPath, normalizedFiles, noTests); - const targetSymbols = loadSymbolsFromDb(targetDbPath, normalizedFiles, noTests); +/** Load symbols from both DBs, diff them, and attach/compute blast-radius impact data. */ +function diffSymbolsWithImpact( + baseDbPath: string, + targetDbPath: string, + normalizedFiles: string[], + noTests: boolean, + maxDepth: number, +): SymbolDiffWithImpact { + const baseSymbols = loadSymbolsFromDb(baseDbPath, normalizedFiles, noTests); + const targetSymbols = loadSymbolsFromDb(targetDbPath, normalizedFiles, noTests); - const { added, removed, changed } = compareSymbols(baseSymbols, targetSymbols); + const { added, removed, changed } = compareSymbols(baseSymbols, targetSymbols); + const { removedIds, changedIds } = resolveImpactfulIds(removed, changed, baseSymbols); - const removedIds = removed.map((s) => s.id).filter(Boolean); - const changedIds = changed - .map((s) => { - const baseSym = baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name)); - return baseSym?.id; - }) - .filter((id): id is number => Boolean(id)); + const removedImpact = loadCallersFromDb(baseDbPath, removedIds, maxDepth, noTests); + const changedImpact = loadCallersFromDb(baseDbPath, changedIds, maxDepth, noTests); - const removedImpact = loadCallersFromDb(baseDbPath, removedIds, maxDepth, noTests); - const changedImpact = loadCallersFromDb(baseDbPath, changedIds, maxDepth, noTests); + attachImpact(removed, (s) => s.id, baseDbPath, maxDepth, noTests); + attachImpact( + changed, + (s) => baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name))?.id, + baseDbPath, + maxDepth, + noTests, + ); - attachImpactToSymbols(removed, baseDbPath, baseSymbols, maxDepth, noTests); - attachImpactToChanged(changed, baseDbPath, baseSymbols, maxDepth, noTests); + const { allImpacted, impactedFiles } = computeImpactedFileSets(removedImpact, changedImpact); - const allImpacted = new Set(); - for (const c of removedImpact) allImpacted.add(`${c.file}:${c.name}`); - for (const c of changedImpact) allImpacted.add(`${c.file}:${c.name}`); + return { added, removed, changed, allImpacted, impactedFiles }; +} - const impactedFiles = new Set(); - for (const key of allImpacted) impactedFiles.add(key.split(':')[0]!); +/** Strip the internal `.id` field, keeping `.impact` where it was attached. */ +function shapeBranchCompareSymbolLists( + added: SymbolInfo[], + removed: SymbolInfo[], +): { cleanAdded: SymbolWithoutId[]; cleanRemoved: SymbolWithoutId[] } { + const cleanAdded = added.map(({ id: _id, ...rest }) => rest as SymbolWithoutId); + const cleanRemoved = removed.map(({ id: _id, ...rest }) => { + const result = rest as SymbolWithoutId; + if ((rest as SymbolInfo & { impact?: CallerInfo[] }).impact) { + result.impact = (rest as SymbolInfo & { impact?: CallerInfo[] }).impact; + } + return result; + }); + return { cleanAdded, cleanRemoved }; +} - const cleanAdded = added.map(({ id: _id, ...rest }) => rest as SymbolWithoutId); - const cleanRemoved = removed.map(({ id: _id, ...rest }) => { - const result = rest as SymbolWithoutId; - if ((rest as SymbolInfo & { impact?: CallerInfo[] }).impact) { - result.impact = (rest as SymbolInfo & { impact?: CallerInfo[] }).impact; - } - return result; - }); +/** Result shape when there are no changed files between the two refs. */ +function emptyBranchCompareResult( + baseRef: string, + targetRef: string, + baseSha: string, + targetSha: string, +): BranchCompareResult { + return { + baseRef, + targetRef, + baseSha, + targetSha, + changedFiles: [], + added: [], + removed: [], + changed: [], + summary: { added: 0, removed: 0, changed: 0, totalImpacted: 0, filesAffected: 0 }, + }; +} + +/** Assemble the final BranchCompareResult from the diff + cleaned symbol lists. */ +function buildBranchCompareResult( + refs: { baseRef: string; targetRef: string; baseSha: string; targetSha: string }, + normalizedFiles: string[], + diff: SymbolDiffWithImpact, + cleaned: { cleanAdded: SymbolWithoutId[]; cleanRemoved: SymbolWithoutId[] }, +): BranchCompareResult { + return { + ...refs, + changedFiles: normalizedFiles, + added: cleaned.cleanAdded, + removed: cleaned.cleanRemoved, + changed: diff.changed, + summary: { + added: diff.added.length, + removed: diff.removed.length, + changed: diff.changed.length, + totalImpacted: diff.allImpacted.size, + filesAffected: diff.impactedFiles.size, + }, + }; +} - return { - baseRef, - targetRef, - baseSha, - targetSha, - changedFiles: normalizedFiles, - added: cleanAdded, - removed: cleanRemoved, - changed, - summary: { - added: added.length, - removed: removed.length, - changed: changed.length, - totalImpacted: allImpacted.size, - filesAffected: impactedFiles.size, - }, - }; +/** Resolve branchCompareData's opts (repoRoot/maxDepth/noTests/engine) with their defaults. */ +function resolveBranchCompareOptions(opts: BranchCompareOpts): { + repoRoot: string; + maxDepth: number; + noTests: boolean; + engine: EngineMode; +} { + return { + repoRoot: opts.repoRoot || process.cwd(), + maxDepth: opts.depth || 3, + noTests: opts.noTests || false, + engine: (opts.engine || 'wasm') as EngineMode, + }; +} + +/** Create the scratch tmpdir + base/target subdirectory paths for the dual worktrees. */ +function createCompareTempDirs(): { tmpBase: string; baseDir: string; targetDir: string } { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bc-')); + return { tmpBase, baseDir: path.join(tmpBase, 'base'), targetDir: path.join(tmpBase, 'target') }; +} + +/** Remove both worktrees and the scratch tmpdir (best-effort, always runs in `finally`). */ +function cleanupCompareTempDirs( + repoRoot: string, + baseDir: string, + targetDir: string, + tmpBase: string, +): void { + removeWorktree(repoRoot, baseDir); + removeWorktree(repoRoot, targetDir); + try { + fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch (cleanupErr) { + debug(`branchCompareData: temp cleanup failed: ${toErrorMessage(cleanupErr)}`); + } +} + +/** Set up worktrees, diff the symbols, and shape the final result (the try-block body). */ +async function runBranchCompareInWorktrees( + resolvedRefs: { baseRef: string; targetRef: string; baseSha: string; targetSha: string }, + dirs: { repoRoot: string; baseDir: string; targetDir: string; engine: EngineMode }, + changedFiles: string[], + noTests: boolean, + maxDepth: number, +): Promise { + const { baseSha, targetSha } = resolvedRefs; + const { baseDbPath, targetDbPath } = await setupCompareWorktrees( + dirs.repoRoot, + baseSha, + targetSha, + dirs.baseDir, + dirs.targetDir, + dirs.engine, + ); + + const normalizedFiles = changedFiles.map((f) => f.replace(/\\/g, '/')); + const diff = diffSymbolsWithImpact(baseDbPath, targetDbPath, normalizedFiles, noTests, maxDepth); + const cleaned = shapeBranchCompareSymbolLists(diff.added, diff.removed); + + return buildBranchCompareResult(resolvedRefs, normalizedFiles, diff, cleaned); +} + +export async function branchCompareData( + baseRef: string, + targetRef: string, + opts: BranchCompareOpts = {}, +): Promise { + const { repoRoot, maxDepth, noTests, engine } = resolveBranchCompareOptions(opts); + + const refs = validateBranchCompareRefs(repoRoot, baseRef, targetRef); + if ('error' in refs) return refs; + const { baseSha, targetSha } = refs; + + const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); + + if (changedFiles.length === 0) { + return emptyBranchCompareResult(baseRef, targetRef, baseSha, targetSha); + } + + const { tmpBase, baseDir, targetDir } = createCompareTempDirs(); + + try { + return await runBranchCompareInWorktrees( + { baseRef, targetRef, baseSha, targetSha }, + { repoRoot, baseDir, targetDir, engine }, + changedFiles, + noTests, + maxDepth, + ); } catch (err) { return { error: toErrorMessage(err) }; } finally { - removeWorktree(repoRoot, baseDir); - removeWorktree(repoRoot, targetDir); - try { - fs.rmSync(tmpBase, { recursive: true, force: true }); - } catch (cleanupErr) { - debug(`branchCompareData: temp cleanup failed: ${toErrorMessage(cleanupErr)}`); - } + cleanupCompareTempDirs(repoRoot, baseDir, targetDir, tmpBase); } } @@ -572,47 +751,78 @@ function collectImpactedCallers( return allImpacted; } -export function branchCompareMermaid(data: BranchCompareResult): string { - if (data.error) return data.error; - if ( +/** Render the "Impacted Callers" subgraph block, if there are any impacted callers. */ +function renderImpactedCallersSubgraph( + lines: string[], + state: MermaidNodeIdState, + allImpacted: Map, +): void { + if (allImpacted.size === 0) return; + lines.push(' subgraph sg_impact["Impacted Callers"]'); + for (const [key, c] of allImpacted) { + const nid = mermaidNodeId(state, key); + lines.push(` ${nid}["[${kindIcon(c.kind)}] ${c.name}"]`); + } + lines.push(' end'); + lines.push(' style sg_impact fill:#f3e5f5,stroke:#9c27b0'); +} + +/** Draw the dotted "impacted by" edges from each removed/changed symbol to its callers. */ +function renderImpactEdges( + lines: string[], + state: MermaidNodeIdState, + impactSources: Array<{ kind: string; file: string; name: string; impact?: CallerInfo[] }>, + removed: SymbolWithoutId[], +): void { + for (const sym of impactSources) { + if (!sym.impact) continue; + const prefix = removed.includes(sym as SymbolWithoutId) ? 'removed' : 'changed'; + const symKey = `${prefix}::${sym.kind}::${sym.file}::${sym.name}`; + for (const c of sym.impact) { + const callerKey = `impact::${c.kind}::${c.file}::${c.name}`; + if (state.map.has(symKey) && state.map.has(callerKey)) { + lines.push(` ${state.map.get(symKey)} -.-> ${state.map.get(callerKey)}`); + } + } + } +} + +/** True if the compare result has no added/removed/changed symbols to render. */ +function hasNoBranchDifferences(data: BranchCompareResult): boolean { + return ( (data.added?.length ?? 0) === 0 && (data.removed?.length ?? 0) === 0 && (data.changed?.length ?? 0) === 0 - ) { + ); +} + +/** Render the three top-level Added/Removed/Changed subgraphs. */ +function renderAddedRemovedChangedSubgraphs( + lines: string[], + state: MermaidNodeIdState, + data: BranchCompareResult, +): void { + addMermaidSubgraph(lines, state, 'added', 'Added', data.added || [], '#e8f5e9', '#4caf50'); + addMermaidSubgraph(lines, state, 'removed', 'Removed', data.removed || [], '#ffebee', '#f44336'); + addMermaidSubgraph(lines, state, 'changed', 'Changed', data.changed || [], '#fff3e0', '#ff9800'); +} + +export function branchCompareMermaid(data: BranchCompareResult): string { + if (data.error) return data.error; + if (hasNoBranchDifferences(data)) { return 'flowchart TB\n none["No structural differences detected"]'; } const lines = ['flowchart TB']; const state: MermaidNodeIdState = { counter: 0, map: new Map() }; - addMermaidSubgraph(lines, state, 'added', 'Added', data.added || [], '#e8f5e9', '#4caf50'); - addMermaidSubgraph(lines, state, 'removed', 'Removed', data.removed || [], '#ffebee', '#f44336'); - addMermaidSubgraph(lines, state, 'changed', 'Changed', data.changed || [], '#fff3e0', '#ff9800'); + renderAddedRemovedChangedSubgraphs(lines, state, data); const impactSources = [...(data.removed || []), ...(data.changed || [])]; const allImpacted = collectImpactedCallers(impactSources); - if (allImpacted.size > 0) { - lines.push(' subgraph sg_impact["Impacted Callers"]'); - for (const [key, c] of allImpacted) { - const nid = mermaidNodeId(state, key); - lines.push(` ${nid}["[${kindIcon(c.kind)}] ${c.name}"]`); - } - lines.push(' end'); - lines.push(' style sg_impact fill:#f3e5f5,stroke:#9c27b0'); - } - - for (const sym of impactSources) { - if (!sym.impact) continue; - const prefix = (data.removed || []).includes(sym as SymbolWithoutId) ? 'removed' : 'changed'; - const symKey = `${prefix}::${sym.kind}::${sym.file}::${sym.name}`; - for (const c of sym.impact) { - const callerKey = `impact::${c.kind}::${c.file}::${c.name}`; - if (state.map.has(symKey) && state.map.has(callerKey)) { - lines.push(` ${state.map.get(symKey)} -.-> ${state.map.get(callerKey)}`); - } - } - } + renderImpactedCallersSubgraph(lines, state, allImpacted); + renderImpactEdges(lines, state, impactSources, data.removed || []); return lines.join('\n'); } From 4a7f106341c59857acb4f6f1e221fde8ac24b85a Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 4 Jul 2026 13:48:15 -0600 Subject: [PATCH 25/26] fix: wrap getChangedFilesBetweenRefs in branchCompareData's error boundary (#1789) git diff-tree failures at this step previously escaped as unhandled rejections instead of the { error: string } shape callers expect. Impact: 1 functions changed, 3 affected --- src/features/branch-compare.ts | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/src/features/branch-compare.ts b/src/features/branch-compare.ts index cdf20674a..11bc25744 100644 --- a/src/features/branch-compare.ts +++ b/src/features/branch-compare.ts @@ -680,26 +680,28 @@ export async function branchCompareData( if ('error' in refs) return refs; const { baseSha, targetSha } = refs; - const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); + try { + const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); - if (changedFiles.length === 0) { - return emptyBranchCompareResult(baseRef, targetRef, baseSha, targetSha); - } + if (changedFiles.length === 0) { + return emptyBranchCompareResult(baseRef, targetRef, baseSha, targetSha); + } - const { tmpBase, baseDir, targetDir } = createCompareTempDirs(); + const { tmpBase, baseDir, targetDir } = createCompareTempDirs(); - try { - return await runBranchCompareInWorktrees( - { baseRef, targetRef, baseSha, targetSha }, - { repoRoot, baseDir, targetDir, engine }, - changedFiles, - noTests, - maxDepth, - ); + try { + return await runBranchCompareInWorktrees( + { baseRef, targetRef, baseSha, targetSha }, + { repoRoot, baseDir, targetDir, engine }, + changedFiles, + noTests, + maxDepth, + ); + } finally { + cleanupCompareTempDirs(repoRoot, baseDir, targetDir, tmpBase); + } } catch (err) { return { error: toErrorMessage(err) }; - } finally { - cleanupCompareTempDirs(repoRoot, baseDir, targetDir, tmpBase); } } From 320d6e9ba470c67b96f7c3d720cdc724cc8742f6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 4 Jul 2026 13:48:25 -0600 Subject: [PATCH 26/26] fix: document buildFileConditionSQL's leading-AND contract in prepareSearch (#1789) Makes explicit why the ' AND ' prefix strip is safe, so a future change to buildFileConditionSQL's output shape doesn't silently break the conditions[] accumulation here. Impact: 1 functions changed, 8 affected --- src/domain/search/search/prepare.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/domain/search/search/prepare.ts b/src/domain/search/search/prepare.ts index ec670d9b8..13cea6cb4 100644 --- a/src/domain/search/search/prepare.ts +++ b/src/domain/search/search/prepare.ts @@ -85,7 +85,9 @@ export function prepareSearch( if (fpArr.length > 0 && !isGlob) { const fc = buildFileConditionSQL(fpArr, 'n.file'); if (fc.sql) { - // Strip leading ' AND ' since we're using conditions array + // buildFileConditionSQL always prefixes its output with ' AND ' (see + // src/db/query-builder.ts); strip it here since we accumulate raw + // fragments in the conditions[] array and join with ' AND ' below. conditions.push(fc.sql.replace(/^ AND /, '')); params.push(...fc.params); }