From 820ff834c3867b952ebaabeffa0ff3c04945affa Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 03:37:11 -0600 Subject: [PATCH 01/39] chore: remove dead duplicate type declarations from types.ts (closes #1727) --- src/types.ts | 82 ---------------------------------------------------- 1 file changed, 82 deletions(-) diff --git a/src/types.ts b/src/types.ts index 9f85a9f33..d52bd81bf 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1108,68 +1108,6 @@ export interface DataflowMutation { line: number; } -// ════════════════════════════════════════════════════════════════════════ -// §9 Graph Model (CodeGraph) -// ════════════════════════════════════════════════════════════════════════ - -/** Node attributes stored in the in-memory graph. */ -export interface GraphNodeAttrs { - label?: string; - kind?: string; - file?: string; - name?: string; - line?: number; - dbId?: number; - [key: string]: unknown; -} - -/** Edge attributes stored in the in-memory graph. */ -export interface GraphEdgeAttrs { - kind?: string; - confidence?: number; - weight?: number; - [key: string]: unknown; -} - -/** The unified in-memory graph model. */ -export interface CodeGraph { - readonly directed: boolean; - readonly nodeCount: number; - readonly edgeCount: number; - - // Node operations - addNode(id: string, attrs?: GraphNodeAttrs): CodeGraph; - hasNode(id: string): boolean; - getNodeAttrs(id: string): GraphNodeAttrs | undefined; - nodes(): IterableIterator<[string, GraphNodeAttrs]>; - nodeIds(): string[]; - - // Edge operations - addEdge(source: string, target: string, attrs?: GraphEdgeAttrs): CodeGraph; - hasEdge(source: string, target: string): boolean; - getEdgeAttrs(source: string, target: string): GraphEdgeAttrs | undefined; - edges(): Generator<[string, string, GraphEdgeAttrs]>; - - // Adjacency - successors(id: string): string[]; - predecessors(id: string): string[]; - neighbors(id: string): string[]; - outDegree(id: string): number; - inDegree(id: string): number; - - // Filtering - subgraph(predicate: (id: string, attrs: GraphNodeAttrs) => boolean): CodeGraph; - filterEdges(predicate: (src: string, tgt: string, attrs: GraphEdgeAttrs) => boolean): CodeGraph; - - // Conversion - toEdgeArray(): Array<{ source: string; target: string }>; - toGraphology(opts?: { type?: string }): unknown; - - // Utilities - clone(): CodeGraph; - merge(other: CodeGraph): CodeGraph; -} - // ════════════════════════════════════════════════════════════════════════ // §10 Build Pipeline // ════════════════════════════════════════════════════════════════════════ @@ -1628,26 +1566,6 @@ export interface PaginatedItems { /** A result object with optional _pagination metadata. */ export type Paginated = T & { _pagination?: PaginationMeta }; -// ════════════════════════════════════════════════════════════════════════ -// §13 Error Hierarchy -// ════════════════════════════════════════════════════════════════════════ - -export type ErrorCode = - | 'CODEGRAPH_ERROR' - | 'PARSE_FAILED' - | 'DB_ERROR' - | 'CONFIG_INVALID' - | 'RESOLUTION_FAILED' - | 'ENGINE_UNAVAILABLE' - | 'ANALYSIS_FAILED' - | 'BOUNDARY_VIOLATION'; - -export interface CodegraphErrorOpts { - code?: ErrorCode; - file?: string; - cause?: Error; -} - // ════════════════════════════════════════════════════════════════════════ // §14 Feature Module Result Shapes // ════════════════════════════════════════════════════════════════════════ From 9fa427902c51d9c13bfcb5c0dcd9c6fb8ce2ca9e Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 03:46:14 -0600 Subject: [PATCH 02/39] chore: remove unused iterComplexity export from complexity-query.ts (docs check acknowledged) Impact: 1 functions changed, 2 affected --- src/features/complexity-query.ts | 103 ------------------------------- src/features/complexity.ts | 2 +- 2 files changed, 1 insertion(+), 104 deletions(-) diff --git a/src/features/complexity-query.ts b/src/features/complexity-query.ts index 27eb5fc08..5f3b9d121 100644 --- a/src/features/complexity-query.ts +++ b/src/features/complexity-query.ts @@ -283,106 +283,3 @@ export function complexityData( db.close(); } } - -interface IterComplexityRow { - name: string; - kind: string; - file: string; - line: number; - end_line: number | null; - cognitive: number; - cyclomatic: number; - max_nesting: number; - loc: number; - sloc: number; -} - -export function* iterComplexity( - customDbPath?: string, - opts: { - noTests?: boolean; - file?: string; - target?: string; - kind?: string; - sort?: string; - } = {}, -): Generator<{ - name: string; - kind: string; - file: string; - line: number; - endLine: number | null; - cognitive: number; - cyclomatic: number; - maxNesting: number; - loc: number; - sloc: number; -}> { - const db = openReadonlyOrFail(customDbPath); - try { - const noTests = opts.noTests || false; - const sort = opts.sort || 'cognitive'; - - let where = "WHERE n.kind IN ('function','method')"; - const params: unknown[] = []; - - if (noTests) { - where += ` AND n.file NOT LIKE '%.test.%' - AND n.file NOT LIKE '%.spec.%' - AND n.file NOT LIKE '%__test__%' - AND n.file NOT LIKE '%__tests__%' - AND n.file NOT LIKE '%.stories.%'`; - } - if (opts.target) { - where += ' AND n.name LIKE ?'; - params.push(`%${opts.target}%`); - } - { - const fc = buildFileConditionSQL(opts.file as string, 'n.file'); - where += fc.sql; - params.push(...fc.params); - } - if (opts.kind) { - where += ' AND n.kind = ?'; - params.push(opts.kind); - } - - const orderMap: Record = { - cognitive: 'fc.cognitive DESC', - cyclomatic: 'fc.cyclomatic DESC', - nesting: 'fc.max_nesting DESC', - mi: 'fc.maintainability_index ASC', - volume: 'fc.halstead_volume DESC', - effort: 'fc.halstead_effort DESC', - bugs: 'fc.halstead_bugs DESC', - loc: 'fc.loc DESC', - }; - const orderBy = orderMap[sort] || 'fc.cognitive DESC'; - - const stmt = db.prepare( - `SELECT n.name, n.kind, n.file, n.line, n.end_line, - fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.loc, fc.sloc - FROM function_complexity fc - JOIN nodes n ON fc.node_id = n.id - ${where} - ORDER BY ${orderBy}`, - ); - for (const r of stmt.iterate(...params)) { - if (noTests && isTestFile(r.file)) continue; - yield { - name: r.name, - kind: r.kind, - file: r.file, - line: r.line, - endLine: r.end_line || null, - cognitive: r.cognitive, - cyclomatic: r.cyclomatic, - maxNesting: r.max_nesting, - loc: r.loc || 0, - sloc: r.sloc || 0, - }; - } - } finally { - db.close(); - } -} diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 307ed8549..8e1b6fb06 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -750,4 +750,4 @@ export async function buildComplexityMetrics( // ─── Query-Time Functions (re-exported from complexity-query.ts) ────────── // Split to separate query-time concerns (DB reads, filtering, pagination) // from compute-time concerns (AST traversal, metric algorithms). -export { complexityData, iterComplexity } from './complexity-query.js'; +export { complexityData } from './complexity-query.js'; From 0f9bbe6f05779ca7531b0b01807e6c35a7092baf Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 03:58:55 -0600 Subject: [PATCH 03/39] refactor: extract shared aggregate and typed-array helpers from leiden algorithm files docs check acknowledged: internal helper extraction only, no user-facing feature/language/architecture-table changes. Impact: 10 functions changed, 27 affected --- src/graph/algorithms/leiden/adapter.ts | 14 +-- .../algorithms/leiden/aggregate-helpers.ts | 83 ++++++++++++++++++ src/graph/algorithms/leiden/index.ts | 73 +--------------- src/graph/algorithms/leiden/partition.ts | 86 ++----------------- .../algorithms/leiden/typed-array-helpers.ts | 28 ++++++ 5 files changed, 121 insertions(+), 163 deletions(-) create mode 100644 src/graph/algorithms/leiden/aggregate-helpers.ts create mode 100644 src/graph/algorithms/leiden/typed-array-helpers.ts diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts index 4425cbb30..390a15aa3 100644 --- a/src/graph/algorithms/leiden/adapter.ts +++ b/src/graph/algorithms/leiden/adapter.ts @@ -6,6 +6,7 @@ */ import type { CodeGraph, EdgeAttrs, NodeAttrs } from '../../model.js'; +import { fget, taAdd } from './typed-array-helpers.js'; export interface EdgeEntry { to: number; @@ -39,17 +40,6 @@ export interface GraphAdapter { forEachNeighbor: (i: number, cb: (to: number, w: number) => void) => void; } -// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess -// widens the return to `number | undefined`. These helpers wrap compound assignment -// patterns (+=, -=) that appear frequently in this performance-critical code. -function taGet(a: Float64Array, i: number): number { - return a[i] as number; -} - -function taAdd(a: Float64Array, i: number, v: number): void { - a[i] = taGet(a, i) + v; -} - /** * Populate edge arrays for a directed graph. Each edge is stored once in * outEdges[from] and inEdges[to]. Self-loops are tracked in both the selfLoop @@ -145,7 +135,7 @@ function populateUndirectedEdges( // Note: uses single-w convention (not standard 2w) — the modularity formulas in // modularity.ts are written to match this convention, keeping the system self-consistent. for (let v = 0; v < n; v++) { - const w: number = taGet(selfLoop, v); + const w: number = fget(selfLoop, v); if (w !== 0) { (outEdges[v] as EdgeEntry[]).push({ to: v, w }); (inEdges[v] as InEdgeEntry[]).push({ from: v, w }); diff --git a/src/graph/algorithms/leiden/aggregate-helpers.ts b/src/graph/algorithms/leiden/aggregate-helpers.ts new file mode 100644 index 000000000..306063409 --- /dev/null +++ b/src/graph/algorithms/leiden/aggregate-helpers.ts @@ -0,0 +1,83 @@ +/** + * Per-community aggregate accumulation shared by partition.ts (live + * optimisation state, mutated move-by-move) and index.ts (one-shot + * evaluation on the original graph for quality()). Both need to reduce the + * graph's per-node size/strength/self-loop values down to one row per + * community using identical directed/undirected branching — extracting + * this once prevents the two copies from silently drifting apart on a + * future edit to only one of them. + */ + +import type { GraphAdapter } from './adapter.js'; +import { fget, iget } from './typed-array-helpers.js'; + +/** + * Accumulate per-community node-level totals (size, strength, self-loop + * weight) into the provided aggregate arrays. + * + * `nodeCount` is optional: partition.ts's live optimisation state tracks + * per-community node counts (used by compactCommunityIds's size/count sort + * tie-break), while index.ts's read-only quality evaluation does not need + * it and omits the argument. + */ +export function accumulateNodeAggregates( + graph: GraphAdapter, + nodeCommunity: Int32Array, + n: number, + totalSize: Float64Array, + internalEdgeWeight: Float64Array, + totalStrength: Float64Array, + totalOutStrength: Float64Array, + totalInStrength: Float64Array, + nodeCount?: Int32Array, +): void { + for (let i = 0; i < n; i++) { + const c: number = iget(nodeCommunity, i); + totalSize[c] = fget(totalSize, c) + fget(graph.size, i); + if (nodeCount) nodeCount[c] = iget(nodeCount, c) + 1; + if (graph.directed) { + totalOutStrength[c] = fget(totalOutStrength, c) + fget(graph.strengthOut, i); + totalInStrength[c] = fget(totalInStrength, c) + fget(graph.strengthIn, i); + } else { + totalStrength[c] = fget(totalStrength, c) + fget(graph.strengthOut, i); + } + if (fget(graph.selfLoop, i) !== 0) + internalEdgeWeight[c] = fget(internalEdgeWeight, c) + fget(graph.selfLoop, i); + } +} + +/** + * Accumulate intra-community edge weights. For directed graphs, counts all + * intra-community non-self edges. For undirected, counts each edge once + * (j > i) to avoid double-counting. + */ +export function accumulateInternalEdgeWeights( + graph: GraphAdapter, + nodeCommunity: Int32Array, + n: number, + internalEdgeWeight: Float64Array, +): void { + if (graph.directed) { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (i === j) continue; // self-loop already counted via graph.selfLoop[i] + if (ci === iget(nodeCommunity, j)) + internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; + } + } + } else { + for (let i = 0; i < n; i++) { + const ci: number = iget(nodeCommunity, i); + const neighbors = graph.outEdges[i]!; + for (let k = 0; k < neighbors.length; k++) { + const { to: j, w } = neighbors[k]!; + if (j <= i) continue; + if (ci === iget(nodeCommunity, j)) + internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; + } + } + } +} diff --git a/src/graph/algorithms/leiden/index.ts b/src/graph/algorithms/leiden/index.ts index a69df7caf..9a2a05a9b 100644 --- a/src/graph/algorithms/leiden/index.ts +++ b/src/graph/algorithms/leiden/index.ts @@ -8,10 +8,12 @@ import type { CodeGraph } from '../../model.js'; import type { GraphAdapter } from './adapter.js'; +import { accumulateInternalEdgeWeights, accumulateNodeAggregates } from './aggregate-helpers.js'; import { qualityCPM } from './cpm.js'; import { qualityModularity } from './modularity.js'; import type { LeidenOptions } from './optimiser.js'; import { runLouvainUndirectedModularity } from './optimiser.js'; +import { iget } from './typed-array-helpers.js'; export type { LeidenOptions } from './optimiser.js'; @@ -27,14 +29,6 @@ export interface DetectClustersResult { }; } -// Typed array safe-access helpers (see adapter.ts for rationale) -function fget(a: Float64Array, i: number): number { - return a[i] as number; -} -function iget(a: Int32Array, i: number): number { - return a[i] as number; -} - /** * Detect communities in a CodeGraph using the Leiden algorithm. * @@ -119,67 +113,6 @@ interface OriginalPartition { getInEdgeWeightFromCommunity(c: number): number; } -/** - * Accumulate intra-community edge weights for quality evaluation. - * For directed graphs, counts all intra-community non-self edges. - * For undirected, counts each edge once (j > i) to avoid double-counting. - */ -function accumulateInternalEdgeWeights( - g: GraphAdapter, - communityMap: Int32Array, - n: number, - internalWeight: Float64Array, -): void { - if (g.directed) { - for (let i = 0; i < n; i++) { - const ci: number = iget(communityMap, i); - const list = g.outEdges[i]!; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]!; - if (i === j) continue; - if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci: number = iget(communityMap, i); - const list = g.outEdges[i]!; - for (let k = 0; k < list.length; k++) { - const { to: j, w } = list[k]!; - if (j <= i) continue; - if (ci === iget(communityMap, j)) internalWeight[ci] = fget(internalWeight, ci) + w; - } - } - } -} - -/** - * Accumulate per-community node-level aggregates (size, strength) from - * the graph adapter and community mapping. - */ -function accumulateNodeAggregates( - g: GraphAdapter, - communityMap: Int32Array, - n: number, - totalSize: Float64Array, - totalStr: Float64Array, - totalOutStr: Float64Array, - totalInStr: Float64Array, - internalWeight: Float64Array, -): void { - for (let i = 0; i < n; i++) { - const c: number = iget(communityMap, i); - totalSize[c] = fget(totalSize, c) + fget(g.size, i); - if (g.directed) { - totalOutStr[c] = fget(totalOutStr, c) + fget(g.strengthOut, i); - totalInStr[c] = fget(totalInStr, c) + fget(g.strengthIn, i); - } else { - totalStr[c] = fget(totalStr, c) + fget(g.strengthOut, i); - } - if (fget(g.selfLoop, i)) internalWeight[c] = fget(internalWeight, c) + fget(g.selfLoop, i); - } -} - function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): OriginalPartition { const n: number = g.n; let maxC: number = 0; @@ -201,10 +134,10 @@ function buildOriginalPartition(g: GraphAdapter, communityMap: Int32Array): Orig communityMap, n, totalSize, + internalWeight, totalStr, totalOutStr, totalInStr, - internalWeight, ); accumulateInternalEdgeWeights(g, communityMap, n, internalWeight); diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index 38b67737c..de78b8f3e 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -8,6 +8,8 @@ */ import type { GraphAdapter } from './adapter.js'; +import { accumulateInternalEdgeWeights, accumulateNodeAggregates } from './aggregate-helpers.js'; +import { fget, iget, u8get } from './typed-array-helpers.js'; export interface CompactOptions { keepOldOrder?: boolean; @@ -44,18 +46,6 @@ export interface Partition { graph?: GraphAdapter; } -// Typed arrays always return a number for in-bounds access, but noUncheckedIndexedAccess -// widens to `number | undefined`. These helpers keep the compound assignment patterns readable. -function fget(a: Float64Array, i: number): number { - return a[i] as number; -} -function iget(a: Int32Array, i: number): number { - return a[i] as number; -} -function u8get(a: Uint8Array, i: number): number { - return a[i] as number; -} - /* ------------------------------------------------------------------ */ /* Internal mutable state bucket shared by all extracted functions */ /* ------------------------------------------------------------------ */ @@ -81,75 +71,9 @@ interface PartitionState { } /* ------------------------------------------------------------------ */ -/* Aggregate helpers (shared by initializeAggregates & compact) */ +/* Community-ID sort helper (used by compact) */ /* ------------------------------------------------------------------ */ -/** - * Accumulate per-community node-level totals (size, count, strength) into the - * provided aggregate arrays. Both `initializeAggregates` and `compactCommunityIds` - * share this logic — extracting it eliminates the duplication. - */ -function accumulateNodeAggregates( - graph: GraphAdapter, - nodeCommunity: Int32Array, - n: number, - totalSize: Float64Array, - nodeCount: Int32Array, - internalEdgeWeight: Float64Array, - totalStrength: Float64Array, - totalOutStrength: Float64Array, - totalInStrength: Float64Array, -): void { - for (let i = 0; i < n; i++) { - const c: number = iget(nodeCommunity, i); - totalSize[c] = fget(totalSize, c) + fget(graph.size, i); - nodeCount[c] = iget(nodeCount, c) + 1; - if (graph.directed) { - totalOutStrength[c] = fget(totalOutStrength, c) + fget(graph.strengthOut, i); - totalInStrength[c] = fget(totalInStrength, c) + fget(graph.strengthIn, i); - } else { - totalStrength[c] = fget(totalStrength, c) + fget(graph.strengthOut, i); - } - if (fget(graph.selfLoop, i) !== 0) - internalEdgeWeight[c] = fget(internalEdgeWeight, c) + fget(graph.selfLoop, i); - } -} - -/** - * Accumulate intra-community edge weights. For directed graphs, counts all - * intra-community non-self edges. For undirected, counts each edge once (j > i). - */ -function accumulateInternalEdgeWeights( - graph: GraphAdapter, - nodeCommunity: Int32Array, - n: number, - internalEdgeWeight: Float64Array, -): void { - if (graph.directed) { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const neighbors = graph.outEdges[i]!; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]!; - if (i === j) continue; // self-loop already counted via graph.selfLoop[i] - if (ci === iget(nodeCommunity, j)) - internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; - } - } - } else { - for (let i = 0; i < n; i++) { - const ci: number = iget(nodeCommunity, i); - const neighbors = graph.outEdges[i]!; - for (let k = 0; k < neighbors.length; k++) { - const { to: j, w } = neighbors[k]!; - if (j <= i) continue; - if (ci === iget(nodeCommunity, j)) - internalEdgeWeight[ci] = fget(internalEdgeWeight, ci) + w; - } - } - } -} - /** * Sort community IDs according to the compaction options: preserve original * order, respect a user-provided label map, or sort by descending size. @@ -218,11 +142,11 @@ function initAggregates(s: PartitionState): void { s.nodeCommunity, s.n, s.communityTotalSize, - s.communityNodeCount, s.communityInternalEdgeWeight, s.communityTotalStrength, s.communityTotalOutStrength, s.communityTotalInStrength, + s.communityNodeCount, ); accumulateInternalEdgeWeights(s.graph, s.nodeCommunity, s.n, s.communityInternalEdgeWeight); } @@ -463,11 +387,11 @@ function compactIds(s: PartitionState, opts: CompactOptions = {}): void { s.nodeCommunity, s.n, newTotalSize, - newNodeCount, newInternalEdgeWeight, newTotalStrength, newTotalOutStrength, newTotalInStrength, + newNodeCount, ); accumulateInternalEdgeWeights(s.graph, s.nodeCommunity, s.n, newInternalEdgeWeight); diff --git a/src/graph/algorithms/leiden/typed-array-helpers.ts b/src/graph/algorithms/leiden/typed-array-helpers.ts new file mode 100644 index 000000000..ce3ef58a4 --- /dev/null +++ b/src/graph/algorithms/leiden/typed-array-helpers.ts @@ -0,0 +1,28 @@ +/** + * Typed-array safe-access helpers shared by the leiden algorithm files. + * + * Typed arrays always return a number for in-bounds access, but + * noUncheckedIndexedAccess widens the return type to `number | undefined`. + * These helpers keep index reads and compound-assignment patterns (`+=`) + * readable in this performance-critical code, without partition.ts, + * adapter.ts, and index.ts each maintaining their own hand-copied variant + * (previously named fget/iget/u8get in two files and taGet/taAdd in the + * third — same idiom, three independent copies). + */ + +export function fget(a: Float64Array, i: number): number { + return a[i] as number; +} + +export function iget(a: Int32Array, i: number): number { + return a[i] as number; +} + +export function u8get(a: Uint8Array, i: number): number { + return a[i] as number; +} + +/** In-place compound addition: `a[i] += v`, safe under noUncheckedIndexedAccess. */ +export function taAdd(a: Float64Array, i: number, v: number): void { + a[i] = fget(a, i) + v; +} From f3e1119e4802ea4babbefc4c0b913f536e14c3ff Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 04:19:45 -0600 Subject: [PATCH 04/39] refactor: extract shared name-map scanner into scripts/lib/name-map.mjs Impact: 11 functions changed, 11 affected --- scripts/compare-tools.mjs | 143 +----------------- scripts/import-jelly-micro.mjs | 153 ++------------------ scripts/lib/name-map.mjs | 256 +++++++++++++++++++++++++++++++++ 3 files changed, 274 insertions(+), 278 deletions(-) create mode 100644 scripts/lib/name-map.mjs diff --git a/scripts/compare-tools.mjs b/scripts/compare-tools.mjs index 530c067ea..d1c18174c 100644 --- a/scripts/compare-tools.mjs +++ b/scripts/compare-tools.mjs @@ -25,6 +25,7 @@ import fs from 'node:fs'; import os from 'node:os'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; +import { buildFileLineNameMap, buildFileNameLookup } from './lib/name-map.mjs'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(__dirname, '..'); @@ -71,75 +72,6 @@ function findBin(name, envVar) { } } -// ── Name resolution from source ──────────────────────────────────────────── - -/** - * Parse source files to build a map of (file, startLine) → class-qualified name. - * Returns a Map<"filename:line", string>. - * - * Heuristic — works well for the small hand-annotated fixtures. - */ -function buildNameMap(fixtureDir, lang) { - const exts = EXTENSIONS[lang] || ['.js']; - const nameMap = new Map(); - - for (const filename of fs.readdirSync(fixtureDir)) { - if (!exts.some((e) => filename.endsWith(e))) continue; - - const src = fs.readFileSync(path.join(fixtureDir, filename), 'utf8'); - const lines = src.split('\n'); - let currentClass = null; - let classDepth = 0; - let braceDepth = 0; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const lineNo = i + 1; - const key = `${filename}:${lineNo}`; - - const classMatch = line.match(/^\s*(?:export\s+)?class\s+(\w+)/); - if (classMatch) { - currentClass = classMatch[1]; - classDepth = braceDepth; - nameMap.set(key, classMatch[1]); - } - - for (const ch of line) { - if (ch === '{') braceDepth++; - else if (ch === '}') { - braceDepth--; - if (currentClass && braceDepth === classDepth) currentClass = null; - } - } - - if (classMatch) continue; - - const funcMatch = line.match(/^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[(<]/); - if (funcMatch) { nameMap.set(key, funcMatch[1]); continue; } - - const arrowMatch = line.match(/^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=/); - if (arrowMatch && (line.includes('=>') || line.includes('function'))) { - nameMap.set(key, arrowMatch[1]); continue; - } - - if (currentClass) { - if (/^\s+constructor\s*\(/.test(line)) { - nameMap.set(key, currentClass); continue; - } - const methodMatch = line.match(/^\s+(?:async\s+|static\s+|(?:get|set)\s+)*(\w+)\s*\(/); - if (methodMatch) { - const mname = methodMatch[1]; - if (!['if', 'for', 'while', 'switch', 'catch'].includes(mname)) { - nameMap.set(key, `${currentClass}.${mname}`); - } - } - } - } - } - - return nameMap; -} - // ── Jelly ────────────────────────────────────────────────────────────────── function runJelly(lang, fixtureDir) { @@ -180,7 +112,7 @@ function runJelly(lang, fixtureDir) { } function jellyEdgesToSet(cg, fixtureDir, lang) { - const nameMap = buildNameMap(fixtureDir, lang); + const nameMap = buildFileLineNameMap(fixtureDir, EXTENSIONS[lang] || ['.js']); const files = cg.files; const functions = cg.functions; @@ -261,75 +193,6 @@ function runAcg(lang, fixtureDir) { } } -/** - * Build a lookup from (basename, unqualifiedName) → Set. - * - * ACG provides function names directly (e.g. "createUser") but not class - * prefixes. This map lets us resolve "createUser in service.js" → - * "UserService.createUser" using the same source scan as buildNameMap. - * - * The value is a Set to handle the case where multiple classes in the same - * file share a method name (e.g. Shape.area + Circle.area + Rectangle.area - * all in hierarchy.ts). Callers should try all candidates rather than - * assuming a 1:1 mapping. - */ -function buildAcgNameLookup(fixtureDir, lang) { - const exts = EXTENSIONS[lang] || ['.js']; - // Map: "basename:unqualifiedName" → Set<"qualifiedName"> - const lookup = new Map(); - - /** Add a (key → value) entry, accumulating into the existing Set if any. */ - function add(key, value) { - const existing = lookup.get(key); - if (existing) existing.add(value); - else lookup.set(key, new Set([value])); - } - - for (const filename of fs.readdirSync(fixtureDir)) { - if (!exts.some((e) => filename.endsWith(e))) continue; - const src = fs.readFileSync(path.join(fixtureDir, filename), 'utf8'); - const lines = src.split('\n'); - let currentClass = null; - let classDepth = 0; - let braceDepth = 0; - - for (const line of lines) { - const classMatch = line.match(/^\s*(?:export\s+)?class\s+(\w+)/); - if (classMatch) { - currentClass = classMatch[1]; - classDepth = braceDepth; - // "ClassName" as an unqualified name refers to the class itself (constructor call sites) - add(`${filename}:${classMatch[1]}`, classMatch[1]); - } - for (const ch of line) { - if (ch === '{') braceDepth++; - else if (ch === '}') { - braceDepth--; - if (currentClass && braceDepth === classDepth) currentClass = null; - } - } - if (classMatch) continue; - - const funcMatch = line.match(/^\s*(?:export\s+)?(?:async\s+)?function\s+(\w+)\s*[(<]/); - if (funcMatch) { add(`${filename}:${funcMatch[1]}`, funcMatch[1]); continue; } - - if (currentClass) { - // constructor → ClassName (ACG labels constructors as "constructor" in the source) - if (/^\s+constructor\s*\(/.test(line)) { - add(`${filename}:constructor`, currentClass); continue; - } - const methodMatch = line.match(/^\s+(?:async\s+|static\s+|(?:get|set)\s+)*(\w+)\s*\(/); - if (methodMatch) { - const mname = methodMatch[1]; - if (!['if', 'for', 'while', 'switch', 'catch'].includes(mname)) - add(`${filename}:${mname}`, `${currentClass}.${mname}`); - } - } - } - } - return lookup; -} - /** * Parse ACG text output into a set of "source→target" edge strings. * @@ -342,7 +205,7 @@ function buildAcgNameLookup(fixtureDir, lang) { * declaration line. So we use the function name directly for the lookup. */ function acgOutputToSet(stdout, fixtureDir, lang) { - const lookup = buildAcgNameLookup(fixtureDir, lang); + const lookup = buildFileNameLookup(fixtureDir, EXTENSIONS[lang] || ['.js']); // 'funcName' (file.js@line:start-end) -> 'funcName' (file.js@line:start-end) const edgeRe = /^'(\w+)'\s+\((\S+?)@\d+:[^)]+\)\s+->\s+'(\w+)'\s+\((\S+?)@\d+:[^)]+\)/; diff --git a/scripts/import-jelly-micro.mjs b/scripts/import-jelly-micro.mjs index 3cafdf171..3f8566ad8 100644 --- a/scripts/import-jelly-micro.mjs +++ b/scripts/import-jelly-micro.mjs @@ -25,6 +25,7 @@ import https from 'node:https'; import os from 'node:os'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; +import { buildLineNameMap } from './lib/name-map.mjs'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT = path.resolve(__dirname, '..'); @@ -33,6 +34,11 @@ const OUT_DIR = path.join(ROOT, 'tests/benchmarks/resolution/fixtures/jelly-micr const JELLY_RAW = 'https://raw.githubusercontent.com/cs-au-dk/jelly/master/tests/micro'; const JELLY_API = 'https://api.github.com/repos/cs-au-dk/jelly/contents/tests/micro'; +// HTTP status-code ranges used by fetchText's redirect-following logic. +const HTTP_STATUS_REDIRECT_MIN = 300; +const HTTP_STATUS_REDIRECT_MAX = 400; // exclusive +const HTTP_STATUS_ERROR_MIN = 400; + // ── Args ──────────────────────────────────────────────────────────────────── const args = process.argv.slice(2); @@ -46,7 +52,11 @@ function fetchText(url, redirectsLeft = 10) { return new Promise((resolve, reject) => { const client = url.startsWith('http:') ? http : https; client.get(url, { headers: { 'User-Agent': 'codegraph-benchmark' } }, (res) => { - if (res.statusCode >= 300 && res.statusCode < 400 && res.headers.location) { + if ( + res.statusCode >= HTTP_STATUS_REDIRECT_MIN && + res.statusCode < HTTP_STATUS_REDIRECT_MAX && + res.headers.location + ) { if (redirectsLeft === 0) { reject(new Error(`Too many redirects: ${url}`)); return; @@ -57,7 +67,7 @@ function fetchText(url, redirectsLeft = 10) { let body = ''; res.on('data', (d) => (body += d)); res.on('end', () => { - if (res.statusCode && res.statusCode >= 400) { + if (res.statusCode && res.statusCode >= HTTP_STATUS_ERROR_MIN) { reject(new Error(`HTTP ${res.statusCode}: ${body.slice(0, 200)}`)); } else { resolve(body); @@ -68,139 +78,6 @@ function fetchText(url, redirectsLeft = 10) { }); } -// ── Name mapping ───────────────────────────────────────────────────────────── - -/** - * Build a Map<"startLine:startCol", name> for all functions in a JS source. - * - * Extends the basic regex approach with: - * - Object method shorthand: { foo() {} } - * - Object property fn: { foo: function() {} } - * - Prototype assignment: Foo.prototype.bar = function() {} - * - Class static blocks: static { ... } - * - * Functions that cannot be named receive the label "". - */ -function buildNameMap(src, filename) { - const lines = src.split('\n'); - const nameMap = new Map(); // "line:col" → name (1-based line, 1-based col) - - let currentClass = null; - let classDepth = 0; - let braceDepth = 0; - - for (let i = 0; i < lines.length; i++) { - const line = lines[i]; - const lineNo = i + 1; - - // Class declaration - const classMatch = line.match(/^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/); - if (classMatch) { - currentClass = classMatch[1]; - classDepth = braceDepth; - } - - // Count braces - for (const ch of line) { - if (ch === '{') braceDepth++; - else if (ch === '}') { - braceDepth--; - if (currentClass !== null && braceDepth === classDepth) { - currentClass = null; - } - } - } - - if (classMatch) { - // Class itself: name the position of the opening brace - // Jelly assigns the class-level function to the line of "class Foo {" - nameMap.set(`${lineNo}:1`, classMatch[1]); - continue; - } - - // Top-level named function declaration - const funcDecl = line.match(/^\s*(?:export\s+(?:default\s+)?)?(?:async\s+)?function\s*\*?\s+(\w+)\s*[\(<]/); - if (funcDecl) { - nameMap.set(`${lineNo}:1`, funcDecl[1]); - continue; - } - - // Variable assignment: const/let/var foo = function/() => - const varDecl = line.match(/^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=/); - if (varDecl && (line.includes('=>') || line.includes('function'))) { - nameMap.set(`${lineNo}:1`, varDecl[1]); - continue; - } - - // Prototype assignment: Foo.prototype.bar = function() {} - const protoMatch = line.match(/^\s*(\w+)\.prototype\.(\w+)\s*=\s*function/); - if (protoMatch) { - nameMap.set(`${lineNo}:1`, `${protoMatch[1]}.${protoMatch[2]}`); - continue; - } - - // Static property assignment: Foo.bar = function() {} - const staticPropMatch = line.match(/^\s*(\w+)\.(\w+)\s*=\s*function/); - if (staticPropMatch) { - nameMap.set(`${lineNo}:1`, `${staticPropMatch[1]}.${staticPropMatch[2]}`); - continue; - } - - // Class methods (inside class body) - if (currentClass !== null) { - // constructor - if (/^\s+constructor\s*\(/.test(line)) { - nameMap.set(`${lineNo}:1`, currentClass); - continue; - } - // static block: static { ... } - if (/^\s+static\s*\{/.test(line)) { - nameMap.set(`${lineNo}:1`, `${currentClass}.`); - continue; - } - // static property with initializer: static foo = ... - const staticProp = line.match(/^\s+static\s+(\w+)\s*=/); - if (staticProp && (line.includes('=>') || line.includes('function') || line.includes('('))) { - nameMap.set(`${lineNo}:1`, `${currentClass}.${staticProp[1]}`); - continue; - } - // Named method (including async, static, get/set, generator) - const methodMatch = line.match( - /^\s+(?:(?:static|async|get|set)\s+)*(?:\*\s*)?(\w+)\s*\(/ - ); - if (methodMatch) { - const mname = methodMatch[1]; - if (!['if', 'for', 'while', 'switch', 'catch', 'return', 'new'].includes(mname)) { - nameMap.set(`${lineNo}:1`, `${currentClass}.${mname}`); - continue; - } - } - // Class field arrow: foo = () => {} - const fieldArrow = line.match(/^\s+(\w+)\s*=\s*(?:async\s+)?\(/); - if (fieldArrow) { - nameMap.set(`${lineNo}:1`, `${currentClass}.${fieldArrow[1]}`); - continue; - } - } - - // Object shorthand method: { foo() {} } or { async foo() {} } - const objMethod = line.match(/^\s+(?:async\s+)?(\w+)\s*\(.*\)\s*\{/); - if (objMethod && !['if', 'for', 'while', 'switch', 'catch', 'function'].includes(objMethod[1])) { - nameMap.set(`${lineNo}:1`, objMethod[1]); - continue; - } - - // Object property: foo: function() {} or foo: () => {} - const objProp = line.match(/^\s+(\w+)\s*:\s*(?:async\s+)?(?:function|\(|[a-zA-Z_$].*=>)/); - if (objProp) { - nameMap.set(`${lineNo}:1`, objProp[1]); - continue; - } - } - - return nameMap; -} - // ── Jelly → expected-edges conversion ──────────────────────────────────────── const SCHEMA = '../../../expected-edges.schema.json'; @@ -209,8 +86,8 @@ const SCHEMA = '../../../expected-edges.schema.json'; * Convert a Jelly .json call graph + .js source to codegraph expected-edges format. * * Jelly function spec: "fileIdx:startLine:startCol:endLine:endCol" (1-based lines) - * We map each function to a name using buildNameMap. Unmapped functions get - * the label "". + * We map each function to a name using buildLineNameMap (scripts/lib/name-map.mjs). + * Unmapped functions get the label "". * * The "module root" function (always index 0 in Jelly) represents the top-level * script scope. We label it "" so edges from it are trackable. @@ -221,7 +98,7 @@ function convertJellyGraph(jellyJson, jsSrc, jsFilename) { const { files, functions, fun2fun } = jellyJson; if (!files || !functions || !fun2fun) return { edges: [], stats: {} }; - const nameMap = buildNameMap(jsSrc, jsFilename); + const nameMap = buildLineNameMap(jsSrc); // Map function index → { name, file } function resolveFunc(idx) { diff --git a/scripts/lib/name-map.mjs b/scripts/lib/name-map.mjs new file mode 100644 index 000000000..2fcf7c026 --- /dev/null +++ b/scripts/lib/name-map.mjs @@ -0,0 +1,256 @@ +/** + * Shared source-scanning heuristics for mapping declaration positions to + * codegraph-style symbol names. + * + * Used by codegraph's external call-graph comparison tooling + * (scripts/import-jelly-micro.mjs, scripts/compare-tools.mjs) to correlate + * Jelly's line-indexed function specs and ACG's textual function names with + * codegraph's own naming scheme. + * + * This is a line-oriented regex heuristic (not an AST parse) — it walks + * source text, tracks the enclosing class via a brace-depth counter, and + * matches an ordered set of declaration patterns. It's good enough for the + * small hand-authored benchmark fixtures these tools run against; it is not + * a general-purpose JS parser. + */ + +import fs from 'node:fs'; +import path from 'node:path'; + +// Bare words that can be mistaken for a class-body method declaration when +// they're the first word before `(` on a line (e.g. `return (x) => …`). +const METHOD_KEYWORD_EXCLUSIONS = new Set(['if', 'for', 'while', 'switch', 'catch', 'return', 'new']); +const OBJECT_METHOD_KEYWORD_EXCLUSIONS = new Set(['if', 'for', 'while', 'switch', 'catch', 'function']); + +const CLASS_DECL_RE = /^\s*(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/; + +/** + * Matchers evaluated on every line regardless of class scope, in priority + * order. Each: { regex, extract(match) => name, guard?(line, match) => boolean } + */ +const TOP_LEVEL_MATCHERS = [ + { + // function foo() / export default function* foo() + regex: /^\s*(?:export\s+(?:default\s+)?)?(?:async\s+)?function\s*\*?\s+(\w+)\s*[\(<]/, + extract: (m) => m[1], + }, + { + // const/let/var foo = function ... | foo = () => ... + regex: /^\s*(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=/, + guard: (line) => line.includes('=>') || line.includes('function'), + extract: (m) => m[1], + }, + { + // Foo.prototype.bar = function() {} + regex: /^\s*(\w+)\.prototype\.(\w+)\s*=\s*function/, + extract: (m) => `${m[1]}.${m[2]}`, + }, + { + // Foo.bar = function() {} + regex: /^\s*(\w+)\.(\w+)\s*=\s*function/, + extract: (m) => `${m[1]}.${m[2]}`, + }, +]; + +/** + * Matchers evaluated only while inside a class body. `extract` receives the + * regex match and the enclosing class name. The constructor matcher is + * flagged separately (`isConstructor`) since it needs distinct accumulation + * in name-lookup consumers (ACG labels constructor call targets literally as + * "constructor", not ".constructor"). + */ +const CLASS_MEMBER_MATCHERS = [ + { + regex: /^\s+constructor\s*\(/, + isConstructor: true, + extract: (_m, cls) => cls, + }, + { + // static { ... } + regex: /^\s+static\s*\{/, + extract: (_m, cls) => `${cls}.`, + }, + { + // static foo = ... (only when it looks like a function/call, not a plain value) + regex: /^\s+static\s+(\w+)\s*=/, + guard: (line) => line.includes('=>') || line.includes('function') || line.includes('('), + extract: (m, cls) => `${cls}.${m[1]}`, + }, + { + // named method, incl. async/static/get/set/generator + regex: /^\s+(?:(?:static|async|get|set)\s+)*(?:\*\s*)?(\w+)\s*\(/, + guard: (_line, m) => !METHOD_KEYWORD_EXCLUSIONS.has(m[1]), + extract: (m, cls) => `${cls}.${m[1]}`, + }, + { + // class field arrow: foo = () => {} + regex: /^\s+(\w+)\s*=\s*(?:async\s+)?\(/, + extract: (m, cls) => `${cls}.${m[1]}`, + }, +]; + +/** + * Object-literal matchers, evaluated as the final fallback on any line not + * already claimed by a top-level or class-member matcher. + */ +const OBJECT_MEMBER_MATCHERS = [ + { + // { foo() {} } or { async foo() {} } + regex: /^\s+(?:async\s+)?(\w+)\s*\(.*\)\s*\{/, + guard: (_line, m) => !OBJECT_METHOD_KEYWORD_EXCLUSIONS.has(m[1]), + extract: (m) => m[1], + }, + { + // foo: function() {} or foo: () => {} + regex: /^\s+(\w+)\s*:\s*(?:async\s+)?(?:function|\(|[a-zA-Z_$].*=>)/, + extract: (m) => m[1], + }, +]; + +/** Run `line` through an ordered matcher list; return the first hit or null. */ +function tryMatchers(line, matchers, ctx) { + for (const matcher of matchers) { + const m = line.match(matcher.regex); + if (!m) continue; + if (matcher.guard && !matcher.guard(line, m)) continue; + return { name: matcher.extract(m, ctx), isConstructor: !!matcher.isConstructor }; + } + return null; +} + +/** + * Walk `src` line by line, tracking the enclosing class via a brace-depth + * counter, and invoke `onDeclaration(entry)` for every recognized + * class/function/method declaration. + * + * `entry` is `{ line, name, className, isConstructor }` (1-based line + * number). `className` is set only for members matched inside a class body; + * `isConstructor` distinguishes constructor declarations from other members + * that resolve to the same "ClassName" value (the class declaration itself). + * + * This is the shared scanning core behind both benchmark tools' heuristic + * name resolution: position→name for Jelly's line-indexed call graph, and + * name→qualified-name lookups for ACG's textual output. + */ +export function scanDeclarations(src, onDeclaration) { + const lines = src.split('\n'); + let currentClass = null; + let classDepth = 0; + let braceDepth = 0; + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + const lineNo = i + 1; + + const classMatch = line.match(CLASS_DECL_RE); + if (classMatch) { + currentClass = classMatch[1]; + classDepth = braceDepth; + } + + for (const ch of line) { + if (ch === '{') braceDepth++; + else if (ch === '}') { + braceDepth--; + if (currentClass !== null && braceDepth === classDepth) currentClass = null; + } + } + + if (classMatch) { + onDeclaration({ line: lineNo, name: classMatch[1], className: null, isConstructor: false }); + continue; + } + + const topLevel = tryMatchers(line, TOP_LEVEL_MATCHERS, null); + if (topLevel) { + onDeclaration({ line: lineNo, name: topLevel.name, className: null, isConstructor: false }); + continue; + } + + if (currentClass !== null) { + const member = tryMatchers(line, CLASS_MEMBER_MATCHERS, currentClass); + if (member) { + onDeclaration({ + line: lineNo, + name: member.name, + className: currentClass, + isConstructor: member.isConstructor, + }); + continue; + } + } + + const obj = tryMatchers(line, OBJECT_MEMBER_MATCHERS, null); + if (obj) { + onDeclaration({ line: lineNo, name: obj.name, className: null, isConstructor: false }); + } + } +} + +/** + * Build a Map<"line:1", name> for all functions/methods/classes in a single + * JS source string (1-based line, column normalised to 1 on insert). + * + * Used by import-jelly-micro.mjs to resolve Jelly's line-indexed function + * specs within one source file. + */ +export function buildLineNameMap(src) { + const nameMap = new Map(); + scanDeclarations(src, (d) => nameMap.set(`${d.line}:1`, d.name)); + return nameMap; +} + +/** + * Build a Map<"filename:line", name> across every file in `dir` whose + * extension is in `exts`. + * + * Used by compare-tools.mjs to resolve Jelly's (file, line) function specs + * against a multi-file fixture directory. + */ +export function buildFileLineNameMap(dir, exts) { + const nameMap = new Map(); + for (const filename of fs.readdirSync(dir)) { + if (!exts.some((e) => filename.endsWith(e))) continue; + const src = fs.readFileSync(path.join(dir, filename), 'utf8'); + scanDeclarations(src, (d) => nameMap.set(`${filename}:${d.line}`, d.name)); + } + return nameMap; +} + +/** + * Build a Map<"filename:unqualifiedName", Set> across every + * file in `dir` whose extension is in `exts`. + * + * Used by compare-tools.mjs to resolve ACG's unqualified function names (no + * class prefix) back to codegraph-style qualified names. A Set is needed + * because multiple classes in the same file can share a method name (e.g. + * Shape.area + Circle.area + Rectangle.area) — callers should try all + * candidates rather than assume a 1:1 mapping. + * + * Constructors are indexed under the literal key "constructor" (ACG labels + * constructor call targets that way), mapping to the enclosing class name. + */ +export function buildFileNameLookup(dir, exts) { + const lookup = new Map(); + const add = (key, value) => { + const existing = lookup.get(key); + if (existing) existing.add(value); + else lookup.set(key, new Set([value])); + }; + + for (const filename of fs.readdirSync(dir)) { + if (!exts.some((e) => filename.endsWith(e))) continue; + const src = fs.readFileSync(path.join(dir, filename), 'utf8'); + scanDeclarations(src, (d) => { + if (d.isConstructor) { + add(`${filename}:constructor`, d.name); + } else if (d.className) { + const member = d.name.slice(d.className.length + 1); + add(`${filename}:${member}`, d.name); + } else { + add(`${filename}:${d.name}`, d.name); + } + }); + } + return lookup; +} From e945bcae7a2d691118a5787fb792dd5eb90044a1 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 04:36:44 -0600 Subject: [PATCH 05/39] fix: replace event-loop-blocking Atomics.wait with shared sleepSync in readFileSafe readFileSafe's Atomics.wait busy-block froze the entire Node.js event loop (all I/O and timer callbacks) for up to 100ms per retry on the watch-mode hot path. Extracts journal.ts's existing sleepSync busy-spin helper into src/shared/sleep.ts so both readFileSafe and journal.ts's lock-retry loop share one implementation instead of duplicating it. docs check acknowledged: internal bug fix, no feature/language/architecture table changes warranted in README.md, CLAUDE.md, or ROADMAP.md. Impact: 2 functions changed, 31 affected --- src/domain/graph/builder/helpers.ts | 4 ++-- src/domain/graph/journal.ts | 12 +----------- src/shared/sleep.ts | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+), 13 deletions(-) create mode 100644 src/shared/sleep.ts diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index 1c6859667..f0f34cffa 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -10,6 +10,7 @@ import { purgeFilesData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js'; import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js'; +import { sleepSync } from '../../../shared/sleep.js'; import type { BetterSqlite3Database, CodegraphConfig, @@ -326,8 +327,7 @@ export function readFileSafe(filePath: string, retries: number = 2): string { return fs.readFileSync(filePath, 'utf-8'); } catch (err: unknown) { if (attempt < retries && TRANSIENT_CODES.has((err as NodeJS.ErrnoException).code ?? '')) { - const sharedBuf = new SharedArrayBuffer(4); - Atomics.wait(new Int32Array(sharedBuf), 0, 0, RETRY_DELAY_MS); + sleepSync(RETRY_DELAY_MS); continue; } throw err; diff --git a/src/domain/graph/journal.ts b/src/domain/graph/journal.ts index 8d68256ca..01775ace5 100644 --- a/src/domain/graph/journal.ts +++ b/src/domain/graph/journal.ts @@ -2,6 +2,7 @@ import crypto from 'node:crypto'; import fs from 'node:fs'; import path from 'node:path'; import { debug, warn } from '../../infrastructure/logger.js'; +import { sleepSync } from '../../shared/sleep.js'; export const JOURNAL_FILENAME = 'changes.journal'; const HEADER_PREFIX = '# codegraph-journal v1 '; @@ -10,17 +11,6 @@ const LOCK_TIMEOUT_MS = 5_000; const LOCK_STALE_MS = 30_000; const LOCK_RETRY_MS = 25; -// Busy-spin sleep avoids blocking the Node.js event loop (unlike Atomics.wait, -// which freezes all I/O and timer callbacks). The retry interval is short -// (25ms), so the CPU cost is negligible while keeping unrelated callbacks -// responsive in watcher processes. -function sleepSync(ms: number): void { - const end = process.hrtime.bigint() + BigInt(ms) * 1_000_000n; - while (process.hrtime.bigint() < end) { - /* spin */ - } -} - function isPidAlive(pid: number): boolean { if (!Number.isFinite(pid) || pid <= 0) return false; try { diff --git a/src/shared/sleep.ts b/src/shared/sleep.ts new file mode 100644 index 000000000..8e3accb41 --- /dev/null +++ b/src/shared/sleep.ts @@ -0,0 +1,20 @@ +/** + * Synchronous sleep utilities for short retry/backoff loops. + */ + +/** + * Busy-spin sleep for `ms` milliseconds. + * + * Deliberately avoids `Atomics.wait`, which blocks the calling thread at the + * OS level and freezes all libuv I/O and timer callbacks for the duration of + * the wait — unsafe on hot paths shared with watcher processes. The retry + * intervals this is used for are short (tens of ms), so the CPU cost of + * spinning is negligible next to the safety of keeping unrelated callbacks + * responsive. + */ +export function sleepSync(ms: number): void { + const end = process.hrtime.bigint() + BigInt(ms) * 1_000_000n; + while (process.hrtime.bigint() < end) { + /* spin */ + } +} From 4a348dbe6e0bc214d9f97c4a6dd6d847fb40d0a6 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 04:52:12 -0600 Subject: [PATCH 06/39] refactor: extract shared resolveFileTree helper from dataflow.ts and complexity.ts Impact: 14 functions changed, 10 affected --- src/features/complexity.ts | 48 +++++--------- src/features/dataflow.ts | 57 ++++------------- src/features/shared/resolve-file-tree.ts | 81 ++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 75 deletions(-) create mode 100644 src/features/shared/resolve-file-tree.ts diff --git a/src/features/complexity.ts b/src/features/complexity.ts index 8e1b6fb06..fb3cca381 100644 --- a/src/features/complexity.ts +++ b/src/features/complexity.ts @@ -1,4 +1,3 @@ -import fs from 'node:fs'; import path from 'node:path'; import { computeLOCMetrics as _computeLOCMetrics, @@ -22,6 +21,7 @@ import type { LOCMetrics, TreeSitterNode, } from '../types.js'; +import { resolveFileTree } from './shared/resolve-file-tree.js'; // Re-export rules for backward compatibility export { COMPLEXITY_RULES, HALSTEAD_RULES }; @@ -437,41 +437,25 @@ function getTreeForFile( extToLang: Map | null, getParser: (parsers: any, absPath: string) => any, ): { tree: { rootNode: TreeSitterNode }; langId: string } | null { - let tree = symbols._tree; - let langId = symbols._langId; - const allPrecomputed = symbols.definitions.every( (d) => (d.kind !== 'function' && d.kind !== 'method') || d.complexity, ); - if (!allPrecomputed && !tree) { - const ext = path.extname(relPath).toLowerCase(); - if (!COMPLEXITY_EXTENSIONS.has(ext)) return null; - if (!extToLang) return null; - langId = extToLang.get(ext); - if (!langId) return null; - - const absPath = path.join(rootDir, relPath); - let code: string; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e: unknown) { - debug(`complexity: cannot read ${relPath}: ${(e as Error).message}`); - return null; - } - - const parser = getParser(parsers, absPath); - if (!parser) return null; - - try { - tree = parser.parse(code); - } catch (e: unknown) { - debug(`complexity: parse failed for ${relPath}: ${(e as Error).message}`); - return null; - } - } - - return tree && langId ? { tree: tree as { rootNode: TreeSitterNode }, langId } : null; + // Every definition already has precomputed complexity and there's no cached + // tree to fall back on — nothing to parse. + if (allPrecomputed && !symbols._tree) return null; + + return resolveFileTree({ + relPath, + rootDir, + cachedTree: symbols._tree, + cachedLangId: symbols._langId, + extensions: COMPLEXITY_EXTENSIONS, + extToLang, + parsers, + getParser, + logPrefix: 'complexity', + }); } function upsertPrecomputedComplexity( diff --git a/src/features/dataflow.ts b/src/features/dataflow.ts index 369265bed..102f7f20d 100644 --- a/src/features/dataflow.ts +++ b/src/features/dataflow.ts @@ -9,7 +9,6 @@ * Opt-in via `build --dataflow`. Supports all languages with DATAFLOW_RULES. */ -import fs from 'node:fs'; import path from 'node:path'; import { DATAFLOW_RULES } from '../ast-analysis/rules/index.js'; import { @@ -27,6 +26,7 @@ import type { NormalizedSymbol } from '../shared/normalize.js'; import { paginateResult } from '../shared/paginate.js'; import type { BetterSqlite3Database, NativeDatabase, NodeRow, TreeSitterNode } from '../types.js'; import { findNodes } from './shared/find-nodes.js'; +import { resolveFileTree } from './shared/resolve-file-tree.js'; // Re-export for backward compatibility export { _makeDataflowRules as makeDataflowRules, DATAFLOW_RULES }; @@ -116,49 +116,20 @@ function getDataflowForFile( ): DataflowResult | null { if (symbols.dataflow) return symbols.dataflow; - let tree = symbols._tree; - let langId = symbols._langId; - - if (!tree) { - if (!getParserFn) return null; - const ext = path.extname(relPath).toLowerCase(); - langId = extToLang.get(ext); - if (!langId || !DATAFLOW_RULES.has(langId)) return null; - - const absPath = path.join(rootDir, relPath); - let code: string; - try { - code = fs.readFileSync(absPath, 'utf-8'); - } catch (e: unknown) { - debug(`dataflow: cannot read ${relPath}: ${(e as Error).message}`); - return null; - } - - const parser = getParserFn(parsers, absPath); - if (!parser) return null; - - try { - tree = parser.parse(code); - } catch (e: unknown) { - debug(`dataflow: parse failed for ${relPath}: ${(e as Error).message}`); - return null; - } - } - - if (!langId) { - const ext = path.extname(relPath).toLowerCase(); - langId = extToLang.get(ext); - if (!langId) return null; - } - - if (!DATAFLOW_RULES.has(langId)) return null; - - return extractDataflow( - tree as { rootNode: TreeSitterNode }, + const resolved = resolveFileTree({ relPath, - symbols.definitions, - langId, - ); + rootDir, + cachedTree: symbols._tree, + cachedLangId: symbols._langId, + extensions: DATAFLOW_EXTENSIONS, + extToLang, + parsers, + getParser: getParserFn, + logPrefix: 'dataflow', + }); + if (!resolved) return null; + + return extractDataflow(resolved.tree, relPath, symbols.definitions, resolved.langId); } interface ArgFlow { diff --git a/src/features/shared/resolve-file-tree.ts b/src/features/shared/resolve-file-tree.ts new file mode 100644 index 000000000..f34d84d9d --- /dev/null +++ b/src/features/shared/resolve-file-tree.ts @@ -0,0 +1,81 @@ +import fs from 'node:fs'; +import path from 'node:path'; +import { debug } from '../../infrastructure/logger.js'; +import type { TreeSitterNode } from '../../types.js'; + +export interface ResolveFileTreeOptions { + /** Repo-relative path of the file being resolved. */ + relPath: string; + /** Absolute root directory the repo-relative path is joined against. */ + rootDir: string; + /** Already-parsed tree, if the caller has one cached (e.g. from a fresh build). */ + cachedTree?: { rootNode: TreeSitterNode } | null; + /** Language id paired with `cachedTree`, if known. */ + cachedLangId?: string | null; + /** Extensions this analysis supports — gates whether a fallback parse is attempted. */ + extensions: Set; + /** Extension → language id map used both for the allowlist gate and langId lookup. */ + extToLang: Map | null | undefined; + /** Opaque parser table passed through to `getParser`. */ + parsers: unknown; + /** Resolves a tree-sitter parser instance for a given absolute path. */ + getParser: + | ((parsers: unknown, absPath: string) => { parse(code: string): unknown } | null | undefined) + | null; + /** Prefix used in debug log messages (e.g. "complexity", "dataflow"). */ + logPrefix: string; +} + +export interface ResolvedFileTree { + tree: { rootNode: TreeSitterNode }; + langId: string; +} + +/** + * Resolve a parsed tree-sitter tree for a file: prefer an already-cached + * tree/langId pair, otherwise read the file from disk and parse it, with + * debug-logged fallback on read/parse errors. Shared by + * complexity.ts's getTreeForFile and dataflow.ts's getDataflowForFile, + * which previously duplicated this ~20-line cache/read/parse dance. + */ +export function resolveFileTree(opts: ResolveFileTreeOptions): ResolvedFileTree | null { + const { relPath, rootDir, extensions, extToLang, parsers, getParser, logPrefix } = opts; + let tree = opts.cachedTree; + let langId = opts.cachedLangId; + + if (!tree) { + if (!getParser) return null; + const ext = path.extname(relPath).toLowerCase(); + if (!extensions.has(ext)) return null; + if (!extToLang) return null; + langId = extToLang.get(ext); + if (!langId) return null; + + const absPath = path.join(rootDir, relPath); + let code: string; + try { + code = fs.readFileSync(absPath, 'utf-8'); + } catch (e: unknown) { + debug(`${logPrefix}: cannot read ${relPath}: ${(e as Error).message}`); + return null; + } + + const parser = getParser(parsers, absPath); + if (!parser) return null; + + try { + tree = parser.parse(code) as { rootNode: TreeSitterNode }; + } catch (e: unknown) { + debug(`${logPrefix}: parse failed for ${relPath}: ${(e as Error).message}`); + return null; + } + } + + if (!langId) { + const ext = path.extname(relPath).toLowerCase(); + langId = extToLang?.get(ext); + if (!langId) return null; + } + + return tree && langId ? { tree, langId } : null; +} From 8fed8bc2355e9c0957b57ec1bc59c5de031f300c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 05:15:47 -0600 Subject: [PATCH 07/39] refactor: extend DEFAULTS with previously-hardcoded config constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Registers resolveSecrets' execFileSync timeout/maxBuffer in DEFAULTS.llm (apiKeyCommandTimeoutMs, apiKeyCommandMaxBufferBytes) and wires resolveSecrets to read them from config instead of hardcoding. Also adds three purely-additive @reserved DEFAULTS entries for constants hardcoded elsewhere in the codebase (build. largeCodebaseFileThreshold, db.busyTimeoutMs, community. capacityGrowthFactor) so their consumer files can be wired to them in follow-up commits. docs check acknowledged — no new feature/language/architecture change; docs/guides/configuration.md (the actual config reference) is already updated in this commit. Impact: 5 functions changed, 87 affected --- docs/guides/configuration.md | 4 +++- src/infrastructure/config.ts | 31 +++++++++++++++++++++++++++++-- src/types.ts | 34 ++++++++++++++++++++++++++++++++++ tests/unit/config.test.ts | 17 +++++++++++++++-- 4 files changed, 81 insertions(+), 5 deletions(-) diff --git a/docs/guides/configuration.md b/docs/guides/configuration.md index 02e08c5d6..ff338f20d 100644 --- a/docs/guides/configuration.md +++ b/docs/guides/configuration.md @@ -214,8 +214,10 @@ Used by features that call out to a chat-completion API (e.g. query expansion), | `model` | `string \| null` | `null` | Model identifier passed to the provider. | | `baseUrl` | `string \| null` | `null` | Override the provider's base URL (for compatible proxies, local servers, etc.). | | `apiKey` | `string \| null` | `null` | Plaintext API key. Prefer `apiKeyCommand` or env vars over this. | -| `apiKeyCommand` | `string \| null` | `null` | Shell-out command that prints the key to stdout. Split on whitespace and run via `execFileSync` (no shell — `$(...)`, pipes, globs, and variable expansion are not supported). 10s timeout, 64 KB max output. | +| `apiKeyCommand` | `string \| null` | `null` | Shell-out command that prints the key to stdout. Split on whitespace and run via `execFileSync` (no shell — `$(...)`, pipes, globs, and variable expansion are not supported). Timeout/output cap controlled by `apiKeyCommandTimeoutMs`/`apiKeyCommandMaxBufferBytes` below. | | `requestTimeoutMs` | `number` | `120000` | Per-request timeout for remote HTTP calls made against `baseUrl` (currently the [remote embedding provider](#embeddings-embeddings)). Aborts and throws if a self-hosted server hangs mid-request instead of blocking indefinitely. | +| `apiKeyCommandTimeoutMs` | `number` | `10000` | Timeout for the `apiKeyCommand` subprocess. Prevents a hung secret-manager CLI from blocking config loading indefinitely. | +| `apiKeyCommandMaxBufferBytes` | `number` | `65536` | Max stdout buffer size (bytes) for the `apiKeyCommand` subprocess. | Resolution order (first non-empty wins): `apiKeyCommand` output → `CODEGRAPH_LLM_API_KEY` env var → `apiKey` field. diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index c8a146873..a6bf6321e 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -29,10 +29,27 @@ export const DEFAULTS = { dbPath: '.codegraph/graph.db', driftThreshold: 0.2, smallFilesThreshold: 5, + /** + * Minimum existing file-node count for a repo to be treated as a "large + * codebase" when deciding whether to scope node loading to changed files. + * @reserved — currently not wired; loadNodes() in + * `src/domain/graph/builder/stages/build-edges.ts` still uses the + * hardcoded literal `20` at its `existingFileCount > 20` gate. + */ + largeCodebaseFileThreshold: 20, typescriptResolver: true, engine: 'auto' as 'auto' | 'native' | 'wasm', fastSkipDiag: false, }, + db: { + /** + * SQLite `busy_timeout` pragma (ms) applied to every opened connection. + * @reserved — currently not wired; `src/db/connection.ts` still sets the + * hardcoded literal `5000` directly via `db.pragma('busy_timeout = 5000')` + * in both `openDb` and `openReadonlyOrFail`. + */ + busyTimeoutMs: 5000, + }, query: { defaultDepth: 3, defaultLimit: 20, @@ -50,6 +67,8 @@ export const DEFAULTS = { apiKey: null as string | null, apiKeyCommand: null as string | null, requestTimeoutMs: 120_000, + apiKeyCommandTimeoutMs: 10_000, + apiKeyCommandMaxBufferBytes: 64 * 1024, }, search: { defaultMinScore: 0.2, rrfK: 60, topK: 15, similarityWarnThreshold: 0.85 }, ci: { failOnCycles: false, impactThreshold: null as number | null }, @@ -119,6 +138,14 @@ export const DEFAULTS = { maxLevels: 50, maxLocalPasses: 20, refinementTheta: 1.0, + /** + * Growth multiplier applied when a Leiden partition's per-community + * typed arrays need to be resized to fit a larger community count. + * @reserved — currently not wired; `ensureCommCapacity()` in + * `src/graph/algorithms/leiden/partition.ts` still uses the hardcoded + * literal `1.5` directly. + */ + capacityGrowthFactor: 1.5, }, structure: { cohesionThreshold: 0.3, @@ -747,8 +774,8 @@ export function resolveSecrets(config: CodegraphConfig): CodegraphConfig { try { const result = execFileSync(executable!, args, { encoding: 'utf-8', - timeout: 10_000, - maxBuffer: 64 * 1024, + timeout: config.llm.apiKeyCommandTimeoutMs, + maxBuffer: config.llm.apiKeyCommandMaxBufferBytes, stdio: ['ignore', 'pipe', 'pipe'], }).trim(); if (result) { diff --git a/src/types.ts b/src/types.ts index d52bd81bf..9ffb1f9b7 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1307,6 +1307,13 @@ export interface CodegraphConfig { dbPath: string; driftThreshold: number; smallFilesThreshold: number; + /** + * Minimum existing file-node count for a repo to be treated as a "large + * codebase" when deciding whether to scope node loading to changed files. + * @reserved — currently not wired; see `largeCodebaseFileThreshold` in + * `src/infrastructure/config.ts` for wiring status. + */ + largeCodebaseFileThreshold: number; /** * Use the TypeScript compiler API to enrich typeMap for .ts/.tsx files. * Improves method-call edge accuracy for patterns like `const svc = container.get()`. @@ -1337,6 +1344,15 @@ export interface CodegraphConfig { fastSkipDiag: boolean; }; + db: { + /** + * SQLite `busy_timeout` pragma (ms) applied to every opened connection. + * @reserved — currently not wired; see `busyTimeoutMs` in + * `src/infrastructure/config.ts` for wiring status. + */ + busyTimeoutMs: number; + }; + query: { defaultDepth: number; defaultLimit: number; @@ -1377,6 +1393,17 @@ export interface CodegraphConfig { * self-hosted server from hanging the process indefinitely. Default: 120000. */ requestTimeoutMs: number; + /** + * Timeout (ms) for the `apiKeyCommand` subprocess spawned via `execFileSync`. + * Prevents a hung secret-manager CLI from blocking config loading indefinitely. + * Default: 10000. + */ + apiKeyCommandTimeoutMs: number; + /** + * Max stdout buffer size (bytes) for the `apiKeyCommand` subprocess spawned via + * `execFileSync`. Default: 65536 (64 KB). + */ + apiKeyCommandMaxBufferBytes: number; }; search: { @@ -1443,6 +1470,13 @@ export interface CodegraphConfig { maxLevels: number; maxLocalPasses: number; refinementTheta: number; + /** + * Growth multiplier applied when a Leiden partition's per-community + * typed arrays need to be resized to fit a larger community count. + * @reserved — currently not wired; see `capacityGrowthFactor` in + * `src/infrastructure/config.ts` for wiring status. + */ + capacityGrowthFactor: number; }; structure: { cohesionThreshold: number }; diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index 7979a6313..df0baede5 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -69,6 +69,8 @@ describe('DEFAULTS', () => { apiKey: null, apiKeyCommand: null, requestTimeoutMs: 120_000, + apiKeyCommandTimeoutMs: 10_000, + apiKeyCommandMaxBufferBytes: 64 * 1024, }); }); @@ -131,6 +133,7 @@ describe('DEFAULTS', () => { maxLevels: 50, maxLocalPasses: 20, refinementTheta: 1.0, + capacityGrowthFactor: 1.5, }); }); @@ -138,6 +141,14 @@ describe('DEFAULTS', () => { expect(DEFAULTS.structure).toEqual({ cohesionThreshold: 0.3 }); }); + it('has db defaults', () => { + expect(DEFAULTS.db).toEqual({ busyTimeoutMs: 5000 }); + }); + + it('has build defaults', () => { + expect(DEFAULTS.build).toHaveProperty('largeCodebaseFileThreshold', 20); + }); + it('has mcp defaults', () => { expect(DEFAULTS.mcp.defaults.list_functions).toBe(100); expect(DEFAULTS.mcp.defaults.fn_impact).toBe(5); @@ -455,13 +466,15 @@ describe('resolveSecrets', () => { baseUrl: null, apiKey: null, apiKeyCommand: 'op read secret/key', + apiKeyCommandTimeoutMs: DEFAULTS.llm.apiKeyCommandTimeoutMs, + apiKeyCommandMaxBufferBytes: DEFAULTS.llm.apiKeyCommandMaxBufferBytes, }, }; resolveSecrets(config); expect(mockExecFile).toHaveBeenCalledWith('op', ['read', 'secret/key'], { encoding: 'utf-8', - timeout: 10_000, - maxBuffer: 64 * 1024, + timeout: DEFAULTS.llm.apiKeyCommandTimeoutMs, + maxBuffer: DEFAULTS.llm.apiKeyCommandMaxBufferBytes, stdio: ['ignore', 'pipe', 'pipe'], }); expect(config.llm.apiKey).toBe('secret-key-123'); From f0a488261d7b5c2a27182d361c7eef4d85ce683c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 05:40:34 -0600 Subject: [PATCH 08/39] refactor: move generatePlotHTML from features/graph-enrichment.ts to presentation/plot.ts Impact: 4 functions changed, 3 affected --- CLAUDE.md | 3 +- src/cli/commands/plot.ts | 3 +- src/features/graph-enrichment.ts | 18 +---- src/presentation/plot.ts | 21 +++++ tests/graph/viewer.test.ts | 107 +------------------------- tests/presentation/plot.test.ts | 128 +++++++++++++++++++++++++++++++ 6 files changed, 155 insertions(+), 125 deletions(-) create mode 100644 src/presentation/plot.ts create mode 100644 tests/presentation/plot.test.ts diff --git a/CLAUDE.md b/CLAUDE.md index cc8f2a65b..cb147dac9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -136,9 +136,10 @@ Source is TypeScript in `src/`, compiled via `tsup`. The Rust native engine live | `features/snapshot.ts` | SQLite DB backup and restore | | `features/structure.ts` | Codebase structure analysis | | `features/triage.ts` | Risk-ranked audit priority queue (delegates scoring to `graph/classifiers/`) | -| `features/graph-enrichment.ts` | Data enrichment for HTML viewer (complexity, communities, fan-in/out) | +| `features/graph-enrichment.ts` | Plot data preparation for the HTML viewer (complexity, communities, fan-in/out) — pure data prep, no HTML/presentation imports beyond shared color/config types | | **`presentation/`** | **Pure output formatting + CLI command wrappers** | | `presentation/viewer.ts` | Interactive HTML renderer with vis-network | +| `presentation/plot.ts` | Thin `generatePlotHTML` wrapper — prepares data via `features/graph-enrichment.ts`, renders via `presentation/viewer.ts` | | `presentation/queries-cli/` | CLI display wrappers for query functions, split by concern: `path.ts`, `overview.ts`, `inspect.ts`, `impact.ts`, `exports.ts` | | `presentation/*.ts` | Command formatters (audit, batch, check, communities, complexity, etc.) — call `features/*.ts`, format output, set exit codes | | `presentation/export.ts` | DOT/Mermaid/GraphML/Neo4j serializers | diff --git a/src/cli/commands/plot.ts b/src/cli/commands/plot.ts index bc25ae732..9a40923ae 100644 --- a/src/cli/commands/plot.ts +++ b/src/cli/commands/plot.ts @@ -41,7 +41,8 @@ export const command: CommandDefinition = { ['--color-by ', 'Color nodes by: kind | role | community | complexity'], ], async execute(_args, opts, ctx) { - const { generatePlotHTML, loadPlotConfig } = await import('../../features/graph-enrichment.js'); + const { generatePlotHTML } = await import('../../presentation/plot.js'); + const { loadPlotConfig } = await import('../../presentation/viewer.js'); const os = await import('node:os'); const { db, close } = openGraph(opts as { db?: string }); diff --git a/src/features/graph-enrichment.ts b/src/features/graph-enrichment.ts index 886d09bf8..a05674ff6 100644 --- a/src/features/graph-enrichment.ts +++ b/src/features/graph-enrichment.ts @@ -7,7 +7,7 @@ import { DEFAULT_NODE_COLORS, DEFAULT_ROLE_COLORS, } from '../presentation/colors.js'; -import { DEFAULT_CONFIG, type PlotConfig, renderPlotHTML } from '../presentation/viewer.js'; +import { DEFAULT_CONFIG, type PlotConfig } from '../presentation/viewer.js'; import type { BetterSqlite3Database } from '../types.js'; // Re-export presentation utilities for backward compatibility @@ -475,19 +475,3 @@ function prepareFileLevelData( return { nodes: visNodes, edges: visEdges, seedNodeIds: selectFileSeedNodes(visNodes, cfg) }; } - -// ─── HTML Generation (thin wrapper) ────────────────────────────────── - -export function generatePlotHTML( - db: BetterSqlite3Database, - opts: { - fileLevel?: boolean; - noTests?: boolean; - minConfidence?: number; - config?: PlotConfig; - } = {}, -): string { - const cfg = opts.config || DEFAULT_CONFIG; - const data = prepareGraphData(db, opts); - return renderPlotHTML(data, cfg); -} diff --git a/src/presentation/plot.ts b/src/presentation/plot.ts new file mode 100644 index 000000000..b666198e6 --- /dev/null +++ b/src/presentation/plot.ts @@ -0,0 +1,21 @@ +import { prepareGraphData } from '../features/graph-enrichment.js'; +import type { BetterSqlite3Database } from '../types.js'; +import { DEFAULT_CONFIG, type PlotConfig, renderPlotHTML } from './viewer.js'; + +/** + * Generate a full interactive HTML plot document for the dependency graph. + * Thin wrapper: prepares graph data (features layer) then renders it (presentation layer). + */ +export function generatePlotHTML( + db: BetterSqlite3Database, + opts: { + fileLevel?: boolean; + noTests?: boolean; + minConfidence?: number; + config?: PlotConfig; + } = {}, +): string { + const cfg = opts.config || DEFAULT_CONFIG; + const data = prepareGraphData(db, opts); + return renderPlotHTML(data, cfg); +} diff --git a/tests/graph/viewer.test.ts b/tests/graph/viewer.test.ts index 9347833f6..368639cc0 100644 --- a/tests/graph/viewer.test.ts +++ b/tests/graph/viewer.test.ts @@ -5,11 +5,7 @@ import Database from 'better-sqlite3'; import { describe, expect, it } from 'vitest'; import { initSchema } from '../../src/db/index.js'; -import { - generatePlotHTML, - loadPlotConfig, - prepareGraphData, -} from '../../src/features/graph-enrichment.js'; +import { loadPlotConfig, prepareGraphData } from '../../src/features/graph-enrichment.js'; function createTestDb() { const db = new Database(':memory:'); @@ -36,107 +32,6 @@ function insertComplexity(db, nodeId, cognitive, cyclomatic, mi) { ).run(nodeId, cognitive, cyclomatic, mi); } -describe('generatePlotHTML', () => { - it('returns a valid HTML document', () => { - const db = createTestDb(); - const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); - const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); - insertEdge(db, a, b, 'imports'); - - const html = generatePlotHTML(db); - expect(html).toContain(''); - expect(html).toContain(''); - db.close(); - }); - - it('embeds graph data as JSON', () => { - const db = createTestDb(); - const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); - const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); - insertEdge(db, a, b, 'imports'); - - const html = generatePlotHTML(db); - expect(html).toContain('var allNodes ='); - expect(html).toContain('var allEdges ='); - expect(html).toContain('a.js'); - expect(html).toContain('b.js'); - db.close(); - }); - - it('includes vis-network CDN script', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain('vis-network'); - expect(html).toContain('unpkg.com'); - db.close(); - }); - - it('applies custom config title', () => { - const db = createTestDb(); - const html = generatePlotHTML(db, { - config: { - title: 'My Custom Graph', - layout: { algorithm: 'hierarchical', direction: 'LR' }, - physics: { enabled: true, nodeDistance: 150 }, - nodeColors: {}, - roleColors: {}, - colorBy: 'kind', - edgeStyle: { color: '#666', smooth: true }, - filter: { kinds: null, roles: null, files: null }, - seedStrategy: 'all', - seedCount: 30, - clusterBy: 'none', - sizeBy: 'uniform', - overlays: { complexity: false, risk: false }, - riskThresholds: { highBlastRadius: 10, lowMI: 40 }, - }, - }); - expect(html).toContain('My Custom Graph'); - db.close(); - }); - - it('handles empty graph without error', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain(''); - expect(html).toContain('var allNodes = []'); - expect(html).toContain('var allEdges = []'); - db.close(); - }); - - it('supports function-level mode', () => { - const db = createTestDb(); - const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); - const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); - insertEdge(db, fnA, fnB, 'calls'); - - const html = generatePlotHTML(db, { fileLevel: false }); - expect(html).toContain('doWork'); - expect(html).toContain('helper'); - db.close(); - }); - - it('includes detail panel elements', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain('id="detail"'); - expect(html).toContain('id="detailContent"'); - expect(html).toContain('id="detailClose"'); - db.close(); - }); - - it('includes new control elements', () => { - const db = createTestDb(); - const html = generatePlotHTML(db); - expect(html).toContain('id="colorBySelect"'); - expect(html).toContain('id="sizeBySelect"'); - expect(html).toContain('id="clusterBySelect"'); - expect(html).toContain('id="riskToggle"'); - db.close(); - }); -}); - describe('prepareGraphData', () => { it('embeds complexity data into function-level nodes', () => { const db = createTestDb(); diff --git a/tests/presentation/plot.test.ts b/tests/presentation/plot.test.ts new file mode 100644 index 000000000..9cfe84a2c --- /dev/null +++ b/tests/presentation/plot.test.ts @@ -0,0 +1,128 @@ +/** + * Interactive HTML plot generation tests. + */ + +import Database from 'better-sqlite3'; +import { describe, expect, it } from 'vitest'; +import { initSchema } from '../../src/db/index.js'; +import { generatePlotHTML } from '../../src/presentation/plot.js'; + +function createTestDb() { + const db = new Database(':memory:'); + db.pragma('journal_mode = WAL'); + initSchema(db); + return db; +} + +function insertNode(db, name, kind, file, line, role) { + return db + .prepare('INSERT INTO nodes (name, kind, file, line, role) VALUES (?, ?, ?, ?, ?)') + .run(name, kind, file, line, role || null).lastInsertRowid; +} + +function insertEdge(db, sourceId, targetId, kind) { + db.prepare( + 'INSERT INTO edges (source_id, target_id, kind, confidence, dynamic) VALUES (?, ?, ?, 1.0, 0)', + ).run(sourceId, targetId, kind); +} + +describe('generatePlotHTML', () => { + it('returns a valid HTML document', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const html = generatePlotHTML(db); + expect(html).toContain(''); + expect(html).toContain(''); + db.close(); + }); + + it('embeds graph data as JSON', () => { + const db = createTestDb(); + const a = insertNode(db, 'src/a.js', 'file', 'src/a.js', 0); + const b = insertNode(db, 'src/b.js', 'file', 'src/b.js', 0); + insertEdge(db, a, b, 'imports'); + + const html = generatePlotHTML(db); + expect(html).toContain('var allNodes ='); + expect(html).toContain('var allEdges ='); + expect(html).toContain('a.js'); + expect(html).toContain('b.js'); + db.close(); + }); + + it('includes vis-network CDN script', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('vis-network'); + expect(html).toContain('unpkg.com'); + db.close(); + }); + + it('applies custom config title', () => { + const db = createTestDb(); + const html = generatePlotHTML(db, { + config: { + title: 'My Custom Graph', + layout: { algorithm: 'hierarchical', direction: 'LR' }, + physics: { enabled: true, nodeDistance: 150 }, + nodeColors: {}, + roleColors: {}, + colorBy: 'kind', + edgeStyle: { color: '#666', smooth: true }, + filter: { kinds: null, roles: null, files: null }, + seedStrategy: 'all', + seedCount: 30, + clusterBy: 'none', + sizeBy: 'uniform', + overlays: { complexity: false, risk: false }, + riskThresholds: { highBlastRadius: 10, lowMI: 40 }, + }, + }); + expect(html).toContain('My Custom Graph'); + db.close(); + }); + + it('handles empty graph without error', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain(''); + expect(html).toContain('var allNodes = []'); + expect(html).toContain('var allEdges = []'); + db.close(); + }); + + it('supports function-level mode', () => { + const db = createTestDb(); + const fnA = insertNode(db, 'doWork', 'function', 'src/a.js', 5); + const fnB = insertNode(db, 'helper', 'function', 'src/b.js', 10); + insertEdge(db, fnA, fnB, 'calls'); + + const html = generatePlotHTML(db, { fileLevel: false }); + expect(html).toContain('doWork'); + expect(html).toContain('helper'); + db.close(); + }); + + it('includes detail panel elements', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('id="detail"'); + expect(html).toContain('id="detailContent"'); + expect(html).toContain('id="detailClose"'); + db.close(); + }); + + it('includes new control elements', () => { + const db = createTestDb(); + const html = generatePlotHTML(db); + expect(html).toContain('id="colorBySelect"'); + expect(html).toContain('id="sizeBySelect"'); + expect(html).toContain('id="clusterBySelect"'); + expect(html).toContain('id="riskToggle"'); + db.close(); + }); +}); From 370f336f02d7a1d3cdbbe291090c681ec62663cd Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 06:13:08 -0600 Subject: [PATCH 09/39] refactor: decompose extractDestructuredBindingsWalk/handleVariableDecl/runContextCollectorWalk Pure extract-method decomposition of the three highest-complexity functions in extractors/javascript.ts (Titan phase 10, sync.json commit message shortened to fit the 100-char commitlint header limit). No extraction logic, node-type handling, or edge-case behavior changed -- verified byte-identical resolution-benchmark precision/recall across all 34 fixture languages and byte-identical codegraph query/where output for 3 real non-fixture files before/after. No Rust files touched, so native/WASM parity is unaffected by construction. docs check acknowledged: internal-only refactor, no new languages/commands/ architecture; README.md, CLAUDE.md, and ROADMAP.md are unaffected. Impact: 20 functions changed, 15 affected --- src/extractors/javascript.ts | 753 +++++++++++++++++++++-------------- 1 file changed, 460 insertions(+), 293 deletions(-) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 56c97b280..8c9931d22 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -535,71 +535,7 @@ function extractDestructuredBindingsWalk( if (inner) declNode = inner; } - const t = declNode.type; - if ( - (t === 'lexical_declaration' || t === 'variable_declaration') && - declNode.text.startsWith('const ') - ) { - for (let j = 0; j < declNode.childCount; j++) { - const declarator = declNode.child(j); - if (declarator?.type !== 'variable_declarator') continue; - const nameN = declarator.childForFieldName('name'); - if (nameN && nameN.type === 'object_pattern') { - extractDestructuredBindings( - nameN, - nodeStartLine(declNode), - nodeEndLine(declNode), - definitions, - ); - // Record CJS require bindings so importedNames can classify these names - // as import artifacts, preventing false local-definition blocking (#1661). - if (cjsRequireBindings) { - const valueN = declarator.childForFieldName('value'); - if (valueN?.type === 'call_expression') { - const fn = valueN.childForFieldName('function'); - if (fn?.text === 'require') { - const args = valueN.childForFieldName('arguments'); - const strArg = args && findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - const names: string[] = []; - for (let k = 0; k < nameN.childCount; k++) { - const prop = nameN.child(k); - if (!prop) continue; - if ( - prop.type === 'shorthand_property_identifier_pattern' || - prop.type === 'shorthand_property_identifier' - ) { - names.push(prop.text); - } else if (prop.type === 'pair_pattern' || prop.type === 'pair') { - const val = prop.childForFieldName('value'); - if ( - val?.type === 'identifier' || - val?.type === 'shorthand_property_identifier_pattern' - ) { - names.push(val.text); - } - } - } - if (names.length > 0) { - cjsRequireBindings.push({ names, source: modPath }); - } - } - } - } - } - } else if (nameN && nameN.type === 'array_pattern') { - // `const [x, y] = ...` — emit a single constant node whose name is the - // full array pattern text (e.g. `[x, y]`), matching native engine behaviour. - definitions.push({ - name: nameN.text, - kind: 'constant', - line: nodeStartLine(declNode), - endLine: nodeEndLine(declNode), - }); - } - } - } + extractDestructuredDeclarators(declNode, definitions, cjsRequireBindings); if (child.type !== 'export_statement') { extractDestructuredBindingsWalk(child, definitions, cjsRequireBindings); @@ -607,6 +543,93 @@ function extractDestructuredBindingsWalk( } } +/** + * Extract object/array-pattern destructured const bindings from a single declaration + * node — the per-declaration counterpart to extractDestructuredBindingsWalk's tree walk. + */ +function extractDestructuredDeclarators( + declNode: TreeSitterNode, + definitions: Definition[], + cjsRequireBindings?: Array<{ names: string[]; source: string }>, +): void { + const t = declNode.type; + if ( + (t !== 'lexical_declaration' && t !== 'variable_declaration') || + !declNode.text.startsWith('const ') + ) { + return; + } + + for (let j = 0; j < declNode.childCount; j++) { + const declarator = declNode.child(j); + if (declarator?.type !== 'variable_declarator') continue; + const nameN = declarator.childForFieldName('name'); + if (nameN && nameN.type === 'object_pattern') { + extractDestructuredBindings( + nameN, + nodeStartLine(declNode), + nodeEndLine(declNode), + definitions, + ); + // Record CJS require bindings so importedNames can classify these names + // as import artifacts, preventing false local-definition blocking (#1661). + if (cjsRequireBindings) { + const valueN = declarator.childForFieldName('value'); + const binding = extractCjsRequireBinding(nameN, valueN); + if (binding) cjsRequireBindings.push(binding); + } + } else if (nameN && nameN.type === 'array_pattern') { + // `const [x, y] = ...` — emit a single constant node whose name is the + // full array pattern text (e.g. `[x, y]`), matching native engine behaviour. + definitions.push({ + name: nameN.text, + kind: 'constant', + line: nodeStartLine(declNode), + endLine: nodeEndLine(declNode), + }); + } + } +} + +/** + * Compute a `const { X } = require('./path')` CJS binding record from a destructured + * object-pattern name node and its declarator's value node, for import-artifact + * classification (#1661). Returns null when the value isn't a static require() call or + * no destructured names could be extracted. Shared by the walk-based + * (extractDestructuredDeclarators) and query-based (handleVariableDecl) const-destructuring + * paths, which independently need the identical extraction. + */ +function extractCjsRequireBinding( + nameN: TreeSitterNode, + valueN: TreeSitterNode | null | undefined, +): { names: string[]; source: string } | null { + if (valueN?.type !== 'call_expression') return null; + const fn = valueN.childForFieldName('function'); + if (fn?.text !== 'require') return null; + const args = valueN.childForFieldName('arguments'); + const strArg = args && findChild(args, 'string'); + if (!strArg) return null; + const modPath = strArg.text.replace(/['"]/g, ''); + const names: string[] = []; + for (let k = 0; k < nameN.childCount; k++) { + const prop = nameN.child(k); + if (!prop) continue; + if ( + prop.type === 'shorthand_property_identifier_pattern' || + prop.type === 'shorthand_property_identifier' + ) { + names.push(prop.text); + } else if (prop.type === 'pair_pattern' || prop.type === 'pair') { + const val = prop.childForFieldName('value'); + if (val?.type === 'identifier' || val?.type === 'shorthand_property_identifier_pattern') { + names.push(val.text); + } + } + } + if (names.length === 0) return null; + return { names, source: modPath }; +} + /** Extract constant definitions from a `const` declaration node. */ function extractConstDeclarators(declNode: TreeSitterNode, definitions: Definition[]): void { const t = declNode.type; @@ -1095,123 +1118,133 @@ function handleVariableDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { for (let i = 0; i < node.childCount; i++) { const declarator = node.child(i); if (declarator && declarator.type === 'variable_declarator') { - const nameN = declarator.childForFieldName('name'); - const valueN = declarator.childForFieldName('value'); - - if (nameN && valueN) { - const valType = valueN.type; - if ( - valType === 'arrow_function' || - valType === 'function_expression' || - valType === 'function' || - valType === 'generator_function' - ) { - const varFnChildren = extractParameters(valueN); - ctx.definitions.push({ - name: nameN.text, - kind: 'function', - line: nodeStartLine(node), - endLine: nodeEndLine(valueN), - children: varFnChildren.length > 0 ? varFnChildren : undefined, - }); - } else if ( - isConst && - nameN.type === 'identifier' && - isConstantValue(valueN) && - !hasFunctionScopeAncestor(node) - ) { - ctx.definitions.push({ - name: nameN.text, - kind: 'constant', - line: nodeStartLine(node), - endLine: nodeEndLine(node), - }); - // Phase 8.3f: extract function/arrow properties from object literals so that - // this.method() calls inside Object.defineProperty accessors can resolve them. - // Scope guard: hasFunctionScopeAncestor mirrors the Rust path's find_parent_of_types - // check and the sibling destructured-binding branch below — skips object literals - // inside function bodies to avoid polluting the global definition index with - // local variable properties (e.g. `localObj.fn` from `const localObj = { fn: ... }` - // inside a function). - if (valueN.type === 'object') { - extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); - } - } else if ( - !isConst && - nameN.type === 'identifier' && - valueN.type === 'object' && - !hasFunctionScopeAncestor(node) - ) { - // `let`/`var` object literals: extract qualified method definitions so that - // `obj.method()` calls resolve correctly. Mirrors Rust match_js_objlit_qualified_method_defs - // which emits method_definition qualified names for ALL declaration kinds and - // pair+arrow/function for let/var only (const is already handled above). - // Scope guard prevents local object properties from polluting the global index. - extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); - } else if (isConst && nameN.type === 'object_pattern' && !hasFunctionScopeAncestor(node)) { - // Destructured bindings: const { handleToken, checkPermissions } = initAuth(...) - // Each destructured property becomes a function definition so it can be - // resolved when passed as a callback (e.g. router.use(handleToken)). - // Restricted to const to avoid creating spurious definitions for - // transient let/var destructuring (e.g. let { userId } = parseRequest(req)). - // Scope guard mirrors extractDestructuredBindingsWalk (query path) and - // handle_var_decl (Rust path) — skips bindings inside function bodies. - extractDestructuredBindings( - nameN, - nodeStartLine(node), - nodeEndLine(node), - ctx.definitions, - ); - // Record CJS require bindings for import-artifact classification (#1661). - if (valueN?.type === 'call_expression') { - const fn = valueN.childForFieldName('function'); - if (fn?.text === 'require') { - const args = valueN.childForFieldName('arguments'); - const strArg = args && findChild(args, 'string'); - if (strArg) { - const modPath = strArg.text.replace(/['"]/g, ''); - const names: string[] = []; - for (let k = 0; k < nameN.childCount; k++) { - const prop = nameN.child(k); - if (!prop) continue; - if ( - prop.type === 'shorthand_property_identifier_pattern' || - prop.type === 'shorthand_property_identifier' - ) { - names.push(prop.text); - } else if (prop.type === 'pair_pattern' || prop.type === 'pair') { - const val = prop.childForFieldName('value'); - if ( - val?.type === 'identifier' || - val?.type === 'shorthand_property_identifier_pattern' - ) { - names.push(val.text); - } - } - } - if (names.length > 0) { - if (!ctx.cjsRequireBindings) ctx.cjsRequireBindings = []; - ctx.cjsRequireBindings.push({ names, source: modPath }); - } - } - } - } - } else if (isConst && nameN.type === 'array_pattern' && !hasFunctionScopeAncestor(node)) { - // Array destructuring: `const [x, y] = ...` — emit a single constant node - // whose name is the full array pattern text (e.g. `[x, y]`), matching - // native engine behaviour. Scope guard mirrors the object_pattern branch above. - ctx.definitions.push({ - name: nameN.text, - kind: 'constant', - line: nodeStartLine(node), - endLine: nodeEndLine(node), - }); - } - } + handleVariableDeclarator(node, declarator, isConst, ctx); } } } +/** + * Dispatch a single variable_declarator within a variable/lexical declaration to the + * handler matching its value/name-pattern kind. Mirrors the query-based path's + * per-capture handler functions (handleFnCapture, etc.) already used elsewhere in this file. + */ +function handleVariableDeclarator( + node: TreeSitterNode, + declarator: TreeSitterNode, + isConst: boolean, + ctx: ExtractorOutput, +): void { + const nameN = declarator.childForFieldName('name'); + const valueN = declarator.childForFieldName('value'); + if (!nameN || !valueN) return; + + const valType = valueN.type; + if ( + valType === 'arrow_function' || + valType === 'function_expression' || + valType === 'function' || + valType === 'generator_function' + ) { + handleVarFnAssignment(node, nameN, valueN, ctx); + } else if ( + isConst && + nameN.type === 'identifier' && + isConstantValue(valueN) && + !hasFunctionScopeAncestor(node) + ) { + handleConstIdentifierAssignment(node, nameN, valueN, ctx); + } else if ( + !isConst && + nameN.type === 'identifier' && + valueN.type === 'object' && + !hasFunctionScopeAncestor(node) + ) { + // `let`/`var` object literals: extract qualified method definitions so that + // `obj.method()` calls resolve correctly. Mirrors Rust match_js_objlit_qualified_method_defs + // which emits method_definition qualified names for ALL declaration kinds and + // pair+arrow/function for let/var only (const is already handled above). + // Scope guard prevents local object properties from polluting the global index. + extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); + } else if (isConst && nameN.type === 'object_pattern' && !hasFunctionScopeAncestor(node)) { + handleConstObjectPatternAssignment(node, nameN, valueN, ctx); + } else if (isConst && nameN.type === 'array_pattern' && !hasFunctionScopeAncestor(node)) { + // Array destructuring: `const [x, y] = ...` — emit a single constant node + // whose name is the full array pattern text (e.g. `[x, y]`), matching + // native engine behaviour. Scope guard mirrors the object_pattern branch above. + ctx.definitions.push({ + name: nameN.text, + kind: 'constant', + line: nodeStartLine(node), + endLine: nodeEndLine(node), + }); + } +} + +/** Handle `const/let fn = (...) => {...}` — a function/arrow value assigned to a variable. */ +function handleVarFnAssignment( + node: TreeSitterNode, + nameN: TreeSitterNode, + valueN: TreeSitterNode, + ctx: ExtractorOutput, +): void { + const varFnChildren = extractParameters(valueN); + ctx.definitions.push({ + name: nameN.text, + kind: 'function', + line: nodeStartLine(node), + endLine: nodeEndLine(valueN), + children: varFnChildren.length > 0 ? varFnChildren : undefined, + }); +} + +/** Handle `const X = ` — a plain constant identifier assignment. */ +function handleConstIdentifierAssignment( + node: TreeSitterNode, + nameN: TreeSitterNode, + valueN: TreeSitterNode, + ctx: ExtractorOutput, +): void { + ctx.definitions.push({ + name: nameN.text, + kind: 'constant', + line: nodeStartLine(node), + endLine: nodeEndLine(node), + }); + // Phase 8.3f: extract function/arrow properties from object literals so that + // this.method() calls inside Object.defineProperty accessors can resolve them. + // Scope guard: hasFunctionScopeAncestor mirrors the Rust path's find_parent_of_types + // check and the sibling destructured-binding branch below — skips object literals + // inside function bodies to avoid polluting the global definition index with + // local variable properties (e.g. `localObj.fn` from `const localObj = { fn: ... }` + // inside a function). + if (valueN.type === 'object') { + extractObjectLiteralFunctions(valueN, nameN.text, ctx.definitions); + } +} + +/** Handle `const { a, b } = value` — destructured object-pattern const bindings. */ +function handleConstObjectPatternAssignment( + node: TreeSitterNode, + nameN: TreeSitterNode, + valueN: TreeSitterNode, + ctx: ExtractorOutput, +): void { + // Destructured bindings: const { handleToken, checkPermissions } = initAuth(...) + // Each destructured property becomes a function definition so it can be + // resolved when passed as a callback (e.g. router.use(handleToken)). + // Restricted to const to avoid creating spurious definitions for + // transient let/var destructuring (e.g. let { userId } = parseRequest(req)). + // Scope guard mirrors extractDestructuredBindingsWalk (query path) and + // handle_var_decl (Rust path) — skips bindings inside function bodies. + extractDestructuredBindings(nameN, nodeStartLine(node), nodeEndLine(node), ctx.definitions); + // Record CJS require bindings for import-artifact classification (#1661). + const binding = extractCjsRequireBinding(nameN, valueN); + if (binding) { + if (!ctx.cjsRequireBindings) ctx.cjsRequireBindings = []; + ctx.cjsRequireBindings.push(binding); + } +} + /** * Phase 8.3f: extract function/arrow function properties from an object literal as standalone * definitions so that `this.method()` calls inside Object.defineProperty accessor functions can @@ -2004,6 +2037,230 @@ interface ContextCollectorOutputs { * before any declarator is processed (a function declared *after* its first * use would otherwise be missed). */ +/** + * Push node onto classStack when it's a named class declaration/expression, for + * method_definition qualification below. Returns whether a push happened. + * The `identifier`-only check keeps the original walk's behaviour (TS class names + * parse as type_identifier and were never pushed), while typeMapClass/objectRestClass + * elsewhere use the bare text like their original walks did. + */ +function pushClassContext( + classStack: string[], + className: string | null, + classNameIsIdentifier: boolean, +): boolean { + if (className && classNameIsIdentifier) { + classStack.push(className); + return true; + } + return false; +} + +/** Push node onto funcStack when it's a named function_declaration/generator_function_declaration. */ +function pushFnDeclContext(funcStack: string[], node: TreeSitterNode): boolean { + const nameNode = node.childForFieldName('name'); + if (nameNode?.type === 'identifier') { + funcStack.push(nameNode.text); + return true; + } + return false; +} + +/** + * Resolve the raw method name from a method_definition's name field, unwrapping + * computed_property_name string literals (e.g. `['foo']() {}` -> 'foo'). Returns '' + * for non-string computed keys (no resolvable name). + */ +function resolveMethodDefinitionName(nameNode: TreeSitterNode): string { + if (nameNode.type !== 'computed_property_name') return nameNode.text; + const inner = nameNode.child(1); + if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) { + // Non-string computed key — skip adding to funcStack (no resolvable name). + return ''; + } + return inner.text.replace(/^['"]|['"]$/g, ''); +} + +/** + * Push node onto funcStack for a method_definition, qualified with the enclosing class + * name so the PTS key matches callerName from findCaller (which uses + * def.name = 'ClassName.method'). + */ +function pushMethodDefContext( + classStack: string[], + funcStack: string[], + node: TreeSitterNode, +): boolean { + const nameNode = node.childForFieldName('name'); + if (!nameNode) return false; + const enclosingClass = classStack.length > 0 ? classStack[classStack.length - 1] : null; + const rawName = resolveMethodDefinitionName(nameNode); + if (!rawName) return false; + const qualifiedName = enclosingClass ? `${enclosingClass}.${rawName}` : rawName; + funcStack.push(qualifiedName); + return true; +} + +/** + * Push node onto funcStack for `const process = (arr) => { ... }` — arrow/expression + * functions assigned to a variable have no `name` field on the function node itself. + */ +function pushArrowVarContext(funcStack: string[], node: TreeSitterNode): boolean { + const nameNode = node.childForFieldName('name'); + const valueNode = node.childForFieldName('value'); + if ( + nameNode?.type === 'identifier' && + (valueNode?.type === 'arrow_function' || valueNode?.type === 'function_expression') + ) { + funcStack.push(nameNode.text); + return true; + } + return false; +} + +/** + * Push node onto funcStack for `obj.method = function() { ... }` func-prop assignment. + * Mirrors handleFuncPropAssignment's logic so for-of loops inside the body get the + * correct enclosingFunc (e.g. 'obj.method') instead of '' or the wrong outer + * function name. + */ +function pushFuncPropContext(funcStack: string[], node: TreeSitterNode): boolean { + const lhs = node.childForFieldName('left'); + const rhs = node.childForFieldName('right'); + if ( + lhs?.type === 'member_expression' && + (rhs?.type === 'function_expression' || rhs?.type === 'arrow_function') + ) { + const obj = lhs.childForFieldName('object'); + const prop = lhs.childForFieldName('property'); + if ( + obj?.type === 'identifier' && + (prop?.type === 'property_identifier' || prop?.type === 'identifier') && + !BUILTIN_GLOBALS.has(obj.text) && + prop.text !== 'prototype' + ) { + funcStack.push(`${obj.text}.${prop.text}`); + return true; + } + } + return false; +} + +/** + * Compute the class name (and whether it's a plain identifier) for a class_declaration/ + * class-expression node — read once, shared by pushClassContext and computeChildContext. + * Returns nulls/false for any other node type. + */ +function computeClassNameContext( + node: TreeSitterNode, + isClassDecl: boolean, + isClassExpr: boolean, +): { className: string | null; classNameIsIdentifier: boolean } { + if (!isClassDecl && !isClassExpr) return { className: null, classNameIsIdentifier: false }; + const nameNode = node.childForFieldName('name'); + return { + className: nameNode?.text ?? null, + classNameIsIdentifier: nameNode?.type === 'identifier', + }; +} + +/** + * Dispatch the enclosing-context stack push for a node to the handler matching its type. + * Returns which stack (if any) was pushed, so the caller can pop the matching stack + * after visiting children. + */ +function pushEnclosingContext( + node: TreeSitterNode, + t: string, + isClassDecl: boolean, + isClassExpr: boolean, + isFnDecl: boolean, + className: string | null, + classNameIsIdentifier: boolean, + classStack: string[], + funcStack: string[], +): { pushedFunc: boolean; pushedClass: boolean } { + if (isClassDecl || isClassExpr) { + return { + pushedFunc: false, + pushedClass: pushClassContext(classStack, className, classNameIsIdentifier), + }; + } + if (isFnDecl) { + return { pushedFunc: pushFnDeclContext(funcStack, node), pushedClass: false }; + } + if (t === 'method_definition') { + return { pushedFunc: pushMethodDefContext(classStack, funcStack, node), pushedClass: false }; + } + if (t === 'variable_declarator') { + return { pushedFunc: pushArrowVarContext(funcStack, node), pushedClass: false }; + } + if (t === 'assignment_expression') { + return { pushedFunc: pushFuncPropContext(funcStack, node), pushedClass: false }; + } + return { pushedFunc: false, pushedClass: false }; +} + +/** + * Run the per-node-type collectors (typeMap/binding extraction) for a single node during + * runContextCollectorWalk's traversal, mirroring the query-based path's capture-handler + * pattern (handleFnCapture, etc.) already used elsewhere in this file. + */ +function dispatchNodeCollectors( + node: TreeSitterNode, + t: string, + typeMapClass: string | null, + objectRestClass: string | null, + funcStack: string[], + out: ContextCollectorOutputs, +): void { + if (t === 'variable_declarator') { + handleVarDeclaratorTypeMap( + node, + out.typeMap, + out.returnTypeMap, + out.callAssignments, + out.fnRefBindings, + ); + collectCollectionWrapBinding(node, out.fnRefBindings); + } else if (t === 'required_parameter' || t === 'optional_parameter') { + handleParamTypeMap(node, out.typeMap); + } else if (t === 'public_field_definition' || t === 'field_definition') { + handleFieldDefTypeMap(node, out.typeMap, typeMapClass); + } else if (t === 'assignment_expression') { + handlePropWriteTypeMap(node, out.typeMap, typeMapClass); + } else if (t === 'call_expression') { + handleDefinePropertyTypeMap(node, out.typeMap); + collectSpreadAndArrayFromBindings(node, out.spreadArgBindings, out.arrayCallbackBindings); + } else if (t === 'for_in_statement') { + const enclosingFunc = funcStack.length > 0 ? funcStack[funcStack.length - 1]! : ''; + collectForOfBinding(node, enclosingFunc, out.forOfBindings); + } + collectObjectRestParams(node, t, objectRestClass, out.objectRestParamBindings); +} + +/** + * Compute the typeMapClass/objectRestClass context to thread into this node's children — + * each concern keeps its own reset rules (see runContextCollectorWalk's doc comment). + */ +function computeChildContext( + t: string, + isClassDecl: boolean, + isClassExpr: boolean, + className: string | null, + typeMapClass: string | null, + objectRestClass: string | null, +): { childTypeMapClass: string | null; childObjectRestClass: string | null } { + const childTypeMapClass = isClassDecl ? className : isClassExpr ? null : typeMapClass; + let childObjectRestClass: string | null = null; + if (t === 'class_declaration' || t === 'class') { + childObjectRestClass = className; + } else if (t === 'class_body') { + childObjectRestClass = objectRestClass; + } + return { childTypeMapClass, childObjectRestClass }; +} + function runContextCollectorWalk(rootNode: TreeSitterNode, out: ContextCollectorOutputs): void { const funcStack: string[] = []; const classStack: string[] = []; @@ -2021,127 +2278,37 @@ function runContextCollectorWalk(rootNode: TreeSitterNode, out: ContextCollector const isClassExpr = t === 'class'; const isFnDecl = t === 'function_declaration' || t === 'generator_function_declaration'; - // Class name read once, shared by every concern that needs it below. - let className: string | null = null; - let classNameIsIdentifier = false; - if (isClassDecl || isClassExpr) { - const nameNode = node.childForFieldName('name'); - className = nameNode?.text ?? null; - classNameIsIdentifier = nameNode?.type === 'identifier'; - } + const { className, classNameIsIdentifier } = computeClassNameContext( + node, + isClassDecl, + isClassExpr, + ); // ── spread/for-of enclosing-context stacks (push on enter, pop after children) ── - let pushedFunc = false; - let pushedClass = false; - if (isClassDecl || isClassExpr) { - // The stack push keeps the original walk's `identifier`-only check (TS - // class names parse as type_identifier and were never pushed), while - // typeMapClass/objectRestClass below use the bare text like their - // original walks did. - if (className && classNameIsIdentifier) { - classStack.push(className); - pushedClass = true; - } - } else if (isFnDecl) { - const nameNode = node.childForFieldName('name'); - if (nameNode?.type === 'identifier') { - funcStack.push(nameNode.text); - pushedFunc = true; - } - } else if (t === 'method_definition') { - const nameNode = node.childForFieldName('name'); - if (nameNode) { - // Qualify with the enclosing class name so the PTS key matches - // callerName from findCaller (which uses def.name = 'ClassName.method'). - const enclosingClass = classStack.length > 0 ? classStack[classStack.length - 1] : null; - let rawName: string; - if (nameNode.type === 'computed_property_name') { - const inner = nameNode.child(1); - if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) { - // Non-string computed key — skip adding to funcStack (no resolvable name). - rawName = ''; - } else { - rawName = inner.text.replace(/^['"]|['"]$/g, ''); - } - } else { - rawName = nameNode.text; - } - if (rawName) { - const qualifiedName = enclosingClass ? `${enclosingClass}.${rawName}` : rawName; - funcStack.push(qualifiedName); - pushedFunc = true; - } - } - } else if (t === 'variable_declarator') { - // `const process = (arr) => { ... }` — arrow/expression functions assigned - // to a variable have no `name` field on the function node itself. - const nameNode = node.childForFieldName('name'); - const valueNode = node.childForFieldName('value'); - if ( - nameNode?.type === 'identifier' && - (valueNode?.type === 'arrow_function' || valueNode?.type === 'function_expression') - ) { - funcStack.push(nameNode.text); - pushedFunc = true; - } - } else if (t === 'assignment_expression') { - // `obj.method = function() { ... }` — func-prop assignment. - // Mirror handleFuncPropAssignment's logic so for-of loops inside the - // body get the correct enclosingFunc (e.g. 'obj.method') instead of - // '' or the wrong outer function name. - const lhs = node.childForFieldName('left'); - const rhs = node.childForFieldName('right'); - if ( - lhs?.type === 'member_expression' && - (rhs?.type === 'function_expression' || rhs?.type === 'arrow_function') - ) { - const obj = lhs.childForFieldName('object'); - const prop = lhs.childForFieldName('property'); - if ( - obj?.type === 'identifier' && - (prop?.type === 'property_identifier' || prop?.type === 'identifier') && - !BUILTIN_GLOBALS.has(obj.text) && - prop.text !== 'prototype' - ) { - funcStack.push(`${obj.text}.${prop.text}`); - pushedFunc = true; - } - } - } + const { pushedFunc, pushedClass } = pushEnclosingContext( + node, + t, + isClassDecl, + isClassExpr, + isFnDecl, + className, + classNameIsIdentifier, + classStack, + funcStack, + ); // ── per-node collectors (class nodes match none of these types) ── - if (t === 'variable_declarator') { - handleVarDeclaratorTypeMap( - node, - out.typeMap, - out.returnTypeMap, - out.callAssignments, - out.fnRefBindings, - ); - collectCollectionWrapBinding(node, out.fnRefBindings); - } else if (t === 'required_parameter' || t === 'optional_parameter') { - handleParamTypeMap(node, out.typeMap); - } else if (t === 'public_field_definition' || t === 'field_definition') { - handleFieldDefTypeMap(node, out.typeMap, typeMapClass); - } else if (t === 'assignment_expression') { - handlePropWriteTypeMap(node, out.typeMap, typeMapClass); - } else if (t === 'call_expression') { - handleDefinePropertyTypeMap(node, out.typeMap); - collectSpreadAndArrayFromBindings(node, out.spreadArgBindings, out.arrayCallbackBindings); - } else if (t === 'for_in_statement') { - const enclosingFunc = funcStack.length > 0 ? funcStack[funcStack.length - 1]! : ''; - collectForOfBinding(node, enclosingFunc, out.forOfBindings); - } - collectObjectRestParams(node, t, objectRestClass, out.objectRestParamBindings); + dispatchNodeCollectors(node, t, typeMapClass, objectRestClass, funcStack, out); // ── child context per concern ── - const childTypeMapClass = isClassDecl ? className : isClassExpr ? null : typeMapClass; - let childObjectRestClass: string | null = null; - if (t === 'class_declaration' || t === 'class') { - childObjectRestClass = className; - } else if (t === 'class_body') { - childObjectRestClass = objectRestClass; - } + const { childTypeMapClass, childObjectRestClass } = computeChildContext( + t, + isClassDecl, + isClassExpr, + className, + typeMapClass, + objectRestClass, + ); for (let i = 0; i < node.childCount; i++) { walk(node.child(i)!, depth + 1, childTypeMapClass, childObjectRestClass); From ad14cf78e638fed7f3ebe21c1184e4a36b0a527f Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 06:45:04 -0600 Subject: [PATCH 10/39] refactor: decompose resolveFallbackTargets/buildEdges/buildCallEdgesNative in build-edges.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs check acknowledged: pure internal extract-method refactor, no new features, commands, languages, or architecture changes — README/CLAUDE/ROADMAP do not need updates. Impact: 20 functions changed, 15 affected --- .../graph/builder/stages/build-edges.ts | 768 +++++++++++------- src/infrastructure/config.ts | 5 +- src/types.ts | 3 +- 3 files changed, 468 insertions(+), 308 deletions(-) diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index e5663f339..efeee79f6 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -496,6 +496,80 @@ function propagateReturnTypesAcrossFiles( // ── Call edges (native engine) ────────────────────────────────────────── +/** + * Build the deduplicated native typeMap array for a single file's symbols. + * Deduplicate: keep highest-confidence entry per name (first-wins on tie), + * matching JS setTypeMapEntry semantics. The Map branch is already + * deduped by setTypeMapEntry — this loop is only needed for the Array + * branch (pre-rebuilt native addon) but runs unconditionally as + * belt-and-suspenders since it's a cheap O(n) pass. + */ +function buildNativeTypeMapEntries( + symbols: ExtractorOutput, +): Array<{ name: string; typeName: string; confidence: number }> { + const typeMapRaw: Array<{ name: string; typeName: string; confidence: number }> = + symbols.typeMap instanceof Map + ? [...symbols.typeMap.entries()].map(([name, entry]) => ({ + name, + typeName: typeof entry === 'string' ? entry : entry.type, + confidence: typeof entry === 'object' ? entry.confidence : 0.9, + })) + : Array.isArray(symbols.typeMap) + ? (symbols.typeMap as Array<{ name: string; typeName: string; confidence: number }>) + : []; + const typeMapDedup = new Map(); + for (const entry of typeMapRaw) { + const existing = typeMapDedup.get(entry.name); + if (!existing || entry.confidence > existing.confidence) { + typeMapDedup.set(entry.name, entry); + } + } + return [...typeMapDedup.values()]; +} + +/** Build the native FFI file entry for a single file, including pts-analysis bindings. */ +function buildNativeFileEntry( + ctx: PipelineContext, + relPath: string, + fileNodeId: number, + symbols: ExtractorOutput, + rootDir: string, +): NativeFileEntry { + const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); + const typeMap = buildNativeTypeMapEntries(symbols); + return { + file: relPath, + fileNodeId, + definitions: symbols.definitions.map((d) => { + const params = d.children?.filter((c) => c.kind === 'parameter').map((c) => c.name); + return { + name: d.name, + kind: d.kind, + line: d.line, + endLine: d.endLine ?? null, + params: params?.length ? params : undefined, + }; + }), + calls: symbols.calls, + importedNames, + classes: symbols.classes, + typeMap, + fnRefBindings: symbols.fnRefBindings?.length ? symbols.fnRefBindings : undefined, + paramBindings: symbols.paramBindings?.length ? symbols.paramBindings : undefined, + thisCallBindings: symbols.thisCallBindings?.length ? symbols.thisCallBindings : undefined, + arrayElemBindings: symbols.arrayElemBindings?.length ? symbols.arrayElemBindings : undefined, + spreadArgBindings: symbols.spreadArgBindings?.length ? symbols.spreadArgBindings : undefined, + forOfBindings: symbols.forOfBindings?.length ? symbols.forOfBindings : undefined, + arrayCallbackBindings: symbols.arrayCallbackBindings?.length + ? symbols.arrayCallbackBindings + : undefined, + objectRestParamBindings: symbols.objectRestParamBindings?.length + ? symbols.objectRestParamBindings + : undefined, + objectPropBindings: symbols.objectPropBindings?.length ? symbols.objectPropBindings : undefined, + }; +} + function buildCallEdgesNative( ctx: PipelineContext, getNodeIdStmt: NodeIdStmt, @@ -511,63 +585,7 @@ function buildCallEdgesNative( const fileNodeRow = getNodeIdStmt.get(relPath, 'file', relPath, 0); if (!fileNodeRow) continue; - const importedNames = buildImportedNamesForNative(ctx, relPath, symbols, rootDir); - const typeMapRaw: Array<{ name: string; typeName: string; confidence: number }> = - symbols.typeMap instanceof Map - ? [...symbols.typeMap.entries()].map(([name, entry]) => ({ - name, - typeName: typeof entry === 'string' ? entry : entry.type, - confidence: typeof entry === 'object' ? entry.confidence : 0.9, - })) - : Array.isArray(symbols.typeMap) - ? (symbols.typeMap as Array<{ name: string; typeName: string; confidence: number }>) - : []; - // Deduplicate: keep highest-confidence entry per name (first-wins on tie), - // matching JS setTypeMapEntry semantics. The Map branch is already - // deduped by setTypeMapEntry — this loop is only needed for the Array - // branch (pre-rebuilt native addon) but runs unconditionally as - // belt-and-suspenders since it's a cheap O(n) pass. - const typeMapDedup = new Map(); - for (const entry of typeMapRaw) { - const existing = typeMapDedup.get(entry.name); - if (!existing || entry.confidence > existing.confidence) { - typeMapDedup.set(entry.name, entry); - } - } - const typeMap = [...typeMapDedup.values()]; - nativeFiles.push({ - file: relPath, - fileNodeId: fileNodeRow.id, - definitions: symbols.definitions.map((d) => { - const params = d.children?.filter((c) => c.kind === 'parameter').map((c) => c.name); - return { - name: d.name, - kind: d.kind, - line: d.line, - endLine: d.endLine ?? null, - params: params?.length ? params : undefined, - }; - }), - calls: symbols.calls, - importedNames, - classes: symbols.classes, - typeMap, - fnRefBindings: symbols.fnRefBindings?.length ? symbols.fnRefBindings : undefined, - paramBindings: symbols.paramBindings?.length ? symbols.paramBindings : undefined, - thisCallBindings: symbols.thisCallBindings?.length ? symbols.thisCallBindings : undefined, - arrayElemBindings: symbols.arrayElemBindings?.length ? symbols.arrayElemBindings : undefined, - spreadArgBindings: symbols.spreadArgBindings?.length ? symbols.spreadArgBindings : undefined, - forOfBindings: symbols.forOfBindings?.length ? symbols.forOfBindings : undefined, - arrayCallbackBindings: symbols.arrayCallbackBindings?.length - ? symbols.arrayCallbackBindings - : undefined, - objectRestParamBindings: symbols.objectRestParamBindings?.length - ? symbols.objectRestParamBindings - : undefined, - objectPropBindings: symbols.objectPropBindings?.length - ? symbols.objectPropBindings - : undefined, - }); + nativeFiles.push(buildNativeFileEntry(ctx, relPath, fileNodeRow.id, symbols, rootDir)); } const nativeEdges = native.buildCallEdges(nativeFiles, allNodes, [ @@ -1041,6 +1059,183 @@ function buildDefinitionParamsMap( // ── Per-call resolution helpers ───────────────────────────────────────── +/** + * RES-4: Kotlin member callable reference — `Greeter::greet` emits + * { name: 'greet', receiver: 'Greeter', dynamicKind: 'reflection' }. + * The receiver is the class qualifier (not a typeMap variable), so + * resolveCallTargets would find a same-named top-level function via + * byNameAndFile('greet', relPath) before the qualified form is tried. + * Prefer `Greeter.greet` in the same file first; fall through to the + * normal path only when no qualified match exists. + */ +function resolveKotlinReflectionPreQualified( + call: Call, + relPath: string, + lookup: CallNodeLookup, +): ReadonlyArray<{ id: number; file: string; kind?: string }> { + if ( + call.dynamicKind === 'reflection' && + call.receiver && + !call.keyExpr && + !isModuleScopedLanguage(relPath) + ) { + return lookup + .byNameAndFile(`${call.receiver}.${call.name}`, relPath) + .filter((n) => n.kind === 'method' || n.kind === 'function'); + } + return []; +} + +/** + * Shared by both same-class fallback strategies below: derive the enclosing + * class name from the caller's qualified name (the segment immediately before + * the final dot, e.g. `Namespace.MyClass.method` → `MyClass`), then look up + * `ClassName.callName` as a method in the same file. + */ +function resolveSameClassQualifiedMethod( + callName: string, + callerName: string, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + const lastDot = callerName.lastIndexOf('.'); + if (lastDot <= 0) return []; + const prevDot = callerName.lastIndexOf('.', lastDot - 1); + const className = callerName.slice(prevDot + 1, lastDot); + return lookup + .byNameAndFile(`${className}.${callName}`, relPath) + .filter((n) => n.kind === 'method'); +} + +/** + * Same-class `this.method()` fallback: when the call receiver is `this` and + * resolveCallTargets found nothing, derive the enclosing class name from the + * caller (e.g. `Logger.info` → class prefix `Logger`) and retry with the + * qualified method name `Logger._write`. This mirrors what the native Rust + * engine does implicitly via its class-scoped symbol table. + * NOTE: restricted to `this` only — `super.method()` targets a parent class, + * not the enclosing class, so qualifying with the child class name would + * produce a false edge when the child also defines a same-named method. + */ +function resolveSameClassThisFallback( + call: Call, + callerName: string | null, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + if (call.receiver !== 'this' || callerName == null) return []; + return resolveSameClassQualifiedMethod(call.name, callerName, relPath, lookup); +} + +/** + * Same-class bare-call fallback: when a no-receiver call can't be resolved + * globally, try the caller's own class as a qualifier. Handles C# static + * sibling calls: `IsValidEmail()` inside `Validators.ValidateUser` resolves + * to `Validators.IsValidEmail`. Skipped for JS/TS where bare calls are + * module-scoped, not class-scoped. + */ +function resolveSameClassBareCallFallback( + call: Call, + callerName: string | null, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + if (call.receiver || callerName == null || isModuleScopedLanguage(relPath)) return []; + return resolveSameClassQualifiedMethod(call.name, callerName, relPath, lookup); +} + +/** + * RES-3: reflection with literal method name — JVM getMethod("name") / invokeMethod("name"). + * Java/Scala/Groovy methods are stored as class-qualified names (e.g. Reflection.greet), + * so lookup.byNameAndFile('greet', relPath) finds nothing. When dynamicKind='reflection' + * and keyExpr is set (a string-literal method name was captured), try the qualified form: + * 1. typeMap[receiver] → resolvedType → lookup `resolvedType.keyExpr` (type-annotated local) + * 2. callerName class prefix → `CallerClass.keyExpr` (same-class sibling, e.g. Groovy obj) + * Scoped to non-JS/TS files to avoid interfering with the JS reflection path. + */ +function resolveReflectionKeyExprFallback( + call: Call, + callerName: string | null, + relPath: string, + typeMap: Map, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + if ( + call.dynamicKind !== 'reflection' || + !call.keyExpr || + !call.receiver || + isModuleScopedLanguage(relPath) + ) { + return []; + } + const typeEntry = typeMap.get(call.receiver); + const resolvedType = typeEntry + ? typeof typeEntry === 'string' + ? typeEntry + : (typeEntry as { type?: string }).type + : null; + if (resolvedType) { + const qualified = lookup + .byNameAndFile(`${resolvedType}.${call.keyExpr}`, relPath) + .filter((n) => n.kind === 'method' || n.kind === 'function'); + if (qualified.length > 0) return qualified; + } + if (callerName != null) { + const lastDot = callerName.lastIndexOf('.'); + if (lastDot > 0) { + const prevDot = callerName.lastIndexOf('.', lastDot - 1); + const callerClass = callerName.slice(prevDot + 1, lastDot); + const qualified = lookup + .byNameAndFile(`${callerClass}.${call.keyExpr}`, relPath) + .filter((n) => n.kind === 'method' || n.kind === 'function'); + if (qualified.length > 0) return qualified; + } + } + return []; +} + +/** + * Object.defineProperty accessor fallback: when a function is registered as + * a getter/setter via `Object.defineProperty(obj, "bar", { get: getter })`, + * calls to `this.X()` inside `getter` resolve against `obj` (this === obj + * when the accessor is invoked). If the same-class fallback above found + * nothing, try treating `obj` as the receiver and look up `obj.X` in the + * typeMap, or fall back to a same-file lookup of any definition named X + * that belongs to the object literal or its type. + */ +function resolveDefinePropertyAccessorFallback( + call: Call, + callerName: string | null, + relPath: string, + typeMap: Map, + lookup: CallNodeLookup, + definePropertyReceivers: Map | undefined, +): Array<{ id: number; file: string; kind?: string }> { + if (call.receiver !== 'this' || callerName == null || !definePropertyReceivers) return []; + const receiverVarName = definePropertyReceivers.get(callerName); + if (!receiverVarName) return []; + + const typeEntry = typeMap.get(receiverVarName); + const typeName = typeEntry + ? typeof typeEntry === 'string' + ? typeEntry + : (typeEntry as { type?: string }).type + : null; + if (typeName) { + const qualified = lookup.byNameAndFile(`${typeName}.${call.name}`, relPath); + if (qualified.length > 0) return [...qualified]; + } + // If still no targets, search for any definition named `call.name` in + // the same file — handles plain object literals where the method isn't + // qualified (e.g. `const obj = { baz() {} }` defines `baz` directly). + // Note: this is intentionally broad — it matches any same-file definition + // with the called name, not just members of the receiver object. This is + // the same behaviour used by the native post-pass path (buildDefinePropertyPostPass). + const sameFile = lookup.byNameAndFile(call.name, relPath); + if (sameFile.length > 0) return [...sameFile]; + return []; +} + /** * Resolve targets for a single call site with all JS-path fallbacks applied. * @@ -1064,24 +1259,7 @@ function resolveFallbackTargets( targets: ReadonlyArray<{ id: number; file: string; kind?: string }>; importedFrom: string | null | undefined; } { - // RES-4: Kotlin member callable reference — `Greeter::greet` emits - // { name: 'greet', receiver: 'Greeter', dynamicKind: 'reflection' }. - // The receiver is the class qualifier (not a typeMap variable), so - // resolveCallTargets would find a same-named top-level function via - // byNameAndFile('greet', relPath) before the qualified form is tried. - // Prefer `Greeter.greet` in the same file first; fall through to the - // normal path only when no qualified match exists. - let preQualifiedTargets: ReadonlyArray<{ id: number; file: string; kind?: string }> = []; - if ( - call.dynamicKind === 'reflection' && - call.receiver && - !call.keyExpr && - !isModuleScopedLanguage(relPath) - ) { - preQualifiedTargets = lookup - .byNameAndFile(`${call.receiver}.${call.name}`, relPath) - .filter((n) => n.kind === 'method' || n.kind === 'function'); - } + const preQualifiedTargets = resolveKotlinReflectionPreQualified(call, relPath, lookup); let { targets, importedFrom } = preQualifiedTargets.length > 0 @@ -1098,123 +1276,41 @@ function resolveFallbackTargets( caller.callerName, ); - // Same-class `this.method()` fallback: when the call receiver is `this` and - // resolveCallTargets found nothing, derive the enclosing class name from the - // caller (e.g. `Logger.info` → class prefix `Logger`) and retry with the - // qualified method name `Logger._write`. This mirrors what the native Rust - // engine does implicitly via its class-scoped symbol table. - // NOTE: restricted to `this` only — `super.method()` targets a parent class, - // not the enclosing class, so qualifying with the child class name would - // produce a false edge when the child also defines a same-named method. - if (targets.length === 0 && call.receiver === 'this' && caller.callerName != null) { - const lastDot = caller.callerName.lastIndexOf('.'); - if (lastDot > 0) { - const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); - const className = caller.callerName.slice(prevDot + 1, lastDot); - const qualified = lookup - .byNameAndFile(`${className}.${call.name}`, relPath) - .filter((n) => n.kind === 'method'); - if (qualified.length > 0) targets = qualified; - } + // Fallback strategies, applied in order until one yields a match. Each + // helper folds its own applicability guard internally (see helper doc + // comments above) — the checks here are unchanged from before, just + // relocated to keep this dispatcher a thin, low-complexity orchestrator. + if (targets.length === 0) { + const qualified = resolveSameClassThisFallback(call, caller.callerName, relPath, lookup); + if (qualified.length > 0) targets = qualified; } - // Same-class bare-call fallback: when a no-receiver call can't be resolved - // globally, try the caller's own class as a qualifier. Handles C# static - // sibling calls: `IsValidEmail()` inside `Validators.ValidateUser` resolves - // to `Validators.IsValidEmail`. Skipped for JS/TS where bare calls are - // module-scoped, not class-scoped. - if ( - targets.length === 0 && - !call.receiver && - caller.callerName != null && - !isModuleScopedLanguage(relPath) - ) { - const lastDot = caller.callerName.lastIndexOf('.'); - if (lastDot > 0) { - const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); - const className = caller.callerName.slice(prevDot + 1, lastDot); - const qualified = lookup - .byNameAndFile(`${className}.${call.name}`, relPath) - .filter((n) => n.kind === 'method'); - if (qualified.length > 0) targets = qualified; - } + if (targets.length === 0) { + const qualified = resolveSameClassBareCallFallback(call, caller.callerName, relPath, lookup); + if (qualified.length > 0) targets = qualified; } - // RES-3: reflection with literal method name — JVM getMethod("name") / invokeMethod("name"). - // Java/Scala/Groovy methods are stored as class-qualified names (e.g. Reflection.greet), - // so lookup.byNameAndFile('greet', relPath) finds nothing. When dynamicKind='reflection' - // and keyExpr is set (a string-literal method name was captured), try the qualified form: - // 1. typeMap[receiver] → resolvedType → lookup `resolvedType.keyExpr` (type-annotated local) - // 2. callerName class prefix → `CallerClass.keyExpr` (same-class sibling, e.g. Groovy obj) - // Scoped to non-JS/TS files to avoid interfering with the JS reflection path. - if ( - targets.length === 0 && - call.dynamicKind === 'reflection' && - call.keyExpr && - call.receiver && - !isModuleScopedLanguage(relPath) - ) { - const typeEntry = typeMap.get(call.receiver); - const resolvedType = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; - if (resolvedType) { - const qualified = lookup - .byNameAndFile(`${resolvedType}.${call.keyExpr}`, relPath) - .filter((n) => n.kind === 'method' || n.kind === 'function'); - if (qualified.length > 0) targets = qualified; - } - if (targets.length === 0 && caller.callerName != null) { - const lastDot = caller.callerName.lastIndexOf('.'); - if (lastDot > 0) { - const prevDot = caller.callerName.lastIndexOf('.', lastDot - 1); - const callerClass = caller.callerName.slice(prevDot + 1, lastDot); - const qualified = lookup - .byNameAndFile(`${callerClass}.${call.keyExpr}`, relPath) - .filter((n) => n.kind === 'method' || n.kind === 'function'); - if (qualified.length > 0) targets = qualified; - } - } + if (targets.length === 0) { + const qualified = resolveReflectionKeyExprFallback( + call, + caller.callerName, + relPath, + typeMap, + lookup, + ); + if (qualified.length > 0) targets = qualified; } - // Object.defineProperty accessor fallback: when a function is registered as - // a getter/setter via `Object.defineProperty(obj, "bar", { get: getter })`, - // calls to `this.X()` inside `getter` resolve against `obj` (this === obj - // when the accessor is invoked). If the same-class fallback above found - // nothing, try treating `obj` as the receiver and look up `obj.X` in the - // typeMap, or fall back to a same-file lookup of any definition named X - // that belongs to the object literal or its type. - if ( - targets.length === 0 && - call.receiver === 'this' && - caller.callerName != null && - definePropertyReceivers - ) { - const receiverVarName = definePropertyReceivers.get(caller.callerName); - if (receiverVarName) { - const typeEntry = typeMap.get(receiverVarName); - const typeName = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; - if (typeName) { - const qualified = lookup.byNameAndFile(`${typeName}.${call.name}`, relPath); - if (qualified.length > 0) targets = [...qualified]; - } - // If still no targets, search for any definition named `call.name` in - // the same file — handles plain object literals where the method isn't - // qualified (e.g. `const obj = { baz() {} }` defines `baz` directly). - // Note: this is intentionally broad — it matches any same-file definition - // with the called name, not just members of the receiver object. This is - // the same behaviour used by the native post-pass path (buildDefinePropertyPostPass). - if (targets.length === 0) { - const sameFile = lookup.byNameAndFile(call.name, relPath); - if (sameFile.length > 0) targets = [...sameFile]; - } - } + if (targets.length === 0) { + const qualified = resolveDefinePropertyAccessorFallback( + call, + caller.callerName, + relPath, + typeMap, + lookup, + definePropertyReceivers, + ); + if (qualified.length > 0) targets = qualified; } return { targets, importedFrom }; @@ -1904,7 +2000,7 @@ function loadNodes(ctx: PipelineContext): { rows: QueryNodeRow[]; scoped: boolea const existingFileCount = ( db.prepare("SELECT COUNT(*) as c FROM nodes WHERE kind = 'file'").get() as { c: number } ).c; - if (existingFileCount > 20) { + if (existingFileCount > ctx.config.build.largeCodebaseFileThreshold) { // Collect relevant files: changed files + their import targets const relevantFiles = new Set(fileSymbols.keys()); if (batchResolved) { @@ -1962,50 +2058,149 @@ function addLazyFallback(ctx: PipelineContext, scopedLoad: boolean): void { }; } -export async function buildEdges(ctx: PipelineContext): Promise { - const { db, engineName } = ctx; - - const getNodeIdStmt = makeGetNodeIdStmt(db); - +/** Load node-lookup structures used throughout edge construction (Phase 0 setup). */ +function prepareNodeLookups(ctx: PipelineContext): { + getNodeIdStmt: NodeIdStmt; + allNodesBefore: QueryNodeRow[]; +} { + const getNodeIdStmt = makeGetNodeIdStmt(ctx.db); const { rows: allNodesBefore, scoped: scopedLoad } = loadNodes(ctx); setupNodeLookups(ctx, allNodesBefore); addLazyFallback(ctx, scopedLoad); + return { getNodeIdStmt, allNodesBefore }; +} - const t0 = performance.now(); - - // Enrich typeMap for .ts/.tsx files using the TypeScript compiler API. - // Runs before call-edge construction so the accurate types are available - // for method-call resolution. Gated on config so users can opt out. - // - // Skip for small incremental builds: TypeScript program creation requires - // loading the entire tsconfig file list (~700ms startup on the codegraph - // corpus), which dominates the 1-file rebuild time. Native engine bypasses - // this entirely via the Rust orchestrator; WASM/JS engines need this gate - // to match native's effective behaviour on tiny incremental changes. - // Mirrors the smallFilesThreshold gates for nativeDb and native call-edges. +/** + * Enrich typeMap for .ts/.tsx files using the TypeScript compiler API. + * Runs before call-edge construction so the accurate types are available + * for method-call resolution. Gated on config so users can opt out. + * + * Skip for small incremental builds: TypeScript program creation requires + * loading the entire tsconfig file list (~700ms startup on the codegraph + * corpus), which dominates the 1-file rebuild time. Native engine bypasses + * this entirely via the Rust orchestrator; WASM/JS engines need this gate + * to match native's effective behaviour on tiny incremental changes. + * Mirrors the smallFilesThreshold gates for nativeDb and native call-edges. + */ +async function maybeEnrichTypeMapWithTsc(ctx: PipelineContext): Promise { const isSmallIncremental = !ctx.isFullBuild && ctx.fileSymbols.size <= ctx.config.build.smallFilesThreshold; if (ctx.config.build.typescriptResolver && !isSmallIncremental) { await enrichTypeMapWithTsc(ctx.rootDir, ctx.fileSymbols); } +} - const native = engineName === 'native' ? loadNative() : null; +/** + * Import-edge sub-phase: native fast path (with JS fallback for a #750-related + * key-format mismatch) or the JS path directly. + */ +function buildImportEdgesPhase( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + native: NativeAddon | null, +): void { + // Skip native import-edge path for small incremental builds: napi-rs + // marshaling overhead (~13ms) exceeds Rust computation savings at this scale. + const useNativeImportEdges = + native?.buildImportEdges && + (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); + if (useNativeImportEdges) { + const beforeLen = allEdgeRows.length; + buildImportEdgesNative(ctx, getNodeIdStmt, allEdgeRows, native!); + // Fallback: if native produced 0 import edges but there are imports to + // process, the native binary may have a key-format mismatch (e.g. Windows + // path separators — #750). Retry with the JS implementation. + // NOTE: This also fires for codebases where every import targets an + // external package (npm deps) that the resolver intentionally skips. + // In that case the JS path resolves zero edges too, so the only cost + // is the redundant JS traversal — no correctness impact. + const hasImports = [...ctx.fileSymbols.values()].some((s) => s.imports.length > 0); + if (allEdgeRows.length === beforeLen && hasImports) { + debug('Native buildImportEdges produced 0 edges — falling back to JS'); + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + } + } else { + buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); + } +} - // Phase 8.2: Augment typeMaps with cross-file return-type propagation before - // the transaction opens. This is pure in-memory mutation (no DB I/O) and must - // run outside the transaction to avoid leaving ctx.fileSymbols in a partial - // state if the transaction rolls back unexpectedly. - propagateReturnTypesAcrossFiles(ctx.fileSymbols, ctx, ctx.rootDir); - // Phase 8.5: Build CHA context after propagation so typeMap confidence values - // (used for RTA seeding) reflect any cross-file propagated types. - const chaCtx = buildChaContext(ctx.fileSymbols); +/** + * Call-edge sub-phase: native fast path (+ JS-only post-passes for + * Object.defineProperty accessor dispatch and CHA/RTA expansion — capabilities + * the native engine doesn't implement) or the full JS fallback path. + */ +function buildCallEdgesPhase( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allEdgeRows: EdgeRowTuple[], + allNodesBefore: QueryNodeRow[], + native: NativeAddon | null, + chaCtx: ChaContext, +): void { + // Skip native call-edge path for small incremental builds: napi-rs + // marshaling overhead for allNodes exceeds Rust computation savings. + const useNativeCallEdges = + native?.buildCallEdges && + (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); + if (useNativeCallEdges) { + buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); + // The native engine receives all pts bindings (paramBindings, + // fnRefBindings, thisCallBindings, objectRestParamBindings, …) through + // NativeFileEntry and runs the same points-to solver as the JS path, so + // no pts post-passes are needed here. Only capabilities that remain + // JS-only run as post-passes below. + const sharedLookup = makeContextLookup(ctx, getNodeIdStmt); + // Object.defineProperty accessor post-pass: resolve this-dispatch inside + // getter/setter functions registered via Object.defineProperty. + buildDefinePropertyPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); + // Phase 8.5 post-pass: augment native call edges with CHA-resolved dispatch. + // The native Rust engine has no knowledge of the CHA context, so this/self + // calls and interface dispatch are not expanded to concrete implementations. + buildChaPostPass(ctx, getNodeIdStmt, allEdgeRows, chaCtx); + } else { + buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows, chaCtx); + } +} - // Phase 1: Compute edges inside a better-sqlite3 transaction. - // Barrel-edge deletion lives here so that the JS path (which also inserts - // edges in this transaction) keeps deletion + insertion atomic. - // When using the native rusqlite path, insertion happens in Phase 2 on a - // separate connection — a crash between Phase 1 and Phase 2 would leave - // barrel edges missing until the next incremental rebuild re-creates them. +/** + * Apply the ts-native confidence floor to allEdgeRows in-memory. The proximity + * heuristic returns 0.3 for cross-module calls with no import-path evidence, + * but both WASM and native engines perform actual name-based symbol lookup, + * which is stronger evidence than pure proximity. Clamping to + * TS_NATIVE_CONFIDENCE_FLOOR (0.5) avoids unfairly dragging down the + * call-confidence metric. Sink edges (confidence = 0.0) are excluded so + * they remain below DEFAULT_MIN_CONFIDENCE. + */ +function applyTsNativeConfidenceFloor(allEdgeRows: EdgeRowTuple[]): void { + for (const r of allEdgeRows) { + if ( + r[2] === 'calls' && + r[5] === 'ts-native' && + (r[3] as number) > 0 && + (r[3] as number) < TS_NATIVE_CONFIDENCE_FLOOR + ) { + r[3] = TS_NATIVE_CONFIDENCE_FLOOR; + } + } +} + +/** + * Phase 1: Compute edges inside a better-sqlite3 transaction. + * Barrel-edge deletion lives here so that the JS path (which also inserts + * edges in this transaction) keeps deletion + insertion atomic. + * When using the native rusqlite path, insertion happens in Phase 2 on a + * separate connection — a crash between Phase 1 and Phase 2 would leave + * barrel edges missing until the next incremental rebuild re-creates them. + */ +function computeAndInsertEdges( + ctx: PipelineContext, + getNodeIdStmt: NodeIdStmt, + allNodesBefore: QueryNodeRow[], + native: NativeAddon | null, + chaCtx: ChaContext, +): EdgeRowTuple[] { + const { db } = ctx; const allEdgeRows: EdgeRowTuple[] = []; const computeEdgesTx = db.transaction(() => { if (ctx.barrelOnlyFiles.size > 0) { @@ -2017,71 +2212,9 @@ export async function buildEdges(ctx: PipelineContext): Promise { } } - // Skip native import-edge path for small incremental builds: napi-rs - // marshaling overhead (~13ms) exceeds Rust computation savings at this scale. - const useNativeImportEdges = - native?.buildImportEdges && - (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); - if (useNativeImportEdges) { - const beforeLen = allEdgeRows.length; - buildImportEdgesNative(ctx, getNodeIdStmt, allEdgeRows, native!); - // Fallback: if native produced 0 import edges but there are imports to - // process, the native binary may have a key-format mismatch (e.g. Windows - // path separators — #750). Retry with the JS implementation. - // NOTE: This also fires for codebases where every import targets an - // external package (npm deps) that the resolver intentionally skips. - // In that case the JS path resolves zero edges too, so the only cost - // is the redundant JS traversal — no correctness impact. - const hasImports = [...ctx.fileSymbols.values()].some((s) => s.imports.length > 0); - if (allEdgeRows.length === beforeLen && hasImports) { - debug('Native buildImportEdges produced 0 edges — falling back to JS'); - buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); - } - } else { - buildImportEdges(ctx, getNodeIdStmt, allEdgeRows); - } - - // Skip native call-edge path for small incremental builds: napi-rs - // marshaling overhead for allNodes exceeds Rust computation savings. - const useNativeCallEdges = - native?.buildCallEdges && - (ctx.isFullBuild || ctx.fileSymbols.size > ctx.config.build.smallFilesThreshold); - if (useNativeCallEdges) { - buildCallEdgesNative(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native!); - // The native engine receives all pts bindings (paramBindings, - // fnRefBindings, thisCallBindings, objectRestParamBindings, …) through - // NativeFileEntry and runs the same points-to solver as the JS path, so - // no pts post-passes are needed here. Only capabilities that remain - // JS-only run as post-passes below. - const sharedLookup = makeContextLookup(ctx, getNodeIdStmt); - // Object.defineProperty accessor post-pass: resolve this-dispatch inside - // getter/setter functions registered via Object.defineProperty. - buildDefinePropertyPostPass(ctx, getNodeIdStmt, allEdgeRows, sharedLookup); - // Phase 8.5 post-pass: augment native call edges with CHA-resolved dispatch. - // The native Rust engine has no knowledge of the CHA context, so this/self - // calls and interface dispatch are not expanded to concrete implementations. - buildChaPostPass(ctx, getNodeIdStmt, allEdgeRows, chaCtx); - } else { - buildCallEdgesJS(ctx, getNodeIdStmt, allEdgeRows, chaCtx); - } - - // Apply ts-native confidence floor to allEdgeRows in-memory. The proximity - // heuristic returns 0.3 for cross-module calls with no import-path evidence, - // but both WASM and native engines perform actual name-based symbol lookup, - // which is stronger evidence than pure proximity. Clamping to - // TS_NATIVE_CONFIDENCE_FLOOR (0.5) avoids unfairly dragging down the - // call-confidence metric. Sink edges (confidence = 0.0) are excluded so - // they remain below DEFAULT_MIN_CONFIDENCE. - for (const r of allEdgeRows) { - if ( - r[2] === 'calls' && - r[5] === 'ts-native' && - (r[3] as number) > 0 && - (r[3] as number) < TS_NATIVE_CONFIDENCE_FLOOR - ) { - r[3] = TS_NATIVE_CONFIDENCE_FLOOR; - } - } + buildImportEdgesPhase(ctx, getNodeIdStmt, allEdgeRows, native); + buildCallEdgesPhase(ctx, getNodeIdStmt, allEdgeRows, allNodesBefore, native, chaCtx); + applyTsNativeConfidenceFloor(allEdgeRows); // When using native edge insert, skip JS insert here — do it after tx commits. // Otherwise insert edges within this transaction for atomicity. @@ -2091,26 +2224,55 @@ export async function buildEdges(ctx: PipelineContext): Promise { } }); computeEdgesTx(); + return allEdgeRows; +} - // Phase 2: Native rusqlite bulk insert (outside better-sqlite3 transaction - // to avoid SQLITE_BUSY contention). Uses NativeDatabase persistent connection. - // Standalone napi functions were removed in 6.17. - if (ctx.engineName === 'native' && ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0) { - const nativeEdges = allEdgeRows.map((r) => ({ - sourceId: r[0], - targetId: r[1], - kind: r[2], - confidence: r[3], - dynamic: r[4], - })); - const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges); - if (!ok) { - debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); - batchInsertEdges(ctx.db, allEdgeRows); - } else { - applyEdgeTechniquesAfterNativeInsert(ctx.db, allEdgeRows); - } +/** + * Phase 2: Native rusqlite bulk insert (outside the better-sqlite3 transaction + * to avoid SQLITE_BUSY contention). Uses the NativeDatabase persistent + * connection. Standalone napi functions were removed in 6.17. + */ +function insertNativeBulkEdges(ctx: PipelineContext, allEdgeRows: EdgeRowTuple[]): void { + if (!(ctx.engineName === 'native' && ctx.nativeDb?.bulkInsertEdges && allEdgeRows.length > 0)) { + return; } + const nativeEdges = allEdgeRows.map((r) => ({ + sourceId: r[0], + targetId: r[1], + kind: r[2], + confidence: r[3], + dynamic: r[4], + })); + const ok = ctx.nativeDb.bulkInsertEdges(nativeEdges); + if (!ok) { + debug('Native bulkInsertEdges failed — falling back to JS batchInsertEdges'); + batchInsertEdges(ctx.db, allEdgeRows); + } else { + applyEdgeTechniquesAfterNativeInsert(ctx.db, allEdgeRows); + } +} + +export async function buildEdges(ctx: PipelineContext): Promise { + const { getNodeIdStmt, allNodesBefore } = prepareNodeLookups(ctx); + + const t0 = performance.now(); + + await maybeEnrichTypeMapWithTsc(ctx); + + const native = ctx.engineName === 'native' ? loadNative() : null; + + // Phase 8.2: Augment typeMaps with cross-file return-type propagation before + // the transaction opens. This is pure in-memory mutation (no DB I/O) and must + // run outside the transaction to avoid leaving ctx.fileSymbols in a partial + // state if the transaction rolls back unexpectedly. + propagateReturnTypesAcrossFiles(ctx.fileSymbols, ctx, ctx.rootDir); + // Phase 8.5: Build CHA context after propagation so typeMap confidence values + // (used for RTA seeding) reflect any cross-file propagated types. + const chaCtx = buildChaContext(ctx.fileSymbols); + + const allEdgeRows = computeAndInsertEdges(ctx, getNodeIdStmt, allNodesBefore, native, chaCtx); + + insertNativeBulkEdges(ctx, allEdgeRows); // Phase 3: Reconnect saved reverse-dep edges (#932, #933). // When the WASM/JS path purged changed files, edges FROM reverse-dep files TO @@ -2126,7 +2288,7 @@ export async function buildEdges(ctx: PipelineContext): Promise { // committed so the DB is consistent. // Note: the native orchestrator success path runs this independently in // tryNativeOrchestrator; this phase covers the WASM and native-fallback paths. - runChaPostPass(db); + runChaPostPass(ctx.db); ctx.timing.edgesMs = performance.now() - t0; } diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index a6bf6321e..7e23f90ba 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -32,9 +32,8 @@ export const DEFAULTS = { /** * Minimum existing file-node count for a repo to be treated as a "large * codebase" when deciding whether to scope node loading to changed files. - * @reserved — currently not wired; loadNodes() in - * `src/domain/graph/builder/stages/build-edges.ts` still uses the - * hardcoded literal `20` at its `existingFileCount > 20` gate. + * Used by loadNodes() in `src/domain/graph/builder/stages/build-edges.ts` + * at its `existingFileCount > largeCodebaseFileThreshold` gate. */ largeCodebaseFileThreshold: 20, typescriptResolver: true, diff --git a/src/types.ts b/src/types.ts index 9ffb1f9b7..88c85c3b8 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1310,8 +1310,7 @@ export interface CodegraphConfig { /** * Minimum existing file-node count for a repo to be treated as a "large * codebase" when deciding whether to scope node loading to changed files. - * @reserved — currently not wired; see `largeCodebaseFileThreshold` in - * `src/infrastructure/config.ts` for wiring status. + * Used by loadNodes() in `src/domain/graph/builder/stages/build-edges.ts`. */ largeCodebaseFileThreshold: number; /** From 51c38160717618bac8e3fce58ffa8aabe17ac071 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 07:05:04 -0600 Subject: [PATCH 11/39] refactor: extract NativeOrchestrationSession from tryNativeOrchestrator in native-orchestrator.ts Pure extract-class decomposition of the DECOMPOSE-flagged worst offender in Titan phase 12 (halstead.bugs 1.17). tryNativeOrchestrator's native-DB lifecycle steps (open/build/backfill/handoff/close) are now owned by a NativeOrchestrationSession class; tryNativeOrchestrator becomes a thin sequencer of session method calls. No dispatch logic, fallback conditions, or error handling changed -- verified via full test suite (200/200 files, 3330/3330 tests), byte-identical resolution-benchmark output across all 34 fixture languages, and byte-identical native-engine DB dumps (full build + incremental early-exit) on tests/fixtures/sample-project before/after. tryNativeOrchestrator: cognitive 35->24, cyclomatic 34->25, halstead.bugs 1.17->0.83, mi 49.7->54.1. docs check acknowledged: pure internal extract-class refactor, no new features, commands, languages, or architecture changes -- README/CLAUDE/ ROADMAP do not need updates. Impact: 9 functions changed, 6 affected Impact: 9 functions changed, 6 affected --- .../builder/stages/native-orchestrator.ts | 205 ++++++++++++------ 1 file changed, 141 insertions(+), 64 deletions(-) diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index f6721c874..0ca5d25b2 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -2124,6 +2124,128 @@ async function runPostNativePasses( }; } +/** + * Owns the native-DB lifecycle steps for a single {@link tryNativeOrchestrator} + * run: opening the Rust-backed connection, invoking `nativeDb.buildGraph()` + * behind the FK-pragma toggle, detecting + backfilling dropped-language files, + * handing the WAL off to a fresh better-sqlite3 connection for the JS + * post-passes, and closing both connections. + * + * Pure extract-class refactor of steps that previously lived inline in + * `tryNativeOrchestrator` (plus thin wrappers around the pre-existing + * `openNativeDatabase`/`handoffWalAfterNativeBuild`/dropped-language helpers) + * so the orchestrator function itself reads as a short sequence of steps. + * Behavior, ordering, and error handling are unchanged from the inline + * version — see the class's methods for the original comments explaining + * each step's rationale. + */ +class NativeOrchestrationSession { + private readonly ctx: PipelineContext; + + constructor(ctx: PipelineContext) { + this.ctx = ctx; + } + + /** Open NativeDatabase on demand (see {@link openNativeDatabase}). */ + open(): void { + openNativeDatabase(this.ctx); + } + + /** True once `open()` succeeded and the native `buildGraph` entry point is available. */ + get isReady(): boolean { + return !!this.ctx.nativeDb?.buildGraph; + } + + /** + * Invoke the Rust orchestrator's `buildGraph()`. + * + * The previous full build's clear_all_graph_data() sets PRAGMA foreign_keys = ON + * on the native connection. Older native binaries (< v3.14) do not delete + * dataflow_vertices / dataflow_summary / call_edge_id rows before purging + * nodes/edges during incremental builds, so FK enforcement causes the purge + * statements to fail silently — leaving stale nodes and edges that then get + * duplicated when the barrel-candidate re-parse re-inserts them (issue #1644). + * Disabling FK before buildGraph() lets the purge succeed; FK is restored in + * a finally block so post-passes (gap-repair, structure patch) retain FK protection + * even if buildGraph() throws. + * + * Caller must only invoke this after `isReady` is true. + */ + runBuildGraph(): NativeOrchestratorResult { + const nativeDb = this.ctx.nativeDb as NonNullable; + try { + nativeDb.exec('PRAGMA foreign_keys = OFF'); + } catch { + // exec may not exist on very old addon versions — safe to ignore + } + + let resultJson: string; + try { + resultJson = nativeDb.buildGraph!( + this.ctx.rootDir, + JSON.stringify(this.ctx.config), + JSON.stringify(this.ctx.aliases), + JSON.stringify(this.ctx.opts), + ); + } finally { + // Restore FK enforcement so any subsequent writes to this connection + // (gap-repair, structure patch) retain FK protection — even if buildGraph() + // throws. + try { + nativeDb.exec('PRAGMA foreign_keys = ON'); + } catch { + // safe to ignore on very old addon versions + } + } + + return JSON.parse(resultJson) as NativeOrchestratorResult; + } + + /** + * Even on no-op rebuilds, dropped-language files added since the last + * full build are still missing from `nodes`/`file_hashes` (#1083), and + * WASM-only files deleted from disk leave stale rows behind (#1073). + * The orchestrator's collect_files skipped them, so its earlyExit + * doesn't imply DB consistency. Detect and repair the gap. + */ + async backfillDroppedLanguages(): Promise { + const gap = detectDroppedLanguageGap(this.ctx); + if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) { + await backfillNativeDroppedFiles(this.ctx, gap); + } + } + + /** + * Ensure a proper better-sqlite3 connection is open before any post-pass that + * writes edges (dropped-language backfill, CHA) and before structure/analysis. + * When analysis fallback is needed, close the native proxy and reopen + * better-sqlite3 directly; otherwise hand the WAL off via + * {@link handoffWalAfterNativeBuild} unless a proxy conversion is already in + * place (deferred so CHA and technique-backfill can still write rows). + * + * Returns false if the DB reopen/handoff failed (caller should return a + * partial result with no post-pass phases completed). + */ + ensureJsDbForPostPasses(needsStructure: boolean, needsAnalysisFallback: boolean): boolean { + if (!needsStructure && !needsAnalysisFallback) return true; + if (needsAnalysisFallback && this.ctx.nativeFirstProxy) { + closeNativeDb(this.ctx, 'pre-analysis-fallback'); + this.ctx.db = openDb(this.ctx.dbPath); + this.ctx.nativeFirstProxy = false; + return true; + } + if (!this.ctx.nativeFirstProxy) { + return handoffWalAfterNativeBuild(this.ctx); + } + return true; + } + + /** Close both the better-sqlite3 and native connections. */ + close(): void { + closeDbPair({ db: this.ctx.db, nativeDb: this.ctx.nativeDb }); + } +} + /** * Try the native build orchestrator. * @@ -2136,7 +2258,9 @@ async function runPostNativePasses( * invoke `nativeDb.buildGraph()` (the Rust pipeline), and run post-native * structure + analysis fallbacks. Lives in its own file to keep the Rust * orchestrator entry point separated from the JS-side `buildGraph()` driver - * in `pipeline.ts`. + * in `pipeline.ts`. The native-DB lifecycle steps (open/build/backfill/handoff/close) + * are delegated to {@link NativeOrchestrationSession} so this function reads as + * a thin sequencer. */ export async function tryNativeOrchestrator( ctx: PipelineContext, @@ -2147,58 +2271,17 @@ export async function tryNativeOrchestrator( return undefined; } - openNativeDatabase(ctx); + const session = new NativeOrchestrationSession(ctx); + session.open(); - if (!ctx.nativeDb?.buildGraph) return undefined; + if (!session.isReady) return undefined; - // The previous full build's clear_all_graph_data() sets PRAGMA foreign_keys = ON - // on the native connection. Older native binaries (< v3.14) do not delete - // dataflow_vertices / dataflow_summary / call_edge_id rows before purging - // nodes/edges during incremental builds, so FK enforcement causes the purge - // statements to fail silently — leaving stale nodes and edges that then get - // duplicated when the barrel-candidate re-parse re-inserts them (issue #1644). - // Disabling FK before buildGraph() lets the purge succeed; FK is restored in - // a finally block so post-passes (gap-repair, structure patch) retain FK protection - // even if buildGraph() throws. - try { - ctx.nativeDb.exec('PRAGMA foreign_keys = OFF'); - } catch { - // exec may not exist on very old addon versions — safe to ignore - } - - let resultJson: string; - try { - resultJson = ctx.nativeDb.buildGraph( - ctx.rootDir, - JSON.stringify(ctx.config), - JSON.stringify(ctx.aliases), - JSON.stringify(ctx.opts), - ); - } finally { - // Restore FK enforcement so any subsequent writes to this connection - // (gap-repair, structure patch) retain FK protection — even if buildGraph() - // throws. - try { - ctx.nativeDb.exec('PRAGMA foreign_keys = ON'); - } catch { - // safe to ignore on very old addon versions - } - } - - const result = JSON.parse(resultJson) as NativeOrchestratorResult; + const result = session.runBuildGraph(); if (result.earlyExit) { info('No changes detected'); - // Even on no-op rebuilds, dropped-language files added since the last - // full build are still missing from `nodes`/`file_hashes` (#1083), and - // WASM-only files deleted from disk leave stale rows behind (#1073). - // The orchestrator's collect_files skipped them, so its earlyExit - // doesn't imply DB consistency. Run the gap repair before returning. - const gap = detectDroppedLanguageGap(ctx); - if (gap.missingAbs.length > 0 || gap.staleRel.length > 0) { - await backfillNativeDroppedFiles(ctx, gap); - } - closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); + await session.backfillDroppedLanguages(); + session.close(); return 'early-exit'; } @@ -2265,21 +2348,15 @@ export async function tryNativeOrchestrator( // When analysis fallback is needed the handoff already happened above; when // neither structure nor analysis is needed the proxy conversion is deferred to // here so CHA and technique-backfill can still write rows. - if (needsStructure || needsAnalysisFallback) { - if (needsAnalysisFallback && ctx.nativeFirstProxy) { - closeNativeDb(ctx, 'pre-analysis-fallback'); - ctx.db = openDb(ctx.dbPath); - ctx.nativeFirstProxy = false; - } else if (!ctx.nativeFirstProxy && !handoffWalAfterNativeBuild(ctx)) { - // DB reopen failed — return partial result (no post-pass phases completed) - return formatNativeTimingResult(p, 0, analysisTiming, { - gapDetectMs: 0, - chaMs: 0, - thisDispatchMs: 0, - reclassifyMs: 0, - techniqueBackfillMs: 0, - }); - } + if (!session.ensureJsDbForPostPasses(needsStructure, needsAnalysisFallback)) { + // DB reopen failed — return partial result (no post-pass phases completed) + return formatNativeTimingResult(p, 0, analysisTiming, { + gapDetectMs: 0, + chaMs: 0, + thisDispatchMs: 0, + reclassifyMs: 0, + techniqueBackfillMs: 0, + }); } const postPassTimings = await runPostNativePasses(ctx, result); @@ -2316,6 +2393,6 @@ export async function tryNativeOrchestrator( await runDataflowVertexPass(ctx, result.changedFiles); } - closeDbPair({ db: ctx.db, nativeDb: ctx.nativeDb }); + session.close(); return formatNativeTimingResult(p, structurePatchMs, analysisTiming, postPassTimings); } From 63ab855b8a4093965be630e39bb38b6451430393 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 07:16:37 -0600 Subject: [PATCH 12/39] refactor: split embedRemote into request-executor and response-validator in remote.ts Extract-method refactor only, no behavior change. embedRemote's per-batch body is split into executeRemoteEmbeddingRequest (build request body, fetch-with-timeout, map network/timeout/status failures to EngineError) and mapRemoteEmbeddingResponse (shape-check, index-sort, embedding-field check, cross-batch dimension-consistency check, Float32Array conversion), with the outer loop calling both in the same order as before. Drops embedRemote from cognitive=36/halstead.bugs=1.10 (DECOMPOSE-flagged worst offender in GAUNTLET) to cognitive=6/bugs=0.38; both new helpers are well within thresholds (cognitive 11 and 9, bugs 0.38 and 0.33). Deliberately does not fix the gauntlet's secondary finding that response.json() sits outside error handling (a malformed body throws a raw SyntaxError instead of EngineError) -- that's a behavior change, out of scope for this pure decomposition. Filed as #1745. docs check acknowledged: internal refactor only, no CLI/feature/language/ architecture surface changed -- README/CLAUDE.md/ROADMAP untouched by design. Impact: 3 functions changed, 10 affected --- src/domain/search/providers/remote.ts | 170 +++++++++++++++++--------- 1 file changed, 109 insertions(+), 61 deletions(-) diff --git a/src/domain/search/providers/remote.ts b/src/domain/search/providers/remote.ts index 545a1b1d9..d323cdabd 100644 --- a/src/domain/search/providers/remote.ts +++ b/src/domain/search/providers/remote.ts @@ -69,6 +69,102 @@ export function resolveRemoteEmbeddingOptions( }; } +/** + * Execute a single batched `/embeddings` request: build the request body, + * fetch with an abort-on-timeout guard, and normalize network/timeout/status + * failures into a descriptive `EngineError` naming the endpoint. Does not + * touch the response body — callers are responsible for parsing/validating it. + */ +async function executeRemoteEmbeddingRequest( + url: string, + headers: Record, + model: string, + batch: string[], + timeoutMs: number, + batchNumber: number, +): Promise { + const controller = new AbortController(); + const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); + + let response: Response; + try { + response = await fetch(url, { + method: 'POST', + headers, + body: JSON.stringify({ model, input: batch }), + signal: controller.signal, + }); + } catch (err: unknown) { + if (err instanceof Error && err.name === 'AbortError') { + throw new EngineError( + `Remote embedding endpoint ${url} did not respond within ${timeoutMs}ms ` + + `(batch ${batchNumber})`, + ); + } + throw new EngineError( + `Failed to reach remote embedding endpoint at ${url}: ${err instanceof Error ? err.message : String(err)}`, + { cause: err instanceof Error ? err : undefined }, + ); + } finally { + clearTimeout(timeoutHandle); + } + + if (!response.ok) { + const body = await response.text().catch(() => ''); + throw new EngineError( + `Remote embedding endpoint ${url} returned ${response.status} ${response.statusText}` + + (body ? `: ${body.slice(0, 500)}` : ''), + ); + } + + return response; +} + +/** + * Validate and map a parsed `/embeddings` response body into vectors: + * shape-check `data` against the batch length, sort by index (servers aren't + * guaranteed to preserve input order), validate each item's `embedding` + * field, and enforce dimension consistency against the running `dim` seen + * across earlier batches in this `embedRemote` call. + */ +function mapRemoteEmbeddingResponse( + json: OpenAIEmbeddingResponse, + batch: string[], + url: string, + dim: number, +): { vectors: Float32Array[]; dim: number } { + if (!Array.isArray(json.data) || json.data.length !== batch.length) { + throw new EngineError( + `Remote embedding endpoint ${url} returned an unexpected response shape ` + + `(expected ${batch.length} embeddings, got ${json.data?.length ?? 0})`, + ); + } + + // OpenAI-compatible servers aren't guaranteed to preserve input order — sort by index. + const sorted = [...json.data].sort((a, b) => a.index - b.index); + const vectors: Float32Array[] = []; + for (const item of sorted) { + if (!Array.isArray(item.embedding)) { + throw new EngineError( + `Remote embedding endpoint ${url} returned an item with a missing or non-array ` + + `"embedding" field (index ${item.index})`, + ); + } + const vec = Float32Array.from(item.embedding); + if (dim === 0) { + dim = vec.length; + } else if (vec.length !== dim) { + throw new EngineError( + `Remote embedding endpoint ${url} returned inconsistent vector dimensions ` + + `(expected ${dim}, got ${vec.length} for response item index ${item.index})`, + ); + } + vectors.push(vec); + } + + return { vectors, dim }; +} + /** * Generate embeddings via a remote OpenAI-compatible `/embeddings` endpoint. * Works with OpenAI itself and any self-hosted server implementing the same @@ -90,69 +186,21 @@ export async function embedRemote( for (let i = 0; i < texts.length; i += REMOTE_BATCH_SIZE) { const batch = texts.slice(i, i + REMOTE_BATCH_SIZE); - - const controller = new AbortController(); - const timeoutHandle = setTimeout(() => controller.abort(), timeoutMs); - - let response: Response; - try { - response = await fetch(url, { - method: 'POST', - headers, - body: JSON.stringify({ model: options.model, input: batch }), - signal: controller.signal, - }); - } catch (err: unknown) { - if (err instanceof Error && err.name === 'AbortError') { - throw new EngineError( - `Remote embedding endpoint ${url} did not respond within ${timeoutMs}ms ` + - `(batch ${Math.floor(i / REMOTE_BATCH_SIZE) + 1})`, - ); - } - throw new EngineError( - `Failed to reach remote embedding endpoint at ${url}: ${err instanceof Error ? err.message : String(err)}`, - { cause: err instanceof Error ? err : undefined }, - ); - } finally { - clearTimeout(timeoutHandle); - } - - if (!response.ok) { - const body = await response.text().catch(() => ''); - throw new EngineError( - `Remote embedding endpoint ${url} returned ${response.status} ${response.statusText}` + - (body ? `: ${body.slice(0, 500)}` : ''), - ); - } + const batchNumber = Math.floor(i / REMOTE_BATCH_SIZE) + 1; + + const response = await executeRemoteEmbeddingRequest( + url, + headers, + options.model, + batch, + timeoutMs, + batchNumber, + ); const json = (await response.json()) as OpenAIEmbeddingResponse; - if (!Array.isArray(json.data) || json.data.length !== batch.length) { - throw new EngineError( - `Remote embedding endpoint ${url} returned an unexpected response shape ` + - `(expected ${batch.length} embeddings, got ${json.data?.length ?? 0})`, - ); - } - - // OpenAI-compatible servers aren't guaranteed to preserve input order — sort by index. - const sorted = [...json.data].sort((a, b) => a.index - b.index); - for (const item of sorted) { - if (!Array.isArray(item.embedding)) { - throw new EngineError( - `Remote embedding endpoint ${url} returned an item with a missing or non-array ` + - `"embedding" field (index ${item.index})`, - ); - } - const vec = Float32Array.from(item.embedding); - if (dim === 0) { - dim = vec.length; - } else if (vec.length !== dim) { - throw new EngineError( - `Remote embedding endpoint ${url} returned inconsistent vector dimensions ` + - `(expected ${dim}, got ${vec.length} for response item index ${item.index})`, - ); - } - results.push(vec); - } + const mapped = mapRemoteEmbeddingResponse(json, batch, url, dim); + dim = mapped.dim; + results.push(...mapped.vectors); if (texts.length > REMOTE_BATCH_SIZE) { process.stderr.write( From f31468c6bc28e1f0d2992417797102739043b85d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 07:46:01 -0600 Subject: [PATCH 13/39] fix: correct in-place mutation bug in applyExcludeTestsShorthand and dedupe consent glob-matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit applyExcludeTestsShorthand mutated merged.query in place, which was still a live reference to the shared DEFAULTS.query singleton whenever no config layer had already overridden `query`. In long-running processes (e.g. `codegraph mcp --multi-repo`) this permanently leaked one repo's excludeTests setting into every subsequent loadConfig() call for any other repo. loadConfig now deep-clones DEFAULTS before merging so no layer can ever write onto a live DEFAULTS reference, and DEFAULTS itself is now deep-frozen so any future regression of this kind throws immediately instead of silently corrupting shared state. applyExcludeTestsShorthand was also hardened to copy-on-write its `query` key directly. Also dedupes the appliesTo-glob-matching logic (previously copy-pasted between resolveConsent and promptForConsentIfNeeded) into a shared matchesAppliesTo helper. No user-facing behavior, CLI surface, or language support changed — docs check acknowledged. Fixes #1725 Impact: 6 functions changed, 140 affected --- src/infrastructure/config.ts | 84 ++++++++++++++++++++++++++---------- tests/unit/config.test.ts | 69 +++++++++++++++++++++++++++++ 2 files changed, 130 insertions(+), 23 deletions(-) diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 7e23f90ba..6181d450e 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -17,7 +17,27 @@ export const CONFIG_FILES: readonly string[] = [ 'codegraph.config.json', ]; -export const DEFAULTS = { +/** + * Recursively freeze an object (and all nested plain objects/arrays) so any + * accidental in-place mutation throws immediately (ES modules run in strict + * mode) instead of silently corrupting shared state. Applied to DEFAULTS + * below — see the loadConfig mutation-leak bug (issue #1725): DEFAULTS.query + * (and, by the same aliasing pattern, DEFAULTS.llm / DEFAULTS.build) must + * never be a live target for mergeConfig / applyExcludeTestsShorthand / + * applyEnvOverrides / resolveSecrets to write onto, whether directly or via + * an un-cloned nested reference. + */ +function deepFreeze(obj: T): T { + if (obj !== null && typeof obj === 'object' && !Object.isFrozen(obj)) { + Object.freeze(obj); + for (const value of Object.values(obj as Record)) { + deepFreeze(value); + } + } + return obj; +} + +export const DEFAULTS = deepFreeze({ include: [] as string[], exclude: [] as string[], ignoreDirs: [] as string[], @@ -209,7 +229,7 @@ export const DEFAULTS = { }, disabledTools: [] as string[], }, -} satisfies CodegraphConfig; +} satisfies CodegraphConfig); // ── Per-process user-config override (set by CLI flags) ──────────────── // Set once by the preAction hook before any command runs; cleared when changed. @@ -376,12 +396,19 @@ function applyExcludeTestsShorthand( rawLayer: Record, ): Record { if ('excludeTests' in rawLayer) { + const result = { ...merged }; + delete result.excludeTests; // Only hoist if this layer doesn't also set query.excludeTests if (!(rawLayer.query && 'excludeTests' in (rawLayer.query as object))) { - (merged.query as Record).excludeTests = Boolean(rawLayer.excludeTests); + // Copy-on-write: never mutate `merged.query` in place. If no layer so + // far has overridden `query`, `merged.query` is still the same object + // reference as DEFAULTS.query — writing to it directly would + // permanently poison the shared DEFAULTS singleton (issue #1725). + result.query = { + ...(merged.query as Record), + excludeTests: Boolean(rawLayer.excludeTests), + }; } - const result = { ...merged }; - delete result.excludeTests; return result; } if ('excludeTests' in merged) { @@ -400,6 +427,22 @@ interface ConsentResolutionResult { consentDecision: ConsentDecision | undefined; } +/** + * Check whether `rootDir` matches any of a parsed global config's `appliesTo` + * glob patterns (§4.2 step 3 of the user-config consent spec). Shared by + * `resolveConsent` and `promptForConsentIfNeeded` — previously duplicated + * verbatim between the two call sites. + */ +function matchesAppliesTo(parsed: ParsedUserConfig | null, rootDir: string): boolean { + if (!parsed?.appliesToGlobs.length) return false; + const expanded = parsed.appliesToGlobs.map((g) => + g.startsWith('~') ? path.join(os.homedir(), g.slice(1)) : g, + ); + const regexes = compileGlobs(expanded); + const absRoot = path.resolve(rootDir); + return matchesAny(regexes, absRoot); +} + /** * Resolve whether the global user config should be applied for a given repo. * Implements the §4.1/§4.2 precedence chain from the spec. @@ -451,15 +494,8 @@ function resolveConsent( // §4.2 step 3: appliesTo glob match (dynamic, never persisted) const parsed = loadUserConfigFile(globalPath); - if (parsed?.appliesToGlobs.length) { - const expanded = parsed.appliesToGlobs.map((g) => - g.startsWith('~') ? path.join(os.homedir(), g.slice(1)) : g, - ); - const regexes = compileGlobs(expanded); - const absRoot = path.resolve(rootDir); - if (matchesAny(regexes, absRoot)) { - return { applied: true, globalPath, consentDecision: undefined }; - } + if (matchesAppliesTo(parsed, rootDir)) { + return { applied: true, globalPath, consentDecision: undefined }; } // §4.2 steps 4–5: undecided — caller decides whether to prompt @@ -531,14 +567,7 @@ export async function promptForConsentIfNeeded( // Check appliesTo globs (dynamic consent — no prompt needed) const parsed = loadUserConfigFile(globalPath); - if (parsed?.appliesToGlobs.length) { - const expanded = parsed.appliesToGlobs.map((g) => - g.startsWith('~') ? path.join(os.homedir(), g.slice(1)) : g, - ); - const regexes = compileGlobs(expanded); - const absRoot = path.resolve(rootDir); - if (matchesAny(regexes, absRoot)) return; // covered by appliesTo - } + if (matchesAppliesTo(parsed, rootDir)) return; // covered by appliesTo // Only prompt in fully interactive sessions if (!process.stdin.isTTY || !process.stdout.isTTY) return; @@ -605,7 +634,16 @@ export function loadConfig(cwd?: string, opts?: LoadConfigOpts): CodegraphConfig } // ── Layer 0: DEFAULTS ───────────────────────────────────────────── - let merged = DEFAULTS as unknown as Record; + // Deep-clone so later layers (mergeConfig / applyExcludeTestsShorthand / + // applyEnvOverrides / resolveSecrets) never hold a live reference into the + // shared, frozen DEFAULTS singleton — writing to a nested key here must + // only ever affect this call's private copy. See issue #1725: + // DEFAULTS.query used to leak mutations across repos in long-running + // processes (e.g. `codegraph mcp --multi-repo`) because mergeConfig's + // shallow copy leaves untouched nested keys pointing straight at the + // DEFAULTS object — the same aliasing risk applied to DEFAULTS.llm / + // DEFAULTS.build via applyEnvOverrides / resolveSecrets. + let merged = structuredClone(DEFAULTS) as unknown as Record; // ── Layer 1: global (if applied) ────────────────────────────────── if (applied && globalPath) { diff --git a/tests/unit/config.test.ts b/tests/unit/config.test.ts index df0baede5..14122333c 100644 --- a/tests/unit/config.test.ts +++ b/tests/unit/config.test.ts @@ -333,6 +333,30 @@ describe('excludeTests hoisting', () => { expect(config.query.excludeTests).toBe(false); expect(config.excludeTests).toBeUndefined(); }); + + it('does not leak excludeTests across repos via the shared DEFAULTS singleton (issue #1725)', () => { + // Regression test: applyExcludeTestsShorthand used to write + // `merged.query.excludeTests` in place. Since mergeConfig only deep-copies + // a nested key when overrides actually set it, `merged.query` for a repo + // whose config uses ONLY the top-level `excludeTests` shorthand is still + // the literal DEFAULTS.query object — so the in-place write permanently + // mutated the module-level DEFAULTS singleton. In a long-running process + // (e.g. `codegraph mcp --multi-repo`) a later loadConfig() call for a + // totally unrelated repo would then silently inherit excludeTests: true. + const dirA = fs.mkdtempSync(path.join(tmpDir, 'exclude-leak-a-')); + fs.writeFileSync(path.join(dirA, '.codegraphrc.json'), JSON.stringify({ excludeTests: true })); + const configA = loadConfig(dirA); + expect(configA.query.excludeTests).toBe(true); + + // A second, unrelated repo with no excludeTests config of its own must + // still see the true default (false), not repo A's leaked value. + const dirB = fs.mkdtempSync(path.join(tmpDir, 'exclude-leak-b-')); + const configB = loadConfig(dirB); + expect(configB.query.excludeTests).toBe(false); + + // The shared DEFAULTS singleton itself must never be mutated. + expect(DEFAULTS.query.excludeTests).toBe(false); + }); }); describe('applyEnvOverrides', () => { @@ -445,6 +469,51 @@ describe('applyEnvOverrides', () => { }); }); +describe('DEFAULTS singleton immutability across loadConfig calls (issue #1725)', () => { + // The excludeTests-hoisting leak (above) was one symptom of a broader bug: + // when no config layer sets a given top-level key (e.g. `llm` or `build`), + // mergeConfig's shallow copy leaves `merged.` pointing straight at + // DEFAULTS.. applyEnvOverrides/resolveSecrets then write onto that + // nested object in place, permanently poisoning DEFAULTS for the rest of + // the process — the same aliasing pattern as applyExcludeTestsShorthand, + // just reached via env vars instead of a config-file shorthand. Covered + // here since it's the same root cause (loadConfig used to start from a + // live reference to DEFAULTS) rather than a separate bug. + afterEach(() => { + delete process.env.CODEGRAPH_ENGINE; + delete process.env.CODEGRAPH_LLM_API_KEY; + }); + + it('does not leak an env-driven build.engine override into DEFAULTS.build', () => { + const dir = fs.mkdtempSync(path.join(tmpDir, 'defaults-freeze-engine-')); + process.env.CODEGRAPH_ENGINE = 'native'; + let config: ReturnType | undefined; + expect(() => { + config = loadConfig(dir); + }).not.toThrow(); + expect(config?.build.engine).toBe('native'); + expect(DEFAULTS.build.engine).toBe('auto'); + }); + + it('does not leak an env-driven llm.apiKey override into DEFAULTS.llm', () => { + const dir = fs.mkdtempSync(path.join(tmpDir, 'defaults-freeze-apikey-')); + process.env.CODEGRAPH_LLM_API_KEY = 'sk-should-not-leak'; + let config: ReturnType | undefined; + expect(() => { + config = loadConfig(dir); + }).not.toThrow(); + expect(config?.llm.apiKey).toBe('sk-should-not-leak'); + expect(DEFAULTS.llm.apiKey).toBeNull(); + }); + + it('DEFAULTS is deeply frozen', () => { + expect(Object.isFrozen(DEFAULTS)).toBe(true); + expect(Object.isFrozen(DEFAULTS.query)).toBe(true); + expect(Object.isFrozen(DEFAULTS.llm)).toBe(true); + expect(Object.isFrozen(DEFAULTS.build)).toBe(true); + }); +}); + describe('resolveSecrets', () => { let mockExecFile: any; From 57d37825846c4fa04f292ad9ddea57b349661194 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:05:42 -0600 Subject: [PATCH 14/39] fix: correct connection-leak ordering in openReadonlyWithNative, dedupe engine resolution openReadonlyWithNative opened the better-sqlite3 handle before resolving the engine, and engine resolution calls loadConfig(), which can throw (e.g. ConfigError from resolveSecrets on a malformed llm.apiKeyCommand). If that throw happened, the already-open DB handle was never closed -- a real leak on the hot path used by dataflow/hotspots/stats commands. Fix: resolve the engine (and thus loadConfig) before opening the DB, mirroring openRepo's existing, correct ordering. Extracted the shared engine-resolution logic (customDbPath > rootDir > loadConfig priority chain) into resolveDbEngine(), used by both openRepo and openReadonlyWithNative so the two call sites can't drift again. Added tests/unit/openReadonlyWithNative-leak.test.ts: tracks every better-sqlite3 Database instantiation and asserts zero occur when loadConfig throws. Verified this test fails against the pre-fix ordering (it recorded a leaked instance) and passes against the fix. docs check acknowledged: internal bug fix + dedup, no CLI surface, language support, or documented architecture/design decision changed. Impact: 3 functions changed, 38 affected --- src/db/connection.ts | 52 +++++---- .../unit/openReadonlyWithNative-leak.test.ts | 103 ++++++++++++++++++ 2 files changed, 135 insertions(+), 20 deletions(-) create mode 100644 tests/unit/openReadonlyWithNative-leak.test.ts diff --git a/src/db/connection.ts b/src/db/connection.ts index fe7145283..eac517849 100644 --- a/src/db/connection.ts +++ b/src/db/connection.ts @@ -349,6 +349,29 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { return db; } +/** + * Resolve the effective engine for DB access: explicit opts.engine > config.build.engine > 'auto'. + * Derives rootDir from the resolved DB path so loadConfig reads the right project config. + * Shared by openRepo() and openReadonlyWithNative() so the two call sites can't drift. + * + * MUST be called before opening any DB handle: loadConfig can throw (e.g. ConfigError + * via resolveSecrets on a malformed llm.apiKeyCommand config), and an already-open + * handle at that point would never be closed. + */ +function resolveDbEngine( + customDbPath: string | undefined, + engineOpt: 'native' | 'wasm' | 'auto' | undefined, +): 'native' | 'wasm' | 'auto' { + // Using findDbPath (not path.resolve(customDbPath)) ensures directory inputs like + // --db /path/to/repo are normalised to .codegraph/graph.db before we strip two levels. + // Convention: resolvedDbPath = /.codegraph/graph.db + const resolvedDbPath = customDbPath ? findDbPath(customDbPath) : undefined; + const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; + // config.build.engine is already populated from CODEGRAPH_ENGINE env by applyEnvOverrides, + // so this covers both the env-var path and the .codegraphrc.json config-file path. + return engineOpt ?? loadConfig(rootDir).build.engine ?? 'auto'; +} + /** Open a NativeRepository via rusqlite, throwing DbError if the DB file is missing. */ function openRepoNative(customDbPath?: string): { repo: Repository; close(): void } { const dbPath = findDbPath(customDbPath); @@ -397,17 +420,9 @@ export function openRepo( return { repo: opts.repo, close() {} }; } - // Derive rootDir from the resolved DB path so loadConfig reads the right project config. - // Using findDbPath (not path.resolve(customDbPath)) ensures directory inputs like - // --db /path/to/repo are normalised to .codegraph/graph.db before we strip two levels. - // Convention: resolvedDbPath = /.codegraph/graph.db - const resolvedDbPath = customDbPath ? findDbPath(customDbPath) : undefined; - const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; // Respect explicit engine selection: opts.engine > config.build.engine > auto. - // config.build.engine is already populated from CODEGRAPH_ENGINE env by applyEnvOverrides, - // so this covers both the env-var path and the .codegraphrc.json config-file path. // This ensures --engine wasm and benchmark workers bypass the native path. - const engine = opts.engine ?? loadConfig(rootDir).build.engine ?? 'auto'; + const engine = resolveDbEngine(customDbPath, opts.engine); // Try native rusqlite path first (Phase 6.14) if (engine !== 'wasm' && isNativeAvailable()) { @@ -455,18 +470,15 @@ export function openReadonlyWithNative( nativeDb: NativeDatabase | undefined; close(): void; } { - const db = openReadonlyOrFail(customPath); + // Resolve engine (which may call loadConfig — and loadConfig can throw, e.g. + // ConfigError via resolveSecrets on a malformed llm.apiKeyCommand config) BEFORE + // opening the DB handle, mirroring openRepo()'s ordering. If this throws, no DB + // handle has been opened yet, so nothing is left leaked. (Previously this ran + // AFTER openReadonlyOrFail(), so a config error here leaked the already-open + // better-sqlite3 handle — see the phase-15 gauntlet finding.) + const engine = resolveDbEngine(customPath, opts.engine); - // Derive rootDir from the resolved DB path so loadConfig reads the right project config, - // consistent with openRepo(). Using findDbPath (not path.resolve(customPath)) ensures - // directory inputs like --db /path/to/repo are normalised before stripping two levels. - // Convention: resolvedDbPath = /.codegraph/graph.db - const resolvedDbPath = customPath ? findDbPath(customPath) : undefined; - const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; - // Respect explicit engine selection: opts.engine > config.build.engine > auto. - // config.build.engine covers both CODEGRAPH_ENGINE env (via applyEnvOverrides) - // and the .codegraphrc.json config-file path. Mirrors openRepo() priority chain. - const engine = opts.engine ?? loadConfig(rootDir).build.engine ?? 'auto'; + const db = openReadonlyOrFail(customPath); let nativeDb: NativeDatabase | undefined; if (engine !== 'wasm' && isNativeAvailable()) { diff --git a/tests/unit/openReadonlyWithNative-leak.test.ts b/tests/unit/openReadonlyWithNative-leak.test.ts new file mode 100644 index 000000000..9fb6fac9c --- /dev/null +++ b/tests/unit/openReadonlyWithNative-leak.test.ts @@ -0,0 +1,103 @@ +/** + * Regression test for a resource leak in openReadonlyWithNative (GAUNTLET + * phase-15 finding, rule 5): the function used to open the better-sqlite3 + * DB handle BEFORE resolving the engine, and engine resolution calls + * loadConfig(), which can throw (e.g. ConfigError from resolveSecrets when + * llm.apiKeyCommand is malformed). If that throw happened, the already-open + * DB handle was never closed — a real leak on a hot path used by + * dataflow/hotspots/stats CLI commands. + * + * The fix reorders openReadonlyWithNative() to resolve the engine (and thus + * call loadConfig) BEFORE opening the DB, mirroring openRepo()'s existing + * ordering. This test proves the fix by tracking every better-sqlite3 + * `Database` instantiation: when loadConfig throws, zero instances should + * ever be constructed (there's nothing to leak because nothing was opened). + */ +import fs from 'node:fs'; +import os from 'node:os'; +import path from 'node:path'; +import { afterAll, beforeAll, beforeEach, describe, expect, it, vi } from 'vitest'; + +const loadConfigSpy = vi.hoisted(() => vi.fn()); +const openedInstances = vi.hoisted(() => [] as { close: () => void }[]); + +// Delegate to the real loadConfig by default; individual tests override with +// mockImplementationOnce to simulate a throwing config resolution. +vi.mock('../../src/infrastructure/config.js', async (importOriginal) => { + const mod = await importOriginal(); + loadConfigSpy.mockImplementation(mod.loadConfig); + return { ...mod, loadConfig: loadConfigSpy }; +}); + +// Wrap the real better-sqlite3 Database constructor so every instantiation +// is recorded. This lets tests assert "no handle was ever opened" directly, +// rather than inferring it indirectly. +vi.mock('../../src/db/better-sqlite3.js', async (importOriginal) => { + const mod = await importOriginal(); + return { + ...mod, + getDatabase: () => { + const RealDatabase = mod.getDatabase(); + return new Proxy(RealDatabase, { + construct(target, args) { + const instance = Reflect.construct(target, args) as { close: () => void }; + openedInstances.push(instance); + return instance; + }, + }); + }, + }; +}); + +import { closeDb, initSchema, openDb, openReadonlyWithNative } from '../../src/db/index.js'; + +let tmpDir: string; +let dbPath: string; + +beforeAll(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-leak-')); + dbPath = path.join(tmpDir, 'graph.db'); + const db = openDb(dbPath); + initSchema(db); + closeDb(db); +}); + +beforeEach(() => { + // Only count instantiations made during the test body itself. + openedInstances.length = 0; + loadConfigSpy.mockClear(); +}); + +afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('openReadonlyWithNative resource-leak regression', () => { + it('does not open (and therefore cannot leak) a DB handle when engine/config resolution throws', () => { + loadConfigSpy.mockImplementationOnce(() => { + throw new Error('ConfigError: llm.apiKeyCommand must be a string'); + }); + + expect(() => openReadonlyWithNative(dbPath)).toThrow(/apiKeyCommand/); + + // The regression: previously openReadonlyOrFail() (which constructs the + // better-sqlite3 Database) ran BEFORE the loadConfig() call that could + // throw, so a config error left an already-opened handle dangling + // forever with no way for the caller to close it. With the fix, engine + // resolution runs first, so a thrown config error means the Database + // constructor is never invoked at all. + expect(openedInstances).toHaveLength(0); + }); + + it('still opens successfully and closes cleanly when config resolution succeeds', () => { + const result = openReadonlyWithNative(dbPath); + expect(result.db).toBeDefined(); + expect(openedInstances).toHaveLength(1); + + result.close(); + + // Prove the handle was actually closed, not merely constructed: + // any query against a closed better-sqlite3 connection throws. + expect(() => result.db.prepare('SELECT 1').get()).toThrow(); + }); +}); From 506e2cefeb0414ec2462251993e4e0fbd31e1941 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:22:53 -0600 Subject: [PATCH 15/39] fix: add debug() logging to silent catch blocks across builder pipeline and cli/commands/info.ts Converts 13 comment-only/silent catch blocks in pipeline.ts, native-orchestrator.ts, detect-changes.ts, helpers.ts, and info.ts from `catch { /* comment */ }` to `catch (e) { debug(...) }`, using the existing infrastructure/logger.ts debug() utility. Purely additive observability -- no control-flow changes, no change to what errors are swallowed vs rethrown. docs check acknowledged: internal logging-only change, no new feature/language/ architecture/command surface to document in README/CLAUDE.md/ROADMAP.md. Impact: 13 functions changed, 17 affected --- src/cli/commands/info.ts | 5 +++- src/domain/graph/builder/helpers.ts | 7 +++-- src/domain/graph/builder/pipeline.ts | 8 +++--- .../graph/builder/stages/detect-changes.ts | 9 ++++--- .../builder/stages/native-orchestrator.ts | 27 ++++++++++++------- 5 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/cli/commands/info.ts b/src/cli/commands/info.ts index fef4e2f9b..a68fad60f 100644 --- a/src/cli/commands/info.ts +++ b/src/cli/commands/info.ts @@ -1,3 +1,5 @@ +import { debug } from '../../infrastructure/logger.js'; +import { toErrorMessage } from '../../shared/errors.js'; import type { CommandDefinition } from '../types.js'; export const command: CommandDefinition = { @@ -72,8 +74,9 @@ export const command: CommandDefinition = { console.log(); } } - } catch { + } catch (e) { /* diagnostics must never crash */ + debug(`DB build-metadata diagnostics failed: ${toErrorMessage(e)}`); } }, }; diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index f0f34cffa..39ab75e5a 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -9,6 +9,7 @@ import path from 'node:path'; import { purgeFilesData } from '../../../db/index.js'; import { debug, warn } from '../../../infrastructure/logger.js'; import { buildIgnoreSet, EXTENSIONS, normalizePath } from '../../../shared/constants.js'; +import { toErrorMessage } from '../../../shared/errors.js'; import { compileGlobs, globToRegex, matchesAny } from '../../../shared/globs.js'; import { sleepSync } from '../../../shared/sleep.js'; import type { @@ -126,8 +127,9 @@ export function readGitignorePatterns(rootDir: string): readonly RegExp[] { normalized = pattern; } regexes.push(globToRegex(normalized)); - } catch { + } catch (e) { // Ignore patterns that don't compile (e.g. those with unsupported syntax) + debug(`.gitignore pattern "${pattern}" failed to compile, skipping: ${toErrorMessage(e)}`); } } return Object.freeze(regexes); @@ -150,7 +152,8 @@ function isSymlinkLoop(dir: string, visited: Set): boolean { let realDir: string; try { realDir = fs.realpathSync(dir); - } catch { + } catch (e) { + debug(`realpathSync failed for ${dir}, treating as symlink loop: ${toErrorMessage(e)}`); return true; } if (visited.has(realDir)) { diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index d3360368b..fa35e9e1a 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -354,8 +354,8 @@ async function runPipelineStages(ctx: PipelineContext): Promise { if (tree && typeof tree.delete === 'function') { try { tree.delete(); - } catch { - /* ignore cleanup errors */ + } catch (e) { + debug(`WASM tree cleanup failed: ${toErrorMessage(e)}`); } } symbols._tree = undefined; @@ -497,8 +497,8 @@ export async function buildGraph( if (tree && typeof tree.delete === 'function') { try { tree.delete(); - } catch { - /* ignore cleanup errors */ + } catch (e) { + debug(`WASM tree cleanup failed: ${toErrorMessage(e)}`); } } symbols._tree = undefined; diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index 7f68fafdb..a216beccb 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -11,6 +11,7 @@ import { performance } from 'node:perf_hooks'; import { closeDb } from '../../../../db/index.js'; import { debug, info } from '../../../../infrastructure/logger.js'; import { normalizePath } from '../../../../shared/constants.js'; +import { toErrorMessage } from '../../../../shared/errors.js'; import type { BetterSqlite3Database, ExtractorOutput, NativeDatabase } from '../../../../types.js'; import { parseFilesAuto } from '../../../parser.js'; import { readJournal, writeJournalHeader } from '../../journal.js'; @@ -66,8 +67,8 @@ function getChangedFiles( try { db.prepare('SELECT 1 FROM file_hashes LIMIT 1').get(); hasTable = true; - } catch { - /* table doesn't exist */ + } catch (e) { + debug(`file_hashes table probe failed, assuming table doesn't exist: ${toErrorMessage(e)}`); } if (!hasTable) { @@ -331,8 +332,8 @@ function healMetadata(ctx: PipelineContext): void { healTx(); } debug(`Self-healed mtime/size for ${metadataUpdates.length} files`); - } catch { - /* ignore heal errors */ + } catch (e) { + debug(`Self-heal of mtime/size metadata failed: ${toErrorMessage(e)}`); } } diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 0ca5d25b2..c0157cf95 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -406,8 +406,11 @@ async function runDataflowVertexPass( } try { result = native.extractDataflowAnalysis(source, absPaths[i]!); - } catch { + } catch (e) { // Language-specific parse failure — fall through to WASM. + debug( + `native dataflow extraction failed for ${relPath}, falling back to WASM: ${toErrorMessage(e)}`, + ); } } if (result) { @@ -624,13 +627,13 @@ async function runPostNativeAnalysis( if (ctx.nativeDb) { try { ctx.nativeDb.exec('PRAGMA wal_checkpoint(TRUNCATE)'); - } catch { - /* ignore checkpoint errors */ + } catch (e) { + debug(`native DB post-analysis WAL checkpoint failed: ${toErrorMessage(e)}`); } try { ctx.nativeDb.close(); - } catch { - /* ignore close errors */ + } catch (e) { + debug(`native DB close failed: ${toErrorMessage(e)}`); } ctx.nativeDb = undefined; if (ctx.engineOpts) { @@ -1216,8 +1219,8 @@ function cleanupThisDispatchWasmTrees(wasmResults: Map) if (tree && typeof tree.delete === 'function') { try { tree.delete(); - } catch { - /* ignore cleanup errors */ + } catch (e) { + debug(`WASM tree cleanup failed: ${toErrorMessage(e)}`); } } (symbols as { _tree?: unknown; _langId?: unknown })._tree = undefined; @@ -2175,8 +2178,11 @@ class NativeOrchestrationSession { const nativeDb = this.ctx.nativeDb as NonNullable; try { nativeDb.exec('PRAGMA foreign_keys = OFF'); - } catch { + } catch (e) { // exec may not exist on very old addon versions — safe to ignore + debug( + `PRAGMA foreign_keys=OFF failed (safe to ignore on old addon versions): ${toErrorMessage(e)}`, + ); } let resultJson: string; @@ -2193,8 +2199,11 @@ class NativeOrchestrationSession { // throws. try { nativeDb.exec('PRAGMA foreign_keys = ON'); - } catch { + } catch (e) { // safe to ignore on very old addon versions + debug( + `PRAGMA foreign_keys=ON restore failed (safe to ignore on old addon versions): ${toErrorMessage(e)}`, + ); } } From dbf34b87b2209ce772cc7ee429ce392894747522 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:35:58 -0600 Subject: [PATCH 16/39] refactor: split buildChaContext into three focused builder functions (docs check acknowledged) Impact: 4 functions changed, 4 affected --- src/domain/graph/builder/cha.ts | 97 +++++++++++++++++++++------------ 1 file changed, 61 insertions(+), 36 deletions(-) diff --git a/src/domain/graph/builder/cha.ts b/src/domain/graph/builder/cha.ts index b2ff8bc76..20a7b1b90 100644 --- a/src/domain/graph/builder/cha.ts +++ b/src/domain/graph/builder/cha.ts @@ -10,7 +10,7 @@ * - buildChaPostPass (native path) — JS post-pass on top of native edges */ -import type { ExtractorOutput } from '../../../types.js'; +import type { ClassRelation, ExtractorOutput } from '../../../types.js'; import type { CallNodeLookup } from './call-resolver.js'; // ── CHA context ────────────────────────────────────────────────────────────── @@ -30,6 +30,63 @@ export const EMPTY_CHA_CONTEXT: ChaContext = { instantiatedTypes: new Set(), }; +/** + * Record a class's `implements` relationship into the implementors map + * (interface/class name → concrete classes that implement it). + */ +function recordImplements(cls: ClassRelation, implementors: Map): void { + if (!cls.implements) return; + let list = implementors.get(cls.implements); + if (!list) { + list = []; + implementors.set(cls.implements, list); + } + if (!list.includes(cls.name)) list.push(cls.name); +} + +/** + * Record a class's `extends` relationship into both the parents map (child → + * direct parent, for this/super hierarchy walking) and the implementors map + * (parent → children, for CHA dispatch expansion via extends). + */ +function recordExtends( + cls: ClassRelation, + implementors: Map, + parents: Map, +): void { + if (!cls.extends) return; + // child → parent (for this/super hierarchy walking) + if (!parents.has(cls.name)) parents.set(cls.name, cls.extends); + // parent → children (for CHA dispatch expansion via extends) + let list = implementors.get(cls.extends); + if (!list) { + list = []; + implementors.set(cls.extends, list); + } + if (!list.includes(cls.name)) list.push(cls.name); +} + +/** + * RTA: collect instantiated class names for one file's symbols — the Phase + * 8.5 dedicated `newExpressions` list (all `new X()` in the file), plus the + * constructor-confidence typeMap fallback (confidence >= 0.9) that covers + * codebases that haven't been re-parsed since Phase 8.5 was added. + */ +function collectInstantiatedTypes(symbols: ExtractorOutput, instantiatedTypes: Set): void { + if (symbols.newExpressions) { + for (const typeName of symbols.newExpressions) { + instantiatedTypes.add(typeName); + } + } + if (symbols.typeMap instanceof Map) { + for (const entry of symbols.typeMap.values()) { + if (typeof entry !== 'string' && entry.confidence >= 0.9) { + instantiatedTypes.add(entry.type); + } + } + } +} + /** * Build the CHA context from all parsed file symbols. * @@ -43,42 +100,10 @@ export function buildChaContext(fileSymbols: ReadonlyMap= 0.9) - // covers codebases that haven't been re-parsed since Phase 8.5 was added. - if (symbols.typeMap instanceof Map) { - for (const entry of symbols.typeMap.values()) { - if (typeof entry !== 'string' && entry.confidence >= 0.9) { - instantiatedTypes.add(entry.type); - } - } + recordImplements(cls, implementors); + recordExtends(cls, implementors, parents); } + collectInstantiatedTypes(symbols, instantiatedTypes); } return { implementors, parents, instantiatedTypes }; From a1946af21e8a59258276e59bb6e9ace267ada993 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 10:49:32 -0600 Subject: [PATCH 17/39] refactor: split purgeAndAddReverseDeps and wire fast-skip-diag via config (docs check acknowledged) Impact: 6 functions changed, 12 affected --- src/domain/graph/builder/pipeline.ts | 8 +- .../graph/builder/stages/detect-changes.ts | 210 +++++++++++------- 2 files changed, 137 insertions(+), 81 deletions(-) diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index fa35e9e1a..490c5fe07 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -439,7 +439,13 @@ export async function buildGraph( try { await collectFiles(ctx); if ( - detectNoChanges(ctx.db, ctx.allFiles, ctx.rootDir, ctx.opts as Record) + detectNoChanges( + ctx.db, + ctx.allFiles, + ctx.rootDir, + ctx.opts as Record, + fastSkipDiag, + ) ) { info('No changes detected. Graph is up to date.'); writeJournalHeader(ctx.rootDir, Date.now()); diff --git a/src/domain/graph/builder/stages/detect-changes.ts b/src/domain/graph/builder/stages/detect-changes.ts index a216beccb..c33f6c70c 100644 --- a/src/domain/graph/builder/stages/detect-changes.ts +++ b/src/domain/graph/builder/stages/detect-changes.ts @@ -377,91 +377,132 @@ function findReverseDependencies( return reverseDeps; } -function purgeAndAddReverseDeps( +/** + * Reconnects reverse-dep files to the changed files they depend on. + * + * Native path: purgeFilesData already deleted + rebuilt the affected edges in + * one transaction, so this just enqueues the reverse-dep files for reparse + * (works correctly with the native edge builder). + * + * WASM/JS path: saves the edge topology from reverse-dep files → changed + * files BEFORE purge runs, so it can be reconnected to new node IDs after + * insertNodes (#932, #933). purgeFilesFromGraph deletes edges in BOTH + * directions for changed files, which already removes the reverse-dep → + * changed-file edges. The old approach then over-deleted ALL outgoing edges + * from reverse-dep files and reparsed them to rebuild everything — expensive + * (87 extra parses) and lossy (442 missing edges due to imperfect resolution + * on rebuild). This approach saves the edge topology, lets purge handle + * deletion, then reconnects using new node IDs. No reparse needed. + */ +function addReverseDeps( ctx: PipelineContext, changePaths: string[], reverseDeps: Set, + hasPurge: boolean, ): void { const { db, rootDir } = ctx; + if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { + for (const relPath of reverseDeps) { + const absPath = path.join(rootDir, relPath); + ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); + } + return; + } + + if (!(reverseDeps.size > 0 && hasPurge)) return; + const changePathSet = new Set(changePaths); + const saveEdgesStmt = db.prepare(` + SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind, + n_tgt.file AS tgt_file, n_tgt.line AS tgt_line, + e.kind AS edge_kind, e.confidence, e.dynamic, e.technique, e.dynamic_kind, + n_src.file AS src_file + FROM edges e + JOIN nodes n_src ON e.source_id = n_src.id + JOIN nodes n_tgt ON e.target_id = n_tgt.id + WHERE n_tgt.file = ? AND n_src.file != n_tgt.file + `); + for (const changedPath of changePaths) { + for (const row of saveEdgesStmt.all(changedPath) as Array<{ + source_id: number; + tgt_name: string; + tgt_kind: string; + tgt_file: string; + tgt_line: number; + edge_kind: string; + confidence: number; + dynamic: number; + technique: string | null; + dynamic_kind: string | null; + src_file: string; + }>) { + // Skip edges whose source is also being purged — buildEdges will + // re-create them with correct new IDs. + if (changePathSet.has(row.src_file)) continue; + ctx.savedReverseDepEdges.push({ + sourceId: row.source_id, + tgtName: row.tgt_name, + tgtKind: row.tgt_kind, + tgtFile: row.tgt_file, + tgtLine: row.tgt_line, + edgeKind: row.edge_kind, + confidence: row.confidence, + dynamic: row.dynamic, + technique: row.technique, + dynamicKind: row.dynamic_kind, + }); + } + } + debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`); +} + +/** + * Deletes graph data for removed/changed files (and, on the native path, + * their reverse-dep edges) in one call. See `addReverseDeps` for the + * counterpart that reconnects reverse-dep topology around this deletion. + */ +function purgeStaleReverseDeps( + ctx: PipelineContext, + filesToPurge: string[], + hasPurge: boolean, + hasReverseDeps: boolean, + reverseDepList: string[], +): void { + // Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670) + if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { + ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined); + return; + } + // No outgoing-edge deletion for reverse-deps — purge already removed + // edges targeting the changed files, and other outgoing edges are valid. + // No reverse-deps added to parseChanges — no reparse needed. + if (hasPurge) { + purgeFilesFromGraph(ctx.db, filesToPurge, { purgeHashes: false }); + } +} + +function purgeAndAddReverseDeps( + ctx: PipelineContext, + changePaths: string[], + reverseDeps: Set, +): void { const hasPurge = changePaths.length > 0 || ctx.removed.length > 0; const hasReverseDeps = reverseDeps.size > 0; const reverseDepList = hasReverseDeps ? [...reverseDeps] : []; - if (hasPurge || hasReverseDeps) { - const filesToPurge = hasPurge ? [...ctx.removed, ...changePaths] : []; - // Prefer NativeDatabase: purge + reverse-dep edge deletion in one transaction (#670) - if (ctx.engineName === 'native' && ctx.nativeDb?.purgeFilesData) { - ctx.nativeDb.purgeFilesData(filesToPurge, false, hasReverseDeps ? reverseDepList : undefined); - // Native path still reparses reverse-deps (works correctly with native edge builder) - for (const relPath of reverseDeps) { - const absPath = path.join(rootDir, relPath); - ctx.parseChanges.push({ file: absPath, relPath, _reverseDepOnly: true }); - } - } else { - // WASM/JS path: save edges from reverse-dep files → changed files BEFORE - // purge, then reconnect them to new node IDs after insertNodes (#932, #933). - // - // purgeFilesFromGraph deletes edges in BOTH directions for changed files, - // which already removes the reverse-dep → changed-file edges. The old - // approach then over-deleted ALL outgoing edges from reverse-dep files and - // reparsed them to rebuild everything — expensive (87 extra parses) and - // lossy (442 missing edges due to imperfect resolution on rebuild). - // - // New approach: save the edge topology, let purge handle deletion, then - // reconnect using new node IDs. No reparse needed. - if (hasReverseDeps && hasPurge) { - const changePathSet = new Set(changePaths); - const saveEdgesStmt = db.prepare(` - SELECT e.source_id, n_tgt.name AS tgt_name, n_tgt.kind AS tgt_kind, - n_tgt.file AS tgt_file, n_tgt.line AS tgt_line, - e.kind AS edge_kind, e.confidence, e.dynamic, e.technique, e.dynamic_kind, - n_src.file AS src_file - FROM edges e - JOIN nodes n_src ON e.source_id = n_src.id - JOIN nodes n_tgt ON e.target_id = n_tgt.id - WHERE n_tgt.file = ? AND n_src.file != n_tgt.file - `); - for (const changedPath of changePaths) { - for (const row of saveEdgesStmt.all(changedPath) as Array<{ - source_id: number; - tgt_name: string; - tgt_kind: string; - tgt_file: string; - tgt_line: number; - edge_kind: string; - confidence: number; - dynamic: number; - technique: string | null; - dynamic_kind: string | null; - src_file: string; - }>) { - // Skip edges whose source is also being purged — buildEdges will - // re-create them with correct new IDs. - if (changePathSet.has(row.src_file)) continue; - ctx.savedReverseDepEdges.push({ - sourceId: row.source_id, - tgtName: row.tgt_name, - tgtKind: row.tgt_kind, - tgtFile: row.tgt_file, - tgtLine: row.tgt_line, - edgeKind: row.edge_kind, - confidence: row.confidence, - dynamic: row.dynamic, - technique: row.technique, - dynamicKind: row.dynamic_kind, - }); - } - } - debug(`Saved ${ctx.savedReverseDepEdges.length} reverse-dep edges for reconnection`); - } + if (!(hasPurge || hasReverseDeps)) return; - if (hasPurge) { - purgeFilesFromGraph(db, filesToPurge, { purgeHashes: false }); - } - // No outgoing-edge deletion for reverse-deps — purge already removed - // edges targeting the changed files, and other outgoing edges are valid. - // No reverse-deps added to parseChanges — no reparse needed. - } + const filesToPurge = hasPurge ? [...ctx.removed, ...changePaths] : []; + const isNative = ctx.engineName === 'native' && !!ctx.nativeDb?.purgeFilesData; + + if (isNative) { + // Native: purge (which also rebuilds reverse-dep edges) runs first, then + // the reverse-dep files are enqueued for reparse. + purgeStaleReverseDeps(ctx, filesToPurge, hasPurge, hasReverseDeps, reverseDepList); + addReverseDeps(ctx, changePaths, reverseDeps, hasPurge); + } else { + // WASM/JS: edge topology must be saved BEFORE purge deletes it. + addReverseDeps(ctx, changePaths, reverseDeps, hasPurge); + purgeStaleReverseDeps(ctx, filesToPurge, hasPurge, hasReverseDeps, reverseDepList); } } @@ -533,11 +574,15 @@ function handleIncrementalBuild(ctx: PipelineContext): void { purgeAndAddReverseDeps(ctx, changePaths, reverseDeps); } -/** Diagnostic logger gated by env var, used by both `detectNoChanges` branches. */ -function makeFastSkipLogger(): (reason: string) => void { - const diag = process.env.CODEGRAPH_FAST_SKIP_DIAG === '1'; +/** + * Diagnostic logger gated by `build.fastSkipDiag` config (resolved by the + * caller from `config.build.fastSkipDiag`, which `applyEnvOverrides` sets + * from `CODEGRAPH_FAST_SKIP_DIAG` — see `infrastructure/config.ts`). Used by + * both `detectNoChanges` branches. + */ +function makeFastSkipLogger(fastSkipDiag: boolean): (reason: string) => void { return (reason: string): void => { - if (diag) info(`[fast-skip] ${reason}`); + if (fastSkipDiag) info(`[fast-skip] ${reason}`); }; } @@ -653,14 +698,19 @@ function passesPendingAnalysisGuard( * repos where source files don't change between builds. * * Pure read of `db` and the filesystem — never mutates either. + * + * `fastSkipDiag` gates the `[fast-skip]` diagnostic log lines and defaults to + * `false` (matching `DEFAULTS.build.fastSkipDiag`) when the caller doesn't + * have a resolved config value to pass — see `makeFastSkipLogger`. */ export function detectNoChanges( db: BetterSqlite3Database, allFiles: string[], rootDir: string, opts?: Record, + fastSkipDiag = false, ): boolean { - const log = makeFastSkipLogger(); + const log = makeFastSkipLogger(fastSkipDiag); const existing = loadFileHashesForPreflight(db, log); if (!existing) return false; From 0e83ba03ab71353dde9160d84db6434eaacaf837 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 11:11:36 -0600 Subject: [PATCH 18/39] refactor: extract getOrCreateBatchStmt, dedupe batch-insert helpers (docs check acknowledged) Impact: 6 functions changed, 22 affected --- src/domain/graph/builder/helpers.ts | 99 +++++++++++++++++------------ 1 file changed, 58 insertions(+), 41 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index 39ab75e5a..c9af890b9 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -357,78 +357,95 @@ const BATCH_CHUNK = 500; const nodeStmtCache = new WeakMap>(); const edgeStmtCache = new WeakMap>(); -function getNodeStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { - let cache = nodeStmtCache.get(db); - if (!cache) { - cache = new Map(); - nodeStmtCache.set(db, cache); +/** + * Get (or lazily prepare + cache) a multi-value INSERT statement for a given + * chunk size, keyed per-database. Shared by getNodeStmt/getEdgeStmt, which + * previously duplicated this exact WeakMap> + * cache-getter shape — only the SQL text differed. + */ +function getOrCreateBatchStmt( + cache: WeakMap>, + db: BetterSqlite3Database, + chunkSize: number, + buildSql: (chunkSize: number) => string, +): SqliteStatement { + let perDb = cache.get(db); + if (!perDb) { + perDb = new Map(); + cache.set(db, perDb); } - let stmt = cache.get(chunkSize); + let stmt = perDb.get(chunkSize); if (!stmt) { + stmt = db.prepare(buildSql(chunkSize)); + perDb.set(chunkSize, stmt); + } + return stmt; +} + +function getNodeStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { + return getOrCreateBatchStmt(nodeStmtCache, db, chunkSize, (n) => { const ph = '(?,?,?,?,?,?,?,?,?)'; - stmt = db.prepare( + return ( 'INSERT OR IGNORE INTO nodes (name,kind,file,line,end_line,parent_id,qualified_name,scope,visibility) VALUES ' + - Array.from({ length: chunkSize }, () => ph).join(','), + Array.from({ length: n }, () => ph).join(',') ); - cache.set(chunkSize, stmt); - } - return stmt; + }); } function getEdgeStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { - let cache = edgeStmtCache.get(db); - if (!cache) { - cache = new Map(); - edgeStmtCache.set(db, cache); - } - let stmt = cache.get(chunkSize); - if (!stmt) { + return getOrCreateBatchStmt(edgeStmtCache, db, chunkSize, (n) => { const ph = '(?,?,?,?,?,?,?)'; - stmt = db.prepare( + return ( 'INSERT INTO edges (source_id,target_id,kind,confidence,dynamic,technique,dynamic_kind) VALUES ' + - Array.from({ length: chunkSize }, () => ph).join(','), + Array.from({ length: n }, () => ph).join(',') ); - cache.set(chunkSize, stmt); - } - return stmt; + }); } /** - * Batch-insert node rows via multi-value INSERT statements. - * Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] + * Chunk `rows` into `BATCH_CHUNK`-sized groups and run one multi-value INSERT + * per group via `getStmt`, flattening each row's positional args via + * `pushValues`. Shared by batchInsertNodes/batchInsertEdges, which previously + * duplicated this exact chunk-loop-and-run shape — only the statement getter + * and per-row value flattening differed. */ -export function batchInsertNodes(db: BetterSqlite3Database, rows: unknown[][]): void { +function runBatchInsert( + db: BetterSqlite3Database, + rows: unknown[][], + getStmt: (db: BetterSqlite3Database, chunkSize: number) => SqliteStatement, + pushValues: (row: unknown[], vals: unknown[]) => void, +): void { if (!rows.length) return; for (let i = 0; i < rows.length; i += BATCH_CHUNK) { const end = Math.min(i + BATCH_CHUNK, rows.length); const chunkSize = end - i; - const stmt = getNodeStmt(db, chunkSize); + const stmt = getStmt(db, chunkSize); const vals: unknown[] = []; for (let j = i; j < end; j++) { - const r = rows[j] as unknown[]; - vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]); + pushValues(rows[j] as unknown[], vals); } stmt.run(...vals); } } +/** + * Batch-insert node rows via multi-value INSERT statements. + * Each row: [name, kind, file, line, end_line, parent_id, qualified_name, scope, visibility] + */ +export function batchInsertNodes(db: BetterSqlite3Database, rows: unknown[][]): void { + runBatchInsert(db, rows, getNodeStmt, (r, vals) => { + vals.push(r[0], r[1], r[2], r[3], r[4], r[5], r[6], r[7], r[8]); + }); +} + /** * Batch-insert edge rows via multi-value INSERT statements. * Each row: [source_id, target_id, kind, confidence, dynamic, technique, dynamic_kind] */ export function batchInsertEdges(db: BetterSqlite3Database, rows: unknown[][]): void { - if (!rows.length) return; - for (let i = 0; i < rows.length; i += BATCH_CHUNK) { - const end = Math.min(i + BATCH_CHUNK, rows.length); - const chunkSize = end - i; - const stmt = getEdgeStmt(db, chunkSize); - const vals: unknown[] = []; - for (let j = i; j < end; j++) { - const r = rows[j] as unknown[]; - vals.push(r[0], r[1], r[2], r[3], r[4], r[5] ?? null, r[6] ?? null); - } - stmt.run(...vals); - } + runBatchInsert(db, rows, getEdgeStmt, (r, vals) => { + vals.push(r[0], r[1], r[2], r[3], r[4], r[5] ?? null, r[6] ?? null); + }); } /** Confidence assigned to CHA-expanded interface/abstract dispatch edges. */ From c0c1f7d5e730de00b1baa1c49543d37e9a2f4980 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 13:17:25 -0600 Subject: [PATCH 19/39] fix: address quality issues in domain/graph/resolver (docs check acknowledged) Decompose gauntlet-flagged FAIL-level complexity in points-to.ts, strategy.ts, and ts-resolver.ts via pure extract-method refactoring. No resolution-behavior change (verified byte-for-byte identical resolution-benchmark output across all 34 fixture languages). Impact: 41 functions changed, 32 affected --- src/domain/graph/resolver/points-to.ts | 249 +++++++++---- src/domain/graph/resolver/strategy.ts | 328 +++++++++------- src/domain/graph/resolver/ts-resolver.ts | 452 +++++++++++++++-------- 3 files changed, 655 insertions(+), 374 deletions(-) diff --git a/src/domain/graph/resolver/points-to.ts b/src/domain/graph/resolver/points-to.ts index ffa36ec8e..5573c53ab 100644 --- a/src/domain/graph/resolver/points-to.ts +++ b/src/domain/graph/resolver/points-to.ts @@ -79,106 +79,195 @@ function buildThisAssignmentMap( } /** - * Append parameter-flow and array/spread/forOf/callback constraints (Phases 8.3c and 8.3e). + * Phase 8.3c: parameter-flow constraints. * - * Mutates `pts` (seeds array-element entries) and appends to `constraints`. + * For each call f(x) at argIndex i where f is locally defined, add + * constraint: pts(f::paramName_i) ⊇ pts(x). This makes the pts solver + * inter-procedural within the module so that `fn()` inside `f` resolves + * to the concrete function passed at each call site. + * + * Keys are scoped as "callee::paramName" to prevent name collisions: bare + * parameter names like `fn`, `cb`, and `callback` appear in many functions + * within the same file. Without scoping, pts(fn) from runA and runB would + * merge into a single set, producing spurious call edges. The scoped key is + * resolved in buildFileCallEdges by combining the enclosing caller's name + * with the call's name (see callerName::call.name lookup there). + * + * Scope: intra-module only (definitionParams contains local defs only). + * + * Appends to `constraints`. */ -function buildParamAndArrayConstraints( - pts: PointsToMap, +function buildParamFlowConstraints( constraints: Array<{ lhs: string; rhsKey: string }>, paramBindings?: readonly ParamBinding[], definitionParams?: ReadonlyMap, +): void { + if (!paramBindings || !definitionParams) return; + for (const { callee, argIndex, argName } of paramBindings) { + const params = definitionParams.get(callee); + if (!params || argIndex >= params.length) continue; + const paramName = params[argIndex]; + if (paramName) constraints.push({ lhs: `${callee}::${paramName}`, rhsKey: argName }); + } +} + +/** + * Phase 8.3e: array-element bindings — seed concrete elements and wildcard. + * + * `arr[0]` etc. are seeded from literal arrays; `arr[*]` collects all elements. + * + * Mutates `pts` (seeds per-index entries) and appends to `constraints`. + */ +function buildArrayElemConstraints( + pts: PointsToMap, + constraints: Array<{ lhs: string; rhsKey: string }>, arrayElemBindings?: readonly ArrayElemBinding[], - spreadArgBindings?: readonly SpreadArgBinding[], - forOfBindings?: readonly ForOfBinding[], - arrayCallbackBindings?: readonly ArrayCallbackBinding[], ): void { - // Phase 8.3c: parameter-flow constraints. - // For each call f(x) at argIndex i where f is locally defined, add - // constraint: pts(f::paramName_i) ⊇ pts(x). This makes the pts solver - // inter-procedural within the module so that `fn()` inside `f` resolves - // to the concrete function passed at each call site. - // - // Keys are scoped as "callee::paramName" to prevent name collisions: bare - // parameter names like `fn`, `cb`, and `callback` appear in many functions - // within the same file. Without scoping, pts(fn) from runA and runB would - // merge into a single set, producing spurious call edges. The scoped key is - // resolved in buildFileCallEdges by combining the enclosing caller's name - // with the call's name (see callerName::call.name lookup there). - // - // Scope: intra-module only (definitionParams contains local defs only). - if (paramBindings && definitionParams) { - for (const { callee, argIndex, argName } of paramBindings) { - const params = definitionParams.get(callee); - if (!params || argIndex >= params.length) continue; - const paramName = params[argIndex]; - if (paramName) constraints.push({ lhs: `${callee}::${paramName}`, rhsKey: argName }); - } + if (!arrayElemBindings || arrayElemBindings.length === 0) return; + for (const { arrayName, index, elemName } of arrayElemBindings) { + const elemKey = `${arrayName}[${index}]`; + const wildcardKey = `${arrayName}[*]`; + // Seed the per-index entry if the elemName is a concrete function. + if (!pts.has(elemKey)) pts.set(elemKey, new Set()); + pts.get(elemKey)!.add(elemName); + // Wildcard: array[*] collects all element targets for imprecise spread/for-of. + constraints.push({ lhs: wildcardKey, rhsKey: elemKey }); } +} - // Phase 8.3e: array-element bindings — seed concrete elements and wildcard. - // `arr[0]` etc. are seeded from literal arrays; `arr[*]` collects all elements. - if (arrayElemBindings && arrayElemBindings.length > 0) { - for (const { arrayName, index, elemName } of arrayElemBindings) { - const elemKey = `${arrayName}[${index}]`; - const wildcardKey = `${arrayName}[*]`; - // Seed the per-index entry if the elemName is a concrete function. - if (!pts.has(elemKey)) pts.set(elemKey, new Set()); - pts.get(elemKey)!.add(elemName); - // Wildcard: array[*] collects all element targets for imprecise spread/for-of. - constraints.push({ lhs: wildcardKey, rhsKey: elemKey }); - } +/** + * Build a per-array index count from arrayElemBindings for precise + * per-index spread-argument constraints. + */ +function computeArrayMaxIndex( + arrayElemBindings: readonly ArrayElemBinding[] | undefined, +): Map { + const arrayMaxIndex = new Map(); + for (const { arrayName, index } of arrayElemBindings ?? []) { + const cur = arrayMaxIndex.get(arrayName) ?? -1; + if (index > cur) arrayMaxIndex.set(arrayName, index); } + return arrayMaxIndex; +} - // Phase 8.3e: spread-argument constraints. - // f(...arr) → pts[f::param_i] ⊇ pts[arr[i]] for each known element. - if (spreadArgBindings && spreadArgBindings.length > 0 && definitionParams) { - // Build a per-array index count from arrayElemBindings for precise per-index constraints. - const arrayMaxIndex = new Map(); - for (const { arrayName, index } of arrayElemBindings ?? []) { - const cur = arrayMaxIndex.get(arrayName) ?? -1; - if (index > cur) arrayMaxIndex.set(arrayName, index); +/** + * Push spread-argument constraints for one callee: precise per-element + * constraints when the source array's max index is known, otherwise a + * wildcard constraint for every parameter at/after startIndex. + */ +function pushSpreadArgConstraintsForCallee( + constraints: Array<{ lhs: string; rhsKey: string }>, + callee: string, + params: readonly string[], + arrayName: string, + startIndex: number, + maxIdx: number, +): void { + if (maxIdx >= 0) { + // Precise: per-element constraints. + for (let i = 0; i <= maxIdx; i++) { + const paramIdx = startIndex + i; + if (paramIdx >= params.length) break; + constraints.push({ lhs: `${callee}::${params[paramIdx]}`, rhsKey: `${arrayName}[${i}]` }); } - - for (const { callee, arrayName, startIndex } of spreadArgBindings) { - const params = definitionParams.get(callee); - if (!params) continue; - const maxIdx = arrayMaxIndex.get(arrayName) ?? -1; - if (maxIdx >= 0) { - // Precise: per-element constraints. - for (let i = 0; i <= maxIdx; i++) { - const paramIdx = startIndex + i; - if (paramIdx >= params.length) break; - constraints.push({ lhs: `${callee}::${params[paramIdx]}`, rhsKey: `${arrayName}[${i}]` }); - } - } else { - // Unknown array size: all params at/after startIndex get the wildcard. - for (let j = startIndex; j < params.length; j++) { - constraints.push({ lhs: `${callee}::${params[j]}`, rhsKey: `${arrayName}[*]` }); - } - } + } else { + // Unknown array size: all params at/after startIndex get the wildcard. + for (let j = startIndex; j < params.length; j++) { + constraints.push({ lhs: `${callee}::${params[j]}`, rhsKey: `${arrayName}[*]` }); } } +} - // Phase 8.3e: for-of iteration constraints. - // `for (const x of arr)` inside `outer` → pts[outer::x] ⊇ pts[arr[*]] - if (forOfBindings) { - for (const { varName, sourceName, enclosingFunc } of forOfBindings) { - constraints.push({ lhs: `${enclosingFunc}::${varName}`, rhsKey: `${sourceName}[*]` }); - } +/** + * Phase 8.3e: spread-argument constraints. + * + * f(...arr) → pts[f::param_i] ⊇ pts[arr[i]] for each known element. + * + * Appends to `constraints`. + */ +function buildSpreadArgConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + spreadArgBindings?: readonly SpreadArgBinding[], + arrayElemBindings?: readonly ArrayElemBinding[], + definitionParams?: ReadonlyMap, +): void { + if (!spreadArgBindings || spreadArgBindings.length === 0 || !definitionParams) return; + const arrayMaxIndex = computeArrayMaxIndex(arrayElemBindings); + + for (const { callee, arrayName, startIndex } of spreadArgBindings) { + const params = definitionParams.get(callee); + if (!params) continue; + const maxIdx = arrayMaxIndex.get(arrayName) ?? -1; + pushSpreadArgConstraintsForCallee(constraints, callee, params, arrayName, startIndex, maxIdx); } +} - // Phase 8.3e: Array.from / callback constraints. - // Array.from(source, cb) → pts[cb::param0] ⊇ pts[source[*]] - if (arrayCallbackBindings && definitionParams) { - for (const { sourceName, calleeName } of arrayCallbackBindings) { - const params = definitionParams.get(calleeName); - if (!params || params.length === 0) continue; - constraints.push({ lhs: `${calleeName}::${params[0]}`, rhsKey: `${sourceName}[*]` }); - } +/** + * Phase 8.3e: for-of iteration constraints. + * + * `for (const x of arr)` inside `outer` → pts[outer::x] ⊇ pts[arr[*]] + * + * Appends to `constraints`. + */ +function buildForOfConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + forOfBindings?: readonly ForOfBinding[], +): void { + if (!forOfBindings) return; + for (const { varName, sourceName, enclosingFunc } of forOfBindings) { + constraints.push({ lhs: `${enclosingFunc}::${varName}`, rhsKey: `${sourceName}[*]` }); } } +/** + * Phase 8.3e: Array.from / callback constraints. + * + * Array.from(source, cb) → pts[cb::param0] ⊇ pts[source[*]] + * + * Appends to `constraints`. + */ +function buildArrayCallbackConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + arrayCallbackBindings?: readonly ArrayCallbackBinding[], + definitionParams?: ReadonlyMap, +): void { + if (!arrayCallbackBindings || !definitionParams) return; + for (const { sourceName, calleeName } of arrayCallbackBindings) { + const params = definitionParams.get(calleeName); + if (!params || params.length === 0) continue; + constraints.push({ lhs: `${calleeName}::${params[0]}`, rhsKey: `${sourceName}[*]` }); + } +} + +/** + * Append parameter-flow and array/spread/forOf/callback constraints (Phases 8.3c and 8.3e). + * + * Delegates to one named helper per binding kind (buildParamFlowConstraints, + * buildArrayElemConstraints, buildSpreadArgConstraints, buildForOfConstraints, + * buildArrayCallbackConstraints) — each handler owns exactly one binding kind's + * guard + iteration + constraint-push shape, called in the same order the + * original inline blocks ran in (none of the blocks read state written by an + * earlier one, so extraction does not change solver input order). + * + * Mutates `pts` (seeds array-element entries) and appends to `constraints`. + */ +function buildParamAndArrayConstraints( + pts: PointsToMap, + constraints: Array<{ lhs: string; rhsKey: string }>, + paramBindings?: readonly ParamBinding[], + definitionParams?: ReadonlyMap, + arrayElemBindings?: readonly ArrayElemBinding[], + spreadArgBindings?: readonly SpreadArgBinding[], + forOfBindings?: readonly ForOfBinding[], + arrayCallbackBindings?: readonly ArrayCallbackBinding[], +): void { + buildParamFlowConstraints(constraints, paramBindings, definitionParams); + buildArrayElemConstraints(pts, constraints, arrayElemBindings); + buildSpreadArgConstraints(constraints, spreadArgBindings, arrayElemBindings, definitionParams); + buildForOfConstraints(constraints, forOfBindings); + buildArrayCallbackConstraints(constraints, arrayCallbackBindings, definitionParams); +} + /** * Seed pts entries for object-rest parameter dispatch (Phase 8.3f). * diff --git a/src/domain/graph/resolver/strategy.ts b/src/domain/graph/resolver/strategy.ts index 84726b4a8..8bfe99cfb 100644 --- a/src/domain/graph/resolver/strategy.ts +++ b/src/domain/graph/resolver/strategy.ts @@ -57,41 +57,44 @@ export function isModuleScopedLanguage(relPath: string): boolean { return MODULE_SCOPED_BARE_CALL_EXTENSIONS.has(ext); } +// ── typeMap entry unwrapping ────────────────────────────────────────────────── + +/** + * Unwrap a typeMap entry to its plain string form. + * + * typeMap values are either a bare string (the target name) or an object of + * shape `{ type?: string }` (some seeders attach extra metadata alongside the + * target). This normalises both shapes to `string | null`, matching the + * falsy-check semantics every call site previously duplicated inline. + */ +function unwrapTypeEntry(entry: unknown): string | null { + if (!entry) return null; + return typeof entry === 'string' ? entry : ((entry as { type?: string }).type ?? null); +} + // ── resolveByReceiver ───────────────────────────────────────────────────────── /** - * Resolve a call site whose receiver is a concrete object reference - * (i.e. `receiver` is present and is NOT `this`, `self`, or `super`). + * Steps 1-3 of the resolveByReceiver cascade: resolve the type name for a + * concrete-object receiver. * - * Resolution cascade: * 1. typeMap class-scoped lookup (`ClassName.prop` key) for `this.prop` receivers. * 2. typeMap bare key, full-receiver key, callee-scoped rest-param key. * 3. Inline `new Ctor()` heuristic for un-normalised receiver text. - * 4. Typed method lookup via `TypeName.methodName` in symbol DB. - * 5. Prototype alias: `Foo.prototype.bar = identifier` via typeMap. - * 6. Direct qualified method lookup: `ClassName.staticMethod()`. - * 7. Composite pts key: `obj.prop` → callback target function. */ -export function resolveByReceiver( - lookup: StrategyLookup, - call: { name: string; receiver: string }, - relPath: string, +function resolveReceiverTypeName( typeMap: Map, + receiver: string, + effectiveReceiver: string, callerName?: string | null, -): ReadonlyArray<{ id: number; file: string }> { - // Strip "this." so `this.repo.method()` resolves via typeMap["repo"] - // (or the "this.repo" key seeded directly by the TSC property-declaration enricher). - const effectiveReceiver = call.receiver.startsWith('this.') - ? call.receiver.slice('this.'.length) - : call.receiver; - +): string | null { // For this.prop receivers, prefer the class-scoped key (ClassName.prop) seeded by // handlePropWriteTypeMap / handleFieldDefTypeMap — prevents false edges when multiple // classes define the same property name (issues #1323, #1458). // Class-scoped lookup runs first so bare fallback keys (confidence 0.6) don't shadow // the correct per-class entry when callerName is available. let typeEntry: unknown; - if (call.receiver.startsWith('this.') && callerName) { + if (receiver.startsWith('this.') && callerName) { const dotIdx = callerName.lastIndexOf('.'); if (dotIdx > -1) { const callerClass = callerName.slice(0, dotIdx); @@ -100,16 +103,12 @@ export function resolveByReceiver( } typeEntry ??= typeMap.get(effectiveReceiver) ?? - typeMap.get(call.receiver) ?? + typeMap.get(receiver) ?? // Phase 8.3f: callee-scoped rest-param key (`callee::restName`) to avoid // same-name rest-binding collision across functions in the same file (#1358). (callerName ? typeMap.get(`${callerName}::${effectiveReceiver}`) : undefined); - let typeName = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; + let typeName = unwrapTypeEntry(typeEntry); // Belt-and-suspenders fallback for inline new-expression receivers that // extractReceiverName did not normalise (e.g. raw text leaked from an @@ -120,77 +119,197 @@ export function resolveByReceiver( // The uppercase-initial restriction ([A-Z_$]) is a heuristic to distinguish // constructors (PascalCase) from regular functions and avoids false positives // on `(new xmlParser()).parse()` style calls. - if (!typeName && call.receiver) { - const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(call.receiver); + if (!typeName && receiver) { + const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(receiver); if (m?.[1]) typeName = m[1]; } + return typeName; +} + +/** Step 4: typed method lookup via `TypeName.methodName` in the symbol DB. */ +function resolveViaTypedMethod( + lookup: StrategyLookup, + typeName: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + return lookup + .byName(`${typeName}.${call.name}`) + .filter((n) => n.kind === 'method' && computeConfidence(relPath, n.file, null) >= 0.5); +} + +/** + * Step 5: prototype alias — `Foo.prototype.bar = identifier` seeds + * typeMap['Foo.bar'] = { type: identifier }. + * Checked after the symbol-DB lookup so an actual method definition always wins. + */ +function resolveViaPrototypeAlias( + lookup: StrategyLookup, + typeMap: Map, + typeName: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const protoTarget = unwrapTypeEntry(typeMap.get(`${typeName}.${call.name}`)); + if (!protoTarget) return []; + return lookup.byName(protoTarget).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Step 6: direct qualified method lookup — `ClassName.staticMethod()` or + * `ClassName.instanceMethod()` when the receiver is a class name with no + * typeMap entry. Handles static method calls like `C6.staticMethod()` or + * `D.d()` where the receiver IS the class. Matches both 'method' and + * 'function' kinds to cover field-initializer synthetic defs. + */ +function resolveViaDirectQualifiedMethod( + lookup: StrategyLookup, + effectiveReceiver: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const qualifiedName = `${effectiveReceiver}.${call.name}`; + return lookup + .byName(qualifiedName) + .filter( + (n) => + (n.kind === 'method' || n.kind === 'function') && + computeConfidence(relPath, n.file, null) >= 0.5, + ); +} + +/** + * Step 7: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] = { type: 'fn' } + * (Phase 8.3d). When a call site references `obj.prop` as a callback, resolve + * directly to the target fn. + */ +function resolveViaCompositePtsKey( + lookup: StrategyLookup, + typeMap: Map, + call: { name: string; receiver: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const ptsTarget = unwrapTypeEntry(typeMap.get(`${call.receiver}.${call.name}`)); + if (!ptsTarget) return []; + return lookup.byName(ptsTarget).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Resolve a call site whose receiver is a concrete object reference + * (i.e. `receiver` is present and is NOT `this`, `self`, or `super`). + * + * Resolution cascade (see the per-step helpers above for the numbered steps): + * 1-3. resolveReceiverTypeName — typeMap lookups + `new Ctor()` heuristic. + * 4. resolveViaTypedMethod — typed method lookup in symbol DB. + * 5. resolveViaPrototypeAlias — prototype alias via typeMap. + * 6. resolveViaDirectQualifiedMethod — direct qualified method lookup. + * 7. resolveViaCompositePtsKey — composite pts key → callback target function. + */ +export function resolveByReceiver( + lookup: StrategyLookup, + call: { name: string; receiver: string }, + relPath: string, + typeMap: Map, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + // Strip "this." so `this.repo.method()` resolves via typeMap["repo"] + // (or the "this.repo" key seeded directly by the TSC property-declaration enricher). + const effectiveReceiver = call.receiver.startsWith('this.') + ? call.receiver.slice('this.'.length) + : call.receiver; + + const typeName = resolveReceiverTypeName(typeMap, call.receiver, effectiveReceiver, callerName); + if (typeName) { - const typed = lookup - .byName(`${typeName}.${call.name}`) - .filter((n) => n.kind === 'method' && computeConfidence(relPath, n.file, null) >= 0.5); + const typed = resolveViaTypedMethod(lookup, typeName, call, relPath); if (typed.length > 0) return typed; - // Prototype alias: `Foo.prototype.bar = identifier` seeds typeMap['Foo.bar'] = { type: identifier }. - // Checked after the symbol-DB lookup so an actual method definition always wins. - const protoEntry = typeMap.get(`${typeName}.${call.name}`); - const protoTarget = protoEntry - ? typeof protoEntry === 'string' - ? protoEntry - : (protoEntry as { type?: string }).type - : null; - if (protoTarget) { - const resolved = lookup - .byName(protoTarget) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } - } - - // Direct qualified method lookup: ClassName.staticMethod() or ClassName.instanceMethod() - // when the receiver is a class name with no typeMap entry. Handles static method calls - // like `C6.staticMethod()` or `D.d()` where the receiver IS the class. - // Matches both 'method' and 'function' kinds to cover field-initializer synthetic defs. - if (!typeName) { - const qualifiedName = `${effectiveReceiver}.${call.name}`; - const direct = lookup - .byName(qualifiedName) - .filter( - (n) => - (n.kind === 'method' || n.kind === 'function') && - computeConfidence(relPath, n.file, null) >= 0.5, - ); + const viaPrototype = resolveViaPrototypeAlias(lookup, typeMap, typeName, call, relPath); + if (viaPrototype.length > 0) return viaPrototype; + } else { + const direct = resolveViaDirectQualifiedMethod(lookup, effectiveReceiver, call, relPath); if (direct.length > 0) return direct; } - // Phase 8.3d: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] = { type: 'fn' }. - // When a call site references `obj.prop` as a callback, resolve directly to the target fn. - const compositeEntry = typeMap.get(`${call.receiver}.${call.name}`); - const ptsTarget = compositeEntry - ? typeof compositeEntry === 'string' - ? compositeEntry - : (compositeEntry as { type?: string }).type - : null; - if (ptsTarget) { - const resolved = lookup - .byName(ptsTarget) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } + const viaComposite = resolveViaCompositePtsKey(lookup, typeMap, call, relPath); + if (viaComposite.length > 0) return viaComposite; return []; } // ── resolveByGlobal ─────────────────────────────────────────────────────────── +/** + * Step 1: accessor this-dispatch via Object.defineProperty (Phase 8.3f). + * + * When a plain function (no class prefix) is registered as a get/set accessor + * for `obj` via Object.defineProperty, typeMap seeds 'callerName:this' = 'obj'. + * We then resolve this.method() → typeMap['obj.method'] → the concrete + * definition. Only applies to a bare (non-qualified) callerName + `this` + * receiver; runs before the broad exact-name lookup to avoid false positives + * from unrelated same-file definitions. + */ +function resolveViaAccessorThisDispatch( + lookup: StrategyLookup, + typeMap: Map, + call: { name: string; receiver?: string | null }, + relPath: string, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + if (!(call.receiver === 'this' && callerName && !callerName.includes('.'))) return []; + const objName = unwrapTypeEntry(typeMap.get(`${callerName}:this`)); + if (!objName) return []; + const targetFn = unwrapTypeEntry(typeMap.get(`${objName}.${call.name}`)); + if (!targetFn) return []; + return lookup.byName(targetFn).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Step 3: same-class sibling method fallback via callerName. + * + * e.g. `this.area()` inside `Shape.describe` → try `Shape.area`. Also covers + * no-receiver calls inside class methods, e.g. `IsValidEmail(x)` inside + * `Validators.ValidateUser` → try `Validators.IsValidEmail` (C#/Java static + * siblings). This seeds the initial edge that runChaPostPass later expands to + * subclass overrides. + * + * For JS/TS, bare (no-receiver) calls are module-scoped — there is no + * implicit class binding. Skip the same-class fallback for bare calls in + * those languages to prevent false positives (e.g. `flush()` inside + * `Processor.run` must not resolve to `Processor.flush`). this.method() + * calls are unaffected: they still reach the fallback because + * `call.receiver === 'this'` is truthy, not a bare call. + */ +function resolveViaSameClassSibling( + lookup: StrategyLookup, + call: { name: string; receiver?: string | null }, + relPath: string, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + const isBareCall = !call.receiver; + if (!callerName || (isBareCall && isModuleScopedLanguage(relPath))) return []; + const dotIdx = callerName.lastIndexOf('.'); + if (dotIdx <= -1) return []; + // Extract only the segment immediately before the method name so that + // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. + // Symbols are stored under their bare class name, not their qualified path. + const prevDot = callerName.lastIndexOf('.', dotIdx - 1); + const callerClass = callerName.slice(prevDot + 1, dotIdx); + const qualifiedName = `${callerClass}.${call.name}`; + return lookup + .byName(qualifiedName) + .filter((t) => t.kind === 'method' && computeConfidence(relPath, t.file, null) >= 0.5); +} + /** * Resolve a call site with no receiver, or whose receiver is `this`, `self`, * or `super`. * * Resolution cascade: - * 1. Accessor this-dispatch via Object.defineProperty (Phase 8.3f). + * 1. resolveViaAccessorThisDispatch — Object.defineProperty this-dispatch (Phase 8.3f). * 2. Exact global name lookup with confidence filter. - * 3. Same-class sibling method fallback (C#/Java static siblings, this.method()). + * 3. resolveViaSameClassSibling — same-class sibling method fallback. */ export function resolveByGlobal( lookup: StrategyLookup, @@ -199,67 +318,16 @@ export function resolveByGlobal( typeMap: Map, callerName?: string | null, ): ReadonlyArray<{ id: number; file: string }> { - // Phase 8.3f: accessor this-dispatch via Object.defineProperty. - // When a plain function (no class prefix) is registered as a get/set accessor for `obj` - // via Object.defineProperty, typeMap seeds 'callerName:this' = 'obj'. - // We then resolve this.method() → typeMap['obj.method'] → the concrete definition. - // This runs before the broad exact-name lookup to avoid false positives from - // unrelated same-file definitions. - if (call.receiver === 'this' && callerName && !callerName.includes('.')) { - const accessorThisEntry = typeMap.get(`${callerName}:this`); - const objName = accessorThisEntry - ? typeof accessorThisEntry === 'string' - ? accessorThisEntry - : (accessorThisEntry as { type?: string }).type - : null; - if (objName) { - const objMethodEntry = typeMap.get(`${objName}.${call.name}`); - const targetFn = objMethodEntry - ? typeof objMethodEntry === 'string' - ? objMethodEntry - : (objMethodEntry as { type?: string }).type - : null; - if (targetFn) { - const resolved = lookup - .byName(targetFn) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } - } - } + const viaAccessor = resolveViaAccessorThisDispatch(lookup, typeMap, call, relPath, callerName); + if (viaAccessor.length > 0) return viaAccessor; const exact = lookup .byName(call.name) .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); if (exact.length > 0) return exact; - // Try same-class method lookup via callerName. - // e.g. `this.area()` inside `Shape.describe` → try `Shape.area`. - // Also covers no-receiver calls inside class methods, e.g. `IsValidEmail(x)` inside - // `Validators.ValidateUser` → try `Validators.IsValidEmail` (C#/Java static siblings). - // This seeds the initial edge that runChaPostPass later expands to subclass overrides. - // - // For JS/TS, bare (no-receiver) calls are module-scoped — there is no implicit class - // binding. Skip the same-class fallback for bare calls in those languages to prevent - // false positives (e.g. `flush()` inside `Processor.run` must not resolve to - // `Processor.flush`). this.method() calls are unaffected: they still reach the fallback - // because `call.receiver === 'this'` is truthy, not a bare call. - const isBareCall = !call.receiver; - if (callerName && !(isBareCall && isModuleScopedLanguage(relPath))) { - const dotIdx = callerName.lastIndexOf('.'); - if (dotIdx > -1) { - // Extract only the segment immediately before the method name so that - // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. - // Symbols are stored under their bare class name, not their qualified path. - const prevDot = callerName.lastIndexOf('.', dotIdx - 1); - const callerClass = callerName.slice(prevDot + 1, dotIdx); - const qualifiedName = `${callerClass}.${call.name}`; - const sameClass = lookup - .byName(qualifiedName) - .filter((t) => t.kind === 'method' && computeConfidence(relPath, t.file, null) >= 0.5); - if (sameClass.length > 0) return sameClass; - } - } + const sameClass = resolveViaSameClassSibling(lookup, call, relPath, callerName); + if (sameClass.length > 0) return sameClass; return exact; // empty } diff --git a/src/domain/graph/resolver/ts-resolver.ts b/src/domain/graph/resolver/ts-resolver.ts index 2aca2d5bc..4b4e67f43 100644 --- a/src/domain/graph/resolver/ts-resolver.ts +++ b/src/domain/graph/resolver/ts-resolver.ts @@ -159,6 +159,21 @@ function countLowConfidence(typeMap: Map): number { return count; } +/** + * Shared "collect candidates by name → keep only names with a single unique + * value → write" ambiguity-filtering algorithm used by both enrichSourceFile + * (ambiguity check on qualifiedName) and enrichCallAssignments (ambiguity + * check on calleeName). + * + * Returns `entries[0]` if every entry shares exactly one distinct value under + * `keyOf`, or `null` if they disagree (ambiguous) or `entries` is empty. + */ +function resolveUnambiguous(entries: readonly T[], keyOf: (entry: T) => string): T | null { + const uniqueKeys = new Set(entries.map(keyOf)); + if (uniqueKeys.size !== 1) return null; + return entries[0] ?? null; +} + /** * Walk up from rootDir looking for tsconfig.json (up to 4 levels). * Handles monorepo setups where rootDir is a package subdirectory but @@ -239,78 +254,105 @@ function createProgram(ts: TsModule, tsconfigPath: string): import('typescript') * Entries already at confidence 1.0 (e.g., `new Foo()` from tree-sitter) are * left unchanged. New entries from the compiler are added at confidence 1.0. */ -function enrichSourceFile( - ts: TsModule, - sourceFile: import('typescript').SourceFile, - checker: import('typescript').TypeChecker, - typeMap: Map, -): void { - // First pass: collect resolved types keyed by bare identifier name. - // Track both the short name (for typeMap writes) and the fully-qualified name +/** + * Mutable state threaded through the enrichSourceFile visitor. Grouped into + * one object (rather than closed-over locals) so the walk can be a plain + * top-level function, outside the enclosing function's own complexity count. + */ +interface SourceFileVisitContext { + ts: TsModule; + checker: import('typescript').TypeChecker; + // Collects resolved types keyed by bare identifier name. Tracks both the + // short name (for typeMap writes) and the fully-qualified name // (module-path-prefixed) for ambiguity detection. Two classes may share the // same short name (e.g., `OrderService` from two different modules), and // symbol.getName() returns the declared name — not the local alias — so // deduplication on short names alone would incorrectly collapse them. - const nameToEntries = new Map(); - // Track class property declaration names so we can also seed "this.X" entries. - const propertyDeclNames = new Set(); + nameToEntries: Map; + // Class property declaration names so we can also seed "this.X" entries. + propertyDeclNames: Set; +} - function visit(node: import('typescript').Node): void { - let identName: string | null = null; - let nameNode: import('typescript').Identifier | null = null; - - if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { - identName = node.name.text; - nameNode = node.name; - } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { - identName = node.name.text; - nameNode = node.name; - } else if (ts.isPropertyDeclaration(node) && ts.isIdentifier(node.name)) { - // TypeScript class field: `private repo: Repository` - // Seeds typeMap so `this.repo.method()` can be resolved via receiver type. - identName = node.name.text; - nameNode = node.name; - propertyDeclNames.add(node.name.text); - } +function visitSourceFileNode(ctx: SourceFileVisitContext, node: import('typescript').Node): void { + const { ts, checker, nameToEntries, propertyDeclNames } = ctx; + let identName: string | null = null; + let nameNode: import('typescript').Identifier | null = null; + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isPropertyDeclaration(node) && ts.isIdentifier(node.name)) { + // TypeScript class field: `private repo: Repository` + // Seeds typeMap so `this.repo.method()` can be resolved via receiver type. + identName = node.name.text; + nameNode = node.name; + propertyDeclNames.add(node.name.text); + } - if (identName && nameNode) { - const resolved = resolveTypeName(ts, nameNode, checker); - if (resolved) { - const existing = nameToEntries.get(identName); - if (existing) { - existing.push(resolved); - } else { - nameToEntries.set(identName, [resolved]); - } + if (identName && nameNode) { + const resolved = resolveTypeName(ts, nameNode, checker); + if (resolved) { + const existing = nameToEntries.get(identName); + if (existing) { + existing.push(resolved); + } else { + nameToEntries.set(identName, [resolved]); } } + } - ts.forEachChild(node, visit); + ts.forEachChild(node, (child) => visitSourceFileNode(ctx, child)); +} + +/** + * Write one (name → candidate entries) group to typeMap if unambiguous + * (single unique qualified type for the name), plus its "this." + * companion entry when name is a class property. + */ +function writeSourceFileTypeMapEntry( + typeMap: Map, + propertyDeclNames: ReadonlySet, + name: string, + entries: { shortName: string; qualifiedName: string }[], +): void { + const first = resolveUnambiguous(entries, (e) => e.qualifiedName); + if (!first) return; // ambiguous across modules, or no candidates — skip + const shortName = first.shortName; + const existing = typeMap.get(name); + if (!existing || existing.confidence < 1.0) { + typeMap.set(name, { type: shortName, confidence: 1.0 }); } - ts.forEachChild(sourceFile, visit); + // For class property declarations, also seed "this.fieldName" so that + // `this.repo.findById()` call sites resolve to the interface/class type. + if (propertyDeclNames.has(name)) { + const thisKey = `this.${name}`; + const existingThis = typeMap.get(thisKey); + if (!existingThis || existingThis.confidence < 1.0) { + typeMap.set(thisKey, { type: shortName, confidence: 1.0 }); + } + } +} + +function enrichSourceFile( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + typeMap: Map, +): void { + const ctx: SourceFileVisitContext = { + ts, + checker, + nameToEntries: new Map(), + propertyDeclNames: new Set(), + }; + ts.forEachChild(sourceFile, (node) => visitSourceFileNode(ctx, node)); // Second pass: only write unambiguous entries (single unique qualified type for a name) - for (const [name, entries] of nameToEntries) { - const uniqueQualified = [...new Set(entries.map((e) => e.qualifiedName))]; - if (uniqueQualified.length !== 1) continue; // ambiguous across modules — skip - // entries is non-empty because we only set() on first occurrence and push() after — - // TypeScript's noUncheckedIndexedAccess can flag [0] access, so assert the type. - const first = entries[0]; - if (!first) continue; - const shortName = first.shortName; - const existing = typeMap.get(name); - if (!existing || existing.confidence < 1.0) { - typeMap.set(name, { type: shortName, confidence: 1.0 }); - } - // For class property declarations, also seed "this.fieldName" so that - // `this.repo.findById()` call sites resolve to the interface/class type. - if (propertyDeclNames.has(name)) { - const thisKey = `this.${name}`; - const existingThis = typeMap.get(thisKey); - if (!existingThis || existingThis.confidence < 1.0) { - typeMap.set(thisKey, { type: shortName, confidence: 1.0 }); - } - } + for (const [name, entries] of ctx.nameToEntries) { + writeSourceFileTypeMapEntry(typeMap, ctx.propertyDeclNames, name, entries); } } @@ -327,98 +369,190 @@ function enrichSourceFile( * Async functions returning Promise are unwrapped: the inner type argument T is * used so that async methods receive a returnTypeMap entry just like sync ones. */ -function enrichReturnTypeMap( - ts: TsModule, - sourceFile: import('typescript').SourceFile, +/** + * Mutable state threaded through the enrichReturnTypeMap visitor. Grouped + * into one object (rather than closed-over locals) so the node-kind handlers + * below can be plain top-level functions, independently testable and outside + * the enclosing function's own complexity count. + */ +interface ReturnTypeVisitContext { + ts: TsModule; + checker: import('typescript').TypeChecker; + returnTypeMap: Map; + currentClass: string | null; +} + +/** + * Resolve the concrete return type name for a signature, unwrapping + * Promise so async functions contribute their inner type. + */ +function resolveReturnTypeName( checker: import('typescript').TypeChecker, - returnTypeMap: Map, -): void { - let currentClass: string | null = null; - - /** - * Resolve the concrete return type name for a signature, unwrapping - * Promise so async functions contribute their inner type. - */ - function resolveReturnTypeName(sig: import('typescript').Signature | undefined): string | null { - if (!sig) return null; - try { - let retType = checker.getReturnTypeOfSignature(sig); - - // Unwrap Promise → T so async functions get a useful returnTypeMap entry. - const outerSym = retType.getSymbol() ?? retType.aliasSymbol; - if (outerSym?.getName() === 'Promise') { - const args = checker.getTypeArguments(retType as import('typescript').TypeReference); - if (args.length > 0) retType = args[0]!; - } + sig: import('typescript').Signature | undefined, +): string | null { + if (!sig) return null; + try { + let retType = checker.getReturnTypeOfSignature(sig); - const sym = retType.getSymbol() ?? retType.aliasSymbol; - if (!sym) return null; - const name = sym.getName(); - if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) - return null; - return name; - } catch { - return null; + // Unwrap Promise → T so async functions get a useful returnTypeMap entry. + const outerSym = retType.getSymbol() ?? retType.aliasSymbol; + if (outerSym?.getName() === 'Promise') { + const args = checker.getTypeArguments(retType as import('typescript').TypeReference); + if (args.length > 0) retType = args[0]!; } + + const sym = retType.getSymbol() ?? retType.aliasSymbol; + if (!sym) return null; + const name = sym.getName(); + if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) return null; + return name; + } catch { + return null; } +} - function writeEntry(fnName: string, sigNode: import('typescript').SignatureDeclaration): void { - const typeName = resolveReturnTypeName(checker.getSignatureFromDeclaration(sigNode)); - if (typeName) { - const existing = returnTypeMap.get(fnName); - if (!existing || existing.confidence < 1.0) - returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); - } +function writeReturnTypeEntry( + ctx: ReturnTypeVisitContext, + fnName: string, + sigNode: import('typescript').SignatureDeclaration, +): void { + const typeName = resolveReturnTypeName( + ctx.checker, + ctx.checker.getSignatureFromDeclaration(sigNode), + ); + if (typeName) { + const existing = ctx.returnTypeMap.get(fnName); + if (!existing || existing.confidence < 1.0) + ctx.returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); } +} - /** - * Visit nodes at the current lexical scope (module level or class body). - * Does NOT recurse into function/method bodies to avoid capturing local - * helper functions under bare names. - */ - function visit(node: import('typescript').Node): void { - if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { - // Enter class scope: visit direct children (method/property declarations). - const saved = currentClass; - currentClass = - (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name - ?.text ?? null; - ts.forEachChild(node, visit); - currentClass = saved; - return; // class body fully handled — stop here - } +/** + * Enter class scope: visit direct children (method/property declarations), + * then restore the enclosing class name. + */ +function visitClassScopeForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').Node, +): void { + const saved = ctx.currentClass; + ctx.currentClass = + (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name + ?.text ?? null; + ctx.ts.forEachChild(node, (child) => visitReturnTypeNode(ctx, child)); + ctx.currentClass = saved; +} - if (ts.isFunctionDeclaration(node) && node.name) { - // Module-level function declaration: record and stop (no body descent). - writeEntry(node.name.text, node); - return; - } +/** Module-level function declaration: record and stop (no body descent). */ +function visitFunctionDeclarationForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').FunctionDeclaration, +): void { + // node.name is guaranteed truthy by the caller's guard. + writeReturnTypeEntry(ctx, node.name!.text, node); +} - if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { - // Class method: record as ClassName.methodName and stop. - const fnName = currentClass ? `${currentClass}.${node.name.text}` : node.name.text; - writeEntry(fnName, node); - return; - } +/** Class method: record as ClassName.methodName and stop. */ +function visitMethodDeclarationForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').MethodDeclaration, +): void { + // node.name is guaranteed to be an Identifier by the caller's guard. + const name = (node.name as import('typescript').Identifier).text; + const fnName = ctx.currentClass ? `${ctx.currentClass}.${name}` : name; + writeReturnTypeEntry(ctx, fnName, node); +} - if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { - // Arrow/function-expression assigned to a variable at the current scope. - // Because we never recurse into function bodies, any VariableDeclaration - // we see here is guaranteed to be at module scope or inside a class body - // (not inside a method body), making the bare name safe for cross-file use. - const init = node.initializer; - if (ts.isArrowFunction(init) || ts.isFunctionExpression(init)) { - writeEntry(node.name.text, init); - } - return; // variable declaration fully handled — stop here - } +/** + * Arrow/function-expression assigned to a variable at the current scope. + * Because we never recurse into function bodies, any VariableDeclaration + * seen here is guaranteed to be at module scope or inside a class body + * (not inside a method body), making the bare name safe for cross-file use. + */ +function visitVariableInitializerForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').VariableDeclaration, +): void { + // node.name is guaranteed to be an Identifier and node.initializer is + // guaranteed defined by the caller's guard. + const init = node.initializer!; + if (ctx.ts.isArrowFunction(init) || ctx.ts.isFunctionExpression(init)) { + writeReturnTypeEntry(ctx, (node.name as import('typescript').Identifier).text, init); + } +} - // For all other node kinds (VariableStatement, VariableDeclarationList, - // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. - ts.forEachChild(node, visit); +/** + * Visit nodes at the current lexical scope (module level or class body). + * Does NOT recurse into function/method bodies to avoid capturing local + * helper functions under bare names. + */ +function visitReturnTypeNode(ctx: ReturnTypeVisitContext, node: import('typescript').Node): void { + const { ts } = ctx; + + if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { + visitClassScopeForReturnType(ctx, node); + return; // class body fully handled — stop here } - ts.forEachChild(sourceFile, visit); + if (ts.isFunctionDeclaration(node) && node.name) { + visitFunctionDeclarationForReturnType(ctx, node); + return; + } + + if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { + visitMethodDeclarationForReturnType(ctx, node); + return; + } + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { + visitVariableInitializerForReturnType(ctx, node); + return; // variable declaration fully handled — stop here + } + + // For all other node kinds (VariableStatement, VariableDeclarationList, + // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. + ts.forEachChild(node, (child) => visitReturnTypeNode(ctx, child)); +} + +function enrichReturnTypeMap( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + returnTypeMap: Map, +): void { + const ctx: ReturnTypeVisitContext = { ts, checker, returnTypeMap, currentClass: null }; + ts.forEachChild(sourceFile, (node) => visitReturnTypeNode(ctx, node)); +} + +/** + * Resolve the callee name and, for receiver method calls (`obj.method()`), + * the receiver's typeMap-resolved type name, from a call expression's callee. + * + * Handles two callee shapes: a bare identifier (`fn()`) and a property-access + * expression (`obj.method()`); any other callee shape (e.g. a call expression + * itself, as in `getFactory()()`) yields no calleeName. + */ +function resolveCalleeNameAndReceiverType( + ts: TsModule, + call: import('typescript').CallExpression, + typeMap: Map, +): { calleeName: string | null; receiverTypeName: string | undefined } { + if (ts.isIdentifier(call.expression)) { + return { calleeName: call.expression.text, receiverTypeName: undefined }; + } + + if (ts.isPropertyAccessExpression(call.expression)) { + const calleeName = call.expression.name.text; + const obj = call.expression.expression; + let receiverTypeName: string | undefined; + if (ts.isIdentifier(obj)) { + const entry = typeMap.get(obj.text); + if (entry && typeof entry === 'object') receiverTypeName = entry.type; + } + return { calleeName, receiverTypeName }; + } + + return { calleeName: null, receiverTypeName: undefined }; } /** @@ -426,13 +560,14 @@ function enrichReturnTypeMap( * is not yet in typeMap into callAssignments for cross-file propagation. * Phase 8.1 already resolved the common case into typeMap; this captures the rest. * - * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile`: - * collect all candidates first, then only push entries where a given `varName` - * maps to exactly one distinct `calleeName`. This prevents multiple methods in the - * same file that each bind a different imported function to a common local name - * (e.g., `const result = getA()` in one method, `const result = getB()` in - * another) from both landing in `callAssignments`, which would cause - * `propagateReturnTypesAcrossFiles` to silently resolve one arbitrarily. + * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile` + * (via the shared `resolveUnambiguous` helper): collect all candidates first, + * then only push entries where a given `varName` maps to exactly one distinct + * `calleeName`. This prevents multiple methods in the same file that each bind + * a different imported function to a common local name (e.g., `const result = + * getA()` in one method, `const result = getB()` in another) from both landing + * in `callAssignments`, which would cause `propagateReturnTypesAcrossFiles` to + * silently resolve one arbitrarily. */ function enrichCallAssignments( ts: TsModule, @@ -452,20 +587,11 @@ function enrichCallAssignments( ) { const varName = node.name.text; if (!typeMap.has(varName)) { - const call = node.initializer; - let calleeName: string | null = null; - let receiverTypeName: string | undefined; - - if (ts.isIdentifier(call.expression)) { - calleeName = call.expression.text; - } else if (ts.isPropertyAccessExpression(call.expression)) { - calleeName = call.expression.name.text; - const obj = call.expression.expression; - if (ts.isIdentifier(obj)) { - const entry = typeMap.get(obj.text); - if (entry && typeof entry === 'object') receiverTypeName = entry.type; - } - } + const { calleeName, receiverTypeName } = resolveCalleeNameAndReceiverType( + ts, + node.initializer, + typeMap, + ); if (calleeName) { const ca: CallAssignment = { varName, calleeName, receiverTypeName }; @@ -488,10 +614,8 @@ function enrichCallAssignments( // calleeName. Ambiguous varNames (same name, different callees across scopes) // are excluded to avoid silently resolving the wrong type cross-file. for (const entries of candidates.values()) { - const uniqueCallees = new Set(entries.map((e) => e.calleeName)); - if (uniqueCallees.size === 1) { - callAssignments.push(entries[0] as CallAssignment); - } + const resolved = resolveUnambiguous(entries, (e) => e.calleeName); + if (resolved) callAssignments.push(resolved); } } From 57143a8e2ef683ed9dbbd9823925adb5ea279498 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 13:32:50 -0600 Subject: [PATCH 20/39] fix: adopt buildFileConditionSQL in prepare.ts and move console.log out of domain layer Impact: 1 functions changed, 8 affected --- src/domain/search/search/prepare.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/domain/search/search/prepare.ts b/src/domain/search/search/prepare.ts index a28330ced..ec670d9b8 100644 --- a/src/domain/search/search/prepare.ts +++ b/src/domain/search/search/prepare.ts @@ -1,6 +1,7 @@ import { openReadonlyOrFail } from '../../../db/index.js'; -import { escapeLike } from '../../../db/query-builder.js'; +import { buildFileConditionSQL } from '../../../db/query-builder.js'; import { getEmbeddingCount, getEmbeddingMeta } from '../../../db/repository/embeddings.js'; +import { info } from '../../../infrastructure/logger.js'; import type { BetterSqlite3Database } from '../../../types.js'; import { MODELS } from '../models.js'; import { applyFilters } from './filters.js'; @@ -47,7 +48,7 @@ export function prepareSearch( try { const count = getEmbeddingCount(db); if (count === 0) { - console.log('No embeddings found. Run `codegraph embed` first.'); + info('No embeddings found. Run `codegraph embed` first.'); db.close(); return null; } @@ -82,12 +83,11 @@ export function prepareSearch( params.push(opts.kind); } if (fpArr.length > 0 && !isGlob) { - if (fpArr.length === 1) { - conditions.push("n.file LIKE ? ESCAPE '\\'"); - params.push(`%${escapeLike(fpArr[0]!)}%`); - } else { - conditions.push(`(${fpArr.map(() => "n.file LIKE ? ESCAPE '\\'").join(' OR ')})`); - params.push(...fpArr.map((f) => `%${escapeLike(f)}%`)); + const fc = buildFileConditionSQL(fpArr, 'n.file'); + if (fc.sql) { + // Strip leading ' AND ' since we're using conditions array + conditions.push(fc.sql.replace(/^ AND /, '')); + params.push(...fc.params); } } if (conditions.length > 0) { From 21db9a9f9698a623fa25fc678b51d2bf80f881cc Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 13:54:44 -0600 Subject: [PATCH 21/39] fix: address quality issues in graph unified model (model.ts merge() aliasing, leiden complexity) model.ts: merge() aliased NodeAttrs/EdgeAttrs objects by reference instead of cloning, unlike subgraph()/filterEdges()/clone() which all defensively copy with { ...attrs }. A caller merging graph B into graph A could silently leak mutations across graphs via the shared attrs object. No production caller exists today (verified via fn-impact: 0 callers besides the test), so this was a latent defect, not a demonstrated one -- now fixed to match the file's established convention. leiden/partition.ts + leiden/adapter.ts: decomposed the remaining cognitive/cyclomatic-exceeding functions left after phase 3's shared aggregate/typed-array helper extraction (commit 0f9bbe6f), following the same directed/undirected-branch-splitting pattern gauntlet recommended: - moveNode (cognitive 16->2, cyclomatic 13->3): split into applyMoveStrengthTotals + applyMoveInternalEdgeWeightDelta[Directed/Undirected] - buildSortedCommunityIds (cognitive 17->3): extracted compareBySizeDesc/ compareByPreserveMap comparators - computeDeltaCPM (cognitive 17->4): extracted computeCpmEdgeWeights[Directed/Undirected] - makeGraphAdapter (cognitive 27->3): extracted resolveAdapterOptions, buildNodeIndex, computeNodeSizes, makeForEachNeighbor - populateUndirectedEdges (cognitive 28->0): extracted aggregateUndirectedPairs/recordUndirectedPairWeight, emitUndirectedPairs, applyUndirectedSelfLoops Pure behavior-preserving decomposition -- no algorithm changes. Verified: full test suite (201/201 files, 3336 tests), leiden-specific suite (22/22), graph suite (177/177 incl. merge()), typecheck, and lint all green. Community-detection output on the leiden-specific directories is byte-identical before/after (confirmed via codegraph communities --drift split-candidates, controlling for the known #1734 run-to-run noise). docs check acknowledged: internal refactor + bug fix only, no user-facing feature/language/architecture-table changes. Impact: 28 functions changed, 31 affected --- src/graph/algorithms/leiden/adapter.ts | 178 ++++++++++++++----- src/graph/algorithms/leiden/partition.ts | 217 +++++++++++++++-------- src/graph/model.ts | 4 +- 3 files changed, 280 insertions(+), 119 deletions(-) diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts index 390a15aa3..29efc1d55 100644 --- a/src/graph/algorithms/leiden/adapter.ts +++ b/src/graph/algorithms/leiden/adapter.ts @@ -74,22 +74,33 @@ function populateDirectedEdges( } } -/** - * Populate edge arrays for an undirected graph. Reciprocal pairs are - * symmetrized and averaged to produce a single weight per undirected edge. - * Self-loops use single-w convention (matching modularity.ts formulas). - */ -function populateUndirectedEdges( +/** Fold a single a→b weight into the unordered-pair aggregate, tracking which direction(s) were seen. */ +function recordUndirectedPairWeight( + pairAgg: Map, + a: number, + b: number, + w: number, +): void { + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; +} + +/** Aggregate raw undirected edges into one weighted record per unordered node pair. */ +function aggregateUndirectedPairs( graph: CodeGraph, idToIndex: Map, linkWeight: (attrs: EdgeAttrs) => number, - n: number, selfLoop: Float64Array, - outEdges: EdgeEntry[][], - inEdges: InEdgeEntry[][], - strengthOut: Float64Array, - strengthIn: Float64Array, -): void { +): Map { const pairAgg = new Map(); for (const [src, tgt, attrs] of graph.edges()) { @@ -101,19 +112,20 @@ function populateUndirectedEdges( taAdd(selfLoop, a, w); continue; } - const i = a < b ? a : b; - const j = a < b ? b : a; - const key = `${i}:${j}`; - let rec = pairAgg.get(key); - if (!rec) { - rec = { sum: 0, seenAB: 0, seenBA: 0 }; - pairAgg.set(key, rec); - } - rec.sum += w; - if (a === i) rec.seenAB = 1; - else rec.seenBA = 1; + recordUndirectedPairWeight(pairAgg, a, b, w); } + return pairAgg; +} + +/** Emit symmetrized undirected edges (averaged over any reciprocal pairs) into the adjacency lists. */ +function emitUndirectedPairs( + pairAgg: Map, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { for (const [key, rec] of pairAgg.entries()) { const parts = key.split(':'); const i = +(parts[0] as string); @@ -130,10 +142,21 @@ function populateUndirectedEdges( taAdd(strengthIn, i, w); taAdd(strengthIn, j, w); } +} - // Add self-loops into adjacency and strengths. - // Note: uses single-w convention (not standard 2w) — the modularity formulas in - // modularity.ts are written to match this convention, keeping the system self-consistent. +/** + * Add self-loops into adjacency and strengths. + * Note: uses single-w convention (not standard 2w) — the modularity formulas in + * modularity.ts are written to match this convention, keeping the system self-consistent. + */ +function applyUndirectedSelfLoops( + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { for (let v = 0; v < n; v++) { const w: number = fget(selfLoop, v); if (w !== 0) { @@ -145,15 +168,56 @@ function populateUndirectedEdges( } } -export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { - const linkWeight: (attrs: EdgeAttrs) => number = - opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); - const nodeSize: (attrs: NodeAttrs) => number = - opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); - const directed: boolean = !!opts.directed; - const baseNodeIds: string[] | undefined = opts.baseNodeIds; - - // Build dense node index mapping +/** + * Populate edge arrays for an undirected graph. Reciprocal pairs are + * symmetrized and averaged to produce a single weight per undirected edge. + * Self-loops use single-w convention (matching modularity.ts formulas). + */ +function populateUndirectedEdges( + graph: CodeGraph, + idToIndex: Map, + linkWeight: (attrs: EdgeAttrs) => number, + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { + const pairAgg = aggregateUndirectedPairs(graph, idToIndex, linkWeight, selfLoop); + emitUndirectedPairs(pairAgg, outEdges, inEdges, strengthOut, strengthIn); + applyUndirectedSelfLoops(n, selfLoop, outEdges, inEdges, strengthOut, strengthIn); +} + +interface ResolvedAdapterOptions { + linkWeight: (attrs: EdgeAttrs) => number; + nodeSize: (attrs: NodeAttrs) => number; + directed: boolean; + baseNodeIds: string[] | undefined; +} + +/** Apply GraphAdapterOptions defaults (weight=1, size=1, directed=false). */ +function resolveAdapterOptions(opts: GraphAdapterOptions): ResolvedAdapterOptions { + return { + linkWeight: + opts.linkWeight || + ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)), + nodeSize: + opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)), + directed: !!opts.directed, + baseNodeIds: opts.baseNodeIds, + }; +} + +/** + * Build the dense node index mapping. When `baseNodeIds` is provided, node + * order/indices are pinned to it (used to align adapters built from related + * graphs); otherwise indices are assigned in CodeGraph iteration order. + */ +function buildNodeIndex( + graph: CodeGraph, + baseNodeIds: string[] | undefined, +): { nodeIds: string[]; idToIndex: Map } { const nodeIds: string[] = []; const idToIndex = new Map(); if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { @@ -169,10 +233,39 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { nodeIds.push(id); } } + return { nodeIds, idToIndex }; +} + +/** Resolve per-node sizes via the adapter's nodeSize accessor, dense-indexed. */ +function computeNodeSizes( + graph: CodeGraph, + idToIndex: Map, + n: number, + nodeSize: (attrs: NodeAttrs) => number, +): Float64Array { + const size = new Float64Array(n); + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + return size; +} + +function makeForEachNeighbor( + outEdges: EdgeEntry[][], +): (i: number, cb: (to: number, w: number) => void) => void { + return (i, cb) => { + const list = outEdges[i] as EdgeEntry[]; + for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); + }; +} + +export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { + const { linkWeight, nodeSize, directed, baseNodeIds } = resolveAdapterOptions(opts); + const { nodeIds, idToIndex } = buildNodeIndex(graph, baseNodeIds); const n: number = nodeIds.length; // Storage - const size = new Float64Array(n); const selfLoop = new Float64Array(n); const strengthOut = new Float64Array(n); const strengthIn = new Float64Array(n); @@ -211,20 +304,11 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { ); } - // Node sizes - for (const [id, attrs] of graph.nodes()) { - const i = idToIndex.get(id); - if (i != null) size[i] = +nodeSize(attrs) || 0; - } + const size = computeNodeSizes(graph, idToIndex, n, nodeSize); // Totals const totalWeight: number = strengthOut.reduce((a, b) => a + b, 0); - function forEachNeighbor(i: number, cb: (to: number, w: number) => void): void { - const list = outEdges[i] as EdgeEntry[]; - for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); - } - return { n, nodeIds, @@ -237,6 +321,6 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { inEdges, directed, totalWeight, - forEachNeighbor, + forEachNeighbor: makeForEachNeighbor(outEdges), }; } diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index de78b8f3e..8e76f8c50 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -74,6 +74,34 @@ interface PartitionState { /* Community-ID sort helper (used by compact) */ /* ------------------------------------------------------------------ */ +/** Comparator: descending by community size, tie-broken by node count then id. */ +function compareBySizeDesc( + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): (a: number, b: number) => number { + return (a, b) => + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b; +} + +/** Comparator: respects a user-provided label map, falling back to size-desc for unmapped ids. */ +function compareByPreserveMap( + preserveMap: Map, + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): (a: number, b: number) => number { + const fallback = compareBySizeDesc(communityTotalSize, communityNodeCount); + return (a, b) => { + const pa = preserveMap.get(a); + const pb = preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return fallback(a, b); + }; +} + /** * Sort community IDs according to the compaction options: preserve original * order, respect a user-provided label map, or sort by descending size. @@ -88,26 +116,9 @@ function buildSortedCommunityIds( if (opts.keepOldOrder) { ids.sort((a, b) => a - b); } else if (opts.preserveMap instanceof Map) { - const preserveMap = opts.preserveMap; - ids.sort((a, b) => { - const pa = preserveMap.get(a); - const pb = preserveMap.get(b); - if (pa != null && pb != null && pa !== pb) return pa - pb; - if (pa != null && pb == null) return -1; - if (pb != null && pa == null) return 1; - return ( - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b - ); - }); + ids.sort(compareByPreserveMap(opts.preserveMap, communityTotalSize, communityNodeCount)); } else { - ids.sort( - (a, b) => - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b, - ); + ids.sort(compareBySizeDesc(communityTotalSize, communityNodeCount)); } } @@ -273,30 +284,55 @@ function computeDeltaModularityDirected( return deltaInternal - deltaExpected; } +/** computeCpmEdgeWeights — directed branch: in+out weight, plus self-loop correction. */ +function computeCpmEdgeWeightsDirected( + s: PartitionState, + v: number, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + const wOld: number = + (fget(s.outEdgeWeightToCommunity, oldC) || 0) + (fget(s.inEdgeWeightFromCommunity, oldC) || 0); + const wNew: number = + newC < s.outEdgeWeightToCommunity.length + ? (fget(s.outEdgeWeightToCommunity, newC) || 0) + + (fget(s.inEdgeWeightFromCommunity, newC) || 0) + : 0; + // Self-loop correction (see cpm.ts diffCPM) + const selfCorrection: number = 2 * (fget(s.graph.selfLoop, v) || 0); + return { wOld, wNew, selfCorrection }; +} + +/** computeCpmEdgeWeights — undirected branch: single neighbor-weight-to-community value. */ +function computeCpmEdgeWeightsUndirected( + s: PartitionState, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + const wOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; + const wNew: number = + newC < s.neighborEdgeWeightToCommunity.length + ? fget(s.neighborEdgeWeightToCommunity, newC) || 0 + : 0; + return { wOld, wNew, selfCorrection: 0 }; +} + +/** Directed/undirected edge-weight-to-community split used by computeDeltaCPM. */ +function computeCpmEdgeWeights( + s: PartitionState, + v: number, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + return s.graph.directed + ? computeCpmEdgeWeightsDirected(s, v, oldC, newC) + : computeCpmEdgeWeightsUndirected(s, oldC, newC); +} + function computeDeltaCPM(s: PartitionState, v: number, newC: number, gamma: number = 1.0): number { const oldC: number = iget(s.nodeCommunity, v); if (newC === oldC) return 0; - let w_old: number; - let w_new: number; - let selfCorrection: number = 0; - if (s.graph.directed) { - w_old = - (fget(s.outEdgeWeightToCommunity, oldC) || 0) + - (fget(s.inEdgeWeightFromCommunity, oldC) || 0); - w_new = - newC < s.outEdgeWeightToCommunity.length - ? (fget(s.outEdgeWeightToCommunity, newC) || 0) + - (fget(s.inEdgeWeightFromCommunity, newC) || 0) - : 0; - // Self-loop correction (see cpm.ts diffCPM) - selfCorrection = 2 * (fget(s.graph.selfLoop, v) || 0); - } else { - w_old = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; - w_new = - newC < s.neighborEdgeWeightToCommunity.length - ? fget(s.neighborEdgeWeightToCommunity, newC) || 0 - : 0; - } + const { wOld: w_old, wNew: w_new, selfCorrection } = computeCpmEdgeWeights(s, v, oldC, newC); const nodeSz: number = fget(s.graph.size, v) || 1; const sizeOld: number = fget(s.communityTotalSize, oldC) || 0; const sizeNew: number = newC < s.communityTotalSize.length ? fget(s.communityTotalSize, newC) : 0; @@ -307,6 +343,75 @@ function computeDeltaCPM(s: PartitionState, v: number, newC: number, gamma: numb /* Extracted: node move */ /* ------------------------------------------------------------------ */ +/** Directed/undirected community strength-total delta applied by moveNode. */ +function applyMoveStrengthTotals( + s: PartitionState, + oldC: number, + newC: number, + strengthOutV: number, + strengthInV: number, +): void { + if (s.graph.directed) { + s.communityTotalOutStrength[oldC] = fget(s.communityTotalOutStrength, oldC) - strengthOutV; + s.communityTotalOutStrength[newC] = fget(s.communityTotalOutStrength, newC) + strengthOutV; + s.communityTotalInStrength[oldC] = fget(s.communityTotalInStrength, oldC) - strengthInV; + s.communityTotalInStrength[newC] = fget(s.communityTotalInStrength, newC) + strengthInV; + } else { + s.communityTotalStrength[oldC] = fget(s.communityTotalStrength, oldC) - strengthOutV; + s.communityTotalStrength[newC] = fget(s.communityTotalStrength, newC) + strengthOutV; + } +} + +/** applyMoveInternalEdgeWeightDelta — directed branch. */ +function applyMoveInternalEdgeWeightDeltaDirected( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + const outToOld: number = fget(s.outEdgeWeightToCommunity, oldC) || 0; + const inFromOld: number = fget(s.inEdgeWeightFromCommunity, oldC) || 0; + const outToNew: number = + newC < s.outEdgeWeightToCommunity.length ? fget(s.outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromNew: number = + newC < s.inEdgeWeightFromCommunity.length ? fget(s.inEdgeWeightFromCommunity, newC) || 0 : 0; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + s.communityInternalEdgeWeight[oldC] = + fget(s.communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); + s.communityInternalEdgeWeight[newC] = + fget(s.communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); +} + +/** applyMoveInternalEdgeWeightDelta — undirected branch. */ +function applyMoveInternalEdgeWeightDeltaUndirected( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + const weightToOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; + const weightToNew: number = fget(s.neighborEdgeWeightToCommunity, newC) || 0; + s.communityInternalEdgeWeight[oldC] = + fget(s.communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); + s.communityInternalEdgeWeight[newC] = + fget(s.communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); +} + +/** Directed/undirected community internal-edge-weight delta applied by moveNode. */ +function applyMoveInternalEdgeWeightDelta( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + if (s.graph.directed) { + applyMoveInternalEdgeWeightDeltaDirected(s, oldC, newC, selfLoopWeight); + } else { + applyMoveInternalEdgeWeightDeltaUndirected(s, oldC, newC, selfLoopWeight); + } +} + function moveNode(s: PartitionState, v: number, newC: number): boolean { const oldC: number = iget(s.nodeCommunity, v); if (oldC === newC) return false; @@ -323,37 +428,9 @@ function moveNode(s: PartitionState, v: number, newC: number): boolean { s.communityNodeCount[newC] = iget(s.communityNodeCount, newC) + 1; s.communityTotalSize[oldC] = fget(s.communityTotalSize, oldC) - nodeSz; s.communityTotalSize[newC] = fget(s.communityTotalSize, newC) + nodeSz; - if (s.graph.directed) { - s.communityTotalOutStrength[oldC] = fget(s.communityTotalOutStrength, oldC) - strengthOutV; - s.communityTotalOutStrength[newC] = fget(s.communityTotalOutStrength, newC) + strengthOutV; - s.communityTotalInStrength[oldC] = fget(s.communityTotalInStrength, oldC) - strengthInV; - s.communityTotalInStrength[newC] = fget(s.communityTotalInStrength, newC) + strengthInV; - } else { - s.communityTotalStrength[oldC] = fget(s.communityTotalStrength, oldC) - strengthOutV; - s.communityTotalStrength[newC] = fget(s.communityTotalStrength, newC) + strengthOutV; - } - if (s.graph.directed) { - const outToOld: number = fget(s.outEdgeWeightToCommunity, oldC) || 0; - const inFromOld: number = fget(s.inEdgeWeightFromCommunity, oldC) || 0; - const outToNew: number = - newC < s.outEdgeWeightToCommunity.length ? fget(s.outEdgeWeightToCommunity, newC) || 0 : 0; - const inFromNew: number = - newC < s.inEdgeWeightFromCommunity.length ? fget(s.inEdgeWeightFromCommunity, newC) || 0 : 0; - // outToOld/inFromOld already include the self-loop weight (self-loops are - // in outEdges/inEdges), so subtract it once to avoid triple-counting. - s.communityInternalEdgeWeight[oldC] = - fget(s.communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); - s.communityInternalEdgeWeight[newC] = - fget(s.communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); - } else { - const weightToOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; - const weightToNew: number = fget(s.neighborEdgeWeightToCommunity, newC) || 0; - s.communityInternalEdgeWeight[oldC] = - fget(s.communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); - s.communityInternalEdgeWeight[newC] = - fget(s.communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); - } + applyMoveStrengthTotals(s, oldC, newC, strengthOutV, strengthInV); + applyMoveInternalEdgeWeightDelta(s, oldC, newC, selfLoopWeight); s.nodeCommunity[v] = newC; return true; diff --git a/src/graph/model.ts b/src/graph/model.ts index d34270aa4..3961eb97f 100644 --- a/src/graph/model.ts +++ b/src/graph/model.ts @@ -228,10 +228,10 @@ export class CodeGraph { /** Merge another graph into this one. Nodes/edges from other override on conflict. */ merge(other: CodeGraph): this { for (const [id, attrs] of other.nodes()) { - this.addNode(id, attrs); + this.addNode(id, { ...attrs }); } for (const [src, tgt, attrs] of other.edges()) { - this.addEdge(src, tgt, attrs); + this.addEdge(src, tgt, { ...attrs }); } return this; } From f7ce3107900e06a5a17a75868cb2ed716f21bcd7 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 14:19:08 -0600 Subject: [PATCH 22/39] fix: address quality issues in features/complexity-query.ts (docs check acknowledged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Extract getExceededMetrics as the single source of truth for which manifesto thresholds a row exceeds, shared by mapComplexityRow and exceedsAnyThreshold — cuts mapComplexityRow's cyclomatic complexity from 23 (fail) to 10 and removes the duplicated 4-branch check. Replace the hardcoded default-threshold object with DEFAULTS.manifesto.rules (config.ts is already the source of truth for these values). Decompose complexityData/computeComplexitySummary (resolveComplexityQueryOptions, buildComplexityResult, queryComplexityRows, fetchAllComplexityMetrics, summarizeComplexityMetrics, average) to bring halstead.effort for every function in the file under the 15000 fail threshold. Pure decomposition, zero behavior change — verified via clean rebuild + full test suite. Widen tests/integration/complexity.test.ts's config.js mock to preserve real exports via importOriginal (it previously replaced the whole module, which broke once this file started importing DEFAULTS). Impact: 24 functions changed, 8 affected --- src/features/complexity-query.ts | 303 ++++++++++++++++----------- tests/integration/complexity.test.ts | 10 +- 2 files changed, 187 insertions(+), 126 deletions(-) diff --git a/src/features/complexity-query.ts b/src/features/complexity-query.ts index 5f3b9d121..d1ba86a3a 100644 --- a/src/features/complexity-query.ts +++ b/src/features/complexity-query.ts @@ -7,7 +7,7 @@ import { openReadonlyOrFail } from '../db/index.js'; import { buildFileConditionSQL } from '../db/query-builder.js'; -import { loadConfig } from '../infrastructure/config.js'; +import { DEFAULTS, loadConfig } from '../infrastructure/config.js'; import { debug } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; @@ -35,6 +35,61 @@ interface ComplexityRow { const isValidThreshold = (v: unknown): v is number => typeof v === 'number' && Number.isFinite(v); +/** Column-sort expressions for `codegraph complexity --sort `. */ +const ORDER_BY_MAP: Record = { + cognitive: 'fc.cognitive DESC', + cyclomatic: 'fc.cyclomatic DESC', + nesting: 'fc.max_nesting DESC', + mi: 'fc.maintainability_index ASC', + volume: 'fc.halstead_volume DESC', + effort: 'fc.halstead_effort DESC', + bugs: 'fc.halstead_bugs DESC', + loc: 'fc.loc DESC', +}; + +interface ThresholdMetrics { + cognitive: number; + cyclomatic: number; + max_nesting: number; + maintainability_index: number; +} + +/** Single source of truth for which metric names exceed which thresholds. */ +const METRIC_THRESHOLD_CHECKS: Array<{ + name: string; + exceeds: (r: ThresholdMetrics, thresholds: any) => boolean; +}> = [ + { + name: 'cognitive', + exceeds: (r, t) => + isValidThreshold(t.cognitive?.warn) && r.cognitive >= (t.cognitive?.warn ?? 0), + }, + { + name: 'cyclomatic', + exceeds: (r, t) => + isValidThreshold(t.cyclomatic?.warn) && r.cyclomatic >= (t.cyclomatic?.warn ?? 0), + }, + { + name: 'maxNesting', + exceeds: (r, t) => + isValidThreshold(t.maxNesting?.warn) && r.max_nesting >= (t.maxNesting?.warn ?? 0), + }, + { + name: 'maintainabilityIndex', + exceeds: (r, t) => + isValidThreshold(t.maintainabilityIndex?.warn) && + r.maintainability_index > 0 && + r.maintainability_index <= (t.maintainabilityIndex?.warn ?? 0), + }, +]; + +/** List of metric names a row exceeds (empty if none). */ +function getExceededMetrics(r: ThresholdMetrics, thresholds: any): string[] { + return METRIC_THRESHOLD_CHECKS.filter((check) => check.exceeds(r, thresholds)).map( + (check) => check.name, + ); +} + /** Build WHERE clause and params for complexity query filtering. */ function buildComplexityWhere(opts: { noTests: boolean; @@ -90,28 +145,7 @@ function buildThresholdHaving(thresholds: any): string { /** Map a raw DB row to the public complexity result shape. */ function mapComplexityRow(r: ComplexityRow, thresholds: any): Record { - const exceeds: string[] = []; - if ( - isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0) - ) - exceeds.push('cognitive'); - if ( - isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0) - ) - exceeds.push('cyclomatic'); - if ( - isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0) - ) - exceeds.push('maxNesting'); - if ( - isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0) - ) - exceeds.push('maintainabilityIndex'); + const exceeds = getExceededMetrics(r, thresholds); return { name: r.name, @@ -136,21 +170,48 @@ function mapComplexityRow(r: ComplexityRow, thresholds: any): Record 0; +} + +/** Fetch the bare metric columns (all rows) used to compute summary statistics. */ +function fetchAllComplexityMetrics( + db: ReturnType, + noTests: boolean, +): ThresholdMetrics[] { + return db + .prepare( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') + ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, + ) + .all(); +} + +/** Arithmetic mean, rounded to 1 decimal (matches the summary's existing precision). */ +function average(values: number[]): number { + return +(values.reduce((s, v) => s + v, 0) / values.length).toFixed(1); +} + +/** Reduce a set of complexity rows down to the public summary-statistics shape. */ +function summarizeComplexityMetrics( + allRows: ThresholdMetrics[], thresholds: any, -): boolean { - return ( - (isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0)) || - (isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0)) || - (isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0)) || - (isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0)) - ); +): Record { + const cognitiveValues = allRows.map((r) => r.cognitive); + const cyclomaticValues = allRows.map((r) => r.cyclomatic); + const miValues = allRows.map((r) => r.maintainability_index || 0); + return { + analyzed: allRows.length, + avgCognitive: average(cognitiveValues), + avgCyclomatic: average(cyclomaticValues), + maxCognitive: Math.max(...cognitiveValues), + maxCyclomatic: Math.max(...cyclomaticValues), + avgMI: average(miValues), + minMI: +Math.min(...miValues).toFixed(1), + aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, + }; } /** Compute summary statistics across all complexity rows. */ @@ -160,33 +221,9 @@ function computeComplexitySummary( thresholds: any, ): Record | null { try { - const allRows = db - .prepare<{ - cognitive: number; - cyclomatic: number; - max_nesting: number; - maintainability_index: number; - }>( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') - ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, - ) - .all(); - + const allRows = fetchAllComplexityMetrics(db, noTests); if (allRows.length === 0) return null; - - const miValues = allRows.map((r) => r.maintainability_index || 0); - return { - analyzed: allRows.length, - avgCognitive: +(allRows.reduce((s, r) => s + r.cognitive, 0) / allRows.length).toFixed(1), - avgCyclomatic: +(allRows.reduce((s, r) => s + r.cyclomatic, 0) / allRows.length).toFixed(1), - maxCognitive: Math.max(...allRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...allRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, - }; + return summarizeComplexityMetrics(allRows, thresholds); } catch (e: unknown) { debug(`complexity summary query failed: ${(e as Error).message}`); return null; @@ -203,33 +240,89 @@ function checkHasGraph(db: ReturnType): boolean { } } +/** Run the main complexity rows query; returns null if the table doesn't exist yet. */ +function queryComplexityRows( + db: ReturnType, + where: string, + having: string, + orderBy: string, + params: unknown[], +): ComplexityRow[] | null { + try { + return db + .prepare( + `SELECT n.name, n.kind, n.file, n.line, n.end_line, + fc.cognitive, fc.cyclomatic, fc.max_nesting, + fc.loc, fc.sloc, fc.maintainability_index, + fc.halstead_volume, fc.halstead_difficulty, fc.halstead_effort, fc.halstead_bugs + FROM function_complexity fc + JOIN nodes n ON fc.node_id = n.id + ${where} ${having} + ORDER BY ${orderBy}`, + ) + .all(...params); + } catch (e: unknown) { + debug(`complexity query failed (table may not exist): ${(e as Error).message}`); + return null; + } +} + +interface ComplexityQueryOpts { + target?: string; + limit?: number; + sort?: string; + aboveThreshold?: boolean; + file?: string; + kind?: string; + noTests?: boolean; + config?: CodegraphConfig; + offset?: number; +} + +/** Resolve query flags + effective manifesto thresholds from opts/config/DEFAULTS. */ +function resolveComplexityQueryOptions(opts: ComplexityQueryOpts): { + sort: string; + noTests: boolean; + aboveThreshold: boolean; + thresholds: any; +} { + const config = opts.config || loadConfig(process.cwd()); + return { + sort: opts.sort || 'cognitive', + noTests: opts.noTests || false, + aboveThreshold: opts.aboveThreshold || false, + thresholds: config.manifesto?.rules || DEFAULTS.manifesto.rules, + }; +} + +/** Run the query + summary and shape the pre-pagination result object. */ +function buildComplexityResult( + db: ReturnType, + sql: { where: string; having: string; orderBy: string; params: unknown[] }, + noTests: boolean, + thresholds: any, +): Record { + const rows = queryComplexityRows(db, sql.where, sql.having, sql.orderBy, sql.params); + if (rows === null) { + return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; + } + + const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; + const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); + + const summary = computeComplexitySummary(db, noTests, thresholds); + const hasGraph = summary === null ? checkHasGraph(db) : false; + + return { functions, summary, thresholds, hasGraph }; +} + export function complexityData( customDbPath?: string, - opts: { - target?: string; - limit?: number; - sort?: string; - aboveThreshold?: boolean; - file?: string; - kind?: string; - noTests?: boolean; - config?: CodegraphConfig; - offset?: number; - } = {}, + opts: ComplexityQueryOpts = {}, ): Record { const db = openReadonlyOrFail(customDbPath); try { - const sort = opts.sort || 'cognitive'; - const noTests = opts.noTests || false; - const aboveThreshold = opts.aboveThreshold || false; - - const config = opts.config || loadConfig(process.cwd()); - const thresholds: any = config.manifesto?.rules || { - cognitive: { warn: 15, fail: null }, - cyclomatic: { warn: 10, fail: null }, - maxNesting: { warn: 4, fail: null }, - maintainabilityIndex: { warn: 20, fail: null }, - }; + const { sort, noTests, aboveThreshold, thresholds } = resolveComplexityQueryOptions(opts); const { where, params } = buildComplexityWhere({ noTests, @@ -239,45 +332,9 @@ export function complexityData( }); const having = aboveThreshold ? buildThresholdHaving(thresholds) : ''; + const orderBy = ORDER_BY_MAP[sort] || 'fc.cognitive DESC'; - const orderMap: Record = { - cognitive: 'fc.cognitive DESC', - cyclomatic: 'fc.cyclomatic DESC', - nesting: 'fc.max_nesting DESC', - mi: 'fc.maintainability_index ASC', - volume: 'fc.halstead_volume DESC', - effort: 'fc.halstead_effort DESC', - bugs: 'fc.halstead_bugs DESC', - loc: 'fc.loc DESC', - }; - const orderBy = orderMap[sort] || 'fc.cognitive DESC'; - - let rows: ComplexityRow[]; - try { - rows = db - .prepare( - `SELECT n.name, n.kind, n.file, n.line, n.end_line, - fc.cognitive, fc.cyclomatic, fc.max_nesting, - fc.loc, fc.sloc, fc.maintainability_index, - fc.halstead_volume, fc.halstead_difficulty, fc.halstead_effort, fc.halstead_bugs - FROM function_complexity fc - JOIN nodes n ON fc.node_id = n.id - ${where} ${having} - ORDER BY ${orderBy}`, - ) - .all(...params); - } catch (e: unknown) { - debug(`complexity query failed (table may not exist): ${(e as Error).message}`); - return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; - } - - const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); - - const summary = computeComplexitySummary(db, noTests, thresholds); - const hasGraph = summary === null ? checkHasGraph(db) : false; - - const base = { functions, summary, thresholds, hasGraph }; + const base = buildComplexityResult(db, { where, having, orderBy, params }, noTests, thresholds); return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); } finally { db.close(); diff --git a/tests/integration/complexity.test.ts b/tests/integration/complexity.test.ts index ddfbd8a81..62f4a9b8c 100644 --- a/tests/integration/complexity.test.ts +++ b/tests/integration/complexity.test.ts @@ -14,9 +14,13 @@ import { initSchema } from '../../src/db/index.js'; import { complexityData } from '../../src/features/complexity.js'; import { loadConfig } from '../../src/infrastructure/config.js'; -vi.mock('../../src/infrastructure/config.js', () => ({ - loadConfig: vi.fn(() => ({})), -})); +vi.mock('../../src/infrastructure/config.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: vi.fn(() => ({})), + }; +}); // ─── Helpers ─────────────────────────────────────────────────────────── From 5b708ee844f9dfdba7e836522e053a7bcac0bc1f Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 15:38:19 -0600 Subject: [PATCH 23/39] fix: address quality issues in features/cochange.ts (docs check acknowledged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire computeCoChanges/analyzeCoChanges's minSupport/maxFilesPerCommit/since fallback literals through DEFAULTS.coChange instead of re-declaring the same magic numbers in two places (extended the same fix to minJaccard in coChangeData/coChangeTopData/coChangeForFiles for consistency). Decompose computeCoChanges' three passes (per-file counts, pair generation, Jaccard filtering) into named helpers (updateFileCommitCounts, updatePairCounts, buildCoChangeResults), plus scanGitHistory, analyzeCoChanges, coChangeData, coChangeTopData, and coChangeForFiles — bringing halstead.effort for every one of the 26 functions in the file under the 15000 fail threshold (worst was computeCoChanges at 65249.68). Fix the loadLastAnalyzedSha/loadKnownFiles silent catches to log via debug(), matching scanGitHistory's existing error-visibility pattern. Pure decomposition + config wiring, zero behavior change — verified via clean rebuild + full test suite (including the real git-history integration tests in cochange.test.ts). Impact: 23 functions changed, 15 affected --- src/features/cochange.ts | 377 +++++++++++++++++++++++++-------------- 1 file changed, 239 insertions(+), 138 deletions(-) diff --git a/src/features/cochange.ts b/src/features/cochange.ts index 2c4b9c379..48bed2c90 100644 --- a/src/features/cochange.ts +++ b/src/features/cochange.ts @@ -9,7 +9,8 @@ import { execFileSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { closeDb, findDbPath, initSchema, openDb, openReadonlyOrFail } from '../db/index.js'; -import { warn } from '../infrastructure/logger.js'; +import { DEFAULTS } from '../infrastructure/config.js'; +import { debug, warn } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { normalizePath } from '../shared/constants.js'; import { paginateResult } from '../shared/paginate.js'; @@ -34,10 +35,8 @@ interface CoChangeMeta { lastCommit: string | null; } -export function scanGitHistory( - repoRoot: string, - opts: { since?: string; afterSha?: string | null } = {}, -): { commits: CommitEntry[] } { +/** Build the `git log` argv for scanning co-change history. */ +function buildGitLogArgs(opts: { since?: string; afterSha?: string | null }): string[] { const args = [ 'log', '--name-only', @@ -48,10 +47,35 @@ export function scanGitHistory( if (opts.since) args.push(`--since=${opts.since}`); if (opts.afterSha) args.push(`${opts.afterSha}..HEAD`); args.push('--', '.'); + return args; +} +/** Parse `git log --name-only --pretty=format:%H%n%at` output into commit entries. */ +function parseGitLogOutput(output: string): CommitEntry[] { + const commits: CommitEntry[] = []; + // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2... + const blocks = output.trim().split(/\n\n+/); + for (const block of blocks) { + const lines = block.split('\n').filter((l) => l.length > 0); + if (lines.length < 2) continue; + const sha = lines[0]!; + const epoch = parseInt(lines[1]!, 10); + if (Number.isNaN(epoch)) continue; + const files = lines.slice(2).map((f) => normalizePath(f)); + if (files.length > 0) { + commits.push({ sha, epoch, files }); + } + } + return commits; +} + +export function scanGitHistory( + repoRoot: string, + opts: { since?: string; afterSha?: string | null } = {}, +): { commits: CommitEntry[] } { let output: string; try { - output = execFileSync('git', args, { + output = execFileSync('git', buildGitLogArgs(opts), { cwd: repoRoot, encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024, @@ -64,30 +88,63 @@ export function scanGitHistory( if (!output.trim()) return { commits: [] }; - const commits: CommitEntry[] = []; - // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2... - const blocks = output.trim().split(/\n\n+/); - for (const block of blocks) { - const lines = block.split('\n').filter((l) => l.length > 0); - if (lines.length < 2) continue; - const sha = lines[0]!; - const epoch = parseInt(lines[1]!, 10); - if (Number.isNaN(epoch)) continue; - const files = lines.slice(2).map((f) => normalizePath(f)); - if (files.length > 0) { - commits.push({ sha, epoch, files }); + return { commits: parseGitLogOutput(output) }; +} + +/** Pass 1: bump the per-file commit count for every file in a (filtered) commit. */ +function updateFileCommitCounts(files: string[], fileCommitCounts: Map): void { + for (const f of files) { + fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1); + } +} + +/** Pass 2: generate all unique file pairs for a commit (canonical: a < b) and tally them. */ +function updatePairCounts( + files: string[], + epoch: number, + pairCounts: Map, + pairLastEpoch: Map, +): void { + const sorted = [...new Set(files)].sort(); + for (let i = 0; i < sorted.length; i++) { + for (let j = i + 1; j < sorted.length; j++) { + const key = `${sorted[i]}\0${sorted[j]}`; + pairCounts.set(key, (pairCounts.get(key) || 0) + 1); + const prev = pairLastEpoch.get(key) || 0; + if (epoch > prev) pairLastEpoch.set(key, epoch); } } +} - return { commits }; +/** Pass 3: filter pairs by minSupport and compute their Jaccard similarity. */ +function buildCoChangeResults( + pairCounts: Map, + pairLastEpoch: Map, + fileCommitCounts: Map, + minSupport: number, +): Map { + const results = new Map(); + for (const [key, count] of pairCounts) { + if (count < minSupport) continue; + const [fileA, fileB] = key.split('\0') as [string, string]; + const countA = fileCommitCounts.get(fileA) || 0; + const countB = fileCommitCounts.get(fileB) || 0; + const jaccard = count / (countA + countB - count); + results.set(key, { + commitCount: count, + jaccard, + lastEpoch: pairLastEpoch.get(key) || 0, + }); + } + return results; } export function computeCoChanges( commits: CommitEntry[], opts: { minSupport?: number; maxFilesPerCommit?: number; knownFiles?: Set | null } = {}, ): { pairs: Map; fileCommitCounts: Map } { - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; + const minSupport = opts.minSupport ?? DEFAULTS.coChange.minSupport; + const maxFilesPerCommit = opts.maxFilesPerCommit ?? DEFAULTS.coChange.maxFilesPerCommit; const knownFiles = opts.knownFiles || null; const fileCommitCounts = new Map(); @@ -102,39 +159,14 @@ export function computeCoChanges( files = files.filter((f) => knownFiles.has(f)); } - // Count per-file commits - for (const f of files) { - fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1); - } - - // Generate all unique pairs (canonical: a < b) - const sorted = [...new Set(files)].sort(); - for (let i = 0; i < sorted.length; i++) { - for (let j = i + 1; j < sorted.length; j++) { - const key = `${sorted[i]}\0${sorted[j]}`; - pairCounts.set(key, (pairCounts.get(key) || 0) + 1); - const prev = pairLastEpoch.get(key) || 0; - if (commit.epoch > prev) pairLastEpoch.set(key, commit.epoch); - } - } + updateFileCommitCounts(files, fileCommitCounts); + updatePairCounts(files, commit.epoch, pairCounts, pairLastEpoch); } - // Filter by minSupport and compute Jaccard - const results = new Map(); - for (const [key, count] of pairCounts) { - if (count < minSupport) continue; - const [fileA, fileB] = key.split('\0') as [string, string]; - const countA = fileCommitCounts.get(fileA) || 0; - const countB = fileCommitCounts.get(fileB) || 0; - const jaccard = count / (countA + countB - count); - results.set(key, { - commitCount: count, - jaccard, - lastEpoch: pairLastEpoch.get(key) || 0, - }); - } - - return { pairs: results, fileCommitCounts }; + return { + pairs: buildCoChangeResults(pairCounts, pairLastEpoch, fileCommitCounts, minSupport), + fileCommitCounts, + }; } /** Read the SHA of the most recently analyzed commit (incremental state). */ @@ -146,8 +178,8 @@ function loadLastAnalyzedSha(db: BetterSqlite3Database): string | null { ) .get(); return row ? row.value : null; - } catch { - /* table may not exist yet */ + } catch (e: unknown) { + debug(`loadLastAnalyzedSha: co_change_meta table may not exist yet: ${(e as Error).message}`); return null; } } @@ -164,8 +196,8 @@ function loadKnownFiles(db: BetterSqlite3Database): Set | null { try { const rows = db.prepare<{ file: string }>('SELECT DISTINCT file FROM nodes').all(); return new Set(rows.map((r) => r.file)); - } catch { - /* nodes table may not exist */ + } catch (e: unknown) { + debug(`loadKnownFiles: nodes table may not exist: ${(e as Error).message}`); return null; } } @@ -236,6 +268,47 @@ function updateCoChangeMeta( metaUpsert.run('min_support', String(minSupport)); } +interface CoChangeAnalysisOptions { + since: string; + minSupport: number; + maxFilesPerCommit: number; +} + +/** Resolve since/minSupport/maxFilesPerCommit from opts, falling back to DEFAULTS.coChange. */ +function resolveCoChangeAnalysisOptions(opts: { + since?: string; + minSupport?: number; + maxFilesPerCommit?: number; +}): CoChangeAnalysisOptions { + return { + since: opts.since || DEFAULTS.coChange.since, + minSupport: opts.minSupport ?? DEFAULTS.coChange.minSupport, + maxFilesPerCommit: opts.maxFilesPerCommit ?? DEFAULTS.coChange.maxFilesPerCommit, + }; +} + +/** Scan git history, compute co-change pairs, and persist them + the run metadata. */ +function runCoChangeScanAndPersist( + db: BetterSqlite3Database, + repoRoot: string, + afterSha: string | null, + resolved: CoChangeAnalysisOptions, +): CommitEntry[] { + const knownFiles = loadKnownFiles(db); + const { commits } = scanGitHistory(repoRoot, { since: resolved.since, afterSha }); + const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { + minSupport: resolved.minSupport, + maxFilesPerCommit: resolved.maxFilesPerCommit, + knownFiles, + }); + + persistCoChangeResults(db, fileCommitCounts, coChanges); + recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); + updateCoChangeMeta(db, commits, resolved.since, resolved.minSupport); + + return commits; +} + export function analyzeCoChanges( customDbPath?: string, opts: { @@ -258,25 +331,11 @@ export function analyzeCoChanges( return { error: `Not a git repository: ${repoRoot}` }; } - const since = opts.since || '1 year ago'; - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; - + const resolved = resolveCoChangeAnalysisOptions(opts); const afterSha = opts.full ? null : loadLastAnalyzedSha(db); if (opts.full) clearCoChangeTables(db); - const knownFiles = loadKnownFiles(db); - - const { commits } = scanGitHistory(repoRoot, { since, afterSha }); - const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { - minSupport, - maxFilesPerCommit, - knownFiles, - }); - - persistCoChangeResults(db, fileCommitCounts, coChanges); - recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); - updateCoChangeMeta(db, commits, since, minSupport); + const commits = runCoChangeScanAndPersist(db, repoRoot, afterSha, resolved); const totalPairs = db .prepare<{ cnt: number }>('SELECT COUNT(*) as cnt FROM co_changes') @@ -287,8 +346,8 @@ export function analyzeCoChanges( return { pairsFound: totalPairs, commitsScanned: commits.length, - since, - minSupport, + since: resolved.since, + minSupport: resolved.minSupport, }; } @@ -300,6 +359,49 @@ interface CoChangeRow { last_commit_epoch: number; } +/** True if the `co_changes` table exists (i.e. `analyzeCoChanges` has run at least once). */ +function hasCoChangeTable(db: BetterSqlite3Database): boolean { + try { + db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); + return true; + } catch (e: unknown) { + debug(`hasCoChangeTable: co_changes table missing: ${(e as Error).message}`); + return false; + } +} + +/** Format a last-commit epoch (seconds) as `YYYY-MM-DD`, or null if absent. */ +function epochToDateString(epoch: number): string | null { + return epoch ? new Date(epoch * 1000).toISOString().slice(0, 10) : null; +} + +/** Shape+filter co-change rows into the public per-file "partners" list. */ +function buildCoChangePartners( + rows: CoChangeRow[], + resolvedFile: string, + noTests: boolean, + limit: number, +): Array<{ file: string; commitCount: number; jaccard: number; lastCommitDate: string | null }> { + const partners: Array<{ + file: string; + commitCount: number; + jaccard: number; + lastCommitDate: string | null; + }> = []; + for (const row of rows) { + const partner = row.file_a === resolvedFile ? row.file_b : row.file_a; + if (noTests && isTestFile(partner)) continue; + partners.push({ + file: partner, + commitCount: row.commit_count, + jaccard: row.jaccard, + lastCommitDate: epochToDateString(row.last_commit_epoch), + }); + if (partners.length >= limit) break; + } + return partners; +} + export function coChangeData( file: string, customDbPath?: string, @@ -307,13 +409,10 @@ export function coChangeData( ): Record { const db = openReadonlyOrFail(customDbPath); const limit = opts.limit || 20; - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const noTests = opts.noTests || false; - // Check if co_changes table exists - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - } catch { + if (!hasCoChangeTable(db)) { closeDb(db); return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' }; } @@ -334,31 +433,46 @@ export function coChangeData( ) .all(resolvedFile, resolvedFile, minJaccard); - const partners: Array<{ - file: string; + const partners = buildCoChangePartners(rows, resolvedFile, noTests, limit); + + const meta = getCoChangeMeta(db); + closeDb(db); + + const base = { file: resolvedFile, partners, meta }; + return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); +} + +/** Shape+filter co-change rows into the public global "top pairs" list. */ +function buildCoChangeTopPairs( + rows: CoChangeRow[], + noTests: boolean, + limit: number, +): Array<{ + fileA: string; + fileB: string; + commitCount: number; + jaccard: number; + lastCommitDate: string | null; +}> { + const pairs: Array<{ + fileA: string; + fileB: string; commitCount: number; jaccard: number; lastCommitDate: string | null; }> = []; for (const row of rows) { - const partner = row.file_a === resolvedFile ? row.file_b : row.file_a; - if (noTests && isTestFile(partner)) continue; - partners.push({ - file: partner, + if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue; + pairs.push({ + fileA: row.file_a, + fileB: row.file_b, commitCount: row.commit_count, jaccard: row.jaccard, - lastCommitDate: row.last_commit_epoch - ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10) - : null, + lastCommitDate: epochToDateString(row.last_commit_epoch), }); - if (partners.length >= limit) break; + if (pairs.length >= limit) break; } - - const meta = getCoChangeMeta(db); - closeDb(db); - - const base = { file: resolvedFile, partners, meta }; - return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); + return pairs; } export function coChangeTopData( @@ -367,12 +481,10 @@ export function coChangeTopData( ): Record { const db = openReadonlyOrFail(customDbPath); const limit = opts.limit || 20; - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const noTests = opts.noTests || false; - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - } catch { + if (!hasCoChangeTable(db)) { closeDb(db); return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' }; } @@ -386,32 +498,40 @@ export function coChangeTopData( ) .all(minJaccard); - const pairs: Array<{ - fileA: string; - fileB: string; + const pairs = buildCoChangeTopPairs(rows, noTests, limit); + + const meta = getCoChangeMeta(db); + closeDb(db); + + const base = { pairs, meta }; + return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); +} + +/** Shape+filter co-change rows into the public "coupled with an input file" list. */ +function buildCoChangeForFilesResults( + rows: Array<{ file_a: string; file_b: string; commit_count: number; jaccard: number }>, + inputSet: Set, + noTests: boolean, +): Array<{ file: string; coupledWith: string; commitCount: number; jaccard: number }> { + const results: Array<{ + file: string; + coupledWith: string; commitCount: number; jaccard: number; - lastCommitDate: string | null; }> = []; for (const row of rows) { - if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue; - pairs.push({ - fileA: row.file_a, - fileB: row.file_b, + const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a; + const source = inputSet.has(row.file_a) ? row.file_a : row.file_b; + if (inputSet.has(partner)) continue; + if (noTests && isTestFile(partner)) continue; + results.push({ + file: partner, + coupledWith: source, commitCount: row.commit_count, jaccard: row.jaccard, - lastCommitDate: row.last_commit_epoch - ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10) - : null, }); - if (pairs.length >= limit) break; } - - const meta = getCoChangeMeta(db); - closeDb(db); - - const base = { pairs, meta }; - return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); + return results; } export function coChangeForFiles( @@ -419,7 +539,7 @@ export function coChangeForFiles( db: BetterSqlite3Database, opts: { minJaccard?: number; limit?: number; noTests?: boolean } = {}, ): Array<{ file: string; coupledWith: string; commitCount: number; jaccard: number }> { - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const limit = opts.limit ?? 20; const noTests = opts.noTests || false; const inputSet = new Set(files); @@ -438,26 +558,7 @@ export function coChangeForFiles( ) .all(...files, ...files, minJaccard, limit); - const results: Array<{ - file: string; - coupledWith: string; - commitCount: number; - jaccard: number; - }> = []; - for (const row of rows) { - const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a; - const source = inputSet.has(row.file_a) ? row.file_a : row.file_b; - if (inputSet.has(partner)) continue; - if (noTests && isTestFile(partner)) continue; - results.push({ - file: partner, - coupledWith: source, - commitCount: row.commit_count, - jaccard: row.jaccard, - }); - } - - return results; + return buildCoChangeForFilesResults(rows, inputSet, noTests); } // ─── Internal Helpers ──────────────────────────────────────────────────── From 7c3b8696808c4e6625ae75128019ed374b19dace Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 15:39:36 -0600 Subject: [PATCH 24/39] fix: address quality issues in features/branch-compare.ts (docs check acknowledged) This was the run's worst gauntlet offender (halstead.bugs 1.585 on branchCompareData). Pure decomposition per the gauntlet recommendation: extract git-ref validation (validateBranchCompareRefs), dual-worktree + dual-buildGraph setup (setupCompareWorktrees), and output-shape cleanup (shapeBranchCompareSymbolLists) out of branchCompareData; unify attachImpactToSymbols/attachImpactToChanged into one generic attachImpact(symbols, resolveId, dbPath, maxDepth, noTests) parameterized by id-resolution strategy. Extended the same treatment to the file's other named-FAIL functions (loadSymbolsFromDb: halstead.effort 123718.05->12326.18, bugs 0.9546->0.2182; branchCompareMermaid: cyclomatic 22->6) and to pre-existing effort-fails gauntlet's summary didn't name explicitly (loadCallersFromDb, compareSymbols) -- consistent with this phase's cochange.ts/complexity-query.ts fixes, where the file-level FAIL verdict covers every function over threshold, not just the 2-3 worst examples cited in the audit detail text. Zero behavior change: both exported functions (branchCompareData, branchCompareMermaid) keep byte-identical signatures; every extraction preserves exact call order, error-handling scope (the try/catch/finally around worktree creation is untouched), and the existing mutate-in-place impact-attachment pattern. Verified via tests/integration/branch-compare.test.ts, which exercises real git worktrees + buildGraph + DB comparison end-to-end (not mocked), plus the full suite, both before and after each incremental edit. Impact: 44 functions changed, 15 affected --- src/features/branch-compare.ts | 798 +++++++++++++++++++++------------ 1 file changed, 504 insertions(+), 294 deletions(-) diff --git a/src/features/branch-compare.ts b/src/features/branch-compare.ts index 086ed1f11..cdf20674a 100644 --- a/src/features/branch-compare.ts +++ b/src/features/branch-compare.ts @@ -10,7 +10,7 @@ import { getNative, isNativeAvailable } from '../infrastructure/native.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { toErrorMessage } from '../shared/errors.js'; import { toSymbolRef } from '../shared/normalize.js'; -import type { EngineMode, NativeDatabase } from '../types.js'; +import type { BetterSqlite3Database, EngineMode, NativeDatabase } from '../types.js'; // ─── Git Helpers ──────────────────────────────────────────────────────── @@ -106,6 +106,96 @@ function makeSymbolKey(kind: string, file: string, name: string): string { return `${kind}::${file}::${name}`; } +interface RawNodeRow { + id: number; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; +} + +/** Try opening a NativeDatabase handle for batched fan-in/fan-out metrics. */ +function openNativeDbForFanMetrics(dbPath: string): NativeDatabase | undefined { + if (!isNativeAvailable()) return undefined; + try { + const native = getNative(); + return native.NativeDatabase.openReadonly(dbPath); + } catch (e) { + debug(`loadSymbolsFromDb: native path failed: ${toErrorMessage(e)}`); + return undefined; + } +} + +/** Query all non-file/directory nodes belonging to the given changed files. */ +function queryChangedFileNodes(db: BetterSqlite3Database, changedFiles: string[]): RawNodeRow[] { + const placeholders = changedFiles.map(() => '?').join(', '); + return db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line + FROM nodes n + WHERE n.file IN (${placeholders}) + AND n.kind NOT IN ('file', 'directory') + ORDER BY n.file, n.line`, + ) + .all(...changedFiles) as RawNodeRow[]; +} + +/** Build the public SymbolInfo shape from a raw row + its resolved fan metrics. */ +function makeSymbolInfo(row: RawNodeRow, fanIn: number, fanOut: number): SymbolInfo { + const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; + return { + id: row.id, + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + lineCount, + fanIn, + fanOut, + }; +} + +/** Native fast path: batch all fan-in/fan-out lookups in one napi call. */ +function buildSymbolsViaNativeBatch( + filtered: RawNodeRow[], + nativeDb: NativeDatabase, +): Map { + const symbols = new Map(); + const nodeIds = filtered.map((r) => r.id); + const metrics = nativeDb.batchFanMetrics!(nodeIds); + const metricsMap = new Map(metrics.map((m) => [m.nodeId, m])); + + for (const row of filtered) { + const m = metricsMap.get(row.id); + const key = makeSymbolKey(row.kind, row.file, row.name); + symbols.set(key, makeSymbolInfo(row, m?.fanIn ?? 0, m?.fanOut ?? 0)); + } + return symbols; +} + +/** JS fallback: per-row fan-in/fan-out COUNT queries. */ +function buildSymbolsViaJsFallback( + db: BetterSqlite3Database, + filtered: RawNodeRow[], +): Map { + const symbols = new Map(); + const fanInStmt = db.prepare( + `SELECT COUNT(*) AS cnt FROM edges WHERE target_id = ? AND kind = 'calls'`, + ); + const fanOutStmt = db.prepare( + `SELECT COUNT(*) AS cnt FROM edges WHERE source_id = ? AND kind = 'calls'`, + ); + + for (const row of filtered) { + const fanIn = (fanInStmt.get(row.id) as { cnt: number }).cnt; + const fanOut = (fanOutStmt.get(row.id) as { cnt: number }).cnt; + const key = makeSymbolKey(row.kind, row.file, row.name); + symbols.set(key, makeSymbolInfo(row, fanIn, fanOut)); + } + return symbols; +} + function loadSymbolsFromDb( dbPath: string, changedFiles: string[], @@ -113,97 +203,23 @@ function loadSymbolsFromDb( ): Map { const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }); - - // Try opening a NativeDatabase for batched fan metrics - let nativeDb: NativeDatabase | undefined; - if (isNativeAvailable()) { - try { - const native = getNative(); - nativeDb = native.NativeDatabase.openReadonly(dbPath); - } catch (e) { - debug(`loadSymbolsFromDb: native path failed: ${toErrorMessage(e)}`); - } - } + const nativeDb = openNativeDbForFanMetrics(dbPath); try { - const symbols = new Map(); - if (changedFiles.length === 0) { - return symbols; + return new Map(); } - const placeholders = changedFiles.map(() => '?').join(', '); - const rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line - FROM nodes n - WHERE n.file IN (${placeholders}) - AND n.kind NOT IN ('file', 'directory') - ORDER BY n.file, n.line`, - ) - .all(...changedFiles) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - end_line: number | null; - }>; + const rows = queryChangedFileNodes(db, changedFiles); // Filter first, then batch fan metrics for all surviving rows const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - // ── Native fast path: batch all fan-in/fan-out in one napi call ── if (nativeDb?.batchFanMetrics && filtered.length > 0) { - const nodeIds = filtered.map((r) => r.id); - const metrics = nativeDb.batchFanMetrics(nodeIds); - const metricsMap = new Map(metrics.map((m) => [m.nodeId, m])); - - for (const row of filtered) { - const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; - const m = metricsMap.get(row.id); - const key = makeSymbolKey(row.kind, row.file, row.name); - symbols.set(key, { - id: row.id, - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, - lineCount, - fanIn: m?.fanIn ?? 0, - fanOut: m?.fanOut ?? 0, - }); - } - return symbols; + return buildSymbolsViaNativeBatch(filtered, nativeDb); } - // ── JS fallback ─────────────────────────────────────────────────── - const fanInStmt = db.prepare( - `SELECT COUNT(*) AS cnt FROM edges WHERE target_id = ? AND kind = 'calls'`, - ); - const fanOutStmt = db.prepare( - `SELECT COUNT(*) AS cnt FROM edges WHERE source_id = ? AND kind = 'calls'`, - ); - - for (const row of filtered) { - const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; - const fanIn = (fanInStmt.get(row.id) as { cnt: number }).cnt; - const fanOut = (fanOutStmt.get(row.id) as { cnt: number }).cnt; - const key = makeSymbolKey(row.kind, row.file, row.name); - - symbols.set(key, { - id: row.id, - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, - lineCount, - fanIn, - fanOut, - }); - } - - return symbols; + return buildSymbolsViaJsFallback(db, filtered); } finally { db.close(); if (nativeDb) { @@ -232,37 +248,7 @@ function loadCallersFromDb( const allCallers = new Set(); for (const startId of nodeIds) { - const visited = new Set([startId]); - let frontier = [startId]; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier: number[] = []; - for (const fid of frontier) { - const callers = db - .prepare( - `SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind = 'calls'`, - ) - .all(fid) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - }>; - - for (const c of callers) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - allCallers.add(JSON.stringify(toSymbolRef(c))); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } + bfsCallersFromNode(db, startId, maxDepth, noTests, allCallers); } return [...allCallers].map((s) => JSON.parse(s) as CallerInfo); @@ -271,63 +257,130 @@ function loadCallersFromDb( } } +/** Direct DB callers of a single node id (one BFS-frontier expansion step). */ +function queryDirectCallers( + db: BetterSqlite3Database, + nodeId: number, +): Array<{ id: number; name: string; kind: string; file: string; line: number }> { + return db + .prepare( + `SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls'`, + ) + .all(nodeId) as Array<{ id: number; name: string; kind: string; file: string; line: number }>; +} + +/** BFS up to maxDepth from a single starting node, adding newly-seen callers to allCallers. */ +function bfsCallersFromNode( + db: BetterSqlite3Database, + startId: number, + maxDepth: number, + noTests: boolean, + allCallers: Set, +): void { + const visited = new Set([startId]); + let frontier = [startId]; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier: number[] = []; + for (const fid of frontier) { + const callers = queryDirectCallers(db, fid); + for (const c of callers) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + allCallers.add(JSON.stringify(toSymbolRef(c))); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } +} + // ─── Symbol Comparison ────────────────────────────────────────────────── -function compareSymbols( +/** Symbols present in `targetSymbols` but not `baseSymbols`. */ +function findAddedSymbols( baseSymbols: Map, targetSymbols: Map, -): { added: SymbolInfo[]; removed: SymbolInfo[]; changed: ChangedSymbol[] } { +): SymbolInfo[] { const added: SymbolInfo[] = []; - const removed: SymbolInfo[] = []; - const changed: ChangedSymbol[] = []; - for (const [key, sym] of targetSymbols) { - if (!baseSymbols.has(key)) { - added.push(sym); - } + if (!baseSymbols.has(key)) added.push(sym); } + return added; +} +/** Symbols present in `baseSymbols` but not `targetSymbols`. */ +function findRemovedSymbols( + baseSymbols: Map, + targetSymbols: Map, +): SymbolInfo[] { + const removed: SymbolInfo[] = []; for (const [key, sym] of baseSymbols) { - if (!targetSymbols.has(key)) { - removed.push(sym); - } + if (!targetSymbols.has(key)) removed.push(sym); } + return removed; +} +/** Build a ChangedSymbol entry from a base/target pair whose metrics diverged. */ +function buildChangedSymbol(baseSym: SymbolInfo, targetSym: SymbolInfo): ChangedSymbol | null { + const lineCountDelta = targetSym.lineCount - baseSym.lineCount; + const fanInDelta = targetSym.fanIn - baseSym.fanIn; + const fanOutDelta = targetSym.fanOut - baseSym.fanOut; + + if (lineCountDelta === 0 && fanInDelta === 0 && fanOutDelta === 0) return null; + + return { + name: baseSym.name, + kind: baseSym.kind, + file: baseSym.file, + base: { + line: baseSym.line, + lineCount: baseSym.lineCount, + fanIn: baseSym.fanIn, + fanOut: baseSym.fanOut, + }, + target: { + line: targetSym.line, + lineCount: targetSym.lineCount, + fanIn: targetSym.fanIn, + fanOut: targetSym.fanOut, + }, + changes: { + lineCount: lineCountDelta, + fanIn: fanInDelta, + fanOut: fanOutDelta, + }, + }; +} + +/** Symbols present in both maps whose line count / fan-in / fan-out diverged. */ +function findChangedSymbols( + baseSymbols: Map, + targetSymbols: Map, +): ChangedSymbol[] { + const changed: ChangedSymbol[] = []; for (const [key, baseSym] of baseSymbols) { const targetSym = targetSymbols.get(key); if (!targetSym) continue; - - const lineCountDelta = targetSym.lineCount - baseSym.lineCount; - const fanInDelta = targetSym.fanIn - baseSym.fanIn; - const fanOutDelta = targetSym.fanOut - baseSym.fanOut; - - if (lineCountDelta !== 0 || fanInDelta !== 0 || fanOutDelta !== 0) { - changed.push({ - name: baseSym.name, - kind: baseSym.kind, - file: baseSym.file, - base: { - line: baseSym.line, - lineCount: baseSym.lineCount, - fanIn: baseSym.fanIn, - fanOut: baseSym.fanOut, - }, - target: { - line: targetSym.line, - lineCount: targetSym.lineCount, - fanIn: targetSym.fanIn, - fanOut: targetSym.fanOut, - }, - changes: { - lineCount: lineCountDelta, - fanIn: fanInDelta, - fanOut: fanOutDelta, - }, - }); - } + const entry = buildChangedSymbol(baseSym, targetSym); + if (entry) changed.push(entry); } + return changed; +} - return { added, removed, changed }; +function compareSymbols( + baseSymbols: Map, + targetSymbols: Map, +): { added: SymbolInfo[]; removed: SymbolInfo[]; changed: ChangedSymbol[] } { + return { + added: findAddedSymbols(baseSymbols, targetSymbols), + removed: findRemovedSymbols(baseSymbols, targetSymbols), + changed: findChangedSymbols(baseSymbols, targetSymbols), + }; } // ─── Main Data Function ───────────────────────────────────────────────── @@ -362,48 +415,31 @@ interface BranchCompareResult { summary?: BranchCompareSummary; } -function attachImpactToSymbols( - symbols: SymbolInfo[], +/** + * Attach caller-impact data to each symbol, given a strategy for resolving + * its DB node id (removed symbols carry their own id; changed symbols must + * be looked up in the base-commit symbol map). + */ +function attachImpact( + symbols: T[], + resolveId: (sym: T) => number | undefined, dbPath: string, - _baseSymbols: Map, maxDepth: number, noTests: boolean, ): void { for (const sym of symbols) { - const symCallers = loadCallersFromDb(dbPath, sym.id ? [sym.id] : [], maxDepth, noTests); - (sym as SymbolInfo & { impact?: CallerInfo[] }).impact = symCallers; + const id = resolveId(sym); + const symCallers = loadCallersFromDb(dbPath, id ? [id] : [], maxDepth, noTests); + (sym as T & { impact?: CallerInfo[] }).impact = symCallers; } } -function attachImpactToChanged( - changed: ChangedSymbol[], - dbPath: string, - baseSymbols: Map, - maxDepth: number, - noTests: boolean, -): void { - for (const sym of changed) { - const baseSym = baseSymbols.get(makeSymbolKey(sym.kind, sym.file, sym.name)); - const symCallers = loadCallersFromDb( - dbPath, - baseSym?.id ? [baseSym.id] : [], - maxDepth, - noTests, - ); - sym.impact = symCallers; - } -} - -export async function branchCompareData( +/** Confirm repoRoot is a git repo and resolve baseRef/targetRef to full SHAs. */ +function validateBranchCompareRefs( + repoRoot: string, baseRef: string, targetRef: string, - opts: BranchCompareOpts = {}, -): Promise { - const repoRoot = opts.repoRoot || process.cwd(); - const maxDepth = opts.depth || 3; - const noTests = opts.noTests || false; - const engine = (opts.engine || 'wasm') as EngineMode; - +): { baseSha: string; targetSha: string } | { error: string } { try { execFileSync('git', ['rev-parse', '--git-dir'], { cwd: repoRoot, @@ -421,106 +457,249 @@ export async function branchCompareData( const targetSha = validateGitRef(repoRoot, targetRef); if (!targetSha) return { error: `Invalid git ref: "${targetRef}"` }; - const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); + return { baseSha, targetSha }; +} - if (changedFiles.length === 0) { - return { - baseRef, - targetRef, - baseSha, - targetSha, - changedFiles: [], - added: [], - removed: [], - changed: [], - summary: { - added: 0, - removed: 0, - changed: 0, - totalImpacted: 0, - filesAffected: 0, - }, - }; - } +/** Create detached worktrees for both refs and build their graphs. */ +async function setupCompareWorktrees( + repoRoot: string, + baseSha: string, + targetSha: string, + baseDir: string, + targetDir: string, + engine: EngineMode, +): Promise<{ baseDbPath: string; targetDbPath: string }> { + createWorktree(repoRoot, baseSha, baseDir); + createWorktree(repoRoot, targetSha, targetDir); + + await buildGraph(baseDir, { engine, skipRegistry: true }); + await buildGraph(targetDir, { engine, skipRegistry: true }); + + return { + baseDbPath: path.join(baseDir, '.codegraph', 'graph.db'), + targetDbPath: path.join(targetDir, '.codegraph', 'graph.db'), + }; +} - const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bc-')); - const baseDir = path.join(tmpBase, 'base'); - const targetDir = path.join(tmpBase, 'target'); +interface SymbolDiffWithImpact { + added: SymbolInfo[]; + removed: SymbolInfo[]; + changed: ChangedSymbol[]; + allImpacted: Set; + impactedFiles: Set; +} - try { - createWorktree(repoRoot, baseSha, baseDir); - createWorktree(repoRoot, targetSha, targetDir); +/** Resolve base-commit node ids for removed/changed symbols (for BFS impact queries). */ +function resolveImpactfulIds( + removed: SymbolInfo[], + changed: ChangedSymbol[], + baseSymbols: Map, +): { removedIds: number[]; changedIds: number[] } { + const removedIds = removed.map((s) => s.id).filter(Boolean); + const changedIds = changed + .map((s) => baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name))?.id) + .filter((id): id is number => Boolean(id)); + return { removedIds, changedIds }; +} - await buildGraph(baseDir, { engine, skipRegistry: true }); - await buildGraph(targetDir, { engine, skipRegistry: true }); +/** Collapse removed+changed caller lists into the summary's impacted-symbol/file sets. */ +function computeImpactedFileSets( + removedImpact: CallerInfo[], + changedImpact: CallerInfo[], +): { allImpacted: Set; impactedFiles: Set } { + const allImpacted = new Set(); + for (const c of removedImpact) allImpacted.add(`${c.file}:${c.name}`); + for (const c of changedImpact) allImpacted.add(`${c.file}:${c.name}`); - const baseDbPath = path.join(baseDir, '.codegraph', 'graph.db'); - const targetDbPath = path.join(targetDir, '.codegraph', 'graph.db'); + const impactedFiles = new Set(); + for (const key of allImpacted) impactedFiles.add(key.split(':')[0]!); - const normalizedFiles = changedFiles.map((f) => f.replace(/\\/g, '/')); + return { allImpacted, impactedFiles }; +} - const baseSymbols = loadSymbolsFromDb(baseDbPath, normalizedFiles, noTests); - const targetSymbols = loadSymbolsFromDb(targetDbPath, normalizedFiles, noTests); +/** Load symbols from both DBs, diff them, and attach/compute blast-radius impact data. */ +function diffSymbolsWithImpact( + baseDbPath: string, + targetDbPath: string, + normalizedFiles: string[], + noTests: boolean, + maxDepth: number, +): SymbolDiffWithImpact { + const baseSymbols = loadSymbolsFromDb(baseDbPath, normalizedFiles, noTests); + const targetSymbols = loadSymbolsFromDb(targetDbPath, normalizedFiles, noTests); - const { added, removed, changed } = compareSymbols(baseSymbols, targetSymbols); + const { added, removed, changed } = compareSymbols(baseSymbols, targetSymbols); + const { removedIds, changedIds } = resolveImpactfulIds(removed, changed, baseSymbols); - const removedIds = removed.map((s) => s.id).filter(Boolean); - const changedIds = changed - .map((s) => { - const baseSym = baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name)); - return baseSym?.id; - }) - .filter((id): id is number => Boolean(id)); + const removedImpact = loadCallersFromDb(baseDbPath, removedIds, maxDepth, noTests); + const changedImpact = loadCallersFromDb(baseDbPath, changedIds, maxDepth, noTests); - const removedImpact = loadCallersFromDb(baseDbPath, removedIds, maxDepth, noTests); - const changedImpact = loadCallersFromDb(baseDbPath, changedIds, maxDepth, noTests); + attachImpact(removed, (s) => s.id, baseDbPath, maxDepth, noTests); + attachImpact( + changed, + (s) => baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name))?.id, + baseDbPath, + maxDepth, + noTests, + ); - attachImpactToSymbols(removed, baseDbPath, baseSymbols, maxDepth, noTests); - attachImpactToChanged(changed, baseDbPath, baseSymbols, maxDepth, noTests); + const { allImpacted, impactedFiles } = computeImpactedFileSets(removedImpact, changedImpact); - const allImpacted = new Set(); - for (const c of removedImpact) allImpacted.add(`${c.file}:${c.name}`); - for (const c of changedImpact) allImpacted.add(`${c.file}:${c.name}`); + return { added, removed, changed, allImpacted, impactedFiles }; +} - const impactedFiles = new Set(); - for (const key of allImpacted) impactedFiles.add(key.split(':')[0]!); +/** Strip the internal `.id` field, keeping `.impact` where it was attached. */ +function shapeBranchCompareSymbolLists( + added: SymbolInfo[], + removed: SymbolInfo[], +): { cleanAdded: SymbolWithoutId[]; cleanRemoved: SymbolWithoutId[] } { + const cleanAdded = added.map(({ id: _id, ...rest }) => rest as SymbolWithoutId); + const cleanRemoved = removed.map(({ id: _id, ...rest }) => { + const result = rest as SymbolWithoutId; + if ((rest as SymbolInfo & { impact?: CallerInfo[] }).impact) { + result.impact = (rest as SymbolInfo & { impact?: CallerInfo[] }).impact; + } + return result; + }); + return { cleanAdded, cleanRemoved }; +} - const cleanAdded = added.map(({ id: _id, ...rest }) => rest as SymbolWithoutId); - const cleanRemoved = removed.map(({ id: _id, ...rest }) => { - const result = rest as SymbolWithoutId; - if ((rest as SymbolInfo & { impact?: CallerInfo[] }).impact) { - result.impact = (rest as SymbolInfo & { impact?: CallerInfo[] }).impact; - } - return result; - }); +/** Result shape when there are no changed files between the two refs. */ +function emptyBranchCompareResult( + baseRef: string, + targetRef: string, + baseSha: string, + targetSha: string, +): BranchCompareResult { + return { + baseRef, + targetRef, + baseSha, + targetSha, + changedFiles: [], + added: [], + removed: [], + changed: [], + summary: { added: 0, removed: 0, changed: 0, totalImpacted: 0, filesAffected: 0 }, + }; +} + +/** Assemble the final BranchCompareResult from the diff + cleaned symbol lists. */ +function buildBranchCompareResult( + refs: { baseRef: string; targetRef: string; baseSha: string; targetSha: string }, + normalizedFiles: string[], + diff: SymbolDiffWithImpact, + cleaned: { cleanAdded: SymbolWithoutId[]; cleanRemoved: SymbolWithoutId[] }, +): BranchCompareResult { + return { + ...refs, + changedFiles: normalizedFiles, + added: cleaned.cleanAdded, + removed: cleaned.cleanRemoved, + changed: diff.changed, + summary: { + added: diff.added.length, + removed: diff.removed.length, + changed: diff.changed.length, + totalImpacted: diff.allImpacted.size, + filesAffected: diff.impactedFiles.size, + }, + }; +} - return { - baseRef, - targetRef, - baseSha, - targetSha, - changedFiles: normalizedFiles, - added: cleanAdded, - removed: cleanRemoved, - changed, - summary: { - added: added.length, - removed: removed.length, - changed: changed.length, - totalImpacted: allImpacted.size, - filesAffected: impactedFiles.size, - }, - }; +/** Resolve branchCompareData's opts (repoRoot/maxDepth/noTests/engine) with their defaults. */ +function resolveBranchCompareOptions(opts: BranchCompareOpts): { + repoRoot: string; + maxDepth: number; + noTests: boolean; + engine: EngineMode; +} { + return { + repoRoot: opts.repoRoot || process.cwd(), + maxDepth: opts.depth || 3, + noTests: opts.noTests || false, + engine: (opts.engine || 'wasm') as EngineMode, + }; +} + +/** Create the scratch tmpdir + base/target subdirectory paths for the dual worktrees. */ +function createCompareTempDirs(): { tmpBase: string; baseDir: string; targetDir: string } { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bc-')); + return { tmpBase, baseDir: path.join(tmpBase, 'base'), targetDir: path.join(tmpBase, 'target') }; +} + +/** Remove both worktrees and the scratch tmpdir (best-effort, always runs in `finally`). */ +function cleanupCompareTempDirs( + repoRoot: string, + baseDir: string, + targetDir: string, + tmpBase: string, +): void { + removeWorktree(repoRoot, baseDir); + removeWorktree(repoRoot, targetDir); + try { + fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch (cleanupErr) { + debug(`branchCompareData: temp cleanup failed: ${toErrorMessage(cleanupErr)}`); + } +} + +/** Set up worktrees, diff the symbols, and shape the final result (the try-block body). */ +async function runBranchCompareInWorktrees( + resolvedRefs: { baseRef: string; targetRef: string; baseSha: string; targetSha: string }, + dirs: { repoRoot: string; baseDir: string; targetDir: string; engine: EngineMode }, + changedFiles: string[], + noTests: boolean, + maxDepth: number, +): Promise { + const { baseSha, targetSha } = resolvedRefs; + const { baseDbPath, targetDbPath } = await setupCompareWorktrees( + dirs.repoRoot, + baseSha, + targetSha, + dirs.baseDir, + dirs.targetDir, + dirs.engine, + ); + + const normalizedFiles = changedFiles.map((f) => f.replace(/\\/g, '/')); + const diff = diffSymbolsWithImpact(baseDbPath, targetDbPath, normalizedFiles, noTests, maxDepth); + const cleaned = shapeBranchCompareSymbolLists(diff.added, diff.removed); + + return buildBranchCompareResult(resolvedRefs, normalizedFiles, diff, cleaned); +} + +export async function branchCompareData( + baseRef: string, + targetRef: string, + opts: BranchCompareOpts = {}, +): Promise { + const { repoRoot, maxDepth, noTests, engine } = resolveBranchCompareOptions(opts); + + const refs = validateBranchCompareRefs(repoRoot, baseRef, targetRef); + if ('error' in refs) return refs; + const { baseSha, targetSha } = refs; + + const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); + + if (changedFiles.length === 0) { + return emptyBranchCompareResult(baseRef, targetRef, baseSha, targetSha); + } + + const { tmpBase, baseDir, targetDir } = createCompareTempDirs(); + + try { + return await runBranchCompareInWorktrees( + { baseRef, targetRef, baseSha, targetSha }, + { repoRoot, baseDir, targetDir, engine }, + changedFiles, + noTests, + maxDepth, + ); } catch (err) { return { error: toErrorMessage(err) }; } finally { - removeWorktree(repoRoot, baseDir); - removeWorktree(repoRoot, targetDir); - try { - fs.rmSync(tmpBase, { recursive: true, force: true }); - } catch (cleanupErr) { - debug(`branchCompareData: temp cleanup failed: ${toErrorMessage(cleanupErr)}`); - } + cleanupCompareTempDirs(repoRoot, baseDir, targetDir, tmpBase); } } @@ -572,47 +751,78 @@ function collectImpactedCallers( return allImpacted; } -export function branchCompareMermaid(data: BranchCompareResult): string { - if (data.error) return data.error; - if ( +/** Render the "Impacted Callers" subgraph block, if there are any impacted callers. */ +function renderImpactedCallersSubgraph( + lines: string[], + state: MermaidNodeIdState, + allImpacted: Map, +): void { + if (allImpacted.size === 0) return; + lines.push(' subgraph sg_impact["Impacted Callers"]'); + for (const [key, c] of allImpacted) { + const nid = mermaidNodeId(state, key); + lines.push(` ${nid}["[${kindIcon(c.kind)}] ${c.name}"]`); + } + lines.push(' end'); + lines.push(' style sg_impact fill:#f3e5f5,stroke:#9c27b0'); +} + +/** Draw the dotted "impacted by" edges from each removed/changed symbol to its callers. */ +function renderImpactEdges( + lines: string[], + state: MermaidNodeIdState, + impactSources: Array<{ kind: string; file: string; name: string; impact?: CallerInfo[] }>, + removed: SymbolWithoutId[], +): void { + for (const sym of impactSources) { + if (!sym.impact) continue; + const prefix = removed.includes(sym as SymbolWithoutId) ? 'removed' : 'changed'; + const symKey = `${prefix}::${sym.kind}::${sym.file}::${sym.name}`; + for (const c of sym.impact) { + const callerKey = `impact::${c.kind}::${c.file}::${c.name}`; + if (state.map.has(symKey) && state.map.has(callerKey)) { + lines.push(` ${state.map.get(symKey)} -.-> ${state.map.get(callerKey)}`); + } + } + } +} + +/** True if the compare result has no added/removed/changed symbols to render. */ +function hasNoBranchDifferences(data: BranchCompareResult): boolean { + return ( (data.added?.length ?? 0) === 0 && (data.removed?.length ?? 0) === 0 && (data.changed?.length ?? 0) === 0 - ) { + ); +} + +/** Render the three top-level Added/Removed/Changed subgraphs. */ +function renderAddedRemovedChangedSubgraphs( + lines: string[], + state: MermaidNodeIdState, + data: BranchCompareResult, +): void { + addMermaidSubgraph(lines, state, 'added', 'Added', data.added || [], '#e8f5e9', '#4caf50'); + addMermaidSubgraph(lines, state, 'removed', 'Removed', data.removed || [], '#ffebee', '#f44336'); + addMermaidSubgraph(lines, state, 'changed', 'Changed', data.changed || [], '#fff3e0', '#ff9800'); +} + +export function branchCompareMermaid(data: BranchCompareResult): string { + if (data.error) return data.error; + if (hasNoBranchDifferences(data)) { return 'flowchart TB\n none["No structural differences detected"]'; } const lines = ['flowchart TB']; const state: MermaidNodeIdState = { counter: 0, map: new Map() }; - addMermaidSubgraph(lines, state, 'added', 'Added', data.added || [], '#e8f5e9', '#4caf50'); - addMermaidSubgraph(lines, state, 'removed', 'Removed', data.removed || [], '#ffebee', '#f44336'); - addMermaidSubgraph(lines, state, 'changed', 'Changed', data.changed || [], '#fff3e0', '#ff9800'); + renderAddedRemovedChangedSubgraphs(lines, state, data); const impactSources = [...(data.removed || []), ...(data.changed || [])]; const allImpacted = collectImpactedCallers(impactSources); - if (allImpacted.size > 0) { - lines.push(' subgraph sg_impact["Impacted Callers"]'); - for (const [key, c] of allImpacted) { - const nid = mermaidNodeId(state, key); - lines.push(` ${nid}["[${kindIcon(c.kind)}] ${c.name}"]`); - } - lines.push(' end'); - lines.push(' style sg_impact fill:#f3e5f5,stroke:#9c27b0'); - } - - for (const sym of impactSources) { - if (!sym.impact) continue; - const prefix = (data.removed || []).includes(sym as SymbolWithoutId) ? 'removed' : 'changed'; - const symKey = `${prefix}::${sym.kind}::${sym.file}::${sym.name}`; - for (const c of sym.impact) { - const callerKey = `impact::${c.kind}::${c.file}::${c.name}`; - if (state.map.has(symKey) && state.map.has(callerKey)) { - lines.push(` ${state.map.get(symKey)} -.-> ${state.map.get(callerKey)}`); - } - } - } + renderImpactedCallersSubgraph(lines, state, allImpacted); + renderImpactEdges(lines, state, impactSources, data.removed || []); return lines.join('\n'); } From ce843532631b9cd6716386c244589acf15028913 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 16:02:51 -0600 Subject: [PATCH 25/39] fix: address quality issues in ast-analysis (docs check acknowledged) Impact: 16 functions changed, 14 affected --- src/ast-analysis/visitor-utils.ts | 167 ++++++++++++------ src/ast-analysis/visitors/cfg-conditionals.ts | 83 +++++---- 2 files changed, 164 insertions(+), 86 deletions(-) diff --git a/src/ast-analysis/visitor-utils.ts b/src/ast-analysis/visitor-utils.ts index 4da9a25fa..a207b8564 100644 --- a/src/ast-analysis/visitor-utils.ts +++ b/src/ast-analysis/visitor-utils.ts @@ -93,6 +93,81 @@ export function extractParams( return result; } +/** + * Resolution result for a single node in the parameter-name worklist: either + * a base case with names to record, or intermediate `next` nodes that still + * need to be resolved. + */ +type ParamNodeResolution = { names?: string[]; next?: TreeSitterNode[] }; + +/** One entry in the node-type -> handler dispatch table used by `resolveParamNode`. */ +interface ParamNodeHandler { + matches(nodeType: string, rules: LanguageRules): boolean; + resolve(node: TreeSitterNode, rules: LanguageRules): ParamNodeResolution | null; +} + +function resolveWrapperParam(node: TreeSitterNode): ParamNodeResolution | null { + const pattern = node.childForFieldName('pattern') || node.childForFieldName('name'); + return pattern ? { next: [pattern] } : null; +} + +function resolveDefaultParam(node: TreeSitterNode): ParamNodeResolution | null { + const left = node.childForFieldName('left') || node.childForFieldName('name'); + return left ? { next: [left] } : null; +} + +function resolveRestParam(node: TreeSitterNode, rules: LanguageRules): ParamNodeResolution | null { + const nameNode = node.childForFieldName('name'); + if (nameNode) return { names: [nameNode.text] }; + for (const child of node.namedChildren) { + if (child.type === rules.paramIdentifier) return { names: [child.text] }; + } + return null; +} + +function resolveObjectDestructParam( + node: TreeSitterNode, + rules: LanguageRules, +): ParamNodeResolution { + return { next: collectObjectDestructChildren(node, rules) }; +} + +function resolveArrayDestructParam(node: TreeSitterNode): ParamNodeResolution { + return { next: [...node.namedChildren] }; +} + +/** + * Ordered node-type -> handler dispatch table for `resolveParamNode`. Order + * matters: earlier entries take precedence, matching the original + * if/else-if cascade exactly. + */ +const PARAM_NODE_HANDLERS: ParamNodeHandler[] = [ + { + matches: (t, rules) => t === rules.paramIdentifier, + resolve: (node) => ({ names: [node.text] }), + }, + { + matches: (t, rules) => rules.paramWrapperTypes.has(t), + resolve: resolveWrapperParam, + }, + { + matches: (t, rules) => !!rules.defaultParamType && t === rules.defaultParamType, + resolve: resolveDefaultParam, + }, + { + matches: (t, rules) => !!rules.restParamType && t === rules.restParamType, + resolve: resolveRestParam, + }, + { + matches: (t, rules) => !!rules.objectDestructType && t === rules.objectDestructType, + resolve: resolveObjectDestructParam, + }, + { + matches: (t, rules) => !!rules.arrayDestructType && t === rules.arrayDestructType, + resolve: resolveArrayDestructParam, + }, +]; + /** * Resolve a single parameter node to either a direct list of names (base case) * or a list of child nodes that still need processing. Returns `null` if the @@ -102,46 +177,16 @@ export function extractParams( * `extractParamNames`, breaking the 3-node mutual recursion cycle between * `extractParamNames`, `extractObjectDestructNames`, and `extractArrayDestructNames`. */ -function resolveParamNode( - node: TreeSitterNode, - rules: LanguageRules, -): { names?: string[]; next?: TreeSitterNode[] } | null { - const t = node.type; - +function resolveParamNode(node: TreeSitterNode, rules: LanguageRules): ParamNodeResolution | null { if (rules.extractParamName) { const result = rules.extractParamName(node); if (result) return { names: result }; } - if (t === rules.paramIdentifier) return { names: [node.text] }; - - if (rules.paramWrapperTypes.has(t)) { - const pattern = node.childForFieldName('pattern') || node.childForFieldName('name'); - return pattern ? { next: [pattern] } : null; - } - - if (rules.defaultParamType && t === rules.defaultParamType) { - const left = node.childForFieldName('left') || node.childForFieldName('name'); - return left ? { next: [left] } : null; - } - - if (rules.restParamType && t === rules.restParamType) { - const nameNode = node.childForFieldName('name'); - if (nameNode) return { names: [nameNode.text] }; - for (const child of node.namedChildren) { - if (child.type === rules.paramIdentifier) return { names: [child.text] }; - } - return null; - } - - if (rules.objectDestructType && t === rules.objectDestructType) { - return { next: collectObjectDestructChildren(node, rules) }; - } - - if (rules.arrayDestructType && t === rules.arrayDestructType) { - return { next: [...node.namedChildren] }; + const t = node.type; + for (const handler of PARAM_NODE_HANDLERS) { + if (handler.matches(t, rules)) return handler.resolve(node, rules); } - return null; } @@ -170,6 +215,41 @@ function collectObjectDestructChildren( return next; } +/** Is this node a shorthand identifier inside an object destructuring pattern? */ +function isShorthandPropPattern(node: TreeSitterNode, rules: LanguageRules): boolean { + return !!rules.shorthandPropPattern && node.type === rules.shorthandPropPattern; +} + +/** + * Push nodes onto the worklist stack in reverse order so that popping them + * (LIFO) visits them in the same left-to-right order as the original + * recursive traversal. + */ +function pushParamWorklist(stack: TreeSitterNode[], nodes: TreeSitterNode[]): void { + for (let i = nodes.length - 1; i >= 0; i--) { + const child = nodes[i]; + if (child) stack.push(child); + } +} + +/** Resolve one worklist entry: record any names, queue any further nodes to visit. */ +function visitParamWorklistNode( + current: TreeSitterNode, + rules: LanguageRules, + names: string[], + stack: TreeSitterNode[], +): void { + if (isShorthandPropPattern(current, rules)) { + names.push(current.text); + return; + } + + const resolved = resolveParamNode(current, rules); + if (!resolved) return; + if (resolved.names) names.push(...resolved.names); + if (resolved.next) pushParamWorklist(stack, resolved.next); +} + /** * Extract parameter names from a single parameter node. * @@ -184,24 +264,7 @@ export function extractParamNames(node: TreeSitterNode | null, rules: LanguageRu while (stack.length > 0) { const current = stack.pop(); - if (!current) continue; - - // Shorthand identifier inside an object destructuring is just the node's text. - if (rules.shorthandPropPattern && current.type === rules.shorthandPropPattern) { - names.push(current.text); - continue; - } - - const resolved = resolveParamNode(current, rules); - if (!resolved) continue; - if (resolved.names) names.push(...resolved.names); - if (resolved.next) { - // Push in reverse so traversal order matches the previous recursive order. - for (let i = resolved.next.length - 1; i >= 0; i--) { - const child = resolved.next[i]; - if (child) stack.push(child); - } - } + if (current) visitParamWorklistNode(current, rules, names, stack); } return names; diff --git a/src/ast-analysis/visitors/cfg-conditionals.ts b/src/ast-analysis/visitors/cfg-conditionals.ts index e96460ba8..530d2194b 100644 --- a/src/ast-analysis/visitors/cfg-conditionals.ts +++ b/src/ast-analysis/visitors/cfg-conditionals.ts @@ -8,6 +8,31 @@ import type { } from './cfg-shared.js'; import { getBodyStatements, isCaseNode, isIfNode, nn } from './cfg-shared.js'; +/** + * Create a branch block off `condBlock`, wire the `branchKind` edge into it, + * run `runBranchBody` to populate the branch and get its exit block, then — + * if the branch falls through (exit block is non-null) — wire a + * `fallthrough` edge from that exit into `joinBlock`. + * + * Shared by `processIf`, `processAlternative`, and `processElifSiblings` for + * the true-branch / else-branch / else-if-branch shapes, which all follow + * the same make-block -> add-edge -> run-body -> fallthrough-edge sequence + * (previously hand-inlined 6+ times across those three functions). + */ +function processBranch( + condBlock: CfgBlockInternal, + joinBlock: CfgBlockInternal, + S: FuncState, + branchKind: 'branch_true' | 'branch_false', + label: string, + runBranchBody: (branchBlock: CfgBlockInternal) => CfgBlockInternal | null, +): void { + const branchBlock = S.makeBlock(branchKind, null, null, label); + S.addEdge(condBlock, branchBlock, branchKind); + const branchEnd = runBranchBody(branchBlock); + if (branchEnd) S.addEdge(branchEnd, joinBlock, 'fallthrough'); +} + export function processIf( ifStmt: TreeSitterNode, currentBlock: CfgBlockInternal, @@ -29,13 +54,10 @@ export function processIf( const consequentField = cfgRules.ifConsequentField || 'consequence'; const consequent = ifStmt.childForFieldName(consequentField); - const trueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(condBlock, trueBlock, 'branch_true'); - const trueStmts = getBodyStatements(consequent, cfgRules); - const trueEnd = processStatements(trueStmts, trueBlock, S, cfgRules); - if (trueEnd) { - S.addEdge(trueEnd, joinBlock, 'fallthrough'); - } + processBranch(condBlock, joinBlock, S, 'branch_true', 'then', (trueBlock) => { + const trueStmts = getBodyStatements(consequent, cfgRules); + return processStatements(trueStmts, trueBlock, S, cfgRules); + }); if (cfgRules.elifNode) { processElifSiblings(ifStmt, condBlock, joinBlock, S, cfgRules, processStatements); @@ -62,16 +84,14 @@ function processAlternative( if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { if (isIfNode(alternative.type, cfgRules)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(alternative, falseBlock, S, cfgRules, processStatements); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else-if', (falseBlock) => + processIf(alternative, falseBlock, S, cfgRules, processStatements), + ); } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseStmts = getBodyStatements(alternative, cfgRules); - const falseEnd = processStatements(falseStmts, falseBlock, S, cfgRules); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else', (falseBlock) => { + const falseStmts = getBodyStatements(alternative, cfgRules); + return processStatements(falseStmts, falseBlock, S, cfgRules); + }); } } else if (alternative.type === cfgRules.elseClause) { const elseChildren: TreeSitterNode[] = []; @@ -80,15 +100,13 @@ function processAlternative( } const firstChild = elseChildren[0]; if (elseChildren.length === 1 && firstChild && isIfNode(firstChild.type, cfgRules)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(firstChild, falseBlock, S, cfgRules, processStatements); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else-if', (falseBlock) => + processIf(firstChild, falseBlock, S, cfgRules, processStatements), + ); } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseEnd = processStatements(elseChildren, falseBlock, S, cfgRules); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else', (falseBlock) => + processStatements(elseChildren, falseBlock, S, cfgRules), + ); } } } @@ -118,17 +136,13 @@ function processElifSiblings( const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; const elifConsequent = child.childForFieldName(elifConsequentField); - const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); - const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); - const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); - if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); + processBranch(elifCondBlock, joinBlock, S, 'branch_true', 'then', (elifTrueBlock) => { + const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); + return processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); + }); lastCondBlock = elifCondBlock; } else if (child.type === cfgRules.elseClause) { - const elseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(lastCondBlock, elseBlock, 'branch_false'); - const elseBody = child.childForFieldName('body'); let elseStmts: TreeSitterNode[]; if (elseBody) { @@ -139,8 +153,9 @@ function processElifSiblings( elseStmts.push(nn(child.namedChild(j))); } } - const elseEnd = processStatements(elseStmts, elseBlock, S, cfgRules); - if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); + processBranch(lastCondBlock, joinBlock, S, 'branch_false', 'else', (elseBlock) => + processStatements(elseStmts, elseBlock, S, cfgRules), + ); foundElse = true; } From 9946db585ca0ab774f2285f4394268df85a7844d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 16:31:30 -0600 Subject: [PATCH 26/39] fix: decompose renderAuditFunction, adopt typed AuditResult (docs check acknowledged) Impact: 48 functions changed, 9 affected --- src/features/audit.ts | 82 ++++++++++++++++++--------------------- src/presentation/audit.ts | 81 ++++++++++++++++++++++---------------- src/types.ts | 49 ++++++++++++++--------- 3 files changed, 115 insertions(+), 97 deletions(-) diff --git a/src/features/audit.ts b/src/features/audit.ts index 9f0d5183b..f5dc5bddf 100644 --- a/src/features/audit.ts +++ b/src/features/audit.ts @@ -8,7 +8,16 @@ import { debug } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { toErrorMessage } from '../shared/errors.js'; import { toSymbolRef } from '../shared/normalize.js'; -import type { BetterSqlite3Database, CodegraphConfig } from '../types.js'; +import type { + AuditFunctionEntry, + AuditHealthMetrics, + AuditResult, + BetterSqlite3Database, + CodegraphConfig, + Role, + SymbolKind, + ThresholdBreach, +} from '../types.js'; import { RULE_DEFS } from './manifesto.js'; // ─── Threshold resolution ─────────────────────────────────────────── @@ -65,13 +74,6 @@ const METRIC_TO_RULE: Record = { max_nesting: 'maxNesting', }; -interface ThresholdBreach { - metric: string; - value: number; - threshold: number; - level: 'warn' | 'fail'; -} - function checkBreaches( row: Record, thresholds: Record, @@ -128,18 +130,6 @@ interface SymbolRef { line: number; } -interface HealthMetrics { - cognitive: number | null; - cyclomatic: number | null; - maxNesting: number | null; - maintainabilityIndex: number | null; - halstead: { volume: number; difficulty: number; effort: number; bugs: number }; - loc: number; - sloc: number; - commentLines: number; - thresholdBreaches: ThresholdBreach[]; -} - interface AuditDataOpts { noTests?: boolean; config?: CodegraphConfig; @@ -152,7 +142,7 @@ export function auditData( target: string, customDbPath?: string, opts: AuditDataOpts = {}, -): { target: string; kind: string; functions: unknown[] } { +): AuditResult { const noTests = opts.noTests || false; const config = opts.config || loadConfig(); const maxDepth = @@ -176,14 +166,14 @@ export function auditData( } if (results.length === 0) { - return { target, kind: explained.kind, functions: [] }; + return { target, kind: explained.kind as 'function' | 'file', functions: [] }; } // 2. Open DB for enrichment const db = openReadonlyOrFail(customDbPath); const thresholds = resolveThresholds(customDbPath, opts.config); - let functions: unknown[]; + let functions: AuditFunctionEntry[]; try { if (explained.kind === 'file') { functions = enrichFileResults(db, results, kind, noTests, maxDepth, thresholds); @@ -196,24 +186,25 @@ export function auditData( db.close(); } - return { target, kind: explained.kind, functions }; + return { target, kind: explained.kind as 'function' | 'file', functions }; } // ─── Enrich a function result from explainData ────────────────────── +/** A function-target result as returned by `explainFunctionImpl` (always fully populated -- see domain/analysis/context.ts). */ interface ExplainResult { name: string; kind: string; file: string; line: number; - endLine?: number | null; - role?: string | null; - lineCount?: number | null; - summary?: string | null; - signature?: string | null; - callees?: SymbolRef[]; - callers?: SymbolRef[]; - relatedTests?: { file: string }[]; + endLine: number | null; + role: string | null; + lineCount: number | null; + summary: string | null; + signature: { params: string | null; returnType: string | null } | null; + callees: SymbolRef[]; + callers: SymbolRef[]; + relatedTests: { file: string }[]; } /** Enrich all symbols from file-target results. */ @@ -224,8 +215,8 @@ function enrichFileResults( noTests: boolean, maxDepth: number, thresholds: Record, -): unknown[] { - const functions: unknown[] = []; +): AuditFunctionEntry[] { + const functions: AuditFunctionEntry[] = []; for (const fileResult of results) { let allSymbols = [ ...(fileResult.publicApi || []), @@ -245,7 +236,7 @@ function enrichFunction( noTests: boolean, maxDepth: number, thresholds: Record, -): unknown { +): AuditFunctionEntry { const nodeRow = db .prepare('SELECT id FROM nodes WHERE name = ? AND file = ? AND line = ?') .get(r.name, r.file, r.line) as { id: number } | undefined; @@ -261,11 +252,11 @@ function enrichFunction( return { name: r.name, - kind: r.kind, + kind: r.kind as SymbolKind, file: r.file, line: r.line, endLine: r.endLine, - role: r.role, + role: r.role as Role | null, lineCount: r.lineCount, summary: r.summary, signature: r.signature, @@ -280,13 +271,14 @@ function enrichFunction( // ─── Enrich a symbol from file-level explainData ──────────────────── +/** A file-target symbol as returned by `explainFileImpl`'s `mapSymbol` (always fully populated -- see domain/analysis/context.ts). */ interface FileSymbol { name: string; kind: string; line: number; - role?: string | null; - summary?: string | null; - signature?: string | null; + role: string | null; + summary: string | null; + signature: { params: string | null; returnType: string | null } | null; } /** Query callees, callers, and related test files for a node. */ @@ -336,7 +328,7 @@ function enrichSymbol( noTests: boolean, maxDepth: number, thresholds: Record, -): unknown { +): AuditFunctionEntry { const nodeRow = db .prepare('SELECT id, end_line FROM nodes WHERE name = ? AND file = ? AND line = ?') .get(sym.name, file, sym.line) as { id: number; end_line: number | null } | undefined; @@ -359,11 +351,11 @@ function enrichSymbol( return { name: sym.name, - kind: sym.kind, + kind: sym.kind as SymbolKind, file, line: sym.line, endLine, - role: sym.role || null, + role: (sym.role || null) as Role | null, lineCount, summary: sym.summary || null, signature: sym.signature || null, @@ -396,7 +388,7 @@ function buildHealth( db: BetterSqlite3Database, nodeId: number, thresholds: Record, -): HealthMetrics { +): AuditHealthMetrics { try { const row = db .prepare( @@ -431,7 +423,7 @@ function buildHealth( } } -function defaultHealth(): HealthMetrics { +function defaultHealth(): AuditHealthMetrics { return { cognitive: null, cyclomatic: null, diff --git a/src/presentation/audit.ts b/src/presentation/audit.ts index f82bc1dac..9a350a6e7 100644 --- a/src/presentation/audit.ts +++ b/src/presentation/audit.ts @@ -1,6 +1,7 @@ import { kindIcon } from '../domain/queries.js'; import { auditData } from '../features/audit.js'; import { outputResult } from '../infrastructure/result-formatter.js'; +import type { AuditFunctionEntry, AuditResult, CodegraphConfig } from '../types.js'; interface AuditOpts { json?: boolean; @@ -12,11 +13,14 @@ interface AuditOpts { limit?: number; offset?: number; depth?: number; - config?: unknown; + config?: CodegraphConfig; } +/** A caller/callee reference as rendered under the "Calls"/"Called by" sections. */ +type CallRef = AuditFunctionEntry['callees'][number]; + /** Render health metrics for a single audit function. */ -function renderHealthMetrics(fn: any): void { +function renderHealthMetrics(fn: AuditFunctionEntry): void { if (fn.health.cognitive == null) return; console.log(`\n Health:`); console.log( @@ -35,8 +39,8 @@ function renderHealthMetrics(fn: any): void { } } -/** Render a single audited function with all its sections. */ -function renderAuditFunction(fn: any): void { +/** Render the name/kind/location/summary/signature header for an audited function. */ +function renderFunctionHeader(fn: AuditFunctionEntry): void { const lineRange = fn.endLine ? `${fn.line}-${fn.endLine}` : `${fn.line}`; const roleTag = fn.role ? ` [${fn.role}]` : ''; console.log(`## ${kindIcon(fn.kind)} ${fn.name} (${fn.kind})${roleTag}`); @@ -46,42 +50,53 @@ function renderAuditFunction(fn: any): void { if (fn.signature.params != null) console.log(` Parameters: (${fn.signature.params})`); if (fn.signature.returnType) console.log(` Returns: ${fn.signature.returnType}`); } +} - renderHealthMetrics(fn); - - if (fn.health.thresholdBreaches.length > 0) { - console.log(`\n Threshold Breaches:`); - for (const b of fn.health.thresholdBreaches) { - const icon = b.level === 'fail' ? 'FAIL' : 'WARN'; - console.log(` [${icon}] ${b.metric}: ${b.value} >= ${b.threshold}`); - } +/** Render manifesto threshold breaches (cognitive/cyclomatic/nesting over warn/fail limits). */ +function renderThresholdBreaches(fn: AuditFunctionEntry): void { + if (fn.health.thresholdBreaches.length === 0) return; + console.log(`\n Threshold Breaches:`); + for (const b of fn.health.thresholdBreaches) { + const icon = b.level === 'fail' ? 'FAIL' : 'WARN'; + console.log(` [${icon}] ${b.metric}: ${b.value} >= ${b.threshold}`); } +} +/** Render the transitive-dependent impact summary, one line per BFS level. */ +function renderImpactSection(fn: AuditFunctionEntry): void { console.log(`\n Impact: ${fn.impact.totalDependents} transitive dependent(s)`); for (const [level, nodes] of Object.entries(fn.impact.levels)) { - console.log( - ` Level ${level}: ${(nodes as Array<{ name: string }>).map((n) => n.name).join(', ')}`, - ); + console.log(` Level ${level}: ${nodes.map((n) => n.name).join(', ')}`); } +} - if (fn.callees.length > 0) { - console.log(`\n Calls (${fn.callees.length}):`); - for (const c of fn.callees) { - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } - if (fn.callers.length > 0) { - console.log(`\n Called by (${fn.callers.length}):`); - for (const c of fn.callers) { - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } +/** Render a labeled list of call references (used for both "Calls" and "Called by"). */ +function renderCallRefs(label: string, refs: CallRef[]): void { + if (refs.length === 0) return; + console.log(`\n ${label} (${refs.length}):`); + for (const c of refs) { + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); } - if (fn.relatedTests.length > 0) { - console.log(`\n Tests (${fn.relatedTests.length}):`); - for (const t of fn.relatedTests) { - console.log(` ${t.file}`); - } +} + +/** Render the related-test-file list for an audited function. */ +function renderRelatedTests(fn: AuditFunctionEntry): void { + if (fn.relatedTests.length === 0) return; + console.log(`\n Tests (${fn.relatedTests.length}):`); + for (const t of fn.relatedTests) { + console.log(` ${t.file}`); } +} + +/** Render a single audited function with all its sections. */ +function renderAuditFunction(fn: AuditFunctionEntry): void { + renderFunctionHeader(fn); + renderHealthMetrics(fn); + renderThresholdBreaches(fn); + renderImpactSection(fn); + renderCallRefs('Calls', fn.callees); + renderCallRefs('Called by', fn.callers); + renderRelatedTests(fn); console.log(); } @@ -91,9 +106,9 @@ export function audit( customDbPath: string | undefined, opts: AuditOpts = {}, ): void { - const data: any = auditData(target, customDbPath, opts as any); + const data: AuditResult = auditData(target, customDbPath, opts); - if (outputResult(data, null, opts)) return; + if (outputResult(data as unknown as Record, null, opts)) return; if (data.functions.length === 0) { console.log(`No ${data.kind === 'file' ? 'file' : 'function/symbol'} matching "${target}"`); diff --git a/src/types.ts b/src/types.ts index 88c85c3b8..66b894f36 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1611,6 +1611,27 @@ export interface AuditResult { functions: AuditFunctionEntry[]; } +/** A single manifesto threshold breach reported against an audited function. */ +export interface ThresholdBreach { + metric: string; + value: number; + threshold: number; + level: 'warn' | 'fail'; +} + +/** Complexity/maintainability health metrics attached to an audited function. */ +export interface AuditHealthMetrics { + cognitive: number | null; + cyclomatic: number | null; + maxNesting: number | null; + maintainabilityIndex: number | null; + halstead: HalsteadMetrics; + loc: number; + sloc: number; + commentLines: number; + thresholdBreaches: ThresholdBreach[]; +} + export interface AuditFunctionEntry { name: string; kind: SymbolKind; @@ -1618,30 +1639,20 @@ export interface AuditFunctionEntry { line: number; endLine: number | null; role: Role | null; - lineCount: number; + lineCount: number | null; summary: string | null; - signature: string | null; - callees: string[]; - callers: string[]; - relatedTests: string[]; + signature: { params: string | null; returnType: string | null } | null; + callees: Array<{ name: string; kind: string; file: string; line: number }>; + callers: Array<{ name: string; kind: string; file: string; line: number }>; + relatedTests: Array<{ file: string }>; impact: { totalDependents: number; levels: Record; }; - health: { - cognitive: number; - cyclomatic: number; - maxNesting: number; - maintainabilityIndex: number | null; - halstead: HalsteadMetrics | null; - loc: number; - sloc: number; - commentLines: number; - thresholdBreaches: string[]; - }; - riskScore: number; - complexityNotes: string[]; - sideEffects: string[]; + health: AuditHealthMetrics; + riskScore: number | null; + complexityNotes: string | null; + sideEffects: string | null; } export interface ImpactLevelEntry { From 210abd2109bf99ab8b6e45d7601a5b88c33be973 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 16:55:32 -0600 Subject: [PATCH 27/39] fix: decompose highest-complexity extractor functions (docs check acknowledged) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Decomposes the highest-complexity functions across 7 independent single-language extractor files (sync.json phase 26): r, dart, groovy, csharp, elixir, scala, julia. Pure behavior-preserving decomposition — no extraction-logic changes. Resolution benchmark precision/recall/ TP/FP/FN confirmed byte-for-byte identical to baseline for all 7 languages. Impact: 22 functions changed, 29 affected --- src/extractors/csharp.ts | 41 ++++++++----- src/extractors/dart.ts | 90 ++++++++++++++------------- src/extractors/elixir.ts | 51 ++++++++++++---- src/extractors/groovy.ts | 68 +++++++++------------ src/extractors/julia.ts | 127 ++++++++++++++++++++++++--------------- src/extractors/r.ts | 95 +++++++++++++++-------------- src/extractors/scala.ts | 57 ++++++++++-------- 7 files changed, 303 insertions(+), 226 deletions(-) diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 850bb8a34..8a3a06e9e 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -335,25 +335,38 @@ function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!typeNode) return; if (typeNode.type === 'implicit_type') { - // var x = new Foo() — infer type from object_creation_expression initializer - if (!ctx.typeMap) return; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child?.type !== 'variable_declarator') continue; - const nameNode = child.childForFieldName('name') || child.child(0); - if (nameNode?.type !== 'identifier') continue; - const objCreation = findChild(child, 'object_creation_expression'); - if (!objCreation) continue; - const ctorTypeNode = objCreation.childForFieldName('type'); - if (!ctorTypeNode) continue; - const ctorType = extractCSharpTypeName(ctorTypeNode); - if (ctorType) setTypeMapEntry(ctx.typeMap, nameNode.text, ctorType, 1.0); - } + handleCSharpImplicitVarDecl(node, ctx); return; } const typeName = extractCSharpTypeName(typeNode); if (!typeName) return; + handleCSharpExplicitVarDecl(node, ctx, typeName); +} + +// var x = new Foo() — infer type from object_creation_expression initializer +function handleCSharpImplicitVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + if (!ctx.typeMap) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child?.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode?.type !== 'identifier') continue; + const objCreation = findChild(child, 'object_creation_expression'); + if (!objCreation) continue; + const ctorTypeNode = objCreation.childForFieldName('type'); + if (!ctorTypeNode) continue; + const ctorType = extractCSharpTypeName(ctorTypeNode); + if (ctorType) setTypeMapEntry(ctx.typeMap, nameNode.text, ctorType, 1.0); + } +} + +// Explicitly-typed declarator list: `Foo x = ..., y = ...;` +function handleCSharpExplicitVarDecl( + node: TreeSitterNode, + ctx: ExtractorOutput, + typeName: string, +): void { for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child?.type !== 'variable_declarator') continue; diff --git a/src/extractors/dart.ts b/src/extractors/dart.ts index cd0642f46..2b166f8d5 100644 --- a/src/extractors/dart.ts +++ b/src/extractors/dart.ts @@ -256,58 +256,66 @@ function handleDartSelector(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!argPart) return; const line = node.startPosition.row + 1; + const methodName = resolveDartSelectorMethodName(node); + if (!methodName) return; - // Look for the identifier this selector belongs to. - // Two layouts are possible depending on grammar version: - // A) selector has both unconditional_assignable_selector + argument_part (same node) - // B) one selector node holds unconditional_assignable_selector (.method), - // the next holds argument_part (the call args) — method name is in the previous sibling - const unconditional = findChild(node, 'unconditional_assignable_selector'); - let methodName: string | null = null; + // Function.apply(fn, positionalArgs, namedArgs) — dynamic higher-order dispatch + if (methodName === 'apply' && isDartFunctionApplyCall(node)) { + ctx.calls.push({ + name: '', + line, + dynamic: true, + dynamicKind: 'unresolved-dynamic', + }); + return; + } + + ctx.calls.push({ name: methodName, line }); +} +// Look for the identifier this selector belongs to. +// Two layouts are possible depending on grammar version: +// A) selector has both unconditional_assignable_selector + argument_part (same node) +// B) one selector node holds unconditional_assignable_selector (.method), +// the next holds argument_part (the call args) — method name is in the previous sibling +function resolveDartSelectorMethodName(node: TreeSitterNode): string | null { + const unconditional = findChild(node, 'unconditional_assignable_selector'); if (unconditional) { const id = findChild(unconditional, 'identifier'); - if (id) methodName = id.text; - } else { - // Layout B: look at the previous sibling selector for the method name - const parent = node.parent; - if (parent) { - for (let i = 0; i < parent.childCount; i++) { - const sibling = parent.child(i); - if (sibling === node) break; - if (sibling?.type === 'selector') { - const unc2 = findChild(sibling, 'unconditional_assignable_selector'); - if (unc2) { - const id2 = findChild(unc2, 'identifier'); - if (id2) methodName = id2.text; - } - } - } - } + return id ? id.text : null; } - if (!methodName) return; + // Layout B: look at the previous sibling selector for the method name + const parent = node.parent; + if (!parent) return null; - // Function.apply(fn, positionalArgs, namedArgs) — dynamic higher-order dispatch - if (methodName === 'apply') { - const parent = node.parent; - if (parent) { - for (let i = 0; i < parent.childCount; i++) { - const sibling = parent.child(i); - if (sibling && sibling !== node && sibling.text === 'Function') { - ctx.calls.push({ - name: '', - line, - dynamic: true, - dynamicKind: 'unresolved-dynamic', - }); - return; - } + let methodName: string | null = null; + for (let i = 0; i < parent.childCount; i++) { + const sibling = parent.child(i); + if (sibling === node) break; + if (sibling?.type === 'selector') { + const unc2 = findChild(sibling, 'unconditional_assignable_selector'); + if (unc2) { + const id2 = findChild(unc2, 'identifier'); + if (id2) methodName = id2.text; } } } + return methodName; +} - ctx.calls.push({ name: methodName, line }); +// Detects `Function.apply(...)` calls: true when a sibling selector's text is +// the literal `Function` identifier preceding this call. +function isDartFunctionApplyCall(node: TreeSitterNode): boolean { + const parent = node.parent; + if (!parent) return false; + for (let i = 0; i < parent.childCount; i++) { + const sibling = parent.child(i); + if (sibling && sibling !== node && sibling.text === 'Function') { + return true; + } + } + return false; } function handleDartTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void { diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts index b1ad19b8b..ffff493dc 100644 --- a/src/extractors/elixir.ts +++ b/src/extractors/elixir.ts @@ -278,25 +278,50 @@ function pushElixirMapValues(node: TreeSitterNode, stack: TreeSitterNode[]): voi for (let i = 0; i < node.childCount; i++) { const content = node.child(i); if (content?.type !== 'map_content') continue; - for (let j = 0; j < content.childCount; j++) { - const kws = content.child(j); - if (kws?.type !== 'keywords') continue; - for (let k = 0; k < kws.childCount; k++) { - const pair = kws.child(k); - if (pair?.type !== 'pair') continue; - for (let p = 0; p < pair.childCount; p++) { - const part = pair.child(p); - if (!part || part.type === 'keyword') continue; - parts.push(part); - } - } - } + parts.push(...collectElixirMapContentParts(content)); } for (let i = parts.length - 1; i >= 0; i--) { stack.push(parts[i] as TreeSitterNode); } } +// Walks a `map_content` node's `keywords` children, collecting every pair's +// value part (see collectElixirPairValueParts) in document order. +function collectElixirMapContentParts(content: TreeSitterNode): TreeSitterNode[] { + const parts: TreeSitterNode[] = []; + for (let j = 0; j < content.childCount; j++) { + const kws = content.child(j); + if (kws?.type !== 'keywords') continue; + parts.push(...collectElixirKeywordsParts(kws)); + } + return parts; +} + +// Walks a `keywords` node's `pair` children, collecting each pair's value +// part in document order. +function collectElixirKeywordsParts(kws: TreeSitterNode): TreeSitterNode[] { + const parts: TreeSitterNode[] = []; + for (let k = 0; k < kws.childCount; k++) { + const pair = kws.child(k); + if (pair?.type !== 'pair') continue; + parts.push(...collectElixirPairValueParts(pair)); + } + return parts; +} + +// Collects a single `pair` node's non-keyword children (the value side of +// `key: value`; the leading `struct`/`keyword` child is intentionally +// skipped — see the pushElixirMapValues doc comment). +function collectElixirPairValueParts(pair: TreeSitterNode): TreeSitterNode[] { + const parts: TreeSitterNode[] = []; + for (let p = 0; p < pair.childCount; p++) { + const part = pair.child(p); + if (!part || part.type === 'keyword') continue; + parts.push(part); + } + return parts; +} + function handleDefprotocol(node: TreeSitterNode, ctx: ExtractorOutput): void { const args = findChild(node, 'arguments'); if (!args) return; diff --git a/src/extractors/groovy.ts b/src/extractors/groovy.ts index 9b5162ace..a9bcf26ea 100644 --- a/src/extractors/groovy.ts +++ b/src/extractors/groovy.ts @@ -35,46 +35,8 @@ export function extractGroovySymbols(tree: TreeSitterTree, _filePath: string): E } function walkGroovyNode(node: TreeSitterNode, ctx: ExtractorOutput): void { - switch (node.type) { - case 'class_definition': - case 'class_declaration': - handleGroovyClassDecl(node, ctx); - break; - case 'interface_definition': - case 'interface_declaration': - handleGroovyInterfaceDecl(node, ctx); - break; - case 'enum_definition': - case 'enum_declaration': - handleGroovyEnumDecl(node, ctx); - break; - case 'method_definition': - case 'method_declaration': - handleGroovyMethodDecl(node, ctx); - break; - case 'constructor_definition': - case 'constructor_declaration': - handleGroovyConstructorDecl(node, ctx); - break; - case 'function_definition': - case 'function_declaration': - handleGroovyFunctionDecl(node, ctx); - break; - case 'import_statement': - case 'import_declaration': - handleGroovyImport(node, ctx); - break; - case 'method_call': - case 'method_invocation': - case 'call_expression': - case 'function_call': - case 'juxt_function_call': - handleGroovyCallExpr(node, ctx); - break; - case 'object_creation_expression': - handleGroovyObjectCreation(node, ctx); - break; - } + const handler = GROOVY_NODE_HANDLERS[node.type]; + if (handler) handler(node, ctx); for (let i = 0; i < node.childCount; i++) { const child = node.child(i); @@ -82,6 +44,32 @@ function walkGroovyNode(node: TreeSitterNode, ctx: ExtractorOutput): void { } } +// Lookup table keyed on node.type, replacing a linear switch dispatch. +// Multiple grammar-version type names map to the same handler (mirrors the +// original switch's fallthrough case groups). +const GROOVY_NODE_HANDLERS: Record void> = { + class_definition: handleGroovyClassDecl, + class_declaration: handleGroovyClassDecl, + interface_definition: handleGroovyInterfaceDecl, + interface_declaration: handleGroovyInterfaceDecl, + enum_definition: handleGroovyEnumDecl, + enum_declaration: handleGroovyEnumDecl, + method_definition: handleGroovyMethodDecl, + method_declaration: handleGroovyMethodDecl, + constructor_definition: handleGroovyConstructorDecl, + constructor_declaration: handleGroovyConstructorDecl, + function_definition: handleGroovyFunctionDecl, + function_declaration: handleGroovyFunctionDecl, + import_statement: handleGroovyImport, + import_declaration: handleGroovyImport, + method_call: handleGroovyCallExpr, + method_invocation: handleGroovyCallExpr, + call_expression: handleGroovyCallExpr, + function_call: handleGroovyCallExpr, + juxt_function_call: handleGroovyCallExpr, + object_creation_expression: handleGroovyObjectCreation, +}; + // ── Handlers ─────────────────────────────────────────────────────────────── const GROOVY_PARENT_TYPES = [ diff --git a/src/extractors/julia.ts b/src/extractors/julia.ts index d412fecda..1bf9b80a3 100644 --- a/src/extractors/julia.ts +++ b/src/extractors/julia.ts @@ -222,27 +222,57 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { const typeHead = findChild(node, 'type_head'); if (!typeHead) return; - let nameNode: TreeSitterNode | null; - let supertypeNode: TreeSitterNode | null = null; + const { nameNode, supertypeNode } = resolveJuliaStructHeadNames(typeHead); + if (!nameNode) return; + const structName = nameNode.text; + const children = collectJuliaStructFields(node); + + if (supertypeNode) { + ctx.classes.push({ + name: structName, + extends: supertypeNode.text, + line: nodeStartLine(node), + }); + } + + ctx.definitions.push({ + name: structName, + kind: 'struct', + line: nodeStartLine(node), + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +// Resolves the struct's name and optional supertype from its `type_head`. +// Handles both the plain form (`Name` or `Vec{T}`) and the `Name <: Super` +// binary_expression form, walking into each side to find the base-name +// identifier for parameterized forms like `Vec{T} <: AbstractArray{T,1}`. +function resolveJuliaStructHeadNames(typeHead: TreeSitterNode): { + nameNode: TreeSitterNode | null; + supertypeNode: TreeSitterNode | null; +} { const binary = findChild(typeHead, 'binary_expression'); - if (binary) { - // Walk into each side of the binary expression to find the base-name - // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`. - const sides: TreeSitterNode[] = []; - for (let i = 0; i < binary.childCount; i++) { - const c = binary.child(i); - if (c && c.type !== 'operator') sides.push(c); - } - nameNode = sides[0] ? findBaseName(sides[0]) : null; - supertypeNode = sides[1] ? findBaseName(sides[1]) : null; - } else { - nameNode = findBaseName(typeHead); + if (!binary) { + return { nameNode: findBaseName(typeHead), supertypeNode: null }; } - if (!nameNode) return; - const structName = nameNode.text; + const sides: TreeSitterNode[] = []; + for (let i = 0; i < binary.childCount; i++) { + const c = binary.child(i); + if (c && c.type !== 'operator') sides.push(c); + } + return { + nameNode: sides[0] ? findBaseName(sides[0]) : null, + supertypeNode: sides[1] ? findBaseName(sides[1]) : null, + }; +} +// Collects the struct's field declarations: `typed_expression` (typed field) +// and plain `identifier` (untyped field) direct children of the +// struct_definition node. +function collectJuliaStructFields(node: TreeSitterNode): SubDeclaration[] { const children: SubDeclaration[] = []; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); @@ -263,22 +293,7 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { children.push({ name: child.text, kind: 'property', line: nodeStartLine(child) }); } } - - if (supertypeNode) { - ctx.classes.push({ - name: structName, - extends: supertypeNode.text, - line: nodeStartLine(node), - }); - } - - ctx.definitions.push({ - name: structName, - kind: 'struct', - line: nodeStartLine(node), - endLine: nodeEndLine(node), - children: children.length > 0 ? children : undefined, - }); + return children; } function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void { @@ -341,24 +356,7 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!source) source = txt; names.push(txt.split('.').pop() || txt); } else if (child.type === 'selected_import') { - // First identifier-bearing node is the source module; the rest are - // imported names. The module may itself be a `scoped_identifier` - // (e.g. `import Foo.Bar: baz`) — handle it alongside bare - // `identifier` and use the trailing segment as the display name, - // mirroring the outer loop. - let first = true; - for (let j = 0; j < child.childCount; j++) { - const part = child.child(j); - if (!part) continue; - if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue; - const txt = part.text; - if (first) { - if (!source) source = txt; - first = false; - } else { - names.push(txt.split('.').pop() || txt); - } - } + source = collectJuliaSelectedImportParts(child, names, source); } } @@ -370,6 +368,35 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { } } +// Handles the `selected_import` shape (`import Base: show` / +// `import Foo.Bar: baz`): the first identifier-bearing node is the source +// module; the rest are imported names. The module may itself be a +// `scoped_identifier` — handled alongside bare `identifier`, using the +// trailing segment as the display name, mirroring the outer loop. Returns +// the resolved source (unchanged from `currentSource` if already set or if +// no identifier-bearing child was found). +function collectJuliaSelectedImportParts( + child: TreeSitterNode, + names: string[], + currentSource: string, +): string { + let source = currentSource; + let first = true; + for (let j = 0; j < child.childCount; j++) { + const part = child.child(j); + if (!part) continue; + if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue; + const txt = part.text; + if (first) { + if (!source) source = txt; + first = false; + } else { + names.push(txt.split('.').pop() || txt); + } + } + return source; +} + function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { // Don't record if parent is assignment LHS (that's a function definition) if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return; diff --git a/src/extractors/r.ts b/src/extractors/r.ts index ef0a863e0..4763cf99d 100644 --- a/src/extractors/r.ts +++ b/src/extractors/r.ts @@ -162,52 +162,59 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void { // `library(package = dplyr)`, prefer the field-named `value` child of the // `argument` node so we extract `dplyr` (the value), not `package` (the // parameter name). Keeps native (Rust) and WASM extractors in parity. - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'arguments') { - for (let j = 0; j < child.childCount; j++) { - const arg = child.child(j); - if (!arg) continue; - if (arg.type === 'identifier') { - pushImport(ctx, node, arg.text, [arg.text]); - return; - } - if (arg.type === 'string' || arg.type === 'string_content') { - const text = stripQuotes(arg.text); - pushImport(ctx, node, text, [text]); - return; - } - // Argument might be wrapped - if (arg.type === 'argument') { - // Prefer the `value` field (correct for named arguments). - const valueNode = arg.childForFieldName('value'); - let pick: TreeSitterNode | null = null; - if (valueNode && (valueNode.type === 'string' || valueNode.type === 'identifier')) { - pick = valueNode; - } else { - // Fallback: skip the parameter-name child if the grammar exposes - // it via the `name` field, then pick the first string/identifier. - const nameNode = arg.childForFieldName('name'); - for (let k = 0; k < arg.childCount; k++) { - const inner = arg.child(k); - if (!inner) continue; - if (nameNode && inner.id === nameNode.id) continue; - if (inner.type === 'string' || inner.type === 'identifier') { - pick = inner; - break; - } - } - } - if (pick) { - const text = stripQuotes(pick.text); - pushImport(ctx, node, text, [text]); - return; - } - } - } + const argumentsNode = findFirstChildOfTypes(node, ['arguments']); + if (!argumentsNode) return; + + for (let j = 0; j < argumentsNode.childCount; j++) { + const arg = argumentsNode.child(j); + if (!arg) continue; + const importName = resolveLibraryImportName(arg); + if (importName !== null) { + pushImport(ctx, node, importName, [importName]); + return; + } + } +} + +// Extracts the package name text for a single library()/require() argument +// node, applying the same identifier/string/wrapped-argument precedence as +// the original inline logic (identifier args are used verbatim; string and +// resolved wrapped-argument values are unquoted via stripQuotes). +function resolveLibraryImportName(arg: TreeSitterNode): string | null { + if (arg.type === 'identifier') { + return arg.text; + } + if (arg.type === 'string' || arg.type === 'string_content') { + return stripQuotes(arg.text); + } + if (arg.type === 'argument') { + const pick = resolveLibraryArgumentValueNode(arg); + if (pick) return stripQuotes(pick.text); + } + return null; +} + +// Picks the value node out of an `argument`-wrapped library()/require() call +// argument: prefers the field-named `value` child (correct for named +// arguments like `library(package = dplyr)`), falling back to the first +// string/identifier child that isn't the `name` field. +function resolveLibraryArgumentValueNode(arg: TreeSitterNode): TreeSitterNode | null { + const valueNode = arg.childForFieldName('value'); + if (valueNode && (valueNode.type === 'string' || valueNode.type === 'identifier')) { + return valueNode; + } + // Fallback: skip the parameter-name child if the grammar exposes + // it via the `name` field, then pick the first string/identifier. + const nameNode = arg.childForFieldName('name'); + for (let k = 0; k < arg.childCount; k++) { + const inner = arg.child(k); + if (!inner) continue; + if (nameNode && inner.id === nameNode.id) continue; + if (inner.type === 'string' || inner.type === 'identifier') { + return inner; } } + return null; } function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void { diff --git a/src/extractors/scala.ts b/src/extractors/scala.ts index b78a7470b..34767f3b4 100644 --- a/src/extractors/scala.ts +++ b/src/extractors/scala.ts @@ -286,37 +286,46 @@ function collectScalaBodyMembers( if (!member) continue; if (member.type === 'function_definition') { - const methName = member.childForFieldName('name'); - if (methName) { - const params = extractScalaParameters(member); - methods.push({ - name: `${parentName}.${methName.text}`, - kind: 'method', - line: member.startPosition.row + 1, - endLine: member.endPosition.row + 1, - visibility: extractModifierVisibility(member), - children: params.length > 0 ? params : undefined, - }); - } + collectScalaFunctionMember(member, parentName, methods); } else if (member.type === 'val_definition' || member.type === 'var_definition') { - const pattern = member.childForFieldName('pattern'); - if (pattern) { - const nameNode = pattern.type === 'identifier' ? pattern : findChild(pattern, 'identifier'); - if (nameNode) { - children.push({ - name: nameNode.text, - kind: 'property', - line: member.startPosition.row + 1, - visibility: extractModifierVisibility(member), - }); - } - } + collectScalaValVarMember(member, children); } } return { children, methods }; } +function collectScalaFunctionMember( + member: TreeSitterNode, + parentName: string, + methods: Definition[], +): void { + const methName = member.childForFieldName('name'); + if (!methName) return; + const params = extractScalaParameters(member); + methods.push({ + name: `${parentName}.${methName.text}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: member.endPosition.row + 1, + visibility: extractModifierVisibility(member), + children: params.length > 0 ? params : undefined, + }); +} + +function collectScalaValVarMember(member: TreeSitterNode, children: SubDeclaration[]): void { + const pattern = member.childForFieldName('pattern'); + if (!pattern) return; + const nameNode = pattern.type === 'identifier' ? pattern : findChild(pattern, 'identifier'); + if (!nameNode) return; + children.push({ + name: nameNode.text, + kind: 'property', + line: member.startPosition.row + 1, + visibility: extractModifierVisibility(member), + }); +} + // ── Parameter extraction ──────────────────────────────────────────────────── function extractScalaParameters(funcNode: TreeSitterNode): SubDeclaration[] { From cb5bc85fad9468f00eaffd7efae1131c540b02ec Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 17:09:57 -0600 Subject: [PATCH 28/39] refactor: split execute() into printEngineInfo/printNativeVersionInfo/printBuildMetadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit docs check acknowledged — pure internal decomposition of CLI output logic, no new features/languages/architecture changes; README/CLAUDE/ ROADMAP do not need updates. Impact: 5 functions changed, 2 affected --- src/cli/commands/info.ts | 163 ++++++++++++++++++++++++--------------- 1 file changed, 101 insertions(+), 62 deletions(-) diff --git a/src/cli/commands/info.ts b/src/cli/commands/info.ts index a68fad60f..b52c8c762 100644 --- a/src/cli/commands/info.ts +++ b/src/cli/commands/info.ts @@ -1,6 +1,96 @@ import { debug } from '../../infrastructure/logger.js'; import { toErrorMessage } from '../../shared/errors.js'; -import type { CommandDefinition } from '../types.js'; +import type { NativeAddon } from '../../types.js'; +import type { CliContext, CommandDefinition, CommandOpts } from '../types.js'; + +/** Print the "Native version" diagnostic line (reconciles npm package vs. loaded binary version). */ +function printNativeVersionInfo( + loadNative: () => NativeAddon | null, + getNativePackageVersion: () => string | null, +): void { + const native = loadNative()!; + const binaryVersion = + typeof native.engineVersion === 'function' ? native.engineVersion() : 'unknown'; + const pkgVersion = getNativePackageVersion(); + const knownBinaryVersion = binaryVersion !== 'unknown' ? binaryVersion : null; + if (pkgVersion && knownBinaryVersion && pkgVersion !== knownBinaryVersion) { + console.log( + ` Native version: ${pkgVersion} (binary built as ${knownBinaryVersion}, engine loaded OK)`, + ); + } else { + console.log(` Native version: ${pkgVersion ?? binaryVersion}`); + } +} + +/** Print the top "Codegraph Diagnostics" block: version, platform, native/active engine info. */ +function printEngineInfo( + ctx: CliContext, + engine: string, + activeName: string, + activeVersion: string | null, + nativeAvailable: boolean, + loadNative: () => NativeAddon | null, + getNativePackageVersion: () => string | null, +): void { + console.log('\nCodegraph Diagnostics'); + console.log('===================='); + console.log(` Version : ${ctx.program.version()}`); + console.log(` Node.js : ${process.version}`); + console.log(` Platform : ${process.platform}-${process.arch}`); + console.log(` Native engine : ${nativeAvailable ? 'available' : 'unavailable'}`); + if (nativeAvailable) { + printNativeVersionInfo(loadNative, getNativePackageVersion); + } + console.log(` Engine flag : --engine ${engine}`); + console.log(` Active engine : ${activeName}${activeVersion ? ` (v${activeVersion})` : ''}`); + console.log(); +} + +/** Print the "Build metadata" block read from the graph DB, if one exists. Never throws. */ +async function printBuildMetadata( + ctx: CliContext, + opts: CommandOpts, + activeName: string, +): Promise { + try { + const { findDbPath, getBuildMeta } = await import('../../db/index.js'); + const Database = (await import('better-sqlite3')).default; + const dbPath = findDbPath(opts.db as string | undefined); + const fs = await import('node:fs'); + if (fs.existsSync(dbPath)) { + const db = new Database(dbPath, { readonly: true }); + const buildEngine = getBuildMeta(db, 'engine'); + const buildVersion = getBuildMeta(db, 'codegraph_version'); + const builtAt = getBuildMeta(db, 'built_at'); + db.close(); + + if (buildEngine || buildVersion || builtAt) { + console.log('Build metadata'); + console.log( + '\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500', + ); + if (buildEngine) console.log(` Engine : ${buildEngine}`); + if (buildVersion) console.log(` Version : ${buildVersion}`); + if (builtAt) console.log(` Built at : ${builtAt}`); + + if (buildVersion && buildVersion !== ctx.program.version()) { + console.log( + ` \u26A0 DB was built with v${buildVersion}, current is v${ctx.program.version()}. Consider: codegraph build --no-incremental`, + ); + } + if (buildEngine && buildEngine !== activeName) { + console.log( + ` \u26A0 DB was built with ${buildEngine} engine, active is ${activeName}. Consider: codegraph build --no-incremental`, + ); + } + console.log(); + } + } + } catch (e) { + /* diagnostics must never crash */ + debug(`DB build-metadata diagnostics failed: ${toErrorMessage(e)}`); + } +} export const command: CommandDefinition = { name: 'info', @@ -16,67 +106,16 @@ export const command: CommandDefinition = { const { name: activeName, version: activeVersion } = getActiveEngine({ engine }); const nativeAvailable = isNativeAvailable(); - console.log('\nCodegraph Diagnostics'); - console.log('===================='); - console.log(` Version : ${ctx.program.version()}`); - console.log(` Node.js : ${process.version}`); - console.log(` Platform : ${process.platform}-${process.arch}`); - console.log(` Native engine : ${nativeAvailable ? 'available' : 'unavailable'}`); - if (nativeAvailable) { - const native = loadNative()!; - const binaryVersion = - typeof native.engineVersion === 'function' ? native.engineVersion() : 'unknown'; - const pkgVersion = getNativePackageVersion(); - const knownBinaryVersion = binaryVersion !== 'unknown' ? binaryVersion : null; - if (pkgVersion && knownBinaryVersion && pkgVersion !== knownBinaryVersion) { - console.log( - ` Native version: ${pkgVersion} (binary built as ${knownBinaryVersion}, engine loaded OK)`, - ); - } else { - console.log(` Native version: ${pkgVersion ?? binaryVersion}`); - } - } - console.log(` Engine flag : --engine ${engine}`); - console.log(` Active engine : ${activeName}${activeVersion ? ` (v${activeVersion})` : ''}`); - console.log(); - - try { - const { findDbPath, getBuildMeta } = await import('../../db/index.js'); - const Database = (await import('better-sqlite3')).default; - const dbPath = findDbPath(opts.db as string | undefined); - const fs = await import('node:fs'); - if (fs.existsSync(dbPath)) { - const db = new Database(dbPath, { readonly: true }); - const buildEngine = getBuildMeta(db, 'engine'); - const buildVersion = getBuildMeta(db, 'codegraph_version'); - const builtAt = getBuildMeta(db, 'built_at'); - db.close(); - - if (buildEngine || buildVersion || builtAt) { - console.log('Build metadata'); - console.log( - '\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500', - ); - if (buildEngine) console.log(` Engine : ${buildEngine}`); - if (buildVersion) console.log(` Version : ${buildVersion}`); - if (builtAt) console.log(` Built at : ${builtAt}`); + printEngineInfo( + ctx, + engine, + activeName, + activeVersion, + nativeAvailable, + loadNative, + getNativePackageVersion, + ); - if (buildVersion && buildVersion !== ctx.program.version()) { - console.log( - ` \u26A0 DB was built with v${buildVersion}, current is v${ctx.program.version()}. Consider: codegraph build --no-incremental`, - ); - } - if (buildEngine && buildEngine !== activeName) { - console.log( - ` \u26A0 DB was built with ${buildEngine} engine, active is ${activeName}. Consider: codegraph build --no-incremental`, - ); - } - console.log(); - } - } - } catch (e) { - /* diagnostics must never crash */ - debug(`DB build-metadata diagnostics failed: ${toErrorMessage(e)}`); - } + await printBuildMetadata(ctx, opts, activeName); }, }; From 387dabe8c5d9d698122e99f489cef863067f0792 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 2 Jul 2026 17:25:42 -0600 Subject: [PATCH 29/39] refactor: extract timeMedian helper in token-benchmark.ts Impact: 2 functions changed, 3 affected --- scripts/token-benchmark.ts | 59 +++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 30 deletions(-) diff --git a/scripts/token-benchmark.ts b/scripts/token-benchmark.ts index 1aa4d3951..d34268ead 100644 --- a/scripts/token-benchmark.ts +++ b/scripts/token-benchmark.ts @@ -265,6 +265,20 @@ function round1(n) { return Math.round(n * 10) / 10; } +/** + * Run `fn` `runs` times (default `PERF_RUNS`), recording the elapsed + * milliseconds per run, and return the median duration. + */ +async function timeMedian(fn, runs = PERF_RUNS) { + const timings = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await fn(); + timings.push(performance.now() - start); + } + return median(timings); +} + /** * Run build/query/stats benchmarks against the Next.js graph. * Reuses the same codegraph APIs as the existing benchmark scripts. @@ -306,24 +320,18 @@ async function runPerfBenchmarks(nextjsDir) { const buildResults = {}; for (const engine of engines) { console.error(` Full build (${engine})...`); - const timings = []; - for (let i = 0; i < PERF_RUNS; i++) { - if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); - const start = performance.now(); - await buildGraph(nextjsDir, { engine, incremental: false }); - timings.push(performance.now() - start); - } - const fullBuildMs = Math.round(median(timings)); + const fullBuildMs = Math.round( + await timeMedian(async () => { + if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + await buildGraph(nextjsDir, { engine, incremental: false }); + }), + ); // No-op rebuild console.error(` No-op rebuild (${engine})...`); - const noopTimings = []; - for (let i = 0; i < PERF_RUNS; i++) { - const start = performance.now(); - await buildGraph(nextjsDir, { engine, incremental: true }); - noopTimings.push(performance.now() - start); - } - const noopRebuildMs = Math.round(median(noopTimings)); + const noopRebuildMs = Math.round( + await timeMedian(() => buildGraph(nextjsDir, { engine, incremental: true })), + ); buildResults[engine] = { fullBuildMs, noopRebuildMs }; console.error(` full=${fullBuildMs}ms noop=${noopRebuildMs}ms`); @@ -370,23 +378,14 @@ async function runPerfBenchmarks(nextjsDir) { for (const depth of [1, 3, 5]) { // fnDeps - const depsTimings = []; - for (let i = 0; i < PERF_RUNS; i++) { - const start = performance.now(); - fnDepsData(hubName, dbPath, { depth, noTests: true }); - depsTimings.push(performance.now() - start); - } + queryResults[`fnDeps_depth${depth}Ms`] = round1( + await timeMedian(() => fnDepsData(hubName, dbPath, { depth, noTests: true })), + ); // fnImpact - const impactTimings = []; - for (let i = 0; i < PERF_RUNS; i++) { - const start = performance.now(); - fnImpactData(hubName, dbPath, { depth, noTests: true }); - impactTimings.push(performance.now() - start); - } - - queryResults[`fnDeps_depth${depth}Ms`] = round1(median(depsTimings)); - queryResults[`fnImpact_depth${depth}Ms`] = round1(median(impactTimings)); + queryResults[`fnImpact_depth${depth}Ms`] = round1( + await timeMedian(() => fnImpactData(hubName, dbPath, { depth, noTests: true })), + ); } console.error( From 8386f711ef03521f2d8ed920fc5f1452676e3874 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 3 Jul 2026 15:26:35 -0600 Subject: [PATCH 30/39] refactor: address warnings in benchmark tracer tooling Split loader-hooks.mjs's instrumentSource (cognitive 34, cyclomatic 22) into single-purpose matcher/tracker helpers. Extract the duplicated line-scanning loop shared by native-tracer.sh's trace_rust/trace_swift/ trace_dart/trace_zig into a parameterized inject_trace_calls plus small maybe_close_context/maybe_close_finally_scope/maybe_inject_declaration/ instrument_one_file helpers, and rename the LANG variable to TRACE_LANG so it no longer clobbers the POSIX locale env var inherited by every spawned compiler toolchain. Log lua-tracer.lua's swallowed pcall error to stderr so a genuine fixture failure is visible instead of looking like a silent successful trace. Impact: 17 functions changed, 11 affected --- .../resolution/tracer/loader-hooks.mjs | 114 +++--- .../resolution/tracer/lua-tracer.lua | 7 +- .../resolution/tracer/native-tracer.sh | 358 ++++++++++-------- 3 files changed, 264 insertions(+), 215 deletions(-) diff --git a/tests/benchmarks/resolution/tracer/loader-hooks.mjs b/tests/benchmarks/resolution/tracer/loader-hooks.mjs index bfe54e764..d0446e9eb 100644 --- a/tests/benchmarks/resolution/tracer/loader-hooks.mjs +++ b/tests/benchmarks/resolution/tracer/loader-hooks.mjs @@ -34,6 +34,67 @@ const NOT_FUNCTIONS = new Set([ 'export', ]); +/** Matches a class declaration line; returns the class name or null. */ +function matchClassDeclaration(trimmed) { + const classMatch = trimmed.match(/^(?:export\s+)?class\s+(\w+)/); + return classMatch && trimmed.includes('{') ? classMatch[1] : null; +} + +/** Matches `function NAME(`, `export function NAME(`, `async function NAME(`. */ +function matchFunctionDeclaration(trimmed) { + const funcDecl = trimmed.match( + /^(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(/, + ); + return funcDecl ? funcDecl[1] : null; +} + +/** Matches `const/let/var NAME = async? (function | arrow)`. */ +function matchAssignedFunction(trimmed) { + const assignedFunc = trimmed.match( + /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:function\s*\w*\s*\(|[^=]*=>\s*\{)/, + ); + return assignedFunc ? assignedFunc[1] : null; +} + +/** Matches a class method/constructor/getter/setter declaration (only inside a class body). */ +function matchClassMethod(trimmed, currentClass, braceDepth, classDepth) { + if (!currentClass || braceDepth <= classDepth) return null; + const methodDecl = trimmed.match(/^(?:async\s+)?(?:static\s+)?(?:get\s+|set\s+)?#?(\w+)\s*\(/); + if (!methodDecl || NOT_FUNCTIONS.has(methodDecl[1])) return null; + const mname = methodDecl[1]; + return mname === 'constructor' ? `${currentClass}.constructor` : `${currentClass}.${mname}`; +} + +/** + * Detects the function/method name declared on this line, if any. + * Tries each pattern in order and returns the first match. + */ +function detectFunctionName(trimmed, currentClass, braceDepth, classDepth) { + return ( + matchFunctionDeclaration(trimmed) || + matchAssignedFunction(trimmed) || + matchClassMethod(trimmed, currentClass, braceDepth, classDepth) + ); +} + +/** Pops and closes any function scopes whose body ends at this line's new brace depth. */ +function closeFinishedScopes(funcStack, newDepth, indent, output) { + while (funcStack.length > 0 && newDepth <= funcStack[funcStack.length - 1].openDepth) { + funcStack.pop(); + output.push(`${indent}} finally { globalThis.__tracer?.exit(); }`); + } +} + +/** Opens a new traced scope (enter + try) if this line declares a function/method. */ +function openScopeIfDeclared(funcName, trimmed, indent, file, braceDepth, funcStack, output) { + if (!funcName || !trimmed.endsWith('{')) return; + const inner = `${indent} `; + const escaped = funcName.replace(/'/g, "\\'"); + output.push(`${inner}globalThis.__tracer?.enter('${escaped}', '${file}');`); + output.push(`${inner}try {`); + funcStack.push({ name: funcName, openDepth: braceDepth }); +} + /** * Instrument all function/method declarations in source code. * Injects enter()/try and finally/exit() around each function body. @@ -60,58 +121,17 @@ function instrumentSource(source, filename) { const closeBraces = (line.match(/\}/g) || []).length; const newDepth = braceDepth + openBraces - closeBraces; - // Detect class declarations - const classMatch = trimmed.match(/^(?:export\s+)?class\s+(\w+)/); - if (classMatch && trimmed.includes('{')) { - currentClass = classMatch[1]; + const classMatch = matchClassDeclaration(trimmed); + if (classMatch) { + currentClass = classMatch; classDepth = braceDepth; } - // Detect function/method declarations - let funcName = null; - - // function NAME(, export function NAME(, async function NAME( - const funcDecl = trimmed.match( - /^(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(/, - ); - if (funcDecl) funcName = funcDecl[1]; - - // const/let/var NAME = async? (function | arrow) - if (!funcName) { - const assignedFunc = trimmed.match( - /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:function\s*\w*\s*\(|[^=]*=>\s*\{)/, - ); - if (assignedFunc) funcName = assignedFunc[1]; - } - - // Class method (only inside a class body) - if (!funcName && currentClass && braceDepth > classDepth) { - const methodDecl = trimmed.match( - /^(?:async\s+)?(?:static\s+)?(?:get\s+|set\s+)?#?(\w+)\s*\(/, - ); - if (methodDecl && !NOT_FUNCTIONS.has(methodDecl[1])) { - const mname = methodDecl[1]; - funcName = - mname === 'constructor' ? `${currentClass}.constructor` : `${currentClass}.${mname}`; - } - } - - // Insert finally blocks for closing function scopes - while (funcStack.length > 0 && newDepth <= funcStack[funcStack.length - 1].openDepth) { - funcStack.pop(); - output.push(`${indent}} finally { globalThis.__tracer?.exit(); }`); - } + const funcName = detectFunctionName(trimmed, currentClass, braceDepth, classDepth); + closeFinishedScopes(funcStack, newDepth, indent, output); output.push(line); - - // Insert enter/try for new function declarations - if (funcName && trimmed.endsWith('{')) { - const inner = `${indent} `; - const escaped = funcName.replace(/'/g, "\\'"); - output.push(`${inner}globalThis.__tracer?.enter('${escaped}', '${file}');`); - output.push(`${inner}try {`); - funcStack.push({ name: funcName, openDepth: braceDepth }); - } + openScopeIfDeclared(funcName, trimmed, indent, file, braceDepth, funcStack, output); braceDepth = newDepth; diff --git a/tests/benchmarks/resolution/tracer/lua-tracer.lua b/tests/benchmarks/resolution/tracer/lua-tracer.lua index 0fcc75882..873e2853e 100644 --- a/tests/benchmarks/resolution/tracer/lua-tracer.lua +++ b/tests/benchmarks/resolution/tracer/lua-tracer.lua @@ -99,7 +99,12 @@ debug.sethook(hook, "cr") local ok, err = pcall(dofile, fixture_dir .. "main.lua") if not ok then - -- Swallow errors - we only care about call edges + -- We only care about call edges captured before the fixture errored, so + -- keep going rather than aborting the trace — but still surface the + -- error to stderr so a genuine failure (e.g. a Lua syntax error + -- producing zero edges) is visible instead of looking like a silent + -- successful trace. + io.stderr:write(tostring(err), "\n") end debug.sethook() diff --git a/tests/benchmarks/resolution/tracer/native-tracer.sh b/tests/benchmarks/resolution/tracer/native-tracer.sh index 5d004972a..812d221d8 100644 --- a/tests/benchmarks/resolution/tracer/native-tracer.sh +++ b/tests/benchmarks/resolution/tracer/native-tracer.sh @@ -14,9 +14,12 @@ set -euo pipefail FIXTURE_DIR="${1:-}" -LANG="${2:-}" +# Named TRACE_LANG (not LANG) — LANG is the POSIX locale environment variable; +# reassigning it here would clobber locale settings inherited by every +# compiler toolchain this script spawns (gcc, cargo, dotnet, swiftc, zig, ...). +TRACE_LANG="${2:-}" -if [[ -z "$FIXTURE_DIR" || -z "$LANG" ]]; then +if [[ -z "$FIXTURE_DIR" || -z "$TRACE_LANG" ]]; then echo "Usage: native-tracer.sh " >&2 exit 1 fi @@ -41,6 +44,143 @@ empty_result() { exit 0 } +# Shared source-instrumentation helpers used by trace_rust / trace_swift / +# trace_zig / trace_dart. Each of those languages needs the same scan: track +# an optional enclosing context (impl/class/struct), detect a function or +# method declaration, and inject a trace-call statement right after the +# opening brace. They differ only in the regexes, the qualname separator +# context, and how the traced scope is closed: +# +# raii — the language has its own scope-exit primitive (Rust +# `let _tg = ;`, Swift/Zig `defer`), so the entry +# template alone is enough — no brace tracking needed. +# finally — the language has neither (Dart), so the function body's +# closing brace is located via manual brace-depth counting and +# a "} finally { ... }" statement is injected right before it. +# +# The helpers below are dispatched one per source line from +# instrument_one_file's loop, each handling one concern and returning 0 only +# when it fully handled the line (caller should move on to the next line). +# Bash dynamically scopes `local` variables into called functions, so they +# read/write inject_trace_calls's and instrument_one_file's locals +# (ctx_regex, decl_regex, strategy, current_ctx, in_func, tmpfile, ...) +# directly instead of threading a dozen parameters through each call. + +# Tracks an optional enclosing context (impl/class/struct) block opening, +# and detects the block's closing brace. Returns 0 (line fully handled) only +# when it wrote the closing-brace line itself. +maybe_close_context() { + local line="$1" trimmed="$2" + [[ -z "$ctx_regex" ]] && return 1 + + if [[ "$trimmed" =~ $ctx_regex ]]; then + current_ctx="${BASH_REMATCH[$ctx_group]}" + fi + if [[ "$trimmed" == "}" && -n "$current_ctx" ]] && [[ ! "$line" =~ ^[[:space:]] ]]; then + printf '%s\n' "$line" >> "$tmpfile" + current_ctx="" + return 0 + fi + return 1 +} + +# For the "finally" strategy only: tracks the brace depth of the currently +# open instrumented function body and injects the finally-block right before +# its closing brace. Returns 0 once that injection is made. +maybe_close_finally_scope() { + local line="$1" + [[ "$strategy" == "finally" ]] || return 1 + (( in_func )) || return 1 + + local opens="${line//[^\{]/}" closes="${line//[^\}]/}" + (( func_brace_depth += ${#opens} - ${#closes} )) || true + (( func_brace_depth <= 0 )) || return 1 + + printf -- "$finally_tmpl"'\n' >> "$tmpfile" + printf '%s\n' "$line" >> "$tmpfile" + in_func=0 + func_brace_depth=0 + return 0 +} + +# Detects a function/method declaration on this line and, unless excluded, +# injects the entry template (plus "try {" for the "finally" strategy). +# Returns 0 once an injection is made. +maybe_inject_declaration() { + local line="$1" trimmed="$2" + [[ "$trimmed" =~ $decl_regex ]] || return 1 + local fname_candidate="${BASH_REMATCH[$decl_group]}" + + if [[ -n "$decl_exclude_regex" && "$trimmed" =~ $decl_exclude_regex ]]; then + return 1 + fi + [[ "$trimmed" =~ \{[[:space:]]*$ ]] || return 1 + + local qualname="$fname_candidate" + [[ -n "$current_ctx" ]] && qualname="${current_ctx}.${fname_candidate}" + printf '%s\n' "$line" >> "$tmpfile" + printf -- "$entry_tmpl"'\n' "$qualname" "$base" >> "$tmpfile" + if [[ "$strategy" == "finally" ]]; then + printf ' try {\n' >> "$tmpfile" + in_func=1 + func_brace_depth=1 + fi + return 0 +} + +# Instruments one source file in place: scans it line-by-line, dispatching +# each line to the maybe_* handlers above in order, falling back to copying +# the line verbatim when none of them handle it. +instrument_one_file() { + local srcfile="$1" + local base + base="$(basename "$srcfile")" + [[ "$base" == "$skip_file" ]] && return + + local current_ctx="" in_func=0 func_brace_depth=0 + local tmpfile + tmpfile="$(mktemp)" + + local line trimmed + while IFS= read -r line || [[ -n "$line" ]]; do + trimmed="${line#"${line%%[![:space:]]*}"}" + maybe_close_context "$line" "$trimmed" && continue + maybe_close_finally_scope "$line" && continue + maybe_inject_declaration "$line" "$trimmed" && continue + printf '%s\n' "$line" >> "$tmpfile" + done < "$srcfile" + + mv "$tmpfile" "$srcfile" +} + +# Args: +# $1 glob pattern for source files to instrument (e.g. "$TMP_DIR"/*.rs) +# $2 filename to skip (the generated trace-support file for this language) +# $3 context regex (e.g. '^impl[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)'); +# empty string disables context (class/impl) tracking entirely +# $4 capture group index holding the context name (ignored if $3 is empty) +# $5 declaration regex (e.g. matches "fn NAME"/"func NAME") +# $6 capture group index holding the candidate function/method name +# $7 declaration exclusion regex — if the trimmed line also matches this, +# the declaration match is discarded (empty string disables filtering; +# used by Dart to skip import/if/while/for/switch/catch/class lines) +# $8 strategy: "raii" or "finally" +# $9 entry template — printf format string, args are (qualname, base file) +# $10 finally template — printf format string with no args (only used when +# strategy is "finally") +inject_trace_calls() { + local glob_pattern="$1" skip_file="$2" + local ctx_regex="$3" ctx_group="$4" + local decl_regex="$5" decl_group="$6" decl_exclude_regex="$7" + local strategy="$8" entry_tmpl="$9" finally_tmpl="${10:-}" + + local srcfile + for srcfile in $glob_pattern; do + [[ -e "$srcfile" ]] || continue + instrument_one_file "$srcfile" + done +} + # ── C / C++ ────────────────────────────────────────────────────────────── trace_c_cpp() { local compiler="$1" @@ -279,48 +419,17 @@ RSTRACE # Add mod trace_support to main.rs sedi '1s/^/mod trace_support;\n/' "$TMP_DIR/src/main.rs" - # Inject trace_call into every fn body using a bash loop that tracks impl blocks - for rsfile in "$TMP_DIR/src"/*.rs; do - base="$(basename "$rsfile")" - [[ "$base" == "trace_support.rs" ]] && continue - - local current_impl="" - local tmpfile="$(mktemp)" - - while IFS= read -r line || [[ -n "$line" ]]; do - # Track impl blocks: "impl TypeName" or "impl TypeName for Trait" - if [[ "$line" =~ ^impl[[:space:]]+([A-Za-z_][A-Za-z0-9_]*) ]]; then - current_impl="${BASH_REMATCH[1]}" - fi - - # End of impl block (top-level closing brace) - if [[ "$line" == "}" && -n "$current_impl" ]]; then - printf '%s\n' "$line" >> "$tmpfile" - current_impl="" - continue - fi - - # Detect fn declarations ending with { - # Save capture before second regex clobbers BASH_REMATCH - if [[ "$line" =~ fn[[:space:]]+([a-z_][a-z0-9_]*) ]]; then - local fname_candidate="${BASH_REMATCH[1]}" - if [[ "$line" =~ \{[[:space:]]*$ ]]; then - local fname="$fname_candidate" - local qualname="$fname" - if [[ -n "$current_impl" ]]; then - qualname="${current_impl}.${fname}" - fi - printf '%s\n' "$line" >> "$tmpfile" - printf ' let _tg = crate::trace_support::trace_call("%s", "%s");\n' "$qualname" "$base" >> "$tmpfile" - continue - fi - fi - - printf '%s\n' "$line" >> "$tmpfile" - done < "$rsfile" - - mv "$tmpfile" "$rsfile" - done + # Inject trace_call into every fn body, tracking impl blocks for qualnames. + # Rust's Drop-guard RAII pattern means only entry needs injecting — the + # guard's Drop impl fires trace_support's exit hook automatically. + inject_trace_calls \ + "$TMP_DIR/src/*.rs" \ + "trace_support.rs" \ + '^impl[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)' 1 \ + 'fn[[:space:]]+([a-z_][a-z0-9_]*)' 1 \ + '' \ + raii \ + ' let _tg = crate::trace_support::trace_call("%s", "%s");' # Inject dump_trace() at end of main() sedi '/^fn main/,/^\}/ { @@ -563,44 +672,17 @@ class CallTracer { } SWTRACE - # Inject traceCall into every func body using bash loop - for swfile in "$TMP_DIR"/*.swift; do - base="$(basename "$swfile")" - [[ "$base" == "TraceSupport.swift" ]] && continue - local current_class="" - local tmpfile="$(mktemp)" - - while IFS= read -r line || [[ -n "$line" ]]; do - local trimmed="${line#"${line%%[![:space:]]*}"}" - # Track class/struct declarations - if [[ "$trimmed" =~ ^(class|struct)[[:space:]]+([A-Za-z_][A-Za-z0-9_]*) ]]; then - current_class="${BASH_REMATCH[2]}" - fi - # End of class (top-level }) - if [[ "$trimmed" == "}" && -n "$current_class" ]] && [[ ! "$line" =~ ^[[:space:]] ]]; then - printf '%s\n' "$line" >> "$tmpfile" - current_class="" - continue - fi - # Detect func declarations ending with { - # Save capture before second regex clobbers BASH_REMATCH - if [[ "$trimmed" =~ ^(override[[:space:]]+)?func[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*) ]]; then - local fname_candidate="${BASH_REMATCH[2]}" - if [[ "$trimmed" =~ \{[[:space:]]*$ ]]; then - local fname="$fname_candidate" - local qualname="$fname" - if [[ -n "$current_class" ]]; then - qualname="${current_class}.${fname}" - fi - printf '%s\n' "$line" >> "$tmpfile" - printf ' CallTracer.shared.traceCall("%s", "%s"); defer { CallTracer.shared.traceReturn() }\n' "$qualname" "$base" >> "$tmpfile" - continue - fi - fi - printf '%s\n' "$line" >> "$tmpfile" - done < "$swfile" - mv "$tmpfile" "$swfile" - done + # Inject traceCall into every func body, tracking class/struct blocks for + # qualnames. Swift's `defer` gives us a built-in scope-exit hook, so only + # entry needs injecting. + inject_trace_calls \ + "$TMP_DIR/*.swift" \ + "TraceSupport.swift" \ + '^(class|struct)[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)' 2 \ + '^(override[[:space:]]+)?func[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*)' 2 \ + '' \ + raii \ + ' CallTracer.shared.traceCall("%s", "%s"); defer { CallTracer.shared.traceReturn() }' # Inject dump at end of main (top-level code or main function) if grep -q 'func main' "$TMP_DIR/main.swift" 2>/dev/null; then @@ -687,68 +769,19 @@ DARTTRACE sedi "1s|^|import 'dart:io';\nimport 'trace_support.dart';\n|" "$dartfile" done - # Inject traceCall + try/finally into function/method bodies. - # We track brace depth per function so we can inject - # "} finally { CallTracer.instance.traceReturn(); }" at the closing brace. - for dartfile in "$TMP_DIR"/*.dart; do - base="$(basename "$dartfile")" - [[ "$base" == "trace_support.dart" ]] && continue - local current_class="" - local in_func=0 - local func_brace_depth=0 - local tmpfile="$(mktemp)" - - while IFS= read -r line || [[ -n "$line" ]]; do - local trimmed="${line#"${line%%[![:space:]]*}"}" - - # Track class - if [[ "$trimmed" =~ ^class[[:space:]]+([A-Za-z_][A-Za-z0-9_]*) ]]; then - current_class="${BASH_REMATCH[1]}" - fi - if [[ "$trimmed" == "}" && -n "$current_class" ]] && [[ ! "$line" =~ ^[[:space:]] ]]; then - printf '%s\n' "$line" >> "$tmpfile" - current_class="" - continue - fi - - # If inside an instrumented function, track braces to find its end - if (( in_func )); then - local opens="${line//[^\{]/}" - local closes="${line//[^\}]/}" - (( func_brace_depth += ${#opens} - ${#closes} )) || true - if (( func_brace_depth <= 0 )); then - # This line contains the function's closing brace — - # inject "} finally { traceReturn(); }" before it - printf ' } finally { CallTracer.instance.traceReturn(); }\n' >> "$tmpfile" - printf '%s\n' "$line" >> "$tmpfile" - in_func=0 - func_brace_depth=0 - continue - fi - fi - - # Detect function declarations (return_type name(args) {) - # Save capture before subsequent regexes clobber BASH_REMATCH - if [[ "$trimmed" =~ [[:space:]]([a-zA-Z_][a-zA-Z0-9_]*)\( ]]; then - local fname_candidate="${BASH_REMATCH[1]}" - if [[ "$trimmed" =~ \{[[:space:]]*$ ]] && [[ ! "$trimmed" =~ ^(import|if|while|for|switch|catch|class) ]]; then - local fname="$fname_candidate" - local qualname="$fname" - if [[ -n "$current_class" ]]; then - qualname="${current_class}.${fname}" - fi - printf '%s\n' "$line" >> "$tmpfile" - printf ' CallTracer.instance.traceCall("%s", "%s");\n' "$qualname" "$base" >> "$tmpfile" - printf ' try {\n' >> "$tmpfile" - in_func=1 - func_brace_depth=1 # we're inside the function's opening brace - continue - fi - fi - printf '%s\n' "$line" >> "$tmpfile" - done < "$dartfile" - mv "$tmpfile" "$dartfile" - done + # Inject traceCall + try/finally into function/method bodies, tracking + # class blocks for qualnames. Dart has neither RAII nor `defer`, so the + # function body's closing brace is located via manual depth counting and + # "} finally { CallTracer.instance.traceReturn(); }" is injected before it. + inject_trace_calls \ + "$TMP_DIR/*.dart" \ + "trace_support.dart" \ + '^class[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)' 1 \ + '[[:space:]]([a-zA-Z_][a-zA-Z0-9_]*)\(' 1 \ + '^(import|if|while|for|switch|catch|class)' \ + finally \ + ' CallTracer.instance.traceCall("%s", "%s");' \ + ' } finally { CallTracer.instance.traceReturn(); }' # Inject dump at end of main sedi '/^void main/,/^\}/ { @@ -844,33 +877,24 @@ pub fn dumpTrace() void { } ZIGTRACE - # Inject traceCall into fn bodies + # Add import of trace_support at top of every fixture file for zigfile in "$TMP_DIR"/*.zig; do base="$(basename "$zigfile")" [[ "$base" == "trace_support.zig" ]] && continue - - # Add import of trace_support at top sedi "1s|^|const trace_support = @import(\"trace_support.zig\");\n|" "$zigfile" - - # Use bash loop to inject trace calls - local tmpfile="$(mktemp)" - while IFS= read -r line || [[ -n "$line" ]]; do - local trimmed="${line#"${line%%[![:space:]]*}"}" - # Save capture before second regex clobbers BASH_REMATCH - if [[ "$trimmed" =~ ^(pub[[:space:]]+)?fn[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*) ]]; then - local fname_candidate="${BASH_REMATCH[2]}" - if [[ "$trimmed" =~ \{[[:space:]]*$ ]]; then - local fname="$fname_candidate" - printf '%s\n' "$line" >> "$tmpfile" - printf ' trace_support.traceCall("%s", "%s"); defer trace_support.traceReturn();\n' "$fname" "$base" >> "$tmpfile" - continue - fi - fi - printf '%s\n' "$line" >> "$tmpfile" - done < "$zigfile" - mv "$tmpfile" "$zigfile" done + # Inject traceCall into fn bodies. Zig has no class/impl concept here, so + # context tracking is disabled; `defer` gives us a built-in scope-exit hook. + inject_trace_calls \ + "$TMP_DIR/*.zig" \ + "trace_support.zig" \ + '' 0 \ + '^(pub[[:space:]]+)?fn[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*)' 2 \ + '' \ + raii \ + ' trace_support.traceCall("%s", "%s"); defer trace_support.traceReturn();' + # Inject dump at end of main sedi '/^pub fn main/,/^\}/ { /^\}/ i\ trace_support.dumpTrace(); @@ -1121,7 +1145,7 @@ trace_cuda() { } # ── Dispatch ───────────────────────────────────────────────────────────── -case "$LANG" in +case "$TRACE_LANG" in c) trace_c_cpp "gcc" "c" ;; cpp) trace_c_cpp "g++" "cpp" ;; rust) trace_rust ;; @@ -1138,5 +1162,5 @@ case "$LANG" in cuda) trace_cuda ;; verilog) empty_result "verilog is a hardware description language — no runtime tracing" ;; hcl) empty_result "HCL/Terraform has no callable functions — no runtime tracing" ;; - *) empty_result "unknown language: $LANG" ;; + *) empty_result "unknown language: $TRACE_LANG" ;; esac From a4dd68729cd4553fce0ec8577ac8d90623b5cc1a Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 3 Jul 2026 15:50:49 -0600 Subject: [PATCH 31/39] refactor: address warnings in ast-analysis and extractors/helpers naming (docs check acknowledged) Impact: 12 functions changed, 36 affected --- src/ast-analysis/visitors/cfg-conditionals.ts | 14 +++++++------- src/ast-analysis/visitors/cfg-shared.ts | 14 +++++++------- src/ast-analysis/visitors/cfg-try-catch.ts | 8 ++++---- src/extractors/helpers.ts | 8 ++++---- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/src/ast-analysis/visitors/cfg-conditionals.ts b/src/ast-analysis/visitors/cfg-conditionals.ts index 530d2194b..3ffdb7ff2 100644 --- a/src/ast-analysis/visitors/cfg-conditionals.ts +++ b/src/ast-analysis/visitors/cfg-conditionals.ts @@ -6,7 +6,7 @@ import type { LoopCtx, ProcessStatementsFn, } from './cfg-shared.js'; -import { getBodyStatements, isCaseNode, isIfNode, nn } from './cfg-shared.js'; +import { getBodyStatements, isCaseNode, isIfNode, requireNode } from './cfg-shared.js'; /** * Create a branch block off `condBlock`, wire the `branchKind` edge into it, @@ -96,7 +96,7 @@ function processAlternative( } else if (alternative.type === cfgRules.elseClause) { const elseChildren: TreeSitterNode[] = []; for (let i = 0; i < alternative.namedChildCount; i++) { - elseChildren.push(nn(alternative.namedChild(i))); + elseChildren.push(requireNode(alternative.namedChild(i))); } const firstChild = elseChildren[0]; if (elseChildren.length === 1 && firstChild && isIfNode(firstChild.type, cfgRules)) { @@ -123,7 +123,7 @@ function processElifSiblings( let foundElse = false; for (let i = 0; i < ifStmt.namedChildCount; i++) { - const child = nn(ifStmt.namedChild(i)); + const child = requireNode(ifStmt.namedChild(i)); if (child.type === cfgRules.elifNode) { const elifCondBlock = S.makeBlock( @@ -150,7 +150,7 @@ function processElifSiblings( } else { elseStmts = []; for (let j = 0; j < child.namedChildCount; j++) { - elseStmts.push(nn(child.namedChild(j))); + elseStmts.push(requireNode(child.namedChild(j))); } } processBranch(lastCondBlock, joinBlock, S, 'branch_false', 'else', (elseBlock) => @@ -192,7 +192,7 @@ export function processSwitch( let hasDefault = false; for (let i = 0; i < container.namedChildCount; i++) { - const caseClause = nn(container.namedChild(i)); + const caseClause = requireNode(container.namedChild(i)); const isDefault = caseClause.type === cfgRules.defaultNode; const isCase = isDefault || isCaseNode(caseClause.type, cfgRules); @@ -227,11 +227,11 @@ function extractCaseBody(caseClause: TreeSitterNode, cfgRules: AnyRules): TreeSi const valueNode = caseClause.childForFieldName('value'); const patternNode = caseClause.childForFieldName('pattern'); for (let j = 0; j < caseClause.namedChildCount; j++) { - const child = nn(caseClause.namedChild(j)); + const child = requireNode(caseClause.namedChild(j)); if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { if (child.type === 'statement_list') { for (let k = 0; k < child.namedChildCount; k++) { - stmts.push(nn(child.namedChild(k))); + stmts.push(requireNode(child.namedChild(k))); } } else { stmts.push(child); diff --git a/src/ast-analysis/visitors/cfg-shared.ts b/src/ast-analysis/visitors/cfg-shared.ts index 9987918e2..721e85854 100644 --- a/src/ast-analysis/visitors/cfg-shared.ts +++ b/src/ast-analysis/visitors/cfg-shared.ts @@ -6,11 +6,11 @@ export type AnyRules = any; export type ProcessStatementsFn = ( stmts: TreeSitterNode[], currentBlock: CfgBlockInternal, - S: FuncState, + state: FuncState, cfgRules: AnyRules, ) => CfgBlockInternal | null; -export function nn(node: TreeSitterNode | null, context?: string): TreeSitterNode { +export function requireNode(node: TreeSitterNode | null, context?: string): TreeSitterNode { if (node === null) { throw new Error(`Unexpected null tree-sitter node${context ? ` (${context})` : ''}`); } @@ -114,18 +114,18 @@ export function isControlFlow(type: string, cfgRules: AnyRules): boolean { export function effectiveNode(node: TreeSitterNode, cfgRules: AnyRules): TreeSitterNode { if (node.type === 'expression_statement' && node.namedChildCount === 1) { - const inner = nn(node.namedChild(0)); + const inner = requireNode(node.namedChild(0)); if (isControlFlow(inner.type, cfgRules)) return inner; } return node; } export function registerLabelCtx( - S: FuncState, + state: FuncState, headerBlock: CfgBlockInternal, exitBlock: CfgBlockInternal, ): void { - for (const [, ctx] of Array.from(S.labelMap)) { + for (const [, ctx] of Array.from(state.labelMap)) { if (!ctx.headerBlock) { ctx.headerBlock = headerBlock; ctx.exitBlock = exitBlock; @@ -141,10 +141,10 @@ export function getBodyStatements( if (isBlockNode(bodyNode.type, cfgRules)) { const stmts: TreeSitterNode[] = []; for (let i = 0; i < bodyNode.namedChildCount; i++) { - const child = nn(bodyNode.namedChild(i)); + const child = requireNode(bodyNode.namedChild(i)); if (child.type === 'statement_list') { for (let j = 0; j < child.namedChildCount; j++) { - stmts.push(nn(child.namedChild(j))); + stmts.push(requireNode(child.namedChild(j))); } } else { stmts.push(child); diff --git a/src/ast-analysis/visitors/cfg-try-catch.ts b/src/ast-analysis/visitors/cfg-try-catch.ts index ea44ddb51..6211cba5a 100644 --- a/src/ast-analysis/visitors/cfg-try-catch.ts +++ b/src/ast-analysis/visitors/cfg-try-catch.ts @@ -1,6 +1,6 @@ import type { TreeSitterNode } from '../../types.js'; import type { AnyRules, CfgBlockInternal, FuncState, ProcessStatementsFn } from './cfg-shared.js'; -import { getBodyStatements, nn } from './cfg-shared.js'; +import { getBodyStatements, requireNode } from './cfg-shared.js'; export function processTryCatch( tryStmt: TreeSitterNode, @@ -23,7 +23,7 @@ export function processTryCatch( tryBodyStart = tryStmt.startPosition.row + 1; tryStmts = []; for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = nn(tryStmt.namedChild(i)); + const child = requireNode(tryStmt.namedChild(i)); if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; tryStmts.push(child); @@ -63,7 +63,7 @@ function findTryHandlers( let catchHandler: TreeSitterNode | null = null; let finallyHandler: TreeSitterNode | null = null; for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = nn(tryStmt.namedChild(i)); + const child = requireNode(tryStmt.namedChild(i)); if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; } @@ -90,7 +90,7 @@ function processCatchHandler( } else { catchStmts = []; for (let i = 0; i < catchHandler.namedChildCount; i++) { - catchStmts.push(nn(catchHandler.namedChild(i))); + catchStmts.push(requireNode(catchHandler.namedChild(i))); } } const catchEnd = processStatements(catchStmts, catchBlock, S, cfgRules); diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts index 4f222ccd1..7e64333a5 100644 --- a/src/extractors/helpers.ts +++ b/src/extractors/helpers.ts @@ -209,9 +209,9 @@ export function extractBodyMembers( for (let i = 0; i < body.childCount; i++) { const member = body.child(i); if (!member || member.type !== memberType) continue; - const nn = member.childForFieldName(nameField); - if (!nn) continue; - const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 }; + const nameNode = member.childForFieldName(nameField); + if (!nameNode) continue; + const entry: SubDeclaration = { name: nameNode.text, kind, line: member.startPosition.row + 1 }; if (visibility) entry.visibility = visibility(member); members.push(entry); } @@ -356,7 +356,7 @@ export function isCPrimitiveType(typeName: string): boolean { /** * Options for {@link extractSimpleParameters}. */ -export interface ExtractParametersOptions { +interface ExtractParametersOptions { /** Tree-sitter types that mark a single parameter node (e.g. `formal_parameter`). */ paramTypes: readonly string[]; /** From 58ccdea86dec8620554fcd71e181595c9304c0d2 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 3 Jul 2026 16:24:39 -0600 Subject: [PATCH 32/39] refactor(leiden): adopt fget/iget from typed-array-helpers in cpm.ts and modularity.ts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Forge phase 3 extracted fget/iget/u8get/taAdd into typed-array-helpers.ts from adapter.ts/index.ts/partition.ts, but two sibling leiden files — cpm.ts and modularity.ts — carried byte-for-byte identical private copies of fget/iget (down to the "see adapter.ts for rationale" comment) that were never migrated to the shared module. Codebase-wide grep/symbol scans found no other duplicates of this pattern outside the leiden directory. Both functions were unexported, file-local helpers with zero external callers (codegraph exports confirms neither appears in either file's export list), so replacing the private declarations with an import from the shared module is a pure dedup with no behavioral change. docs check acknowledged: internal helper adoption within an already-vendored algorithm module, no user-facing feature/language/architecture-table changes. --- src/graph/algorithms/leiden/cpm.ts | 10 ++-------- src/graph/algorithms/leiden/modularity.ts | 10 ++-------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/src/graph/algorithms/leiden/cpm.ts b/src/graph/algorithms/leiden/cpm.ts index 957a605fe..ee0e3bb7d 100644 --- a/src/graph/algorithms/leiden/cpm.ts +++ b/src/graph/algorithms/leiden/cpm.ts @@ -3,6 +3,8 @@ * Vendored from ngraph.leiden (MIT) — no external dependencies. */ +import { fget, iget } from './typed-array-helpers.js'; + /** * Minimal view of a partition needed by CPM quality functions. */ @@ -26,14 +28,6 @@ export interface GraphView { size: Float64Array; } -// Typed array safe-access helper (see adapter.ts for rationale) -function fget(a: Float64Array, i: number): number { - return a[i] as number; -} -function iget(a: Int32Array, i: number): number { - return a[i] as number; -} - export function diffCPM( part: PartitionView, g: GraphView, diff --git a/src/graph/algorithms/leiden/modularity.ts b/src/graph/algorithms/leiden/modularity.ts index 98a9a038b..d419d9fc6 100644 --- a/src/graph/algorithms/leiden/modularity.ts +++ b/src/graph/algorithms/leiden/modularity.ts @@ -3,6 +3,8 @@ * Vendored from ngraph.leiden (MIT) — no external dependencies. */ +import { fget, iget } from './typed-array-helpers.js'; + /** * Minimal view of a partition needed by modularity quality functions. */ @@ -30,14 +32,6 @@ export interface GraphView { selfLoop: Float64Array; } -// Typed array safe-access helper (see adapter.ts for rationale) -function fget(a: Float64Array, i: number): number { - return a[i] as number; -} -function iget(a: Int32Array, i: number): number { - return a[i] as number; -} - export function diffModularity( part: PartitionView, g: GraphView, From d914aab2573711d8650a64b9633e499ab59fed54 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 3 Jul 2026 16:54:05 -0600 Subject: [PATCH 33/39] refactor(config): wire db.busyTimeoutMs and community.capacityGrowthFactor Forge phase 7 (8fed8bc2) added these as reserved DEFAULTS entries but left their consumers on hardcoded literals. db.busyTimeoutMs: openDb and openReadonlyOrFail in src/db/connection.ts now take an optional busyTimeoutMs parameter defaulting to DEFAULTS.db.busyTimeoutMs. Build and watch call sites that already hold a resolved config (pipeline, native orchestrator, native-db-lifecycle, watcher, openRepo and openReadonlyWithNative via the renamed resolveDbSettings helper) pass the user-configured value through explicitly. This partially addresses issue 1749, whose busy-locked regex dedup half remains open; remaining read-only query call sites and the Rust connection.rs mirror are tracked in issue 1763. community.capacityGrowthFactor is threaded through the existing maxLevels and refinementTheta plumbing, from communitiesData through louvainCommunities and detectClusters into makePartition, so ensureCommCapacity in src/graph/algorithms/leiden/partition.ts reads the configured value instead of a hardcoded 1.5 growth multiplier. It is ignored by the native Rust Louvain path, consistent with the other options. largeCodebaseFileThreshold, the third reserved entry from phase 7, was already wired by a later forge commit and needed no action. docs check acknowledged: internal config wiring only, already documented in docs/guides/configuration.md by forge phase 7. Impact: 31 functions changed, 171 affected --- src/db/connection.ts | 43 +++++++++++++------ src/domain/graph/builder/pipeline.ts | 4 +- .../builder/stages/native-db-lifecycle.ts | 2 +- .../builder/stages/native-orchestrator.ts | 8 ++-- src/domain/graph/watcher.ts | 2 +- src/features/communities.ts | 3 ++ src/graph/algorithms/leiden/optimiser.ts | 14 +++++- src/graph/algorithms/leiden/partition.ts | 20 ++++++++- src/graph/algorithms/louvain.ts | 11 ++++- src/infrastructure/config.ts | 16 ++++--- src/types.ts | 6 +-- 11 files changed, 92 insertions(+), 37 deletions(-) diff --git a/src/db/connection.ts b/src/db/connection.ts index eac517849..56f23978d 100644 --- a/src/db/connection.ts +++ b/src/db/connection.ts @@ -2,7 +2,7 @@ import { execFileSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; -import { loadConfig } from '../infrastructure/config.js'; +import { DEFAULTS, loadConfig } from '../infrastructure/config.js'; import { debug, warn } from '../infrastructure/logger.js'; import { getNative, isNativeAvailable } from '../infrastructure/native.js'; import { DbError, toErrorMessage } from '../shared/errors.js'; @@ -158,7 +158,10 @@ function isSameDirectory(a: string, b: string): boolean { } } -export function openDb(dbPath: string): LockedDatabase { +export function openDb( + dbPath: string, + busyTimeoutMs: number = DEFAULTS.db.busyTimeoutMs, +): LockedDatabase { // Flush any deferred DB close from a previous build (avoids WAL contention) flushDeferredClose(); const dir = path.dirname(dbPath); @@ -167,7 +170,7 @@ export function openDb(dbPath: string): LockedDatabase { const Database = getDatabase(); const db = new Database(dbPath) as unknown as LockedDatabase; db.pragma('journal_mode = WAL'); - db.pragma('busy_timeout = 5000'); + db.pragma(`busy_timeout = ${busyTimeoutMs}`); db.__lockPath = `${dbPath}.lock`; return db; } @@ -327,7 +330,10 @@ export function findDbPath(customPath?: string): string { } /** Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. */ -export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { +export function openReadonlyOrFail( + customPath?: string, + busyTimeoutMs: number = DEFAULTS.db.busyTimeoutMs, +): BetterSqlite3Database { const dbPath = findDbPath(customPath); if (!fs.existsSync(dbPath)) { throw new DbError( @@ -337,7 +343,7 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { } const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }) as unknown as BetterSqlite3Database; - db.pragma('busy_timeout = 5000'); + db.pragma(`busy_timeout = ${busyTimeoutMs}`); warnOnVersionMismatch(() => { const row = db @@ -349,8 +355,15 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { return db; } +/** Effective engine plus config-derived DB settings shared by openRepo() and openReadonlyWithNative(). */ +interface ResolvedDbSettings { + engine: 'native' | 'wasm' | 'auto'; + busyTimeoutMs: number; +} + /** - * Resolve the effective engine for DB access: explicit opts.engine > config.build.engine > 'auto'. + * Resolve the effective engine for DB access (explicit opts.engine > config.build.engine > + * 'auto') alongside config.db.busyTimeoutMs, in a single loadConfig() call. * Derives rootDir from the resolved DB path so loadConfig reads the right project config. * Shared by openRepo() and openReadonlyWithNative() so the two call sites can't drift. * @@ -358,18 +371,22 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database { * via resolveSecrets on a malformed llm.apiKeyCommand config), and an already-open * handle at that point would never be closed. */ -function resolveDbEngine( +function resolveDbSettings( customDbPath: string | undefined, engineOpt: 'native' | 'wasm' | 'auto' | undefined, -): 'native' | 'wasm' | 'auto' { +): ResolvedDbSettings { // Using findDbPath (not path.resolve(customDbPath)) ensures directory inputs like // --db /path/to/repo are normalised to .codegraph/graph.db before we strip two levels. // Convention: resolvedDbPath = /.codegraph/graph.db const resolvedDbPath = customDbPath ? findDbPath(customDbPath) : undefined; const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined; + const config = loadConfig(rootDir); // config.build.engine is already populated from CODEGRAPH_ENGINE env by applyEnvOverrides, // so this covers both the env-var path and the .codegraphrc.json config-file path. - return engineOpt ?? loadConfig(rootDir).build.engine ?? 'auto'; + return { + engine: engineOpt ?? config.build.engine ?? 'auto', + busyTimeoutMs: config.db.busyTimeoutMs ?? DEFAULTS.db.busyTimeoutMs, + }; } /** Open a NativeRepository via rusqlite, throwing DbError if the DB file is missing. */ @@ -422,7 +439,7 @@ export function openRepo( // Respect explicit engine selection: opts.engine > config.build.engine > auto. // This ensures --engine wasm and benchmark workers bypass the native path. - const engine = resolveDbEngine(customDbPath, opts.engine); + const { engine, busyTimeoutMs } = resolveDbSettings(customDbPath, opts.engine); // Try native rusqlite path first (Phase 6.14) if (engine !== 'wasm' && isNativeAvailable()) { @@ -442,7 +459,7 @@ export function openRepo( } } - const db = openReadonlyOrFail(customDbPath); + const db = openReadonlyOrFail(customDbPath, busyTimeoutMs); return { repo: new SqliteRepository(db), close() { @@ -476,9 +493,9 @@ export function openReadonlyWithNative( // handle has been opened yet, so nothing is left leaked. (Previously this ran // AFTER openReadonlyOrFail(), so a config error here leaked the already-open // better-sqlite3 handle — see the phase-15 gauntlet finding.) - const engine = resolveDbEngine(customPath, opts.engine); + const { engine, busyTimeoutMs } = resolveDbSettings(customPath, opts.engine); - const db = openReadonlyOrFail(customPath); + const db = openReadonlyOrFail(customPath, busyTimeoutMs); let nativeDb: NativeDatabase | undefined; if (engine !== 'wasm' && isNativeAvailable()) { diff --git a/src/domain/graph/builder/pipeline.ts b/src/domain/graph/builder/pipeline.ts index 490c5fe07..28e4eb0df 100644 --- a/src/domain/graph/builder/pipeline.ts +++ b/src/domain/graph/builder/pipeline.ts @@ -193,7 +193,7 @@ function setupPipeline(ctx: PipelineContext): void { // cost entirely on no-op builds that exit before reaching the orchestrator. const dir = path.dirname(ctx.dbPath); if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); - ctx.db = openDb(ctx.dbPath); + ctx.db = openDb(ctx.dbPath, ctx.config.db.busyTimeoutMs); initSchema(ctx.db); // Merge caller-supplied excludes on top of the file-config excludes so // programmatic callers (e.g. benchmark scripts) can extend exclusion @@ -287,7 +287,7 @@ async function runPipelineStages(ctx: PipelineContext): Promise { // now-closed NativeDatabase. Replace it with a real better-sqlite3 // connection so the JS pipeline stages can operate normally. if (ctx.nativeFirstProxy) { - ctx.db = openDb(ctx.dbPath); + ctx.db = openDb(ctx.dbPath, ctx.config.db.busyTimeoutMs); ctx.nativeFirstProxy = false; } } diff --git a/src/domain/graph/builder/stages/native-db-lifecycle.ts b/src/domain/graph/builder/stages/native-db-lifecycle.ts index ac9e2568f..7395c0d6d 100644 --- a/src/domain/graph/builder/stages/native-db-lifecycle.ts +++ b/src/domain/graph/builder/stages/native-db-lifecycle.ts @@ -70,5 +70,5 @@ export function refreshJsDb(ctx: PipelineContext): void { } catch (e) { debug(`refreshJsDb close failed: ${toErrorMessage(e)}`); } - ctx.db = openDb(ctx.dbPath); + ctx.db = openDb(ctx.dbPath, ctx.config.db.busyTimeoutMs); } diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index c0157cf95..5ba453d77 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -136,7 +136,7 @@ function handoffWalAfterNativeBuild(ctx: PipelineContext): boolean { debug(`handoffWal JS db close failed: ${toErrorMessage(e)}`); } try { - ctx.db = openDb(ctx.dbPath); + ctx.db = openDb(ctx.dbPath, ctx.config.db.busyTimeoutMs); return true; } catch (reopenErr) { warn(`Failed to reopen DB after native build: ${(reopenErr as Error).message}`); @@ -1789,7 +1789,7 @@ async function backfillNativeDroppedFiles( // for the INSERT path below). if (ctx.nativeFirstProxy) { closeNativeDb(ctx, 'pre-parity-backfill'); - ctx.db = openDb(ctx.dbPath); + ctx.db = openDb(ctx.dbPath, ctx.config.db.busyTimeoutMs); ctx.nativeFirstProxy = false; } @@ -1951,7 +1951,7 @@ function openNativeDatabase(ctx: PipelineContext): void { ctx.nativeFirstProxy = false; // defensive: reset in case future refactors move the assignment above throwing lines releaseAdvisoryLock(`${ctx.dbPath}.lock`); // Reopen better-sqlite3 for JS pipeline fallback - ctx.db = openDb(ctx.dbPath); + ctx.db = openDb(ctx.dbPath, ctx.config.db.busyTimeoutMs); } } @@ -2239,7 +2239,7 @@ class NativeOrchestrationSession { if (!needsStructure && !needsAnalysisFallback) return true; if (needsAnalysisFallback && this.ctx.nativeFirstProxy) { closeNativeDb(this.ctx, 'pre-analysis-fallback'); - this.ctx.db = openDb(this.ctx.dbPath); + this.ctx.db = openDb(this.ctx.dbPath, this.ctx.config.db.busyTimeoutMs); this.ctx.nativeFirstProxy = false; return true; } diff --git a/src/domain/graph/watcher.ts b/src/domain/graph/watcher.ts index d922307e6..703a45247 100644 --- a/src/domain/graph/watcher.ts +++ b/src/domain/graph/watcher.ts @@ -186,7 +186,7 @@ function setupWatcher(rootDir: string, opts: { engine?: string; dbPath?: string const extraDirs = [...(config.ignoreDirs ?? []), ...(config.ignoreAdditionalDirs ?? [])]; const ignoreSet = buildIgnoreSet(extraDirs.length ? extraDirs : undefined); - const db = openDb(dbPath); + const db = openDb(dbPath, config.db.busyTimeoutMs); initSchema(db); const engineOpts: import('../../types.js').EngineOpts = { engine: (opts.engine || 'auto') as import('../../types.js').EngineMode, diff --git a/src/features/communities.ts b/src/features/communities.ts index 0244c976c..baf5c10d1 100644 --- a/src/features/communities.ts +++ b/src/features/communities.ts @@ -156,6 +156,7 @@ type CommunitiesDataOpts = { maxLevels?: number; maxLocalPasses?: number; refinementTheta?: number; + capacityGrowthFactor?: number; limit?: number; offset?: number; repo?: Repository; @@ -197,11 +198,13 @@ export function communitiesData( const maxLevels = opts.maxLevels ?? config.community?.maxLevels; const maxLocalPasses = opts.maxLocalPasses ?? config.community?.maxLocalPasses; const refinementTheta = opts.refinementTheta ?? config.community?.refinementTheta; + const capacityGrowthFactor = opts.capacityGrowthFactor ?? config.community?.capacityGrowthFactor; const { assignments, modularity } = louvainCommunities(graph, { resolution, maxLevels, maxLocalPasses, refinementTheta, + capacityGrowthFactor, }); const { communities, communityDirs } = buildCommunityObjects(graph, assignments, opts); diff --git a/src/graph/algorithms/leiden/optimiser.ts b/src/graph/algorithms/leiden/optimiser.ts index 5d6753c70..17c3d219b 100644 --- a/src/graph/algorithms/leiden/optimiser.ts +++ b/src/graph/algorithms/leiden/optimiser.ts @@ -16,6 +16,7 @@ import { createRng } from './rng.js'; // via .codegraphrc.json. Callers (e.g. louvain.ts) can pass overrides through options. const DEFAULT_MAX_LEVELS: number = 50; const DEFAULT_MAX_LOCAL_PASSES: number = 20; +const DEFAULT_CAPACITY_GROWTH_FACTOR: number = 1.5; const GAIN_EPSILON: number = 1e-12; /** Pre-allocated scratch buffers for refinement candidate collection. */ @@ -51,6 +52,7 @@ export interface LeidenOptions { linkWeight?: GraphAdapterOptions['linkWeight']; nodeSize?: GraphAdapterOptions['nodeSize']; baseNodeIds?: string[]; + capacityGrowthFactor?: number; } export interface NormalizedOptions { @@ -67,6 +69,7 @@ export interface NormalizedOptions { maxCommunitySize: number; refinementTheta: number; fixedNodes: Set | string[] | undefined; + capacityGrowthFactor: number; } export interface LevelEntry { @@ -168,7 +171,9 @@ function runLevel( random: () => number, fixedNodeMask: Uint8Array | null, ): LevelOutcome { - const partition: Partition = makePartition(graphAdapter); + const partition: Partition = makePartition(graphAdapter, { + capacityGrowthFactor: options.capacityGrowthFactor, + }); partition.graph = graphAdapter; partition.initializeAggregates(); @@ -499,7 +504,7 @@ function refineWithinCoarseCommunities( opts: NormalizedOptions, fixedMask0: Uint8Array | null, ): Partition { - const p: Partition = makePartition(g); + const p: Partition = makePartition(g, { capacityGrowthFactor: opts.capacityGrowthFactor }); p.initializeAggregates(); p.graph = g; const macro: Int32Array = basePart.nodeCommunity; @@ -675,6 +680,10 @@ function normalizeOptions(options: LeidenOptions = {}): NormalizedOptions { : Infinity; const refinementTheta: number = typeof options.refinementTheta === 'number' ? options.refinementTheta : 1.0; + const capacityGrowthFactor: number = + typeof options.capacityGrowthFactor === 'number' + ? options.capacityGrowthFactor + : DEFAULT_CAPACITY_GROWTH_FACTOR; return { directed, randomSeed, @@ -689,6 +698,7 @@ function normalizeOptions(options: LeidenOptions = {}): NormalizedOptions { maxCommunitySize, refinementTheta, fixedNodes: options.fixedNodes, + capacityGrowthFactor, }; } diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index 8e76f8c50..50a11c4be 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -68,8 +68,13 @@ interface PartitionState { outEdgeWeightToCommunity: Float64Array; inEdgeWeightFromCommunity: Float64Array; isCandidateCommunity: Uint8Array; + /** Growth multiplier applied by ensureCommCapacity when resizing typed arrays. */ + capacityGrowthFactor: number; } +/** Mirrored in DEFAULTS.community.capacityGrowthFactor (src/infrastructure/config.ts). */ +const DEFAULT_CAPACITY_GROWTH_FACTOR = 1.5; + /* ------------------------------------------------------------------ */ /* Community-ID sort helper (used by compact) */ /* ------------------------------------------------------------------ */ @@ -128,7 +133,10 @@ function buildSortedCommunityIds( function ensureCommCapacity(s: PartitionState, newCount: number): void { if (newCount <= s.communityTotalSize.length) return; - const growTo: number = Math.max(newCount, Math.ceil(s.communityTotalSize.length * 1.5)); + const growTo: number = Math.max( + newCount, + Math.ceil(s.communityTotalSize.length * s.capacityGrowthFactor), + ); s.communityTotalSize = growFloat(s.communityTotalSize, growTo); s.communityNodeCount = growInt(s.communityNodeCount, growTo); s.communityInternalEdgeWeight = growFloat(s.communityInternalEdgeWeight, growTo); @@ -485,7 +493,11 @@ function compactIds(s: PartitionState, opts: CompactOptions = {}): void { /* Factory: thin wrapper that wires state to extracted functions */ /* ------------------------------------------------------------------ */ -export function makePartition(graph: GraphAdapter): Partition { +export interface MakePartitionOptions { + capacityGrowthFactor?: number; +} + +export function makePartition(graph: GraphAdapter, options: MakePartitionOptions = {}): Partition { const n: number = graph.n; const nodeCommunity = new Int32Array(n); for (let i = 0; i < n; i++) nodeCommunity[i] = i; @@ -507,6 +519,10 @@ export function makePartition(graph: GraphAdapter): Partition { outEdgeWeightToCommunity: new Float64Array(n), inEdgeWeightFromCommunity: new Float64Array(n), isCandidateCommunity: new Uint8Array(n), + capacityGrowthFactor: + typeof options.capacityGrowthFactor === 'number' + ? options.capacityGrowthFactor + : DEFAULT_CAPACITY_GROWTH_FACTOR, }; return { diff --git a/src/graph/algorithms/louvain.ts b/src/graph/algorithms/louvain.ts index 8adfdb560..60ef439ff 100644 --- a/src/graph/algorithms/louvain.ts +++ b/src/graph/algorithms/louvain.ts @@ -20,6 +20,7 @@ export interface LouvainOptions { maxLevels?: number; maxLocalPasses?: number; refinementTheta?: number; + capacityGrowthFactor?: number; } export interface LouvainResult { @@ -36,9 +37,14 @@ export function louvainCommunities(graph: CodeGraph, opts: LouvainOptions = {}): const native = loadNative(); if (native?.louvainCommunities) { - if (opts.maxLevels != null || opts.maxLocalPasses != null || opts.refinementTheta != null) { + if ( + opts.maxLevels != null || + opts.maxLocalPasses != null || + opts.refinementTheta != null || + opts.capacityGrowthFactor != null + ) { debug( - 'louvainCommunities: maxLevels/maxLocalPasses/refinementTheta are ignored by the native Rust path', + 'louvainCommunities: maxLevels/maxLocalPasses/refinementTheta/capacityGrowthFactor are ignored by the native Rust path', ); } const edges = graph.toEdgeArray(); @@ -63,6 +69,7 @@ function louvainJS(graph: CodeGraph, opts: LouvainOptions, resolution: number): ...(opts.maxLevels != null && { maxLevels: opts.maxLevels }), ...(opts.maxLocalPasses != null && { maxLocalPasses: opts.maxLocalPasses }), ...(opts.refinementTheta != null && { refinementTheta: opts.refinementTheta }), + ...(opts.capacityGrowthFactor != null && { capacityGrowthFactor: opts.capacityGrowthFactor }), }); const assignments = new Map(); diff --git a/src/infrastructure/config.ts b/src/infrastructure/config.ts index 6181d450e..ceba048f3 100644 --- a/src/infrastructure/config.ts +++ b/src/infrastructure/config.ts @@ -63,9 +63,12 @@ export const DEFAULTS = deepFreeze({ db: { /** * SQLite `busy_timeout` pragma (ms) applied to every opened connection. - * @reserved — currently not wired; `src/db/connection.ts` still sets the - * hardcoded literal `5000` directly via `db.pragma('busy_timeout = 5000')` - * in both `openDb` and `openReadonlyOrFail`. + * Wired as the default `busyTimeoutMs` parameter of `openDb()` and + * `openReadonlyOrFail()` in `src/db/connection.ts`. Build/watch call sites + * that already hold a resolved config (pipeline, native orchestrator, + * watcher, `openRepo`/`openReadonlyWithNative`) pass the user-configured + * value through explicitly; the remaining ad-hoc read-only query call + * sites fall back to this default. */ busyTimeoutMs: 5000, }, @@ -160,9 +163,10 @@ export const DEFAULTS = deepFreeze({ /** * Growth multiplier applied when a Leiden partition's per-community * typed arrays need to be resized to fit a larger community count. - * @reserved — currently not wired; `ensureCommCapacity()` in - * `src/graph/algorithms/leiden/partition.ts` still uses the hardcoded - * literal `1.5` directly. + * Threaded through `communitiesData()` -> `louvainCommunities()` -> + * `detectClusters()` -> `makePartition()` to `ensureCommCapacity()` in + * `src/graph/algorithms/leiden/partition.ts`. Ignored by the native Rust + * Louvain path (classic Louvain doesn't use this Leiden-specific resize). */ capacityGrowthFactor: 1.5, }, diff --git a/src/types.ts b/src/types.ts index 66b894f36..980713528 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1346,8 +1346,7 @@ export interface CodegraphConfig { db: { /** * SQLite `busy_timeout` pragma (ms) applied to every opened connection. - * @reserved — currently not wired; see `busyTimeoutMs` in - * `src/infrastructure/config.ts` for wiring status. + * See `busyTimeoutMs` in `src/infrastructure/config.ts` for wiring status. */ busyTimeoutMs: number; }; @@ -1472,8 +1471,7 @@ export interface CodegraphConfig { /** * Growth multiplier applied when a Leiden partition's per-community * typed arrays need to be resized to fit a larger community count. - * @reserved — currently not wired; see `capacityGrowthFactor` in - * `src/infrastructure/config.ts` for wiring status. + * See `capacityGrowthFactor` in `src/infrastructure/config.ts` for wiring status. */ capacityGrowthFactor: number; }; From 824f9165b972f23dae09523336e4008c8fba5ee3 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 3 Jul 2026 17:09:31 -0600 Subject: [PATCH 34/39] refactor(extractors): adopt resolveMethodDefinitionName across 3 duplicate sites handleMethodCapture, handleMethodDef, and extractObjectLiteralFunctions each inlined the identical computed_property_name unwrap-and-strip-quotes logic that forge phase 10 already extracted into resolveMethodDefinitionName for pushMethodDefContext. Consolidate onto the shared helper; no behavior change. docs check acknowledged: internal-only duplicate-code consolidation, no new languages/commands/architecture; README.md, CLAUDE.md, and ROADMAP.md are unaffected. Impact: 3 functions changed, 11 affected --- src/extractors/javascript.ts | 44 ++++++++---------------------------- 1 file changed, 9 insertions(+), 35 deletions(-) diff --git a/src/extractors/javascript.ts b/src/extractors/javascript.ts index 8c9931d22..4d189fa4c 100644 --- a/src/extractors/javascript.ts +++ b/src/extractors/javascript.ts @@ -178,18 +178,9 @@ function handleClassCapture( /** Handle method_definition capture. */ function handleMethodCapture(c: Record, definitions: Definition[]): void { const methNameNode = c.meth_name!; - let methName: string; - if (methNameNode.type === 'computed_property_name') { - // Extract the inner string literal from `['methodName']` or `["methodName"]`. - // Non-string computed keys (e.g. `[Symbol.iterator]`) cannot be resolved at - // dot-notation call sites, so skip them entirely. - const inner = methNameNode.child(1); // child(0)='[', child(1)=string, child(2)=']' - if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) return; - methName = inner.text.replace(/^['"]|['"]$/g, ''); - if (!methName) return; - } else { - methName = methNameNode.text; - } + // Non-string computed keys (e.g. `[Symbol.iterator]`) resolve to '' and are skipped. + const methName = resolveMethodDefinitionName(methNameNode); + if (!methName) return; const parentClass = findParentClass(c.meth_node!); const fullName = parentClass ? `${parentClass}.${methName}` : methName; const methChildren = extractParameters(c.meth_node!); @@ -960,18 +951,9 @@ function handleClassDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { function handleMethodDef(node: TreeSitterNode, ctx: ExtractorOutput): void { const nameNode = node.childForFieldName('name'); if (nameNode) { - let methName: string; - if (nameNode.type === 'computed_property_name') { - // Extract the inner string literal from `['methodName']` or `["methodName"]`. - // Non-string computed keys (e.g. `[Symbol.iterator]`) cannot be resolved at - // dot-notation call sites, so skip them entirely. - const inner = nameNode.child(1); // child(0)='[', child(1)=string, child(2)=']' - if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) return; - methName = inner.text.replace(/^['"]|['"]$/g, ''); - if (!methName) return; - } else { - methName = nameNode.text; - } + // Non-string computed keys (e.g. `[Symbol.iterator]`) resolve to '' and are skipped. + const methName = resolveMethodDefinitionName(nameNode); + if (!methName) return; const parentClass = findParentClass(node); const fullName = parentClass ? `${parentClass}.${methName}` : methName; const methChildren = extractParameters(node); @@ -1287,17 +1269,9 @@ function extractObjectLiteralFunctions( } else if (child.type === 'method_definition') { const nameNode = child.childForFieldName('name'); if (nameNode) { - let methodName: string; - if (nameNode.type === 'computed_property_name') { - // Strip brackets+quotes from `['methodName']` to get a resolvable name. - // Skip non-string computed keys (e.g. [Symbol.iterator]). - const inner = nameNode.child(1); - if (!inner || (inner.type !== 'string' && inner.type !== 'string_fragment')) continue; - methodName = inner.text.replace(/^['"]|['"]$/g, ''); - if (!methodName) continue; - } else { - methodName = nameNode.text; - } + // Non-string computed keys (e.g. `[Symbol.iterator]`) resolve to '' and are skipped. + const methodName = resolveMethodDefinitionName(nameNode); + if (!methodName) continue; definitions.push({ name: `${varName}.${methodName}`, kind: 'function', From e229c6b428874bfba88a480a1085c0d054dac232 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Fri, 3 Jul 2026 17:27:11 -0600 Subject: [PATCH 35/39] refactor(resolver): dedupe forge phase-11 call-resolution helpers Phase 11 decomposed build-edges.ts's resolveFallbackTargets into 15 named helpers. The codebase-wide duplicate scan on those new helpers surfaced two pre-existing duplicates the decomposition didn't catch (both predate phase 11 temporally, so the consolidation opportunities weren't nameable until now): - resolveReflectionKeyExprFallback and resolveDefinePropertyAccessorFallback each inlined the same string/{type} typeMap-entry unwrap ternary that strategy.ts's unwrapTypeEntry already implements (extracted one commit after phase 11, in forge phase 20). Promoted unwrapTypeEntry to an export and wired both call sites to it. - build-edges.ts's newly-named resolveSameClassQualifiedMethod was byte-identical to incremental.ts's pre-existing resolveThisSameClassTarget (incremental.ts's own docstring already noted it mirrors "the full-build counterpart"). Moved the shared implementation into call-resolver.ts -- the module whose stated purpose is holding call-resolution logic shared between build-edges.ts and incremental.ts exactly once -- and wired both consumers to it. Zero behavior change: all replaced logic was verified byte-identical before relocating. Resolution benchmark (206/206) and full suite (3340 tests) pass identically before/after. Two follow-on divergences the scan also surfaced between the full-build and incremental resolution paths (not fixed here -- would change behavior, not just relocate it): incremental.ts has no same-class bare-call fallback (#1765), and the Object.defineProperty same-file fallback's kind-filter differs between the two paths (#1766). docs check acknowledged: pure internal dedup/extract-method refactor, no new features, commands, languages, or architecture changes -- README/CLAUDE/ ROADMAP do not need updates. Impact: 5 functions changed, 20 affected --- src/domain/graph/builder/call-resolver.ts | 25 +++++++++++++ src/domain/graph/builder/incremental.ts | 29 +-------------- .../graph/builder/stages/build-edges.ts | 37 ++----------------- src/domain/graph/resolver/strategy.ts | 5 ++- 4 files changed, 35 insertions(+), 61 deletions(-) diff --git a/src/domain/graph/builder/call-resolver.ts b/src/domain/graph/builder/call-resolver.ts index 0312dee75..b39a158a7 100644 --- a/src/domain/graph/builder/call-resolver.ts +++ b/src/domain/graph/builder/call-resolver.ts @@ -35,6 +35,31 @@ export const RECEIVER_KINDS = new Set(['class', 'struct', 'interface', 'type', ' // continue to work without changes (build-edges.ts, etc.). export { isModuleScopedLanguage }; +/** + * Shared by both the full-build (build-edges.ts) and incremental (incremental.ts) + * same-class fallback strategies: derive the enclosing class name from the + * caller's qualified name (the segment immediately before the final dot, e.g. + * `Namespace.MyClass.method` → `MyClass`), then look up `ClassName.callName` + * as a method in the same file. + * + * Uses lastIndexOf (not indexOf) so deeply-qualified caller names extract the + * innermost class, not the outermost namespace. + */ +export function resolveSameClassQualifiedMethod( + callName: string, + callerName: string, + relPath: string, + lookup: CallNodeLookup, +): Array<{ id: number; file: string; kind?: string }> { + const lastDot = callerName.lastIndexOf('.'); + if (lastDot <= 0) return []; + const prevDot = callerName.lastIndexOf('.', lastDot - 1); + const className = callerName.slice(prevDot + 1, lastDot); + return lookup + .byNameAndFile(`${className}.${callName}`, relPath) + .filter((n) => n.kind === 'method'); +} + // ── Shared resolution functions ────────────────────────────────────────── /** diff --git a/src/domain/graph/builder/incremental.ts b/src/domain/graph/builder/incremental.ts index 2c60b3909..863ef00f7 100644 --- a/src/domain/graph/builder/incremental.ts +++ b/src/domain/graph/builder/incremental.ts @@ -26,6 +26,7 @@ import { findCaller, resolveCallTargets, resolveReceiverEdge, + resolveSameClassQualifiedMethod, } from './call-resolver.js'; import { BUILTIN_RECEIVERS, readFileSafe } from './helpers.js'; @@ -556,32 +557,6 @@ function buildIncrementalTypeMap(symbols: ExtractorOutput): Map return typeMap; } -/** - * Strategy 1 — same-class `this.method()` fallback. - * Derives the enclosing class name from callerName by extracting the segment - * immediately before the final dot (e.g. `MyClass.method` → `MyClass`, - * `Namespace.MyClass.method` → `MyClass`), then retries with the qualified - * method name `MyClass.callName`. - * - * Uses lastIndexOf to match the full-build counterpart in resolveFallbackTargets - * (build-edges.ts) — indexOf would extract `Namespace` instead of `MyClass` for - * deeply-qualified caller names like `Namespace.MyClass.method`. - */ -function resolveThisSameClassTarget( - callName: string, - callerName: string, - relPath: string, - lookup: CallNodeLookup, -): Array<{ id: number; file: string; kind?: string }> { - const lastDot = callerName.lastIndexOf('.'); - if (lastDot <= 0) return []; - const prevDot = callerName.lastIndexOf('.', lastDot - 1); - const className = callerName.slice(prevDot + 1, lastDot); - return lookup - .byNameAndFile(`${className}.${callName}`, relPath) - .filter((n) => n.kind === 'method'); -} - /** * Strategy 2 — Object.defineProperty accessor fallback. * When a function is registered as a getter/setter via @@ -635,7 +610,7 @@ function applyThisReceiverFallbacks( // Strategy 1: same-class `this.method()` fallback. if (call.receiver === 'this' && callerName != null) { - const s1 = resolveThisSameClassTarget(call.name, callerName, relPath, lookup); + const s1 = resolveSameClassQualifiedMethod(call.name, callerName, relPath, lookup); if (s1.length > 0) return s1; } diff --git a/src/domain/graph/builder/stages/build-edges.ts b/src/domain/graph/builder/stages/build-edges.ts index efeee79f6..011ce246b 100644 --- a/src/domain/graph/builder/stages/build-edges.ts +++ b/src/domain/graph/builder/stages/build-edges.ts @@ -36,6 +36,7 @@ import type { import { computeConfidence } from '../../resolve.js'; import type { PointsToMap } from '../../resolver/points-to.js'; import { buildPointsToMap, resolveViaPointsTo } from '../../resolver/points-to.js'; +import { unwrapTypeEntry } from '../../resolver/strategy.js'; import { enrichTypeMapWithTsc } from '../../resolver/ts-resolver.js'; import { type CallNodeLookup, @@ -43,6 +44,7 @@ import { isModuleScopedLanguage, resolveCallTargets, resolveReceiverEdge, + resolveSameClassQualifiedMethod, } from '../call-resolver.js'; import type { ChaContext } from '../cha.js'; import { buildChaContext, resolveChaTargets, resolveThisDispatch } from '../cha.js'; @@ -1086,27 +1088,6 @@ function resolveKotlinReflectionPreQualified( return []; } -/** - * Shared by both same-class fallback strategies below: derive the enclosing - * class name from the caller's qualified name (the segment immediately before - * the final dot, e.g. `Namespace.MyClass.method` → `MyClass`), then look up - * `ClassName.callName` as a method in the same file. - */ -function resolveSameClassQualifiedMethod( - callName: string, - callerName: string, - relPath: string, - lookup: CallNodeLookup, -): Array<{ id: number; file: string; kind?: string }> { - const lastDot = callerName.lastIndexOf('.'); - if (lastDot <= 0) return []; - const prevDot = callerName.lastIndexOf('.', lastDot - 1); - const className = callerName.slice(prevDot + 1, lastDot); - return lookup - .byNameAndFile(`${className}.${callName}`, relPath) - .filter((n) => n.kind === 'method'); -} - /** * Same-class `this.method()` fallback: when the call receiver is `this` and * resolveCallTargets found nothing, derive the enclosing class name from the @@ -1168,12 +1149,7 @@ function resolveReflectionKeyExprFallback( ) { return []; } - const typeEntry = typeMap.get(call.receiver); - const resolvedType = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; + const resolvedType = unwrapTypeEntry(typeMap.get(call.receiver)); if (resolvedType) { const qualified = lookup .byNameAndFile(`${resolvedType}.${call.keyExpr}`, relPath) @@ -1215,12 +1191,7 @@ function resolveDefinePropertyAccessorFallback( const receiverVarName = definePropertyReceivers.get(callerName); if (!receiverVarName) return []; - const typeEntry = typeMap.get(receiverVarName); - const typeName = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; + const typeName = unwrapTypeEntry(typeMap.get(receiverVarName)); if (typeName) { const qualified = lookup.byNameAndFile(`${typeName}.${call.name}`, relPath); if (qualified.length > 0) return [...qualified]; diff --git a/src/domain/graph/resolver/strategy.ts b/src/domain/graph/resolver/strategy.ts index 8bfe99cfb..02859cc4d 100644 --- a/src/domain/graph/resolver/strategy.ts +++ b/src/domain/graph/resolver/strategy.ts @@ -66,8 +66,11 @@ export function isModuleScopedLanguage(relPath: string): boolean { * shape `{ type?: string }` (some seeders attach extra metadata alongside the * target). This normalises both shapes to `string | null`, matching the * falsy-check semantics every call site previously duplicated inline. + * + * Exported: also used by build-edges.ts's reflection/defineProperty fallback + * helpers, which duplicated this exact ternary inline before being wired here. */ -function unwrapTypeEntry(entry: unknown): string | null { +export function unwrapTypeEntry(entry: unknown): string | null { if (!entry) return null; return typeof entry === 'string' ? entry : ((entry as { type?: string }).type ?? null); } From b4a1d87a190bd2150e8f5b747763055e421cce84 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 4 Jul 2026 00:05:52 -0600 Subject: [PATCH 36/39] refactor(builder): extract markExportedSymbols, dedupe batch UPDATE (docs check acknowledged) insertDefinitionsAndExports (insert-nodes.ts) and insertBackfilledNodes (native-orchestrator.ts) duplicated the exact same chunked-UPDATE loop for marking exported symbols -- the latter's own comment even said "mirrors insertDefinitionsAndExports". Extract markExportedSymbols(db, exportKeys) in builder/helpers.ts, built on the existing getOrCreateBatchStmt/runBatchInsert primitives from the batch-insert dedupe, and wire both call sites to it. Impact: 4 functions changed, 20 affected --- src/domain/graph/builder/helpers.ts | 25 +++++++++++++++++ .../graph/builder/stages/insert-nodes.ts | 27 ++---------------- .../builder/stages/native-orchestrator.ts | 28 ++----------------- 3 files changed, 29 insertions(+), 51 deletions(-) diff --git a/src/domain/graph/builder/helpers.ts b/src/domain/graph/builder/helpers.ts index c9af890b9..16dda21f5 100644 --- a/src/domain/graph/builder/helpers.ts +++ b/src/domain/graph/builder/helpers.ts @@ -448,6 +448,31 @@ export function batchInsertEdges(db: BetterSqlite3Database, rows: unknown[][]): }); } +const exportStmtCache = new WeakMap>(); + +function getExportStmt(db: BetterSqlite3Database, chunkSize: number): SqliteStatement { + return getOrCreateBatchStmt(exportStmtCache, db, chunkSize, (n) => { + const conditions = Array.from( + { length: n }, + () => '(name = ? AND kind = ? AND file = ? AND line = ?)', + ).join(' OR '); + return `UPDATE nodes SET exported = 1 WHERE ${conditions}`; + }); +} + +/** + * Mark exported symbols as `exported = 1` in batches, keyed by + * `[name, kind, file, line]`. Shared by the JS-fallback definitions/exports + * insert (`insertDefinitionsAndExports`) and the native-orchestrator backfill + * (`insertBackfilledNodes`), which previously duplicated this exact + * chunked-UPDATE loop verbatim. + */ +export function markExportedSymbols(db: BetterSqlite3Database, exportKeys: unknown[][]): void { + runBatchInsert(db, exportKeys, getExportStmt, (k, vals) => { + vals.push(k[0], k[1], k[2], k[3]); + }); +} + /** Confidence assigned to CHA-expanded interface/abstract dispatch edges. */ export const CHA_DISPATCH_CONFIDENCE = 0.8; diff --git a/src/domain/graph/builder/stages/insert-nodes.ts b/src/domain/graph/builder/stages/insert-nodes.ts index 09aad25d8..c0459a2c5 100644 --- a/src/domain/graph/builder/stages/insert-nodes.ts +++ b/src/domain/graph/builder/stages/insert-nodes.ts @@ -27,6 +27,7 @@ import { batchInsertNodes, fileHash, fileStat, + markExportedSymbols, readFileSafe, } from '../helpers.js'; @@ -283,31 +284,7 @@ function insertDefinitionsAndExports( } } batchInsertNodes(db, phase1Rows); - - // Mark exported symbols in batches (cache prepared statements by chunk size) - if (exportKeys.length > 0) { - const EXPORT_CHUNK = 500; - const exportStmtCache = new Map(); - for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) { - const end = Math.min(i + EXPORT_CHUNK, exportKeys.length); - const chunkSize = end - i; - let updateStmt = exportStmtCache.get(chunkSize); - if (!updateStmt) { - const conditions = Array.from( - { length: chunkSize }, - () => '(name = ? AND kind = ? AND file = ? AND line = ?)', - ).join(' OR '); - updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`); - exportStmtCache.set(chunkSize, updateStmt); - } - const vals: unknown[] = []; - for (let j = i; j < end; j++) { - const k = exportKeys[j] as unknown[]; - vals.push(k[0], k[1], k[2], k[3]); - } - updateStmt.run(...vals); - } - } + markExportedSymbols(db, exportKeys); } // ── JS fallback: Phase 2+3 ────────────────────────────────────────── diff --git a/src/domain/graph/builder/stages/native-orchestrator.ts b/src/domain/graph/builder/stages/native-orchestrator.ts index 5ba453d77..b5d0c0467 100644 --- a/src/domain/graph/builder/stages/native-orchestrator.ts +++ b/src/domain/graph/builder/stages/native-orchestrator.ts @@ -34,7 +34,6 @@ import type { DataflowResult, Definition, ExtractorOutput, - SqliteStatement, } from '../../../../types.js'; import { classifyNativeDrops, @@ -57,6 +56,7 @@ import { collectFiles as collectFilesUtil, fileHash, fileStat, + markExportedSymbols, readFileSafe, } from '../helpers.js'; import { NativeDbProxy } from '../native-db-proxy.js'; @@ -1691,31 +1691,7 @@ function insertBackfilledNodes( } } batchInsertNodes(db, rows); - - // Mark exported symbols in batches — mirrors insertDefinitionsAndExports. - if (exportKeys.length > 0) { - const EXPORT_CHUNK = 500; - const exportStmtCache = new Map(); - for (let i = 0; i < exportKeys.length; i += EXPORT_CHUNK) { - const end = Math.min(i + EXPORT_CHUNK, exportKeys.length); - const chunkSize = end - i; - let updateStmt = exportStmtCache.get(chunkSize); - if (!updateStmt) { - const conditions = Array.from( - { length: chunkSize }, - () => '(name = ? AND kind = ? AND file = ? AND line = ?)', - ).join(' OR '); - updateStmt = db.prepare(`UPDATE nodes SET exported = 1 WHERE ${conditions}`); - exportStmtCache.set(chunkSize, updateStmt); - } - const vals: unknown[] = []; - for (let j = i; j < end; j++) { - const k = exportKeys[j] as unknown[]; - vals.push(k[0], k[1], k[2], k[3]); - } - updateStmt.run(...vals); - } - } + markExportedSymbols(db, exportKeys); } /** From 1201518d8091bcd0adeaa17c6d657fb18b67e1ae Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 4 Jul 2026 01:22:30 -0600 Subject: [PATCH 37/39] refactor(scripts): promote timeMedian/median/round1 into shared bench-timing lib Forge phase 28 extracted timeMedian in token-benchmark.ts, already wired within that file. The codebase-wide duplicate scan found median()/round1() duplicated verbatim across query-benchmark.ts, incremental-benchmark.ts (twice), and benchmark.ts, plus the "loop N times, time each run, median()" pattern repeated 8 more times across those files. Promotes median/round1/timeMedian into scripts/lib/bench-timing.ts (matching the existing bench-config.ts/fork-engine.ts shared-module pattern) and wires median/round1 into all 4 scripts. Adopts timeMedian at the 3 call sites that were already unconditionally async with no new async boundary (incremental-benchmark.ts fullBuildMs/noopRebuildMs worker, benchmark.ts noopRebuildMs worker). Left 5 remaining duplicate sites unconverted since adopting timeMedian there requires making currently-synchronous functions async, which is a control-flow change rather than a mechanical adoption -- tracked in #1774. Impact: 4 functions changed, 14 affected --- scripts/benchmark.ts | 24 +++++---------- scripts/incremental-benchmark.ts | 37 ++++++----------------- scripts/lib/bench-timing.ts | 52 ++++++++++++++++++++++++++++++++ scripts/query-benchmark.ts | 11 +------ scripts/token-benchmark.ts | 34 +++------------------ 5 files changed, 75 insertions(+), 83 deletions(-) create mode 100644 scripts/lib/bench-timing.ts diff --git a/scripts/benchmark.ts b/scripts/benchmark.ts index 642e2b1f5..65cad01a3 100644 --- a/scripts/benchmark.ts +++ b/scripts/benchmark.ts @@ -17,6 +17,7 @@ import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js'; +import { median, round1, timeMedian } from './lib/bench-timing.js'; // ── Parent process: fork one child per engine, assemble final output ───── if (!isWorker()) { @@ -97,16 +98,6 @@ const QUERY_WARMUP_RUNS = 3; const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts'); const BENCH_EXCLUDE = [...resolveBenchmarkExcludes()]; -function median(arr) { - const sorted = [...arr].sort((a, b) => a - b); - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; -} - -function round1(n) { - return Math.round(n * 10) / 10; -} - function selectTargets() { const db = new Database(dbPath, { readonly: true }); const rows = db @@ -158,13 +149,12 @@ try { for (let i = 0; i < WARMUP_RUNS; i++) { await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); } - const noopTimings = []; - for (let i = 0; i < INCREMENTAL_RUNS; i++) { - const start = performance.now(); - await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }); - noopTimings.push(performance.now() - start); - } - noopRebuildMs = Math.round(median(noopTimings)); + noopRebuildMs = Math.round( + await timeMedian( + () => buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }), + INCREMENTAL_RUNS, + ), + ); } catch (err) { console.error(` [${engine}] No-op rebuild failed: ${(err as Error).message}`); } diff --git a/scripts/incremental-benchmark.ts b/scripts/incremental-benchmark.ts index 853d595da..b2f3b53d7 100644 --- a/scripts/incremental-benchmark.ts +++ b/scripts/incremental-benchmark.ts @@ -16,6 +16,7 @@ import { performance } from 'node:perf_hooks'; import { fileURLToPath } from 'node:url'; import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js'; +import { median, round1, timeMedian } from './lib/bench-timing.js'; // ── Parent process: fork one child per engine, assemble final output ───── if (!isWorker()) { @@ -51,12 +52,6 @@ if (!isWorker()) { // jitter and produces CI-amplified false regressions. const RUNS = 5; const WARMUP_RUNS = 2; - function median(arr) { - const sorted = [...arr].sort((a, b) => a - b); - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; - } - function round1(n) { return Math.round(n * 10) / 10; } function collectImportPairs() { const srcRoot = path.join(rootParent, 'src'); @@ -190,23 +185,15 @@ const WARMUP_RUNS = 2; // the same corpus. const BUILD_OPTS = { engine, exclude: [...resolveBenchmarkExcludes()] }; -function median(arr) { - const sorted = [...arr].sort((a, b) => a - b); - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; -} - console.error(`Benchmarking ${engine} engine...`); // Full build (delete DB first) -const fullTimings = []; -for (let i = 0; i < RUNS; i++) { - if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); - const start = performance.now(); - await buildGraph(root, { ...BUILD_OPTS, incremental: false }); - fullTimings.push(performance.now() - start); -} -const fullBuildMs = Math.round(median(fullTimings)); +const fullBuildMs = Math.round( + await timeMedian(async () => { + if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + await buildGraph(root, { ...BUILD_OPTS, incremental: false }); + }, RUNS), +); // No-op rebuild (nothing changed) let noopRebuildMs = null; @@ -214,13 +201,9 @@ try { for (let i = 0; i < WARMUP_RUNS; i++) { await buildGraph(root, { ...BUILD_OPTS, incremental: true }); } - const noopTimings = []; - for (let i = 0; i < RUNS; i++) { - const start = performance.now(); - await buildGraph(root, { ...BUILD_OPTS, incremental: true }); - noopTimings.push(performance.now() - start); - } - noopRebuildMs = Math.round(median(noopTimings)); + noopRebuildMs = Math.round( + await timeMedian(() => buildGraph(root, { ...BUILD_OPTS, incremental: true }), RUNS), + ); } catch (err) { console.error(` [${engine}] No-op rebuild failed: ${(err as Error).message}`); } diff --git a/scripts/lib/bench-timing.ts b/scripts/lib/bench-timing.ts new file mode 100644 index 000000000..e23f6135f --- /dev/null +++ b/scripts/lib/bench-timing.ts @@ -0,0 +1,52 @@ +/** + * Shared timing helpers for benchmark scripts. + * + * `median`/`round1` were independently duplicated (byte-for-byte, in most + * cases) across token-benchmark.ts, query-benchmark.ts, + * incremental-benchmark.ts (twice — once per process: parent and worker), + * and benchmark.ts. `timeMedian` wraps the "run N times, time each run, + * return the median" loop that recurred at every call site measuring a + * single scalar latency. + * + * Usage (in a benchmark script): + * + * import { median, round1, timeMedian } from './lib/bench-timing.js'; + * + * const fullBuildMs = Math.round( + * await timeMedian(() => buildGraph(root, { engine, incremental: false }), RUNS), + * ); + */ + +/** + * Returns the median of `arr`. `arr` is not mutated (sorted on a copy). + * Returns 0 for an empty array. + */ +export function median(arr: number[]): number { + if (arr.length === 0) return 0; + const sorted = [...arr].sort((a, b) => a - b); + const mid = Math.floor(sorted.length / 2); + return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; +} + +/** + * Rounds `n` to 1 decimal place. + */ +export function round1(n: number): number { + return Math.round(n * 10) / 10; +} + +/** + * Runs `fn` `runs` times, recording the elapsed milliseconds per run, and + * returns the median duration. Awaits `fn()` each iteration, so both sync + * and async `fn` work — pass an async closure when `fn` itself needs to + * `await` (e.g. wrapping `buildGraph`). + */ +export async function timeMedian(fn: () => unknown, runs: number): Promise { + const timings: number[] = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await fn(); + timings.push(performance.now() - start); + } + return median(timings); +} diff --git a/scripts/query-benchmark.ts b/scripts/query-benchmark.ts index 0d53d75e4..d19aa8e2f 100644 --- a/scripts/query-benchmark.ts +++ b/scripts/query-benchmark.ts @@ -18,6 +18,7 @@ import { fileURLToPath } from 'node:url'; import Database from 'better-sqlite3'; import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js'; import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js'; +import { median, round1 } from './lib/bench-timing.js'; // ── Parent process: fork one child per engine, assemble final output ───── if (!isWorker()) { @@ -117,16 +118,6 @@ const RUNS = 5; // before timing so the metric reflects warm-call latency, not cold-start. const WARMUP_RUNS = 3; -function median(arr) { - const sorted = [...arr].sort((a, b) => a - b); - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; -} - -function round1(n) { - return Math.round(n * 10) / 10; -} - // Pinned hub targets — stable function names that exist across versions. // Auto-selecting the most-connected node makes version-to-version comparison // meaningless when barrel/type files get added or removed. diff --git a/scripts/token-benchmark.ts b/scripts/token-benchmark.ts index d34268ead..7e66cb6b8 100644 --- a/scripts/token-benchmark.ts +++ b/scripts/token-benchmark.ts @@ -27,6 +27,7 @@ import { parseArgs } from 'node:util'; import { ISSUES, extractAgentOutput, validateResult } from './token-benchmark-issues.js'; import { getBenchmarkVersion } from './bench-version.js'; +import { median, round1, timeMedian } from './lib/bench-timing.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const root = path.resolve(__dirname, '..'); @@ -76,13 +77,6 @@ const selectedIssues = selectedIssueIds.map((id) => { // ── Helpers ─────────────────────────────────────────────────────────────── -function median(arr) { - if (arr.length === 0) return 0; - const sorted = [...arr].sort((a, b) => a - b); - const mid = Math.floor(sorted.length / 2); - return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2; -} - function round2(n) { return Math.round(n * 100) / 100; } @@ -261,24 +255,6 @@ async function runSession(mode, issue, nextjsDir) { const PERF_RUNS = 3; -function round1(n) { - return Math.round(n * 10) / 10; -} - -/** - * Run `fn` `runs` times (default `PERF_RUNS`), recording the elapsed - * milliseconds per run, and return the median duration. - */ -async function timeMedian(fn, runs = PERF_RUNS) { - const timings = []; - for (let i = 0; i < runs; i++) { - const start = performance.now(); - await fn(); - timings.push(performance.now() - start); - } - return median(timings); -} - /** * Run build/query/stats benchmarks against the Next.js graph. * Reuses the same codegraph APIs as the existing benchmark scripts. @@ -324,13 +300,13 @@ async function runPerfBenchmarks(nextjsDir) { await timeMedian(async () => { if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); await buildGraph(nextjsDir, { engine, incremental: false }); - }), + }, PERF_RUNS), ); // No-op rebuild console.error(` No-op rebuild (${engine})...`); const noopRebuildMs = Math.round( - await timeMedian(() => buildGraph(nextjsDir, { engine, incremental: true })), + await timeMedian(() => buildGraph(nextjsDir, { engine, incremental: true }), PERF_RUNS), ); buildResults[engine] = { fullBuildMs, noopRebuildMs }; @@ -379,12 +355,12 @@ async function runPerfBenchmarks(nextjsDir) { for (const depth of [1, 3, 5]) { // fnDeps queryResults[`fnDeps_depth${depth}Ms`] = round1( - await timeMedian(() => fnDepsData(hubName, dbPath, { depth, noTests: true })), + await timeMedian(() => fnDepsData(hubName, dbPath, { depth, noTests: true }), PERF_RUNS), ); // fnImpact queryResults[`fnImpact_depth${depth}Ms`] = round1( - await timeMedian(() => fnImpactData(hubName, dbPath, { depth, noTests: true })), + await timeMedian(() => fnImpactData(hubName, dbPath, { depth, noTests: true }), PERF_RUNS), ); } From da0c2674818d2dd0cc272fb774c8cdf8a51255a0 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sat, 4 Jul 2026 13:46:53 -0600 Subject: [PATCH 38/39] fix(leiden): dedupe DEFAULT_CAPACITY_GROWTH_FACTOR (docs check acknowledged) optimiser.ts kept its own copy of the 1.5 fallback instead of importing partition.ts's constant, so the two could silently drift apart. Export it from partition.ts and import it in optimiser.ts. Addresses Greptile review feedback on #1793. --- src/graph/algorithms/leiden/optimiser.ts | 3 +-- src/graph/algorithms/leiden/partition.ts | 8 ++++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/src/graph/algorithms/leiden/optimiser.ts b/src/graph/algorithms/leiden/optimiser.ts index 17c3d219b..ed00d5442 100644 --- a/src/graph/algorithms/leiden/optimiser.ts +++ b/src/graph/algorithms/leiden/optimiser.ts @@ -9,14 +9,13 @@ import { makeGraphAdapter } from './adapter.js'; import { diffCPM } from './cpm.js'; import { diffModularity } from './modularity.js'; import type { Partition } from './partition.js'; -import { makePartition } from './partition.js'; +import { DEFAULT_CAPACITY_GROWTH_FACTOR, makePartition } from './partition.js'; import { createRng } from './rng.js'; // Mirrored in DEFAULTS.community (src/infrastructure/config.js) for user override // via .codegraphrc.json. Callers (e.g. louvain.ts) can pass overrides through options. const DEFAULT_MAX_LEVELS: number = 50; const DEFAULT_MAX_LOCAL_PASSES: number = 20; -const DEFAULT_CAPACITY_GROWTH_FACTOR: number = 1.5; const GAIN_EPSILON: number = 1e-12; /** Pre-allocated scratch buffers for refinement candidate collection. */ diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index 50a11c4be..ad2464f5c 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -72,8 +72,12 @@ interface PartitionState { capacityGrowthFactor: number; } -/** Mirrored in DEFAULTS.community.capacityGrowthFactor (src/infrastructure/config.ts). */ -const DEFAULT_CAPACITY_GROWTH_FACTOR = 1.5; +/** + * Mirrored in DEFAULTS.community.capacityGrowthFactor (src/infrastructure/config.ts). + * Exported so other leiden modules (e.g. optimiser.ts) share this single fallback + * instead of keeping an independently-drifting copy. + */ +export const DEFAULT_CAPACITY_GROWTH_FACTOR = 1.5; /* ------------------------------------------------------------------ */ /* Community-ID sort helper (used by compact) */ From 7e717318fcf2d17e7699cd6bd77c7594e319f224 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Sun, 5 Jul 2026 01:49:29 -0600 Subject: [PATCH 39/39] fix: import performance explicitly in bench-timing.ts (#1793) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every other benchmark script imports performance from node:perf_hooks; bench-timing.ts relied on the global instead. Flagged by Greptile's review summary. docs check acknowledged — one-line import fix, no new features/languages/architecture changes; README/CLAUDE/ROADMAP do not need updates. --- scripts/lib/bench-timing.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/lib/bench-timing.ts b/scripts/lib/bench-timing.ts index e23f6135f..233fb33a7 100644 --- a/scripts/lib/bench-timing.ts +++ b/scripts/lib/bench-timing.ts @@ -16,6 +16,7 @@ * await timeMedian(() => buildGraph(root, { engine, incremental: false }), RUNS), * ); */ +import { performance } from 'node:perf_hooks'; /** * Returns the median of `arr`. `arr` is not mutated (sorted on a copy).