diff --git a/src/domain/graph/resolver/points-to.ts b/src/domain/graph/resolver/points-to.ts index ffa36ec8e..5573c53ab 100644 --- a/src/domain/graph/resolver/points-to.ts +++ b/src/domain/graph/resolver/points-to.ts @@ -79,106 +79,195 @@ function buildThisAssignmentMap( } /** - * Append parameter-flow and array/spread/forOf/callback constraints (Phases 8.3c and 8.3e). + * Phase 8.3c: parameter-flow constraints. * - * Mutates `pts` (seeds array-element entries) and appends to `constraints`. + * For each call f(x) at argIndex i where f is locally defined, add + * constraint: pts(f::paramName_i) ⊇ pts(x). This makes the pts solver + * inter-procedural within the module so that `fn()` inside `f` resolves + * to the concrete function passed at each call site. + * + * Keys are scoped as "callee::paramName" to prevent name collisions: bare + * parameter names like `fn`, `cb`, and `callback` appear in many functions + * within the same file. Without scoping, pts(fn) from runA and runB would + * merge into a single set, producing spurious call edges. The scoped key is + * resolved in buildFileCallEdges by combining the enclosing caller's name + * with the call's name (see callerName::call.name lookup there). + * + * Scope: intra-module only (definitionParams contains local defs only). + * + * Appends to `constraints`. */ -function buildParamAndArrayConstraints( - pts: PointsToMap, +function buildParamFlowConstraints( constraints: Array<{ lhs: string; rhsKey: string }>, paramBindings?: readonly ParamBinding[], definitionParams?: ReadonlyMap, +): void { + if (!paramBindings || !definitionParams) return; + for (const { callee, argIndex, argName } of paramBindings) { + const params = definitionParams.get(callee); + if (!params || argIndex >= params.length) continue; + const paramName = params[argIndex]; + if (paramName) constraints.push({ lhs: `${callee}::${paramName}`, rhsKey: argName }); + } +} + +/** + * Phase 8.3e: array-element bindings — seed concrete elements and wildcard. + * + * `arr[0]` etc. are seeded from literal arrays; `arr[*]` collects all elements. + * + * Mutates `pts` (seeds per-index entries) and appends to `constraints`. + */ +function buildArrayElemConstraints( + pts: PointsToMap, + constraints: Array<{ lhs: string; rhsKey: string }>, arrayElemBindings?: readonly ArrayElemBinding[], - spreadArgBindings?: readonly SpreadArgBinding[], - forOfBindings?: readonly ForOfBinding[], - arrayCallbackBindings?: readonly ArrayCallbackBinding[], ): void { - // Phase 8.3c: parameter-flow constraints. - // For each call f(x) at argIndex i where f is locally defined, add - // constraint: pts(f::paramName_i) ⊇ pts(x). This makes the pts solver - // inter-procedural within the module so that `fn()` inside `f` resolves - // to the concrete function passed at each call site. - // - // Keys are scoped as "callee::paramName" to prevent name collisions: bare - // parameter names like `fn`, `cb`, and `callback` appear in many functions - // within the same file. Without scoping, pts(fn) from runA and runB would - // merge into a single set, producing spurious call edges. The scoped key is - // resolved in buildFileCallEdges by combining the enclosing caller's name - // with the call's name (see callerName::call.name lookup there). - // - // Scope: intra-module only (definitionParams contains local defs only). - if (paramBindings && definitionParams) { - for (const { callee, argIndex, argName } of paramBindings) { - const params = definitionParams.get(callee); - if (!params || argIndex >= params.length) continue; - const paramName = params[argIndex]; - if (paramName) constraints.push({ lhs: `${callee}::${paramName}`, rhsKey: argName }); - } + if (!arrayElemBindings || arrayElemBindings.length === 0) return; + for (const { arrayName, index, elemName } of arrayElemBindings) { + const elemKey = `${arrayName}[${index}]`; + const wildcardKey = `${arrayName}[*]`; + // Seed the per-index entry if the elemName is a concrete function. + if (!pts.has(elemKey)) pts.set(elemKey, new Set()); + pts.get(elemKey)!.add(elemName); + // Wildcard: array[*] collects all element targets for imprecise spread/for-of. + constraints.push({ lhs: wildcardKey, rhsKey: elemKey }); } +} - // Phase 8.3e: array-element bindings — seed concrete elements and wildcard. - // `arr[0]` etc. are seeded from literal arrays; `arr[*]` collects all elements. - if (arrayElemBindings && arrayElemBindings.length > 0) { - for (const { arrayName, index, elemName } of arrayElemBindings) { - const elemKey = `${arrayName}[${index}]`; - const wildcardKey = `${arrayName}[*]`; - // Seed the per-index entry if the elemName is a concrete function. - if (!pts.has(elemKey)) pts.set(elemKey, new Set()); - pts.get(elemKey)!.add(elemName); - // Wildcard: array[*] collects all element targets for imprecise spread/for-of. - constraints.push({ lhs: wildcardKey, rhsKey: elemKey }); - } +/** + * Build a per-array index count from arrayElemBindings for precise + * per-index spread-argument constraints. + */ +function computeArrayMaxIndex( + arrayElemBindings: readonly ArrayElemBinding[] | undefined, +): Map { + const arrayMaxIndex = new Map(); + for (const { arrayName, index } of arrayElemBindings ?? []) { + const cur = arrayMaxIndex.get(arrayName) ?? -1; + if (index > cur) arrayMaxIndex.set(arrayName, index); } + return arrayMaxIndex; +} - // Phase 8.3e: spread-argument constraints. - // f(...arr) → pts[f::param_i] ⊇ pts[arr[i]] for each known element. - if (spreadArgBindings && spreadArgBindings.length > 0 && definitionParams) { - // Build a per-array index count from arrayElemBindings for precise per-index constraints. - const arrayMaxIndex = new Map(); - for (const { arrayName, index } of arrayElemBindings ?? []) { - const cur = arrayMaxIndex.get(arrayName) ?? -1; - if (index > cur) arrayMaxIndex.set(arrayName, index); +/** + * Push spread-argument constraints for one callee: precise per-element + * constraints when the source array's max index is known, otherwise a + * wildcard constraint for every parameter at/after startIndex. + */ +function pushSpreadArgConstraintsForCallee( + constraints: Array<{ lhs: string; rhsKey: string }>, + callee: string, + params: readonly string[], + arrayName: string, + startIndex: number, + maxIdx: number, +): void { + if (maxIdx >= 0) { + // Precise: per-element constraints. + for (let i = 0; i <= maxIdx; i++) { + const paramIdx = startIndex + i; + if (paramIdx >= params.length) break; + constraints.push({ lhs: `${callee}::${params[paramIdx]}`, rhsKey: `${arrayName}[${i}]` }); } - - for (const { callee, arrayName, startIndex } of spreadArgBindings) { - const params = definitionParams.get(callee); - if (!params) continue; - const maxIdx = arrayMaxIndex.get(arrayName) ?? -1; - if (maxIdx >= 0) { - // Precise: per-element constraints. - for (let i = 0; i <= maxIdx; i++) { - const paramIdx = startIndex + i; - if (paramIdx >= params.length) break; - constraints.push({ lhs: `${callee}::${params[paramIdx]}`, rhsKey: `${arrayName}[${i}]` }); - } - } else { - // Unknown array size: all params at/after startIndex get the wildcard. - for (let j = startIndex; j < params.length; j++) { - constraints.push({ lhs: `${callee}::${params[j]}`, rhsKey: `${arrayName}[*]` }); - } - } + } else { + // Unknown array size: all params at/after startIndex get the wildcard. + for (let j = startIndex; j < params.length; j++) { + constraints.push({ lhs: `${callee}::${params[j]}`, rhsKey: `${arrayName}[*]` }); } } +} - // Phase 8.3e: for-of iteration constraints. - // `for (const x of arr)` inside `outer` → pts[outer::x] ⊇ pts[arr[*]] - if (forOfBindings) { - for (const { varName, sourceName, enclosingFunc } of forOfBindings) { - constraints.push({ lhs: `${enclosingFunc}::${varName}`, rhsKey: `${sourceName}[*]` }); - } +/** + * Phase 8.3e: spread-argument constraints. + * + * f(...arr) → pts[f::param_i] ⊇ pts[arr[i]] for each known element. + * + * Appends to `constraints`. + */ +function buildSpreadArgConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + spreadArgBindings?: readonly SpreadArgBinding[], + arrayElemBindings?: readonly ArrayElemBinding[], + definitionParams?: ReadonlyMap, +): void { + if (!spreadArgBindings || spreadArgBindings.length === 0 || !definitionParams) return; + const arrayMaxIndex = computeArrayMaxIndex(arrayElemBindings); + + for (const { callee, arrayName, startIndex } of spreadArgBindings) { + const params = definitionParams.get(callee); + if (!params) continue; + const maxIdx = arrayMaxIndex.get(arrayName) ?? -1; + pushSpreadArgConstraintsForCallee(constraints, callee, params, arrayName, startIndex, maxIdx); } +} - // Phase 8.3e: Array.from / callback constraints. - // Array.from(source, cb) → pts[cb::param0] ⊇ pts[source[*]] - if (arrayCallbackBindings && definitionParams) { - for (const { sourceName, calleeName } of arrayCallbackBindings) { - const params = definitionParams.get(calleeName); - if (!params || params.length === 0) continue; - constraints.push({ lhs: `${calleeName}::${params[0]}`, rhsKey: `${sourceName}[*]` }); - } +/** + * Phase 8.3e: for-of iteration constraints. + * + * `for (const x of arr)` inside `outer` → pts[outer::x] ⊇ pts[arr[*]] + * + * Appends to `constraints`. + */ +function buildForOfConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + forOfBindings?: readonly ForOfBinding[], +): void { + if (!forOfBindings) return; + for (const { varName, sourceName, enclosingFunc } of forOfBindings) { + constraints.push({ lhs: `${enclosingFunc}::${varName}`, rhsKey: `${sourceName}[*]` }); } } +/** + * Phase 8.3e: Array.from / callback constraints. + * + * Array.from(source, cb) → pts[cb::param0] ⊇ pts[source[*]] + * + * Appends to `constraints`. + */ +function buildArrayCallbackConstraints( + constraints: Array<{ lhs: string; rhsKey: string }>, + arrayCallbackBindings?: readonly ArrayCallbackBinding[], + definitionParams?: ReadonlyMap, +): void { + if (!arrayCallbackBindings || !definitionParams) return; + for (const { sourceName, calleeName } of arrayCallbackBindings) { + const params = definitionParams.get(calleeName); + if (!params || params.length === 0) continue; + constraints.push({ lhs: `${calleeName}::${params[0]}`, rhsKey: `${sourceName}[*]` }); + } +} + +/** + * Append parameter-flow and array/spread/forOf/callback constraints (Phases 8.3c and 8.3e). + * + * Delegates to one named helper per binding kind (buildParamFlowConstraints, + * buildArrayElemConstraints, buildSpreadArgConstraints, buildForOfConstraints, + * buildArrayCallbackConstraints) — each handler owns exactly one binding kind's + * guard + iteration + constraint-push shape, called in the same order the + * original inline blocks ran in (none of the blocks read state written by an + * earlier one, so extraction does not change solver input order). + * + * Mutates `pts` (seeds array-element entries) and appends to `constraints`. + */ +function buildParamAndArrayConstraints( + pts: PointsToMap, + constraints: Array<{ lhs: string; rhsKey: string }>, + paramBindings?: readonly ParamBinding[], + definitionParams?: ReadonlyMap, + arrayElemBindings?: readonly ArrayElemBinding[], + spreadArgBindings?: readonly SpreadArgBinding[], + forOfBindings?: readonly ForOfBinding[], + arrayCallbackBindings?: readonly ArrayCallbackBinding[], +): void { + buildParamFlowConstraints(constraints, paramBindings, definitionParams); + buildArrayElemConstraints(pts, constraints, arrayElemBindings); + buildSpreadArgConstraints(constraints, spreadArgBindings, arrayElemBindings, definitionParams); + buildForOfConstraints(constraints, forOfBindings); + buildArrayCallbackConstraints(constraints, arrayCallbackBindings, definitionParams); +} + /** * Seed pts entries for object-rest parameter dispatch (Phase 8.3f). * diff --git a/src/domain/graph/resolver/strategy.ts b/src/domain/graph/resolver/strategy.ts index 84726b4a8..8bfe99cfb 100644 --- a/src/domain/graph/resolver/strategy.ts +++ b/src/domain/graph/resolver/strategy.ts @@ -57,41 +57,44 @@ export function isModuleScopedLanguage(relPath: string): boolean { return MODULE_SCOPED_BARE_CALL_EXTENSIONS.has(ext); } +// ── typeMap entry unwrapping ────────────────────────────────────────────────── + +/** + * Unwrap a typeMap entry to its plain string form. + * + * typeMap values are either a bare string (the target name) or an object of + * shape `{ type?: string }` (some seeders attach extra metadata alongside the + * target). This normalises both shapes to `string | null`, matching the + * falsy-check semantics every call site previously duplicated inline. + */ +function unwrapTypeEntry(entry: unknown): string | null { + if (!entry) return null; + return typeof entry === 'string' ? entry : ((entry as { type?: string }).type ?? null); +} + // ── resolveByReceiver ───────────────────────────────────────────────────────── /** - * Resolve a call site whose receiver is a concrete object reference - * (i.e. `receiver` is present and is NOT `this`, `self`, or `super`). + * Steps 1-3 of the resolveByReceiver cascade: resolve the type name for a + * concrete-object receiver. * - * Resolution cascade: * 1. typeMap class-scoped lookup (`ClassName.prop` key) for `this.prop` receivers. * 2. typeMap bare key, full-receiver key, callee-scoped rest-param key. * 3. Inline `new Ctor()` heuristic for un-normalised receiver text. - * 4. Typed method lookup via `TypeName.methodName` in symbol DB. - * 5. Prototype alias: `Foo.prototype.bar = identifier` via typeMap. - * 6. Direct qualified method lookup: `ClassName.staticMethod()`. - * 7. Composite pts key: `obj.prop` → callback target function. */ -export function resolveByReceiver( - lookup: StrategyLookup, - call: { name: string; receiver: string }, - relPath: string, +function resolveReceiverTypeName( typeMap: Map, + receiver: string, + effectiveReceiver: string, callerName?: string | null, -): ReadonlyArray<{ id: number; file: string }> { - // Strip "this." so `this.repo.method()` resolves via typeMap["repo"] - // (or the "this.repo" key seeded directly by the TSC property-declaration enricher). - const effectiveReceiver = call.receiver.startsWith('this.') - ? call.receiver.slice('this.'.length) - : call.receiver; - +): string | null { // For this.prop receivers, prefer the class-scoped key (ClassName.prop) seeded by // handlePropWriteTypeMap / handleFieldDefTypeMap — prevents false edges when multiple // classes define the same property name (issues #1323, #1458). // Class-scoped lookup runs first so bare fallback keys (confidence 0.6) don't shadow // the correct per-class entry when callerName is available. let typeEntry: unknown; - if (call.receiver.startsWith('this.') && callerName) { + if (receiver.startsWith('this.') && callerName) { const dotIdx = callerName.lastIndexOf('.'); if (dotIdx > -1) { const callerClass = callerName.slice(0, dotIdx); @@ -100,16 +103,12 @@ export function resolveByReceiver( } typeEntry ??= typeMap.get(effectiveReceiver) ?? - typeMap.get(call.receiver) ?? + typeMap.get(receiver) ?? // Phase 8.3f: callee-scoped rest-param key (`callee::restName`) to avoid // same-name rest-binding collision across functions in the same file (#1358). (callerName ? typeMap.get(`${callerName}::${effectiveReceiver}`) : undefined); - let typeName = typeEntry - ? typeof typeEntry === 'string' - ? typeEntry - : (typeEntry as { type?: string }).type - : null; + let typeName = unwrapTypeEntry(typeEntry); // Belt-and-suspenders fallback for inline new-expression receivers that // extractReceiverName did not normalise (e.g. raw text leaked from an @@ -120,77 +119,197 @@ export function resolveByReceiver( // The uppercase-initial restriction ([A-Z_$]) is a heuristic to distinguish // constructors (PascalCase) from regular functions and avoids false positives // on `(new xmlParser()).parse()` style calls. - if (!typeName && call.receiver) { - const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(call.receiver); + if (!typeName && receiver) { + const m = /^\(?\s*new\s+([A-Z_$][A-Za-z0-9_$]*)/.exec(receiver); if (m?.[1]) typeName = m[1]; } + return typeName; +} + +/** Step 4: typed method lookup via `TypeName.methodName` in the symbol DB. */ +function resolveViaTypedMethod( + lookup: StrategyLookup, + typeName: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + return lookup + .byName(`${typeName}.${call.name}`) + .filter((n) => n.kind === 'method' && computeConfidence(relPath, n.file, null) >= 0.5); +} + +/** + * Step 5: prototype alias — `Foo.prototype.bar = identifier` seeds + * typeMap['Foo.bar'] = { type: identifier }. + * Checked after the symbol-DB lookup so an actual method definition always wins. + */ +function resolveViaPrototypeAlias( + lookup: StrategyLookup, + typeMap: Map, + typeName: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const protoTarget = unwrapTypeEntry(typeMap.get(`${typeName}.${call.name}`)); + if (!protoTarget) return []; + return lookup.byName(protoTarget).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Step 6: direct qualified method lookup — `ClassName.staticMethod()` or + * `ClassName.instanceMethod()` when the receiver is a class name with no + * typeMap entry. Handles static method calls like `C6.staticMethod()` or + * `D.d()` where the receiver IS the class. Matches both 'method' and + * 'function' kinds to cover field-initializer synthetic defs. + */ +function resolveViaDirectQualifiedMethod( + lookup: StrategyLookup, + effectiveReceiver: string, + call: { name: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const qualifiedName = `${effectiveReceiver}.${call.name}`; + return lookup + .byName(qualifiedName) + .filter( + (n) => + (n.kind === 'method' || n.kind === 'function') && + computeConfidence(relPath, n.file, null) >= 0.5, + ); +} + +/** + * Step 7: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] = { type: 'fn' } + * (Phase 8.3d). When a call site references `obj.prop` as a callback, resolve + * directly to the target fn. + */ +function resolveViaCompositePtsKey( + lookup: StrategyLookup, + typeMap: Map, + call: { name: string; receiver: string }, + relPath: string, +): ReadonlyArray<{ id: number; file: string }> { + const ptsTarget = unwrapTypeEntry(typeMap.get(`${call.receiver}.${call.name}`)); + if (!ptsTarget) return []; + return lookup.byName(ptsTarget).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Resolve a call site whose receiver is a concrete object reference + * (i.e. `receiver` is present and is NOT `this`, `self`, or `super`). + * + * Resolution cascade (see the per-step helpers above for the numbered steps): + * 1-3. resolveReceiverTypeName — typeMap lookups + `new Ctor()` heuristic. + * 4. resolveViaTypedMethod — typed method lookup in symbol DB. + * 5. resolveViaPrototypeAlias — prototype alias via typeMap. + * 6. resolveViaDirectQualifiedMethod — direct qualified method lookup. + * 7. resolveViaCompositePtsKey — composite pts key → callback target function. + */ +export function resolveByReceiver( + lookup: StrategyLookup, + call: { name: string; receiver: string }, + relPath: string, + typeMap: Map, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + // Strip "this." so `this.repo.method()` resolves via typeMap["repo"] + // (or the "this.repo" key seeded directly by the TSC property-declaration enricher). + const effectiveReceiver = call.receiver.startsWith('this.') + ? call.receiver.slice('this.'.length) + : call.receiver; + + const typeName = resolveReceiverTypeName(typeMap, call.receiver, effectiveReceiver, callerName); + if (typeName) { - const typed = lookup - .byName(`${typeName}.${call.name}`) - .filter((n) => n.kind === 'method' && computeConfidence(relPath, n.file, null) >= 0.5); + const typed = resolveViaTypedMethod(lookup, typeName, call, relPath); if (typed.length > 0) return typed; - // Prototype alias: `Foo.prototype.bar = identifier` seeds typeMap['Foo.bar'] = { type: identifier }. - // Checked after the symbol-DB lookup so an actual method definition always wins. - const protoEntry = typeMap.get(`${typeName}.${call.name}`); - const protoTarget = protoEntry - ? typeof protoEntry === 'string' - ? protoEntry - : (protoEntry as { type?: string }).type - : null; - if (protoTarget) { - const resolved = lookup - .byName(protoTarget) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } - } - - // Direct qualified method lookup: ClassName.staticMethod() or ClassName.instanceMethod() - // when the receiver is a class name with no typeMap entry. Handles static method calls - // like `C6.staticMethod()` or `D.d()` where the receiver IS the class. - // Matches both 'method' and 'function' kinds to cover field-initializer synthetic defs. - if (!typeName) { - const qualifiedName = `${effectiveReceiver}.${call.name}`; - const direct = lookup - .byName(qualifiedName) - .filter( - (n) => - (n.kind === 'method' || n.kind === 'function') && - computeConfidence(relPath, n.file, null) >= 0.5, - ); + const viaPrototype = resolveViaPrototypeAlias(lookup, typeMap, typeName, call, relPath); + if (viaPrototype.length > 0) return viaPrototype; + } else { + const direct = resolveViaDirectQualifiedMethod(lookup, effectiveReceiver, call, relPath); if (direct.length > 0) return direct; } - // Phase 8.3d: composite pts key — `obj.prop = fn` seeds typeMap['obj.prop'] = { type: 'fn' }. - // When a call site references `obj.prop` as a callback, resolve directly to the target fn. - const compositeEntry = typeMap.get(`${call.receiver}.${call.name}`); - const ptsTarget = compositeEntry - ? typeof compositeEntry === 'string' - ? compositeEntry - : (compositeEntry as { type?: string }).type - : null; - if (ptsTarget) { - const resolved = lookup - .byName(ptsTarget) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } + const viaComposite = resolveViaCompositePtsKey(lookup, typeMap, call, relPath); + if (viaComposite.length > 0) return viaComposite; return []; } // ── resolveByGlobal ─────────────────────────────────────────────────────────── +/** + * Step 1: accessor this-dispatch via Object.defineProperty (Phase 8.3f). + * + * When a plain function (no class prefix) is registered as a get/set accessor + * for `obj` via Object.defineProperty, typeMap seeds 'callerName:this' = 'obj'. + * We then resolve this.method() → typeMap['obj.method'] → the concrete + * definition. Only applies to a bare (non-qualified) callerName + `this` + * receiver; runs before the broad exact-name lookup to avoid false positives + * from unrelated same-file definitions. + */ +function resolveViaAccessorThisDispatch( + lookup: StrategyLookup, + typeMap: Map, + call: { name: string; receiver?: string | null }, + relPath: string, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + if (!(call.receiver === 'this' && callerName && !callerName.includes('.'))) return []; + const objName = unwrapTypeEntry(typeMap.get(`${callerName}:this`)); + if (!objName) return []; + const targetFn = unwrapTypeEntry(typeMap.get(`${objName}.${call.name}`)); + if (!targetFn) return []; + return lookup.byName(targetFn).filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); +} + +/** + * Step 3: same-class sibling method fallback via callerName. + * + * e.g. `this.area()` inside `Shape.describe` → try `Shape.area`. Also covers + * no-receiver calls inside class methods, e.g. `IsValidEmail(x)` inside + * `Validators.ValidateUser` → try `Validators.IsValidEmail` (C#/Java static + * siblings). This seeds the initial edge that runChaPostPass later expands to + * subclass overrides. + * + * For JS/TS, bare (no-receiver) calls are module-scoped — there is no + * implicit class binding. Skip the same-class fallback for bare calls in + * those languages to prevent false positives (e.g. `flush()` inside + * `Processor.run` must not resolve to `Processor.flush`). this.method() + * calls are unaffected: they still reach the fallback because + * `call.receiver === 'this'` is truthy, not a bare call. + */ +function resolveViaSameClassSibling( + lookup: StrategyLookup, + call: { name: string; receiver?: string | null }, + relPath: string, + callerName?: string | null, +): ReadonlyArray<{ id: number; file: string }> { + const isBareCall = !call.receiver; + if (!callerName || (isBareCall && isModuleScopedLanguage(relPath))) return []; + const dotIdx = callerName.lastIndexOf('.'); + if (dotIdx <= -1) return []; + // Extract only the segment immediately before the method name so that + // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. + // Symbols are stored under their bare class name, not their qualified path. + const prevDot = callerName.lastIndexOf('.', dotIdx - 1); + const callerClass = callerName.slice(prevDot + 1, dotIdx); + const qualifiedName = `${callerClass}.${call.name}`; + return lookup + .byName(qualifiedName) + .filter((t) => t.kind === 'method' && computeConfidence(relPath, t.file, null) >= 0.5); +} + /** * Resolve a call site with no receiver, or whose receiver is `this`, `self`, * or `super`. * * Resolution cascade: - * 1. Accessor this-dispatch via Object.defineProperty (Phase 8.3f). + * 1. resolveViaAccessorThisDispatch — Object.defineProperty this-dispatch (Phase 8.3f). * 2. Exact global name lookup with confidence filter. - * 3. Same-class sibling method fallback (C#/Java static siblings, this.method()). + * 3. resolveViaSameClassSibling — same-class sibling method fallback. */ export function resolveByGlobal( lookup: StrategyLookup, @@ -199,67 +318,16 @@ export function resolveByGlobal( typeMap: Map, callerName?: string | null, ): ReadonlyArray<{ id: number; file: string }> { - // Phase 8.3f: accessor this-dispatch via Object.defineProperty. - // When a plain function (no class prefix) is registered as a get/set accessor for `obj` - // via Object.defineProperty, typeMap seeds 'callerName:this' = 'obj'. - // We then resolve this.method() → typeMap['obj.method'] → the concrete definition. - // This runs before the broad exact-name lookup to avoid false positives from - // unrelated same-file definitions. - if (call.receiver === 'this' && callerName && !callerName.includes('.')) { - const accessorThisEntry = typeMap.get(`${callerName}:this`); - const objName = accessorThisEntry - ? typeof accessorThisEntry === 'string' - ? accessorThisEntry - : (accessorThisEntry as { type?: string }).type - : null; - if (objName) { - const objMethodEntry = typeMap.get(`${objName}.${call.name}`); - const targetFn = objMethodEntry - ? typeof objMethodEntry === 'string' - ? objMethodEntry - : (objMethodEntry as { type?: string }).type - : null; - if (targetFn) { - const resolved = lookup - .byName(targetFn) - .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); - if (resolved.length > 0) return resolved; - } - } - } + const viaAccessor = resolveViaAccessorThisDispatch(lookup, typeMap, call, relPath, callerName); + if (viaAccessor.length > 0) return viaAccessor; const exact = lookup .byName(call.name) .filter((t) => computeConfidence(relPath, t.file, null) >= 0.5); if (exact.length > 0) return exact; - // Try same-class method lookup via callerName. - // e.g. `this.area()` inside `Shape.describe` → try `Shape.area`. - // Also covers no-receiver calls inside class methods, e.g. `IsValidEmail(x)` inside - // `Validators.ValidateUser` → try `Validators.IsValidEmail` (C#/Java static siblings). - // This seeds the initial edge that runChaPostPass later expands to subclass overrides. - // - // For JS/TS, bare (no-receiver) calls are module-scoped — there is no implicit class - // binding. Skip the same-class fallback for bare calls in those languages to prevent - // false positives (e.g. `flush()` inside `Processor.run` must not resolve to - // `Processor.flush`). this.method() calls are unaffected: they still reach the fallback - // because `call.receiver === 'this'` is truthy, not a bare call. - const isBareCall = !call.receiver; - if (callerName && !(isBareCall && isModuleScopedLanguage(relPath))) { - const dotIdx = callerName.lastIndexOf('.'); - if (dotIdx > -1) { - // Extract only the segment immediately before the method name so that - // 'Namespace.ClassName.method' yields 'ClassName', not 'Namespace.ClassName'. - // Symbols are stored under their bare class name, not their qualified path. - const prevDot = callerName.lastIndexOf('.', dotIdx - 1); - const callerClass = callerName.slice(prevDot + 1, dotIdx); - const qualifiedName = `${callerClass}.${call.name}`; - const sameClass = lookup - .byName(qualifiedName) - .filter((t) => t.kind === 'method' && computeConfidence(relPath, t.file, null) >= 0.5); - if (sameClass.length > 0) return sameClass; - } - } + const sameClass = resolveViaSameClassSibling(lookup, call, relPath, callerName); + if (sameClass.length > 0) return sameClass; return exact; // empty } diff --git a/src/domain/graph/resolver/ts-resolver.ts b/src/domain/graph/resolver/ts-resolver.ts index 2aca2d5bc..4b4e67f43 100644 --- a/src/domain/graph/resolver/ts-resolver.ts +++ b/src/domain/graph/resolver/ts-resolver.ts @@ -159,6 +159,21 @@ function countLowConfidence(typeMap: Map): number { return count; } +/** + * Shared "collect candidates by name → keep only names with a single unique + * value → write" ambiguity-filtering algorithm used by both enrichSourceFile + * (ambiguity check on qualifiedName) and enrichCallAssignments (ambiguity + * check on calleeName). + * + * Returns `entries[0]` if every entry shares exactly one distinct value under + * `keyOf`, or `null` if they disagree (ambiguous) or `entries` is empty. + */ +function resolveUnambiguous(entries: readonly T[], keyOf: (entry: T) => string): T | null { + const uniqueKeys = new Set(entries.map(keyOf)); + if (uniqueKeys.size !== 1) return null; + return entries[0] ?? null; +} + /** * Walk up from rootDir looking for tsconfig.json (up to 4 levels). * Handles monorepo setups where rootDir is a package subdirectory but @@ -239,78 +254,105 @@ function createProgram(ts: TsModule, tsconfigPath: string): import('typescript') * Entries already at confidence 1.0 (e.g., `new Foo()` from tree-sitter) are * left unchanged. New entries from the compiler are added at confidence 1.0. */ -function enrichSourceFile( - ts: TsModule, - sourceFile: import('typescript').SourceFile, - checker: import('typescript').TypeChecker, - typeMap: Map, -): void { - // First pass: collect resolved types keyed by bare identifier name. - // Track both the short name (for typeMap writes) and the fully-qualified name +/** + * Mutable state threaded through the enrichSourceFile visitor. Grouped into + * one object (rather than closed-over locals) so the walk can be a plain + * top-level function, outside the enclosing function's own complexity count. + */ +interface SourceFileVisitContext { + ts: TsModule; + checker: import('typescript').TypeChecker; + // Collects resolved types keyed by bare identifier name. Tracks both the + // short name (for typeMap writes) and the fully-qualified name // (module-path-prefixed) for ambiguity detection. Two classes may share the // same short name (e.g., `OrderService` from two different modules), and // symbol.getName() returns the declared name — not the local alias — so // deduplication on short names alone would incorrectly collapse them. - const nameToEntries = new Map(); - // Track class property declaration names so we can also seed "this.X" entries. - const propertyDeclNames = new Set(); + nameToEntries: Map; + // Class property declaration names so we can also seed "this.X" entries. + propertyDeclNames: Set; +} - function visit(node: import('typescript').Node): void { - let identName: string | null = null; - let nameNode: import('typescript').Identifier | null = null; - - if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { - identName = node.name.text; - nameNode = node.name; - } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { - identName = node.name.text; - nameNode = node.name; - } else if (ts.isPropertyDeclaration(node) && ts.isIdentifier(node.name)) { - // TypeScript class field: `private repo: Repository` - // Seeds typeMap so `this.repo.method()` can be resolved via receiver type. - identName = node.name.text; - nameNode = node.name; - propertyDeclNames.add(node.name.text); - } +function visitSourceFileNode(ctx: SourceFileVisitContext, node: import('typescript').Node): void { + const { ts, checker, nameToEntries, propertyDeclNames } = ctx; + let identName: string | null = null; + let nameNode: import('typescript').Identifier | null = null; + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isParameter(node) && ts.isIdentifier(node.name)) { + identName = node.name.text; + nameNode = node.name; + } else if (ts.isPropertyDeclaration(node) && ts.isIdentifier(node.name)) { + // TypeScript class field: `private repo: Repository` + // Seeds typeMap so `this.repo.method()` can be resolved via receiver type. + identName = node.name.text; + nameNode = node.name; + propertyDeclNames.add(node.name.text); + } - if (identName && nameNode) { - const resolved = resolveTypeName(ts, nameNode, checker); - if (resolved) { - const existing = nameToEntries.get(identName); - if (existing) { - existing.push(resolved); - } else { - nameToEntries.set(identName, [resolved]); - } + if (identName && nameNode) { + const resolved = resolveTypeName(ts, nameNode, checker); + if (resolved) { + const existing = nameToEntries.get(identName); + if (existing) { + existing.push(resolved); + } else { + nameToEntries.set(identName, [resolved]); } } + } - ts.forEachChild(node, visit); + ts.forEachChild(node, (child) => visitSourceFileNode(ctx, child)); +} + +/** + * Write one (name → candidate entries) group to typeMap if unambiguous + * (single unique qualified type for the name), plus its "this." + * companion entry when name is a class property. + */ +function writeSourceFileTypeMapEntry( + typeMap: Map, + propertyDeclNames: ReadonlySet, + name: string, + entries: { shortName: string; qualifiedName: string }[], +): void { + const first = resolveUnambiguous(entries, (e) => e.qualifiedName); + if (!first) return; // ambiguous across modules, or no candidates — skip + const shortName = first.shortName; + const existing = typeMap.get(name); + if (!existing || existing.confidence < 1.0) { + typeMap.set(name, { type: shortName, confidence: 1.0 }); } - ts.forEachChild(sourceFile, visit); + // For class property declarations, also seed "this.fieldName" so that + // `this.repo.findById()` call sites resolve to the interface/class type. + if (propertyDeclNames.has(name)) { + const thisKey = `this.${name}`; + const existingThis = typeMap.get(thisKey); + if (!existingThis || existingThis.confidence < 1.0) { + typeMap.set(thisKey, { type: shortName, confidence: 1.0 }); + } + } +} + +function enrichSourceFile( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + typeMap: Map, +): void { + const ctx: SourceFileVisitContext = { + ts, + checker, + nameToEntries: new Map(), + propertyDeclNames: new Set(), + }; + ts.forEachChild(sourceFile, (node) => visitSourceFileNode(ctx, node)); // Second pass: only write unambiguous entries (single unique qualified type for a name) - for (const [name, entries] of nameToEntries) { - const uniqueQualified = [...new Set(entries.map((e) => e.qualifiedName))]; - if (uniqueQualified.length !== 1) continue; // ambiguous across modules — skip - // entries is non-empty because we only set() on first occurrence and push() after — - // TypeScript's noUncheckedIndexedAccess can flag [0] access, so assert the type. - const first = entries[0]; - if (!first) continue; - const shortName = first.shortName; - const existing = typeMap.get(name); - if (!existing || existing.confidence < 1.0) { - typeMap.set(name, { type: shortName, confidence: 1.0 }); - } - // For class property declarations, also seed "this.fieldName" so that - // `this.repo.findById()` call sites resolve to the interface/class type. - if (propertyDeclNames.has(name)) { - const thisKey = `this.${name}`; - const existingThis = typeMap.get(thisKey); - if (!existingThis || existingThis.confidence < 1.0) { - typeMap.set(thisKey, { type: shortName, confidence: 1.0 }); - } - } + for (const [name, entries] of ctx.nameToEntries) { + writeSourceFileTypeMapEntry(typeMap, ctx.propertyDeclNames, name, entries); } } @@ -327,98 +369,190 @@ function enrichSourceFile( * Async functions returning Promise are unwrapped: the inner type argument T is * used so that async methods receive a returnTypeMap entry just like sync ones. */ -function enrichReturnTypeMap( - ts: TsModule, - sourceFile: import('typescript').SourceFile, +/** + * Mutable state threaded through the enrichReturnTypeMap visitor. Grouped + * into one object (rather than closed-over locals) so the node-kind handlers + * below can be plain top-level functions, independently testable and outside + * the enclosing function's own complexity count. + */ +interface ReturnTypeVisitContext { + ts: TsModule; + checker: import('typescript').TypeChecker; + returnTypeMap: Map; + currentClass: string | null; +} + +/** + * Resolve the concrete return type name for a signature, unwrapping + * Promise so async functions contribute their inner type. + */ +function resolveReturnTypeName( checker: import('typescript').TypeChecker, - returnTypeMap: Map, -): void { - let currentClass: string | null = null; - - /** - * Resolve the concrete return type name for a signature, unwrapping - * Promise so async functions contribute their inner type. - */ - function resolveReturnTypeName(sig: import('typescript').Signature | undefined): string | null { - if (!sig) return null; - try { - let retType = checker.getReturnTypeOfSignature(sig); - - // Unwrap Promise → T so async functions get a useful returnTypeMap entry. - const outerSym = retType.getSymbol() ?? retType.aliasSymbol; - if (outerSym?.getName() === 'Promise') { - const args = checker.getTypeArguments(retType as import('typescript').TypeReference); - if (args.length > 0) retType = args[0]!; - } + sig: import('typescript').Signature | undefined, +): string | null { + if (!sig) return null; + try { + let retType = checker.getReturnTypeOfSignature(sig); - const sym = retType.getSymbol() ?? retType.aliasSymbol; - if (!sym) return null; - const name = sym.getName(); - if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) - return null; - return name; - } catch { - return null; + // Unwrap Promise → T so async functions get a useful returnTypeMap entry. + const outerSym = retType.getSymbol() ?? retType.aliasSymbol; + if (outerSym?.getName() === 'Promise') { + const args = checker.getTypeArguments(retType as import('typescript').TypeReference); + if (args.length > 0) retType = args[0]!; } + + const sym = retType.getSymbol() ?? retType.aliasSymbol; + if (!sym) return null; + const name = sym.getName(); + if (!name || name === '__type' || name === '__object' || SKIP_TYPE_NAMES.has(name)) return null; + return name; + } catch { + return null; } +} - function writeEntry(fnName: string, sigNode: import('typescript').SignatureDeclaration): void { - const typeName = resolveReturnTypeName(checker.getSignatureFromDeclaration(sigNode)); - if (typeName) { - const existing = returnTypeMap.get(fnName); - if (!existing || existing.confidence < 1.0) - returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); - } +function writeReturnTypeEntry( + ctx: ReturnTypeVisitContext, + fnName: string, + sigNode: import('typescript').SignatureDeclaration, +): void { + const typeName = resolveReturnTypeName( + ctx.checker, + ctx.checker.getSignatureFromDeclaration(sigNode), + ); + if (typeName) { + const existing = ctx.returnTypeMap.get(fnName); + if (!existing || existing.confidence < 1.0) + ctx.returnTypeMap.set(fnName, { type: typeName, confidence: 1.0 }); } +} - /** - * Visit nodes at the current lexical scope (module level or class body). - * Does NOT recurse into function/method bodies to avoid capturing local - * helper functions under bare names. - */ - function visit(node: import('typescript').Node): void { - if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { - // Enter class scope: visit direct children (method/property declarations). - const saved = currentClass; - currentClass = - (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name - ?.text ?? null; - ts.forEachChild(node, visit); - currentClass = saved; - return; // class body fully handled — stop here - } +/** + * Enter class scope: visit direct children (method/property declarations), + * then restore the enclosing class name. + */ +function visitClassScopeForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').Node, +): void { + const saved = ctx.currentClass; + ctx.currentClass = + (node as import('typescript').ClassDeclaration | import('typescript').ClassExpression).name + ?.text ?? null; + ctx.ts.forEachChild(node, (child) => visitReturnTypeNode(ctx, child)); + ctx.currentClass = saved; +} - if (ts.isFunctionDeclaration(node) && node.name) { - // Module-level function declaration: record and stop (no body descent). - writeEntry(node.name.text, node); - return; - } +/** Module-level function declaration: record and stop (no body descent). */ +function visitFunctionDeclarationForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').FunctionDeclaration, +): void { + // node.name is guaranteed truthy by the caller's guard. + writeReturnTypeEntry(ctx, node.name!.text, node); +} - if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { - // Class method: record as ClassName.methodName and stop. - const fnName = currentClass ? `${currentClass}.${node.name.text}` : node.name.text; - writeEntry(fnName, node); - return; - } +/** Class method: record as ClassName.methodName and stop. */ +function visitMethodDeclarationForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').MethodDeclaration, +): void { + // node.name is guaranteed to be an Identifier by the caller's guard. + const name = (node.name as import('typescript').Identifier).text; + const fnName = ctx.currentClass ? `${ctx.currentClass}.${name}` : name; + writeReturnTypeEntry(ctx, fnName, node); +} - if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { - // Arrow/function-expression assigned to a variable at the current scope. - // Because we never recurse into function bodies, any VariableDeclaration - // we see here is guaranteed to be at module scope or inside a class body - // (not inside a method body), making the bare name safe for cross-file use. - const init = node.initializer; - if (ts.isArrowFunction(init) || ts.isFunctionExpression(init)) { - writeEntry(node.name.text, init); - } - return; // variable declaration fully handled — stop here - } +/** + * Arrow/function-expression assigned to a variable at the current scope. + * Because we never recurse into function bodies, any VariableDeclaration + * seen here is guaranteed to be at module scope or inside a class body + * (not inside a method body), making the bare name safe for cross-file use. + */ +function visitVariableInitializerForReturnType( + ctx: ReturnTypeVisitContext, + node: import('typescript').VariableDeclaration, +): void { + // node.name is guaranteed to be an Identifier and node.initializer is + // guaranteed defined by the caller's guard. + const init = node.initializer!; + if (ctx.ts.isArrowFunction(init) || ctx.ts.isFunctionExpression(init)) { + writeReturnTypeEntry(ctx, (node.name as import('typescript').Identifier).text, init); + } +} - // For all other node kinds (VariableStatement, VariableDeclarationList, - // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. - ts.forEachChild(node, visit); +/** + * Visit nodes at the current lexical scope (module level or class body). + * Does NOT recurse into function/method bodies to avoid capturing local + * helper functions under bare names. + */ +function visitReturnTypeNode(ctx: ReturnTypeVisitContext, node: import('typescript').Node): void { + const { ts } = ctx; + + if (ts.isClassDeclaration(node) || ts.isClassExpression(node)) { + visitClassScopeForReturnType(ctx, node); + return; // class body fully handled — stop here } - ts.forEachChild(sourceFile, visit); + if (ts.isFunctionDeclaration(node) && node.name) { + visitFunctionDeclarationForReturnType(ctx, node); + return; + } + + if (ts.isMethodDeclaration(node) && ts.isIdentifier(node.name)) { + visitMethodDeclarationForReturnType(ctx, node); + return; + } + + if (ts.isVariableDeclaration(node) && ts.isIdentifier(node.name) && node.initializer) { + visitVariableInitializerForReturnType(ctx, node); + return; // variable declaration fully handled — stop here + } + + // For all other node kinds (VariableStatement, VariableDeclarationList, + // ExportDeclaration, etc.) recurse to reach nested function/class/var nodes. + ts.forEachChild(node, (child) => visitReturnTypeNode(ctx, child)); +} + +function enrichReturnTypeMap( + ts: TsModule, + sourceFile: import('typescript').SourceFile, + checker: import('typescript').TypeChecker, + returnTypeMap: Map, +): void { + const ctx: ReturnTypeVisitContext = { ts, checker, returnTypeMap, currentClass: null }; + ts.forEachChild(sourceFile, (node) => visitReturnTypeNode(ctx, node)); +} + +/** + * Resolve the callee name and, for receiver method calls (`obj.method()`), + * the receiver's typeMap-resolved type name, from a call expression's callee. + * + * Handles two callee shapes: a bare identifier (`fn()`) and a property-access + * expression (`obj.method()`); any other callee shape (e.g. a call expression + * itself, as in `getFactory()()`) yields no calleeName. + */ +function resolveCalleeNameAndReceiverType( + ts: TsModule, + call: import('typescript').CallExpression, + typeMap: Map, +): { calleeName: string | null; receiverTypeName: string | undefined } { + if (ts.isIdentifier(call.expression)) { + return { calleeName: call.expression.text, receiverTypeName: undefined }; + } + + if (ts.isPropertyAccessExpression(call.expression)) { + const calleeName = call.expression.name.text; + const obj = call.expression.expression; + let receiverTypeName: string | undefined; + if (ts.isIdentifier(obj)) { + const entry = typeMap.get(obj.text); + if (entry && typeof entry === 'object') receiverTypeName = entry.type; + } + return { calleeName, receiverTypeName }; + } + + return { calleeName: null, receiverTypeName: undefined }; } /** @@ -426,13 +560,14 @@ function enrichReturnTypeMap( * is not yet in typeMap into callAssignments for cross-file propagation. * Phase 8.1 already resolved the common case into typeMap; this captures the rest. * - * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile`: - * collect all candidates first, then only push entries where a given `varName` - * maps to exactly one distinct `calleeName`. This prevents multiple methods in the - * same file that each bind a different imported function to a common local name - * (e.g., `const result = getA()` in one method, `const result = getB()` in - * another) from both landing in `callAssignments`, which would cause - * `propagateReturnTypesAcrossFiles` to silently resolve one arbitrarily. + * Uses the same two-pass "unambiguous names only" strategy as `enrichSourceFile` + * (via the shared `resolveUnambiguous` helper): collect all candidates first, + * then only push entries where a given `varName` maps to exactly one distinct + * `calleeName`. This prevents multiple methods in the same file that each bind + * a different imported function to a common local name (e.g., `const result = + * getA()` in one method, `const result = getB()` in another) from both landing + * in `callAssignments`, which would cause `propagateReturnTypesAcrossFiles` to + * silently resolve one arbitrarily. */ function enrichCallAssignments( ts: TsModule, @@ -452,20 +587,11 @@ function enrichCallAssignments( ) { const varName = node.name.text; if (!typeMap.has(varName)) { - const call = node.initializer; - let calleeName: string | null = null; - let receiverTypeName: string | undefined; - - if (ts.isIdentifier(call.expression)) { - calleeName = call.expression.text; - } else if (ts.isPropertyAccessExpression(call.expression)) { - calleeName = call.expression.name.text; - const obj = call.expression.expression; - if (ts.isIdentifier(obj)) { - const entry = typeMap.get(obj.text); - if (entry && typeof entry === 'object') receiverTypeName = entry.type; - } - } + const { calleeName, receiverTypeName } = resolveCalleeNameAndReceiverType( + ts, + node.initializer, + typeMap, + ); if (calleeName) { const ca: CallAssignment = { varName, calleeName, receiverTypeName }; @@ -488,10 +614,8 @@ function enrichCallAssignments( // calleeName. Ambiguous varNames (same name, different callees across scopes) // are excluded to avoid silently resolving the wrong type cross-file. for (const entries of candidates.values()) { - const uniqueCallees = new Set(entries.map((e) => e.calleeName)); - if (uniqueCallees.size === 1) { - callAssignments.push(entries[0] as CallAssignment); - } + const resolved = resolveUnambiguous(entries, (e) => e.calleeName); + if (resolved) callAssignments.push(resolved); } } diff --git a/src/domain/search/search/prepare.ts b/src/domain/search/search/prepare.ts index a28330ced..13cea6cb4 100644 --- a/src/domain/search/search/prepare.ts +++ b/src/domain/search/search/prepare.ts @@ -1,6 +1,7 @@ import { openReadonlyOrFail } from '../../../db/index.js'; -import { escapeLike } from '../../../db/query-builder.js'; +import { buildFileConditionSQL } from '../../../db/query-builder.js'; import { getEmbeddingCount, getEmbeddingMeta } from '../../../db/repository/embeddings.js'; +import { info } from '../../../infrastructure/logger.js'; import type { BetterSqlite3Database } from '../../../types.js'; import { MODELS } from '../models.js'; import { applyFilters } from './filters.js'; @@ -47,7 +48,7 @@ export function prepareSearch( try { const count = getEmbeddingCount(db); if (count === 0) { - console.log('No embeddings found. Run `codegraph embed` first.'); + info('No embeddings found. Run `codegraph embed` first.'); db.close(); return null; } @@ -82,12 +83,13 @@ export function prepareSearch( params.push(opts.kind); } if (fpArr.length > 0 && !isGlob) { - if (fpArr.length === 1) { - conditions.push("n.file LIKE ? ESCAPE '\\'"); - params.push(`%${escapeLike(fpArr[0]!)}%`); - } else { - conditions.push(`(${fpArr.map(() => "n.file LIKE ? ESCAPE '\\'").join(' OR ')})`); - params.push(...fpArr.map((f) => `%${escapeLike(f)}%`)); + const fc = buildFileConditionSQL(fpArr, 'n.file'); + if (fc.sql) { + // buildFileConditionSQL always prefixes its output with ' AND ' (see + // src/db/query-builder.ts); strip it here since we accumulate raw + // fragments in the conditions[] array and join with ' AND ' below. + conditions.push(fc.sql.replace(/^ AND /, '')); + params.push(...fc.params); } } if (conditions.length > 0) { diff --git a/src/features/branch-compare.ts b/src/features/branch-compare.ts index 086ed1f11..11bc25744 100644 --- a/src/features/branch-compare.ts +++ b/src/features/branch-compare.ts @@ -10,7 +10,7 @@ import { getNative, isNativeAvailable } from '../infrastructure/native.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { toErrorMessage } from '../shared/errors.js'; import { toSymbolRef } from '../shared/normalize.js'; -import type { EngineMode, NativeDatabase } from '../types.js'; +import type { BetterSqlite3Database, EngineMode, NativeDatabase } from '../types.js'; // ─── Git Helpers ──────────────────────────────────────────────────────── @@ -106,6 +106,96 @@ function makeSymbolKey(kind: string, file: string, name: string): string { return `${kind}::${file}::${name}`; } +interface RawNodeRow { + id: number; + name: string; + kind: string; + file: string; + line: number; + end_line: number | null; +} + +/** Try opening a NativeDatabase handle for batched fan-in/fan-out metrics. */ +function openNativeDbForFanMetrics(dbPath: string): NativeDatabase | undefined { + if (!isNativeAvailable()) return undefined; + try { + const native = getNative(); + return native.NativeDatabase.openReadonly(dbPath); + } catch (e) { + debug(`loadSymbolsFromDb: native path failed: ${toErrorMessage(e)}`); + return undefined; + } +} + +/** Query all non-file/directory nodes belonging to the given changed files. */ +function queryChangedFileNodes(db: BetterSqlite3Database, changedFiles: string[]): RawNodeRow[] { + const placeholders = changedFiles.map(() => '?').join(', '); + return db + .prepare( + `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line + FROM nodes n + WHERE n.file IN (${placeholders}) + AND n.kind NOT IN ('file', 'directory') + ORDER BY n.file, n.line`, + ) + .all(...changedFiles) as RawNodeRow[]; +} + +/** Build the public SymbolInfo shape from a raw row + its resolved fan metrics. */ +function makeSymbolInfo(row: RawNodeRow, fanIn: number, fanOut: number): SymbolInfo { + const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; + return { + id: row.id, + name: row.name, + kind: row.kind, + file: row.file, + line: row.line, + lineCount, + fanIn, + fanOut, + }; +} + +/** Native fast path: batch all fan-in/fan-out lookups in one napi call. */ +function buildSymbolsViaNativeBatch( + filtered: RawNodeRow[], + nativeDb: NativeDatabase, +): Map { + const symbols = new Map(); + const nodeIds = filtered.map((r) => r.id); + const metrics = nativeDb.batchFanMetrics!(nodeIds); + const metricsMap = new Map(metrics.map((m) => [m.nodeId, m])); + + for (const row of filtered) { + const m = metricsMap.get(row.id); + const key = makeSymbolKey(row.kind, row.file, row.name); + symbols.set(key, makeSymbolInfo(row, m?.fanIn ?? 0, m?.fanOut ?? 0)); + } + return symbols; +} + +/** JS fallback: per-row fan-in/fan-out COUNT queries. */ +function buildSymbolsViaJsFallback( + db: BetterSqlite3Database, + filtered: RawNodeRow[], +): Map { + const symbols = new Map(); + const fanInStmt = db.prepare( + `SELECT COUNT(*) AS cnt FROM edges WHERE target_id = ? AND kind = 'calls'`, + ); + const fanOutStmt = db.prepare( + `SELECT COUNT(*) AS cnt FROM edges WHERE source_id = ? AND kind = 'calls'`, + ); + + for (const row of filtered) { + const fanIn = (fanInStmt.get(row.id) as { cnt: number }).cnt; + const fanOut = (fanOutStmt.get(row.id) as { cnt: number }).cnt; + const key = makeSymbolKey(row.kind, row.file, row.name); + symbols.set(key, makeSymbolInfo(row, fanIn, fanOut)); + } + return symbols; +} + function loadSymbolsFromDb( dbPath: string, changedFiles: string[], @@ -113,97 +203,23 @@ function loadSymbolsFromDb( ): Map { const Database = getDatabase(); const db = new Database(dbPath, { readonly: true }); - - // Try opening a NativeDatabase for batched fan metrics - let nativeDb: NativeDatabase | undefined; - if (isNativeAvailable()) { - try { - const native = getNative(); - nativeDb = native.NativeDatabase.openReadonly(dbPath); - } catch (e) { - debug(`loadSymbolsFromDb: native path failed: ${toErrorMessage(e)}`); - } - } + const nativeDb = openNativeDbForFanMetrics(dbPath); try { - const symbols = new Map(); - if (changedFiles.length === 0) { - return symbols; + return new Map(); } - const placeholders = changedFiles.map(() => '?').join(', '); - const rows = db - .prepare( - `SELECT n.id, n.name, n.kind, n.file, n.line, n.end_line - FROM nodes n - WHERE n.file IN (${placeholders}) - AND n.kind NOT IN ('file', 'directory') - ORDER BY n.file, n.line`, - ) - .all(...changedFiles) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - end_line: number | null; - }>; + const rows = queryChangedFileNodes(db, changedFiles); // Filter first, then batch fan metrics for all surviving rows const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - // ── Native fast path: batch all fan-in/fan-out in one napi call ── if (nativeDb?.batchFanMetrics && filtered.length > 0) { - const nodeIds = filtered.map((r) => r.id); - const metrics = nativeDb.batchFanMetrics(nodeIds); - const metricsMap = new Map(metrics.map((m) => [m.nodeId, m])); - - for (const row of filtered) { - const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; - const m = metricsMap.get(row.id); - const key = makeSymbolKey(row.kind, row.file, row.name); - symbols.set(key, { - id: row.id, - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, - lineCount, - fanIn: m?.fanIn ?? 0, - fanOut: m?.fanOut ?? 0, - }); - } - return symbols; - } - - // ── JS fallback ─────────────────────────────────────────────────── - const fanInStmt = db.prepare( - `SELECT COUNT(*) AS cnt FROM edges WHERE target_id = ? AND kind = 'calls'`, - ); - const fanOutStmt = db.prepare( - `SELECT COUNT(*) AS cnt FROM edges WHERE source_id = ? AND kind = 'calls'`, - ); - - for (const row of filtered) { - const lineCount = row.end_line ? row.end_line - row.line + 1 : 0; - const fanIn = (fanInStmt.get(row.id) as { cnt: number }).cnt; - const fanOut = (fanOutStmt.get(row.id) as { cnt: number }).cnt; - const key = makeSymbolKey(row.kind, row.file, row.name); - - symbols.set(key, { - id: row.id, - name: row.name, - kind: row.kind, - file: row.file, - line: row.line, - lineCount, - fanIn, - fanOut, - }); + return buildSymbolsViaNativeBatch(filtered, nativeDb); } - return symbols; + return buildSymbolsViaJsFallback(db, filtered); } finally { db.close(); if (nativeDb) { @@ -232,37 +248,7 @@ function loadCallersFromDb( const allCallers = new Set(); for (const startId of nodeIds) { - const visited = new Set([startId]); - let frontier = [startId]; - - for (let d = 1; d <= maxDepth; d++) { - const nextFrontier: number[] = []; - for (const fid of frontier) { - const callers = db - .prepare( - `SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line - FROM edges e JOIN nodes n ON e.source_id = n.id - WHERE e.target_id = ? AND e.kind = 'calls'`, - ) - .all(fid) as Array<{ - id: number; - name: string; - kind: string; - file: string; - line: number; - }>; - - for (const c of callers) { - if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { - visited.add(c.id); - nextFrontier.push(c.id); - allCallers.add(JSON.stringify(toSymbolRef(c))); - } - } - } - frontier = nextFrontier; - if (frontier.length === 0) break; - } + bfsCallersFromNode(db, startId, maxDepth, noTests, allCallers); } return [...allCallers].map((s) => JSON.parse(s) as CallerInfo); @@ -271,63 +257,130 @@ function loadCallersFromDb( } } +/** Direct DB callers of a single node id (one BFS-frontier expansion step). */ +function queryDirectCallers( + db: BetterSqlite3Database, + nodeId: number, +): Array<{ id: number; name: string; kind: string; file: string; line: number }> { + return db + .prepare( + `SELECT DISTINCT n.id, n.name, n.kind, n.file, n.line + FROM edges e JOIN nodes n ON e.source_id = n.id + WHERE e.target_id = ? AND e.kind = 'calls'`, + ) + .all(nodeId) as Array<{ id: number; name: string; kind: string; file: string; line: number }>; +} + +/** BFS up to maxDepth from a single starting node, adding newly-seen callers to allCallers. */ +function bfsCallersFromNode( + db: BetterSqlite3Database, + startId: number, + maxDepth: number, + noTests: boolean, + allCallers: Set, +): void { + const visited = new Set([startId]); + let frontier = [startId]; + + for (let d = 1; d <= maxDepth; d++) { + const nextFrontier: number[] = []; + for (const fid of frontier) { + const callers = queryDirectCallers(db, fid); + for (const c of callers) { + if (!visited.has(c.id) && (!noTests || !isTestFile(c.file))) { + visited.add(c.id); + nextFrontier.push(c.id); + allCallers.add(JSON.stringify(toSymbolRef(c))); + } + } + } + frontier = nextFrontier; + if (frontier.length === 0) break; + } +} + // ─── Symbol Comparison ────────────────────────────────────────────────── -function compareSymbols( +/** Symbols present in `targetSymbols` but not `baseSymbols`. */ +function findAddedSymbols( baseSymbols: Map, targetSymbols: Map, -): { added: SymbolInfo[]; removed: SymbolInfo[]; changed: ChangedSymbol[] } { +): SymbolInfo[] { const added: SymbolInfo[] = []; - const removed: SymbolInfo[] = []; - const changed: ChangedSymbol[] = []; - for (const [key, sym] of targetSymbols) { - if (!baseSymbols.has(key)) { - added.push(sym); - } + if (!baseSymbols.has(key)) added.push(sym); } + return added; +} +/** Symbols present in `baseSymbols` but not `targetSymbols`. */ +function findRemovedSymbols( + baseSymbols: Map, + targetSymbols: Map, +): SymbolInfo[] { + const removed: SymbolInfo[] = []; for (const [key, sym] of baseSymbols) { - if (!targetSymbols.has(key)) { - removed.push(sym); - } + if (!targetSymbols.has(key)) removed.push(sym); } + return removed; +} + +/** Build a ChangedSymbol entry from a base/target pair whose metrics diverged. */ +function buildChangedSymbol(baseSym: SymbolInfo, targetSym: SymbolInfo): ChangedSymbol | null { + const lineCountDelta = targetSym.lineCount - baseSym.lineCount; + const fanInDelta = targetSym.fanIn - baseSym.fanIn; + const fanOutDelta = targetSym.fanOut - baseSym.fanOut; + + if (lineCountDelta === 0 && fanInDelta === 0 && fanOutDelta === 0) return null; + + return { + name: baseSym.name, + kind: baseSym.kind, + file: baseSym.file, + base: { + line: baseSym.line, + lineCount: baseSym.lineCount, + fanIn: baseSym.fanIn, + fanOut: baseSym.fanOut, + }, + target: { + line: targetSym.line, + lineCount: targetSym.lineCount, + fanIn: targetSym.fanIn, + fanOut: targetSym.fanOut, + }, + changes: { + lineCount: lineCountDelta, + fanIn: fanInDelta, + fanOut: fanOutDelta, + }, + }; +} +/** Symbols present in both maps whose line count / fan-in / fan-out diverged. */ +function findChangedSymbols( + baseSymbols: Map, + targetSymbols: Map, +): ChangedSymbol[] { + const changed: ChangedSymbol[] = []; for (const [key, baseSym] of baseSymbols) { const targetSym = targetSymbols.get(key); if (!targetSym) continue; - - const lineCountDelta = targetSym.lineCount - baseSym.lineCount; - const fanInDelta = targetSym.fanIn - baseSym.fanIn; - const fanOutDelta = targetSym.fanOut - baseSym.fanOut; - - if (lineCountDelta !== 0 || fanInDelta !== 0 || fanOutDelta !== 0) { - changed.push({ - name: baseSym.name, - kind: baseSym.kind, - file: baseSym.file, - base: { - line: baseSym.line, - lineCount: baseSym.lineCount, - fanIn: baseSym.fanIn, - fanOut: baseSym.fanOut, - }, - target: { - line: targetSym.line, - lineCount: targetSym.lineCount, - fanIn: targetSym.fanIn, - fanOut: targetSym.fanOut, - }, - changes: { - lineCount: lineCountDelta, - fanIn: fanInDelta, - fanOut: fanOutDelta, - }, - }); - } + const entry = buildChangedSymbol(baseSym, targetSym); + if (entry) changed.push(entry); } + return changed; +} - return { added, removed, changed }; +function compareSymbols( + baseSymbols: Map, + targetSymbols: Map, +): { added: SymbolInfo[]; removed: SymbolInfo[]; changed: ChangedSymbol[] } { + return { + added: findAddedSymbols(baseSymbols, targetSymbols), + removed: findRemovedSymbols(baseSymbols, targetSymbols), + changed: findChangedSymbols(baseSymbols, targetSymbols), + }; } // ─── Main Data Function ───────────────────────────────────────────────── @@ -362,48 +415,31 @@ interface BranchCompareResult { summary?: BranchCompareSummary; } -function attachImpactToSymbols( - symbols: SymbolInfo[], +/** + * Attach caller-impact data to each symbol, given a strategy for resolving + * its DB node id (removed symbols carry their own id; changed symbols must + * be looked up in the base-commit symbol map). + */ +function attachImpact( + symbols: T[], + resolveId: (sym: T) => number | undefined, dbPath: string, - _baseSymbols: Map, maxDepth: number, noTests: boolean, ): void { for (const sym of symbols) { - const symCallers = loadCallersFromDb(dbPath, sym.id ? [sym.id] : [], maxDepth, noTests); - (sym as SymbolInfo & { impact?: CallerInfo[] }).impact = symCallers; + const id = resolveId(sym); + const symCallers = loadCallersFromDb(dbPath, id ? [id] : [], maxDepth, noTests); + (sym as T & { impact?: CallerInfo[] }).impact = symCallers; } } -function attachImpactToChanged( - changed: ChangedSymbol[], - dbPath: string, - baseSymbols: Map, - maxDepth: number, - noTests: boolean, -): void { - for (const sym of changed) { - const baseSym = baseSymbols.get(makeSymbolKey(sym.kind, sym.file, sym.name)); - const symCallers = loadCallersFromDb( - dbPath, - baseSym?.id ? [baseSym.id] : [], - maxDepth, - noTests, - ); - sym.impact = symCallers; - } -} - -export async function branchCompareData( +/** Confirm repoRoot is a git repo and resolve baseRef/targetRef to full SHAs. */ +function validateBranchCompareRefs( + repoRoot: string, baseRef: string, targetRef: string, - opts: BranchCompareOpts = {}, -): Promise { - const repoRoot = opts.repoRoot || process.cwd(); - const maxDepth = opts.depth || 3; - const noTests = opts.noTests || false; - const engine = (opts.engine || 'wasm') as EngineMode; - +): { baseSha: string; targetSha: string } | { error: string } { try { execFileSync('git', ['rev-parse', '--git-dir'], { cwd: repoRoot, @@ -421,106 +457,251 @@ export async function branchCompareData( const targetSha = validateGitRef(repoRoot, targetRef); if (!targetSha) return { error: `Invalid git ref: "${targetRef}"` }; - const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); - - if (changedFiles.length === 0) { - return { - baseRef, - targetRef, - baseSha, - targetSha, - changedFiles: [], - added: [], - removed: [], - changed: [], - summary: { - added: 0, - removed: 0, - changed: 0, - totalImpacted: 0, - filesAffected: 0, - }, - }; - } + return { baseSha, targetSha }; +} - const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bc-')); - const baseDir = path.join(tmpBase, 'base'); - const targetDir = path.join(tmpBase, 'target'); +/** Create detached worktrees for both refs and build their graphs. */ +async function setupCompareWorktrees( + repoRoot: string, + baseSha: string, + targetSha: string, + baseDir: string, + targetDir: string, + engine: EngineMode, +): Promise<{ baseDbPath: string; targetDbPath: string }> { + createWorktree(repoRoot, baseSha, baseDir); + createWorktree(repoRoot, targetSha, targetDir); + + await buildGraph(baseDir, { engine, skipRegistry: true }); + await buildGraph(targetDir, { engine, skipRegistry: true }); + + return { + baseDbPath: path.join(baseDir, '.codegraph', 'graph.db'), + targetDbPath: path.join(targetDir, '.codegraph', 'graph.db'), + }; +} - try { - createWorktree(repoRoot, baseSha, baseDir); - createWorktree(repoRoot, targetSha, targetDir); +interface SymbolDiffWithImpact { + added: SymbolInfo[]; + removed: SymbolInfo[]; + changed: ChangedSymbol[]; + allImpacted: Set; + impactedFiles: Set; +} - await buildGraph(baseDir, { engine, skipRegistry: true }); - await buildGraph(targetDir, { engine, skipRegistry: true }); +/** Resolve base-commit node ids for removed/changed symbols (for BFS impact queries). */ +function resolveImpactfulIds( + removed: SymbolInfo[], + changed: ChangedSymbol[], + baseSymbols: Map, +): { removedIds: number[]; changedIds: number[] } { + const removedIds = removed.map((s) => s.id).filter(Boolean); + const changedIds = changed + .map((s) => baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name))?.id) + .filter((id): id is number => Boolean(id)); + return { removedIds, changedIds }; +} - const baseDbPath = path.join(baseDir, '.codegraph', 'graph.db'); - const targetDbPath = path.join(targetDir, '.codegraph', 'graph.db'); +/** Collapse removed+changed caller lists into the summary's impacted-symbol/file sets. */ +function computeImpactedFileSets( + removedImpact: CallerInfo[], + changedImpact: CallerInfo[], +): { allImpacted: Set; impactedFiles: Set } { + const allImpacted = new Set(); + for (const c of removedImpact) allImpacted.add(`${c.file}:${c.name}`); + for (const c of changedImpact) allImpacted.add(`${c.file}:${c.name}`); - const normalizedFiles = changedFiles.map((f) => f.replace(/\\/g, '/')); + const impactedFiles = new Set(); + for (const key of allImpacted) impactedFiles.add(key.split(':')[0]!); - const baseSymbols = loadSymbolsFromDb(baseDbPath, normalizedFiles, noTests); - const targetSymbols = loadSymbolsFromDb(targetDbPath, normalizedFiles, noTests); + return { allImpacted, impactedFiles }; +} - const { added, removed, changed } = compareSymbols(baseSymbols, targetSymbols); +/** Load symbols from both DBs, diff them, and attach/compute blast-radius impact data. */ +function diffSymbolsWithImpact( + baseDbPath: string, + targetDbPath: string, + normalizedFiles: string[], + noTests: boolean, + maxDepth: number, +): SymbolDiffWithImpact { + const baseSymbols = loadSymbolsFromDb(baseDbPath, normalizedFiles, noTests); + const targetSymbols = loadSymbolsFromDb(targetDbPath, normalizedFiles, noTests); - const removedIds = removed.map((s) => s.id).filter(Boolean); - const changedIds = changed - .map((s) => { - const baseSym = baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name)); - return baseSym?.id; - }) - .filter((id): id is number => Boolean(id)); + const { added, removed, changed } = compareSymbols(baseSymbols, targetSymbols); + const { removedIds, changedIds } = resolveImpactfulIds(removed, changed, baseSymbols); - const removedImpact = loadCallersFromDb(baseDbPath, removedIds, maxDepth, noTests); - const changedImpact = loadCallersFromDb(baseDbPath, changedIds, maxDepth, noTests); + const removedImpact = loadCallersFromDb(baseDbPath, removedIds, maxDepth, noTests); + const changedImpact = loadCallersFromDb(baseDbPath, changedIds, maxDepth, noTests); - attachImpactToSymbols(removed, baseDbPath, baseSymbols, maxDepth, noTests); - attachImpactToChanged(changed, baseDbPath, baseSymbols, maxDepth, noTests); + attachImpact(removed, (s) => s.id, baseDbPath, maxDepth, noTests); + attachImpact( + changed, + (s) => baseSymbols.get(makeSymbolKey(s.kind, s.file, s.name))?.id, + baseDbPath, + maxDepth, + noTests, + ); - const allImpacted = new Set(); - for (const c of removedImpact) allImpacted.add(`${c.file}:${c.name}`); - for (const c of changedImpact) allImpacted.add(`${c.file}:${c.name}`); + const { allImpacted, impactedFiles } = computeImpactedFileSets(removedImpact, changedImpact); - const impactedFiles = new Set(); - for (const key of allImpacted) impactedFiles.add(key.split(':')[0]!); + return { added, removed, changed, allImpacted, impactedFiles }; +} - const cleanAdded = added.map(({ id: _id, ...rest }) => rest as SymbolWithoutId); - const cleanRemoved = removed.map(({ id: _id, ...rest }) => { - const result = rest as SymbolWithoutId; - if ((rest as SymbolInfo & { impact?: CallerInfo[] }).impact) { - result.impact = (rest as SymbolInfo & { impact?: CallerInfo[] }).impact; - } - return result; - }); +/** Strip the internal `.id` field, keeping `.impact` where it was attached. */ +function shapeBranchCompareSymbolLists( + added: SymbolInfo[], + removed: SymbolInfo[], +): { cleanAdded: SymbolWithoutId[]; cleanRemoved: SymbolWithoutId[] } { + const cleanAdded = added.map(({ id: _id, ...rest }) => rest as SymbolWithoutId); + const cleanRemoved = removed.map(({ id: _id, ...rest }) => { + const result = rest as SymbolWithoutId; + if ((rest as SymbolInfo & { impact?: CallerInfo[] }).impact) { + result.impact = (rest as SymbolInfo & { impact?: CallerInfo[] }).impact; + } + return result; + }); + return { cleanAdded, cleanRemoved }; +} + +/** Result shape when there are no changed files between the two refs. */ +function emptyBranchCompareResult( + baseRef: string, + targetRef: string, + baseSha: string, + targetSha: string, +): BranchCompareResult { + return { + baseRef, + targetRef, + baseSha, + targetSha, + changedFiles: [], + added: [], + removed: [], + changed: [], + summary: { added: 0, removed: 0, changed: 0, totalImpacted: 0, filesAffected: 0 }, + }; +} + +/** Assemble the final BranchCompareResult from the diff + cleaned symbol lists. */ +function buildBranchCompareResult( + refs: { baseRef: string; targetRef: string; baseSha: string; targetSha: string }, + normalizedFiles: string[], + diff: SymbolDiffWithImpact, + cleaned: { cleanAdded: SymbolWithoutId[]; cleanRemoved: SymbolWithoutId[] }, +): BranchCompareResult { + return { + ...refs, + changedFiles: normalizedFiles, + added: cleaned.cleanAdded, + removed: cleaned.cleanRemoved, + changed: diff.changed, + summary: { + added: diff.added.length, + removed: diff.removed.length, + changed: diff.changed.length, + totalImpacted: diff.allImpacted.size, + filesAffected: diff.impactedFiles.size, + }, + }; +} + +/** Resolve branchCompareData's opts (repoRoot/maxDepth/noTests/engine) with their defaults. */ +function resolveBranchCompareOptions(opts: BranchCompareOpts): { + repoRoot: string; + maxDepth: number; + noTests: boolean; + engine: EngineMode; +} { + return { + repoRoot: opts.repoRoot || process.cwd(), + maxDepth: opts.depth || 3, + noTests: opts.noTests || false, + engine: (opts.engine || 'wasm') as EngineMode, + }; +} + +/** Create the scratch tmpdir + base/target subdirectory paths for the dual worktrees. */ +function createCompareTempDirs(): { tmpBase: string; baseDir: string; targetDir: string } { + const tmpBase = fs.mkdtempSync(path.join(os.tmpdir(), 'codegraph-bc-')); + return { tmpBase, baseDir: path.join(tmpBase, 'base'), targetDir: path.join(tmpBase, 'target') }; +} + +/** Remove both worktrees and the scratch tmpdir (best-effort, always runs in `finally`). */ +function cleanupCompareTempDirs( + repoRoot: string, + baseDir: string, + targetDir: string, + tmpBase: string, +): void { + removeWorktree(repoRoot, baseDir); + removeWorktree(repoRoot, targetDir); + try { + fs.rmSync(tmpBase, { recursive: true, force: true }); + } catch (cleanupErr) { + debug(`branchCompareData: temp cleanup failed: ${toErrorMessage(cleanupErr)}`); + } +} + +/** Set up worktrees, diff the symbols, and shape the final result (the try-block body). */ +async function runBranchCompareInWorktrees( + resolvedRefs: { baseRef: string; targetRef: string; baseSha: string; targetSha: string }, + dirs: { repoRoot: string; baseDir: string; targetDir: string; engine: EngineMode }, + changedFiles: string[], + noTests: boolean, + maxDepth: number, +): Promise { + const { baseSha, targetSha } = resolvedRefs; + const { baseDbPath, targetDbPath } = await setupCompareWorktrees( + dirs.repoRoot, + baseSha, + targetSha, + dirs.baseDir, + dirs.targetDir, + dirs.engine, + ); + + const normalizedFiles = changedFiles.map((f) => f.replace(/\\/g, '/')); + const diff = diffSymbolsWithImpact(baseDbPath, targetDbPath, normalizedFiles, noTests, maxDepth); + const cleaned = shapeBranchCompareSymbolLists(diff.added, diff.removed); + + return buildBranchCompareResult(resolvedRefs, normalizedFiles, diff, cleaned); +} + +export async function branchCompareData( + baseRef: string, + targetRef: string, + opts: BranchCompareOpts = {}, +): Promise { + const { repoRoot, maxDepth, noTests, engine } = resolveBranchCompareOptions(opts); + + const refs = validateBranchCompareRefs(repoRoot, baseRef, targetRef); + if ('error' in refs) return refs; + const { baseSha, targetSha } = refs; + + try { + const changedFiles = getChangedFilesBetweenRefs(repoRoot, baseSha, targetSha); + + if (changedFiles.length === 0) { + return emptyBranchCompareResult(baseRef, targetRef, baseSha, targetSha); + } + + const { tmpBase, baseDir, targetDir } = createCompareTempDirs(); - return { - baseRef, - targetRef, - baseSha, - targetSha, - changedFiles: normalizedFiles, - added: cleanAdded, - removed: cleanRemoved, - changed, - summary: { - added: added.length, - removed: removed.length, - changed: changed.length, - totalImpacted: allImpacted.size, - filesAffected: impactedFiles.size, - }, - }; - } catch (err) { - return { error: toErrorMessage(err) }; - } finally { - removeWorktree(repoRoot, baseDir); - removeWorktree(repoRoot, targetDir); try { - fs.rmSync(tmpBase, { recursive: true, force: true }); - } catch (cleanupErr) { - debug(`branchCompareData: temp cleanup failed: ${toErrorMessage(cleanupErr)}`); + return await runBranchCompareInWorktrees( + { baseRef, targetRef, baseSha, targetSha }, + { repoRoot, baseDir, targetDir, engine }, + changedFiles, + noTests, + maxDepth, + ); + } finally { + cleanupCompareTempDirs(repoRoot, baseDir, targetDir, tmpBase); } + } catch (err) { + return { error: toErrorMessage(err) }; } } @@ -572,47 +753,78 @@ function collectImpactedCallers( return allImpacted; } -export function branchCompareMermaid(data: BranchCompareResult): string { - if (data.error) return data.error; - if ( +/** Render the "Impacted Callers" subgraph block, if there are any impacted callers. */ +function renderImpactedCallersSubgraph( + lines: string[], + state: MermaidNodeIdState, + allImpacted: Map, +): void { + if (allImpacted.size === 0) return; + lines.push(' subgraph sg_impact["Impacted Callers"]'); + for (const [key, c] of allImpacted) { + const nid = mermaidNodeId(state, key); + lines.push(` ${nid}["[${kindIcon(c.kind)}] ${c.name}"]`); + } + lines.push(' end'); + lines.push(' style sg_impact fill:#f3e5f5,stroke:#9c27b0'); +} + +/** Draw the dotted "impacted by" edges from each removed/changed symbol to its callers. */ +function renderImpactEdges( + lines: string[], + state: MermaidNodeIdState, + impactSources: Array<{ kind: string; file: string; name: string; impact?: CallerInfo[] }>, + removed: SymbolWithoutId[], +): void { + for (const sym of impactSources) { + if (!sym.impact) continue; + const prefix = removed.includes(sym as SymbolWithoutId) ? 'removed' : 'changed'; + const symKey = `${prefix}::${sym.kind}::${sym.file}::${sym.name}`; + for (const c of sym.impact) { + const callerKey = `impact::${c.kind}::${c.file}::${c.name}`; + if (state.map.has(symKey) && state.map.has(callerKey)) { + lines.push(` ${state.map.get(symKey)} -.-> ${state.map.get(callerKey)}`); + } + } + } +} + +/** True if the compare result has no added/removed/changed symbols to render. */ +function hasNoBranchDifferences(data: BranchCompareResult): boolean { + return ( (data.added?.length ?? 0) === 0 && (data.removed?.length ?? 0) === 0 && (data.changed?.length ?? 0) === 0 - ) { + ); +} + +/** Render the three top-level Added/Removed/Changed subgraphs. */ +function renderAddedRemovedChangedSubgraphs( + lines: string[], + state: MermaidNodeIdState, + data: BranchCompareResult, +): void { + addMermaidSubgraph(lines, state, 'added', 'Added', data.added || [], '#e8f5e9', '#4caf50'); + addMermaidSubgraph(lines, state, 'removed', 'Removed', data.removed || [], '#ffebee', '#f44336'); + addMermaidSubgraph(lines, state, 'changed', 'Changed', data.changed || [], '#fff3e0', '#ff9800'); +} + +export function branchCompareMermaid(data: BranchCompareResult): string { + if (data.error) return data.error; + if (hasNoBranchDifferences(data)) { return 'flowchart TB\n none["No structural differences detected"]'; } const lines = ['flowchart TB']; const state: MermaidNodeIdState = { counter: 0, map: new Map() }; - addMermaidSubgraph(lines, state, 'added', 'Added', data.added || [], '#e8f5e9', '#4caf50'); - addMermaidSubgraph(lines, state, 'removed', 'Removed', data.removed || [], '#ffebee', '#f44336'); - addMermaidSubgraph(lines, state, 'changed', 'Changed', data.changed || [], '#fff3e0', '#ff9800'); + renderAddedRemovedChangedSubgraphs(lines, state, data); const impactSources = [...(data.removed || []), ...(data.changed || [])]; const allImpacted = collectImpactedCallers(impactSources); - if (allImpacted.size > 0) { - lines.push(' subgraph sg_impact["Impacted Callers"]'); - for (const [key, c] of allImpacted) { - const nid = mermaidNodeId(state, key); - lines.push(` ${nid}["[${kindIcon(c.kind)}] ${c.name}"]`); - } - lines.push(' end'); - lines.push(' style sg_impact fill:#f3e5f5,stroke:#9c27b0'); - } - - for (const sym of impactSources) { - if (!sym.impact) continue; - const prefix = (data.removed || []).includes(sym as SymbolWithoutId) ? 'removed' : 'changed'; - const symKey = `${prefix}::${sym.kind}::${sym.file}::${sym.name}`; - for (const c of sym.impact) { - const callerKey = `impact::${c.kind}::${c.file}::${c.name}`; - if (state.map.has(symKey) && state.map.has(callerKey)) { - lines.push(` ${state.map.get(symKey)} -.-> ${state.map.get(callerKey)}`); - } - } - } + renderImpactedCallersSubgraph(lines, state, allImpacted); + renderImpactEdges(lines, state, impactSources, data.removed || []); return lines.join('\n'); } diff --git a/src/features/cochange.ts b/src/features/cochange.ts index 2c4b9c379..48bed2c90 100644 --- a/src/features/cochange.ts +++ b/src/features/cochange.ts @@ -9,7 +9,8 @@ import { execFileSync } from 'node:child_process'; import fs from 'node:fs'; import path from 'node:path'; import { closeDb, findDbPath, initSchema, openDb, openReadonlyOrFail } from '../db/index.js'; -import { warn } from '../infrastructure/logger.js'; +import { DEFAULTS } from '../infrastructure/config.js'; +import { debug, warn } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { normalizePath } from '../shared/constants.js'; import { paginateResult } from '../shared/paginate.js'; @@ -34,10 +35,8 @@ interface CoChangeMeta { lastCommit: string | null; } -export function scanGitHistory( - repoRoot: string, - opts: { since?: string; afterSha?: string | null } = {}, -): { commits: CommitEntry[] } { +/** Build the `git log` argv for scanning co-change history. */ +function buildGitLogArgs(opts: { since?: string; afterSha?: string | null }): string[] { const args = [ 'log', '--name-only', @@ -48,10 +47,35 @@ export function scanGitHistory( if (opts.since) args.push(`--since=${opts.since}`); if (opts.afterSha) args.push(`${opts.afterSha}..HEAD`); args.push('--', '.'); + return args; +} +/** Parse `git log --name-only --pretty=format:%H%n%at` output into commit entries. */ +function parseGitLogOutput(output: string): CommitEntry[] { + const commits: CommitEntry[] = []; + // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2... + const blocks = output.trim().split(/\n\n+/); + for (const block of blocks) { + const lines = block.split('\n').filter((l) => l.length > 0); + if (lines.length < 2) continue; + const sha = lines[0]!; + const epoch = parseInt(lines[1]!, 10); + if (Number.isNaN(epoch)) continue; + const files = lines.slice(2).map((f) => normalizePath(f)); + if (files.length > 0) { + commits.push({ sha, epoch, files }); + } + } + return commits; +} + +export function scanGitHistory( + repoRoot: string, + opts: { since?: string; afterSha?: string | null } = {}, +): { commits: CommitEntry[] } { let output: string; try { - output = execFileSync('git', args, { + output = execFileSync('git', buildGitLogArgs(opts), { cwd: repoRoot, encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024, @@ -64,30 +88,63 @@ export function scanGitHistory( if (!output.trim()) return { commits: [] }; - const commits: CommitEntry[] = []; - // Split on double newlines to get blocks; each block is sha\nepoch\nfile1\nfile2... - const blocks = output.trim().split(/\n\n+/); - for (const block of blocks) { - const lines = block.split('\n').filter((l) => l.length > 0); - if (lines.length < 2) continue; - const sha = lines[0]!; - const epoch = parseInt(lines[1]!, 10); - if (Number.isNaN(epoch)) continue; - const files = lines.slice(2).map((f) => normalizePath(f)); - if (files.length > 0) { - commits.push({ sha, epoch, files }); + return { commits: parseGitLogOutput(output) }; +} + +/** Pass 1: bump the per-file commit count for every file in a (filtered) commit. */ +function updateFileCommitCounts(files: string[], fileCommitCounts: Map): void { + for (const f of files) { + fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1); + } +} + +/** Pass 2: generate all unique file pairs for a commit (canonical: a < b) and tally them. */ +function updatePairCounts( + files: string[], + epoch: number, + pairCounts: Map, + pairLastEpoch: Map, +): void { + const sorted = [...new Set(files)].sort(); + for (let i = 0; i < sorted.length; i++) { + for (let j = i + 1; j < sorted.length; j++) { + const key = `${sorted[i]}\0${sorted[j]}`; + pairCounts.set(key, (pairCounts.get(key) || 0) + 1); + const prev = pairLastEpoch.get(key) || 0; + if (epoch > prev) pairLastEpoch.set(key, epoch); } } +} - return { commits }; +/** Pass 3: filter pairs by minSupport and compute their Jaccard similarity. */ +function buildCoChangeResults( + pairCounts: Map, + pairLastEpoch: Map, + fileCommitCounts: Map, + minSupport: number, +): Map { + const results = new Map(); + for (const [key, count] of pairCounts) { + if (count < minSupport) continue; + const [fileA, fileB] = key.split('\0') as [string, string]; + const countA = fileCommitCounts.get(fileA) || 0; + const countB = fileCommitCounts.get(fileB) || 0; + const jaccard = count / (countA + countB - count); + results.set(key, { + commitCount: count, + jaccard, + lastEpoch: pairLastEpoch.get(key) || 0, + }); + } + return results; } export function computeCoChanges( commits: CommitEntry[], opts: { minSupport?: number; maxFilesPerCommit?: number; knownFiles?: Set | null } = {}, ): { pairs: Map; fileCommitCounts: Map } { - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; + const minSupport = opts.minSupport ?? DEFAULTS.coChange.minSupport; + const maxFilesPerCommit = opts.maxFilesPerCommit ?? DEFAULTS.coChange.maxFilesPerCommit; const knownFiles = opts.knownFiles || null; const fileCommitCounts = new Map(); @@ -102,39 +159,14 @@ export function computeCoChanges( files = files.filter((f) => knownFiles.has(f)); } - // Count per-file commits - for (const f of files) { - fileCommitCounts.set(f, (fileCommitCounts.get(f) || 0) + 1); - } - - // Generate all unique pairs (canonical: a < b) - const sorted = [...new Set(files)].sort(); - for (let i = 0; i < sorted.length; i++) { - for (let j = i + 1; j < sorted.length; j++) { - const key = `${sorted[i]}\0${sorted[j]}`; - pairCounts.set(key, (pairCounts.get(key) || 0) + 1); - const prev = pairLastEpoch.get(key) || 0; - if (commit.epoch > prev) pairLastEpoch.set(key, commit.epoch); - } - } + updateFileCommitCounts(files, fileCommitCounts); + updatePairCounts(files, commit.epoch, pairCounts, pairLastEpoch); } - // Filter by minSupport and compute Jaccard - const results = new Map(); - for (const [key, count] of pairCounts) { - if (count < minSupport) continue; - const [fileA, fileB] = key.split('\0') as [string, string]; - const countA = fileCommitCounts.get(fileA) || 0; - const countB = fileCommitCounts.get(fileB) || 0; - const jaccard = count / (countA + countB - count); - results.set(key, { - commitCount: count, - jaccard, - lastEpoch: pairLastEpoch.get(key) || 0, - }); - } - - return { pairs: results, fileCommitCounts }; + return { + pairs: buildCoChangeResults(pairCounts, pairLastEpoch, fileCommitCounts, minSupport), + fileCommitCounts, + }; } /** Read the SHA of the most recently analyzed commit (incremental state). */ @@ -146,8 +178,8 @@ function loadLastAnalyzedSha(db: BetterSqlite3Database): string | null { ) .get(); return row ? row.value : null; - } catch { - /* table may not exist yet */ + } catch (e: unknown) { + debug(`loadLastAnalyzedSha: co_change_meta table may not exist yet: ${(e as Error).message}`); return null; } } @@ -164,8 +196,8 @@ function loadKnownFiles(db: BetterSqlite3Database): Set | null { try { const rows = db.prepare<{ file: string }>('SELECT DISTINCT file FROM nodes').all(); return new Set(rows.map((r) => r.file)); - } catch { - /* nodes table may not exist */ + } catch (e: unknown) { + debug(`loadKnownFiles: nodes table may not exist: ${(e as Error).message}`); return null; } } @@ -236,6 +268,47 @@ function updateCoChangeMeta( metaUpsert.run('min_support', String(minSupport)); } +interface CoChangeAnalysisOptions { + since: string; + minSupport: number; + maxFilesPerCommit: number; +} + +/** Resolve since/minSupport/maxFilesPerCommit from opts, falling back to DEFAULTS.coChange. */ +function resolveCoChangeAnalysisOptions(opts: { + since?: string; + minSupport?: number; + maxFilesPerCommit?: number; +}): CoChangeAnalysisOptions { + return { + since: opts.since || DEFAULTS.coChange.since, + minSupport: opts.minSupport ?? DEFAULTS.coChange.minSupport, + maxFilesPerCommit: opts.maxFilesPerCommit ?? DEFAULTS.coChange.maxFilesPerCommit, + }; +} + +/** Scan git history, compute co-change pairs, and persist them + the run metadata. */ +function runCoChangeScanAndPersist( + db: BetterSqlite3Database, + repoRoot: string, + afterSha: string | null, + resolved: CoChangeAnalysisOptions, +): CommitEntry[] { + const knownFiles = loadKnownFiles(db); + const { commits } = scanGitHistory(repoRoot, { since: resolved.since, afterSha }); + const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { + minSupport: resolved.minSupport, + maxFilesPerCommit: resolved.maxFilesPerCommit, + knownFiles, + }); + + persistCoChangeResults(db, fileCommitCounts, coChanges); + recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); + updateCoChangeMeta(db, commits, resolved.since, resolved.minSupport); + + return commits; +} + export function analyzeCoChanges( customDbPath?: string, opts: { @@ -258,25 +331,11 @@ export function analyzeCoChanges( return { error: `Not a git repository: ${repoRoot}` }; } - const since = opts.since || '1 year ago'; - const minSupport = opts.minSupport ?? 3; - const maxFilesPerCommit = opts.maxFilesPerCommit ?? 50; - + const resolved = resolveCoChangeAnalysisOptions(opts); const afterSha = opts.full ? null : loadLastAnalyzedSha(db); if (opts.full) clearCoChangeTables(db); - const knownFiles = loadKnownFiles(db); - - const { commits } = scanGitHistory(repoRoot, { since, afterSha }); - const { pairs: coChanges, fileCommitCounts } = computeCoChanges(commits, { - minSupport, - maxFilesPerCommit, - knownFiles, - }); - - persistCoChangeResults(db, fileCommitCounts, coChanges); - recomputeJaccardForAffected(db, [...fileCommitCounts.keys()]); - updateCoChangeMeta(db, commits, since, minSupport); + const commits = runCoChangeScanAndPersist(db, repoRoot, afterSha, resolved); const totalPairs = db .prepare<{ cnt: number }>('SELECT COUNT(*) as cnt FROM co_changes') @@ -287,8 +346,8 @@ export function analyzeCoChanges( return { pairsFound: totalPairs, commitsScanned: commits.length, - since, - minSupport, + since: resolved.since, + minSupport: resolved.minSupport, }; } @@ -300,6 +359,49 @@ interface CoChangeRow { last_commit_epoch: number; } +/** True if the `co_changes` table exists (i.e. `analyzeCoChanges` has run at least once). */ +function hasCoChangeTable(db: BetterSqlite3Database): boolean { + try { + db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); + return true; + } catch (e: unknown) { + debug(`hasCoChangeTable: co_changes table missing: ${(e as Error).message}`); + return false; + } +} + +/** Format a last-commit epoch (seconds) as `YYYY-MM-DD`, or null if absent. */ +function epochToDateString(epoch: number): string | null { + return epoch ? new Date(epoch * 1000).toISOString().slice(0, 10) : null; +} + +/** Shape+filter co-change rows into the public per-file "partners" list. */ +function buildCoChangePartners( + rows: CoChangeRow[], + resolvedFile: string, + noTests: boolean, + limit: number, +): Array<{ file: string; commitCount: number; jaccard: number; lastCommitDate: string | null }> { + const partners: Array<{ + file: string; + commitCount: number; + jaccard: number; + lastCommitDate: string | null; + }> = []; + for (const row of rows) { + const partner = row.file_a === resolvedFile ? row.file_b : row.file_a; + if (noTests && isTestFile(partner)) continue; + partners.push({ + file: partner, + commitCount: row.commit_count, + jaccard: row.jaccard, + lastCommitDate: epochToDateString(row.last_commit_epoch), + }); + if (partners.length >= limit) break; + } + return partners; +} + export function coChangeData( file: string, customDbPath?: string, @@ -307,13 +409,10 @@ export function coChangeData( ): Record { const db = openReadonlyOrFail(customDbPath); const limit = opts.limit || 20; - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const noTests = opts.noTests || false; - // Check if co_changes table exists - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - } catch { + if (!hasCoChangeTable(db)) { closeDb(db); return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' }; } @@ -334,31 +433,46 @@ export function coChangeData( ) .all(resolvedFile, resolvedFile, minJaccard); - const partners: Array<{ - file: string; + const partners = buildCoChangePartners(rows, resolvedFile, noTests, limit); + + const meta = getCoChangeMeta(db); + closeDb(db); + + const base = { file: resolvedFile, partners, meta }; + return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); +} + +/** Shape+filter co-change rows into the public global "top pairs" list. */ +function buildCoChangeTopPairs( + rows: CoChangeRow[], + noTests: boolean, + limit: number, +): Array<{ + fileA: string; + fileB: string; + commitCount: number; + jaccard: number; + lastCommitDate: string | null; +}> { + const pairs: Array<{ + fileA: string; + fileB: string; commitCount: number; jaccard: number; lastCommitDate: string | null; }> = []; for (const row of rows) { - const partner = row.file_a === resolvedFile ? row.file_b : row.file_a; - if (noTests && isTestFile(partner)) continue; - partners.push({ - file: partner, + if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue; + pairs.push({ + fileA: row.file_a, + fileB: row.file_b, commitCount: row.commit_count, jaccard: row.jaccard, - lastCommitDate: row.last_commit_epoch - ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10) - : null, + lastCommitDate: epochToDateString(row.last_commit_epoch), }); - if (partners.length >= limit) break; + if (pairs.length >= limit) break; } - - const meta = getCoChangeMeta(db); - closeDb(db); - - const base = { file: resolvedFile, partners, meta }; - return paginateResult(base, 'partners', { limit: opts.limit, offset: opts.offset }); + return pairs; } export function coChangeTopData( @@ -367,12 +481,10 @@ export function coChangeTopData( ): Record { const db = openReadonlyOrFail(customDbPath); const limit = opts.limit || 20; - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const noTests = opts.noTests || false; - try { - db.prepare('SELECT 1 FROM co_changes LIMIT 1').get(); - } catch { + if (!hasCoChangeTable(db)) { closeDb(db); return { error: 'No co-change data found. Run `codegraph co-change --analyze` first.' }; } @@ -386,32 +498,40 @@ export function coChangeTopData( ) .all(minJaccard); - const pairs: Array<{ - fileA: string; - fileB: string; + const pairs = buildCoChangeTopPairs(rows, noTests, limit); + + const meta = getCoChangeMeta(db); + closeDb(db); + + const base = { pairs, meta }; + return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); +} + +/** Shape+filter co-change rows into the public "coupled with an input file" list. */ +function buildCoChangeForFilesResults( + rows: Array<{ file_a: string; file_b: string; commit_count: number; jaccard: number }>, + inputSet: Set, + noTests: boolean, +): Array<{ file: string; coupledWith: string; commitCount: number; jaccard: number }> { + const results: Array<{ + file: string; + coupledWith: string; commitCount: number; jaccard: number; - lastCommitDate: string | null; }> = []; for (const row of rows) { - if (noTests && (isTestFile(row.file_a) || isTestFile(row.file_b))) continue; - pairs.push({ - fileA: row.file_a, - fileB: row.file_b, + const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a; + const source = inputSet.has(row.file_a) ? row.file_a : row.file_b; + if (inputSet.has(partner)) continue; + if (noTests && isTestFile(partner)) continue; + results.push({ + file: partner, + coupledWith: source, commitCount: row.commit_count, jaccard: row.jaccard, - lastCommitDate: row.last_commit_epoch - ? new Date(row.last_commit_epoch * 1000).toISOString().slice(0, 10) - : null, }); - if (pairs.length >= limit) break; } - - const meta = getCoChangeMeta(db); - closeDb(db); - - const base = { pairs, meta }; - return paginateResult(base, 'pairs', { limit: opts.limit, offset: opts.offset }); + return results; } export function coChangeForFiles( @@ -419,7 +539,7 @@ export function coChangeForFiles( db: BetterSqlite3Database, opts: { minJaccard?: number; limit?: number; noTests?: boolean } = {}, ): Array<{ file: string; coupledWith: string; commitCount: number; jaccard: number }> { - const minJaccard = opts.minJaccard ?? 0.3; + const minJaccard = opts.minJaccard ?? DEFAULTS.coChange.minJaccard; const limit = opts.limit ?? 20; const noTests = opts.noTests || false; const inputSet = new Set(files); @@ -438,26 +558,7 @@ export function coChangeForFiles( ) .all(...files, ...files, minJaccard, limit); - const results: Array<{ - file: string; - coupledWith: string; - commitCount: number; - jaccard: number; - }> = []; - for (const row of rows) { - const partner = inputSet.has(row.file_a) ? row.file_b : row.file_a; - const source = inputSet.has(row.file_a) ? row.file_a : row.file_b; - if (inputSet.has(partner)) continue; - if (noTests && isTestFile(partner)) continue; - results.push({ - file: partner, - coupledWith: source, - commitCount: row.commit_count, - jaccard: row.jaccard, - }); - } - - return results; + return buildCoChangeForFilesResults(rows, inputSet, noTests); } // ─── Internal Helpers ──────────────────────────────────────────────────── diff --git a/src/features/complexity-query.ts b/src/features/complexity-query.ts index 5f3b9d121..d1ba86a3a 100644 --- a/src/features/complexity-query.ts +++ b/src/features/complexity-query.ts @@ -7,7 +7,7 @@ import { openReadonlyOrFail } from '../db/index.js'; import { buildFileConditionSQL } from '../db/query-builder.js'; -import { loadConfig } from '../infrastructure/config.js'; +import { DEFAULTS, loadConfig } from '../infrastructure/config.js'; import { debug } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { paginateResult } from '../shared/paginate.js'; @@ -35,6 +35,61 @@ interface ComplexityRow { const isValidThreshold = (v: unknown): v is number => typeof v === 'number' && Number.isFinite(v); +/** Column-sort expressions for `codegraph complexity --sort `. */ +const ORDER_BY_MAP: Record = { + cognitive: 'fc.cognitive DESC', + cyclomatic: 'fc.cyclomatic DESC', + nesting: 'fc.max_nesting DESC', + mi: 'fc.maintainability_index ASC', + volume: 'fc.halstead_volume DESC', + effort: 'fc.halstead_effort DESC', + bugs: 'fc.halstead_bugs DESC', + loc: 'fc.loc DESC', +}; + +interface ThresholdMetrics { + cognitive: number; + cyclomatic: number; + max_nesting: number; + maintainability_index: number; +} + +/** Single source of truth for which metric names exceed which thresholds. */ +const METRIC_THRESHOLD_CHECKS: Array<{ + name: string; + exceeds: (r: ThresholdMetrics, thresholds: any) => boolean; +}> = [ + { + name: 'cognitive', + exceeds: (r, t) => + isValidThreshold(t.cognitive?.warn) && r.cognitive >= (t.cognitive?.warn ?? 0), + }, + { + name: 'cyclomatic', + exceeds: (r, t) => + isValidThreshold(t.cyclomatic?.warn) && r.cyclomatic >= (t.cyclomatic?.warn ?? 0), + }, + { + name: 'maxNesting', + exceeds: (r, t) => + isValidThreshold(t.maxNesting?.warn) && r.max_nesting >= (t.maxNesting?.warn ?? 0), + }, + { + name: 'maintainabilityIndex', + exceeds: (r, t) => + isValidThreshold(t.maintainabilityIndex?.warn) && + r.maintainability_index > 0 && + r.maintainability_index <= (t.maintainabilityIndex?.warn ?? 0), + }, +]; + +/** List of metric names a row exceeds (empty if none). */ +function getExceededMetrics(r: ThresholdMetrics, thresholds: any): string[] { + return METRIC_THRESHOLD_CHECKS.filter((check) => check.exceeds(r, thresholds)).map( + (check) => check.name, + ); +} + /** Build WHERE clause and params for complexity query filtering. */ function buildComplexityWhere(opts: { noTests: boolean; @@ -90,28 +145,7 @@ function buildThresholdHaving(thresholds: any): string { /** Map a raw DB row to the public complexity result shape. */ function mapComplexityRow(r: ComplexityRow, thresholds: any): Record { - const exceeds: string[] = []; - if ( - isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0) - ) - exceeds.push('cognitive'); - if ( - isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0) - ) - exceeds.push('cyclomatic'); - if ( - isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0) - ) - exceeds.push('maxNesting'); - if ( - isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0) - ) - exceeds.push('maintainabilityIndex'); + const exceeds = getExceededMetrics(r, thresholds); return { name: r.name, @@ -136,21 +170,48 @@ function mapComplexityRow(r: ComplexityRow, thresholds: any): Record 0; +} + +/** Fetch the bare metric columns (all rows) used to compute summary statistics. */ +function fetchAllComplexityMetrics( + db: ReturnType, + noTests: boolean, +): ThresholdMetrics[] { + return db + .prepare( + `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index + FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id + WHERE n.kind IN ('function','method') + ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, + ) + .all(); +} + +/** Arithmetic mean, rounded to 1 decimal (matches the summary's existing precision). */ +function average(values: number[]): number { + return +(values.reduce((s, v) => s + v, 0) / values.length).toFixed(1); +} + +/** Reduce a set of complexity rows down to the public summary-statistics shape. */ +function summarizeComplexityMetrics( + allRows: ThresholdMetrics[], thresholds: any, -): boolean { - return ( - (isValidThreshold(thresholds.cognitive?.warn) && - r.cognitive >= (thresholds.cognitive?.warn ?? 0)) || - (isValidThreshold(thresholds.cyclomatic?.warn) && - r.cyclomatic >= (thresholds.cyclomatic?.warn ?? 0)) || - (isValidThreshold(thresholds.maxNesting?.warn) && - r.max_nesting >= (thresholds.maxNesting?.warn ?? 0)) || - (isValidThreshold(thresholds.maintainabilityIndex?.warn) && - r.maintainability_index > 0 && - r.maintainability_index <= (thresholds.maintainabilityIndex?.warn ?? 0)) - ); +): Record { + const cognitiveValues = allRows.map((r) => r.cognitive); + const cyclomaticValues = allRows.map((r) => r.cyclomatic); + const miValues = allRows.map((r) => r.maintainability_index || 0); + return { + analyzed: allRows.length, + avgCognitive: average(cognitiveValues), + avgCyclomatic: average(cyclomaticValues), + maxCognitive: Math.max(...cognitiveValues), + maxCyclomatic: Math.max(...cyclomaticValues), + avgMI: average(miValues), + minMI: +Math.min(...miValues).toFixed(1), + aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, + }; } /** Compute summary statistics across all complexity rows. */ @@ -160,33 +221,9 @@ function computeComplexitySummary( thresholds: any, ): Record | null { try { - const allRows = db - .prepare<{ - cognitive: number; - cyclomatic: number; - max_nesting: number; - maintainability_index: number; - }>( - `SELECT fc.cognitive, fc.cyclomatic, fc.max_nesting, fc.maintainability_index - FROM function_complexity fc JOIN nodes n ON fc.node_id = n.id - WHERE n.kind IN ('function','method') - ${noTests ? `AND n.file NOT LIKE '%.test.%' AND n.file NOT LIKE '%.spec.%' AND n.file NOT LIKE '%__test__%' AND n.file NOT LIKE '%__tests__%' AND n.file NOT LIKE '%.stories.%'` : ''}`, - ) - .all(); - + const allRows = fetchAllComplexityMetrics(db, noTests); if (allRows.length === 0) return null; - - const miValues = allRows.map((r) => r.maintainability_index || 0); - return { - analyzed: allRows.length, - avgCognitive: +(allRows.reduce((s, r) => s + r.cognitive, 0) / allRows.length).toFixed(1), - avgCyclomatic: +(allRows.reduce((s, r) => s + r.cyclomatic, 0) / allRows.length).toFixed(1), - maxCognitive: Math.max(...allRows.map((r) => r.cognitive)), - maxCyclomatic: Math.max(...allRows.map((r) => r.cyclomatic)), - avgMI: +(miValues.reduce((s, v) => s + v, 0) / miValues.length).toFixed(1), - minMI: +Math.min(...miValues).toFixed(1), - aboveWarn: allRows.filter((r) => exceedsAnyThreshold(r, thresholds)).length, - }; + return summarizeComplexityMetrics(allRows, thresholds); } catch (e: unknown) { debug(`complexity summary query failed: ${(e as Error).message}`); return null; @@ -203,33 +240,89 @@ function checkHasGraph(db: ReturnType): boolean { } } +/** Run the main complexity rows query; returns null if the table doesn't exist yet. */ +function queryComplexityRows( + db: ReturnType, + where: string, + having: string, + orderBy: string, + params: unknown[], +): ComplexityRow[] | null { + try { + return db + .prepare( + `SELECT n.name, n.kind, n.file, n.line, n.end_line, + fc.cognitive, fc.cyclomatic, fc.max_nesting, + fc.loc, fc.sloc, fc.maintainability_index, + fc.halstead_volume, fc.halstead_difficulty, fc.halstead_effort, fc.halstead_bugs + FROM function_complexity fc + JOIN nodes n ON fc.node_id = n.id + ${where} ${having} + ORDER BY ${orderBy}`, + ) + .all(...params); + } catch (e: unknown) { + debug(`complexity query failed (table may not exist): ${(e as Error).message}`); + return null; + } +} + +interface ComplexityQueryOpts { + target?: string; + limit?: number; + sort?: string; + aboveThreshold?: boolean; + file?: string; + kind?: string; + noTests?: boolean; + config?: CodegraphConfig; + offset?: number; +} + +/** Resolve query flags + effective manifesto thresholds from opts/config/DEFAULTS. */ +function resolveComplexityQueryOptions(opts: ComplexityQueryOpts): { + sort: string; + noTests: boolean; + aboveThreshold: boolean; + thresholds: any; +} { + const config = opts.config || loadConfig(process.cwd()); + return { + sort: opts.sort || 'cognitive', + noTests: opts.noTests || false, + aboveThreshold: opts.aboveThreshold || false, + thresholds: config.manifesto?.rules || DEFAULTS.manifesto.rules, + }; +} + +/** Run the query + summary and shape the pre-pagination result object. */ +function buildComplexityResult( + db: ReturnType, + sql: { where: string; having: string; orderBy: string; params: unknown[] }, + noTests: boolean, + thresholds: any, +): Record { + const rows = queryComplexityRows(db, sql.where, sql.having, sql.orderBy, sql.params); + if (rows === null) { + return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; + } + + const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; + const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); + + const summary = computeComplexitySummary(db, noTests, thresholds); + const hasGraph = summary === null ? checkHasGraph(db) : false; + + return { functions, summary, thresholds, hasGraph }; +} + export function complexityData( customDbPath?: string, - opts: { - target?: string; - limit?: number; - sort?: string; - aboveThreshold?: boolean; - file?: string; - kind?: string; - noTests?: boolean; - config?: CodegraphConfig; - offset?: number; - } = {}, + opts: ComplexityQueryOpts = {}, ): Record { const db = openReadonlyOrFail(customDbPath); try { - const sort = opts.sort || 'cognitive'; - const noTests = opts.noTests || false; - const aboveThreshold = opts.aboveThreshold || false; - - const config = opts.config || loadConfig(process.cwd()); - const thresholds: any = config.manifesto?.rules || { - cognitive: { warn: 15, fail: null }, - cyclomatic: { warn: 10, fail: null }, - maxNesting: { warn: 4, fail: null }, - maintainabilityIndex: { warn: 20, fail: null }, - }; + const { sort, noTests, aboveThreshold, thresholds } = resolveComplexityQueryOptions(opts); const { where, params } = buildComplexityWhere({ noTests, @@ -239,45 +332,9 @@ export function complexityData( }); const having = aboveThreshold ? buildThresholdHaving(thresholds) : ''; + const orderBy = ORDER_BY_MAP[sort] || 'fc.cognitive DESC'; - const orderMap: Record = { - cognitive: 'fc.cognitive DESC', - cyclomatic: 'fc.cyclomatic DESC', - nesting: 'fc.max_nesting DESC', - mi: 'fc.maintainability_index ASC', - volume: 'fc.halstead_volume DESC', - effort: 'fc.halstead_effort DESC', - bugs: 'fc.halstead_bugs DESC', - loc: 'fc.loc DESC', - }; - const orderBy = orderMap[sort] || 'fc.cognitive DESC'; - - let rows: ComplexityRow[]; - try { - rows = db - .prepare( - `SELECT n.name, n.kind, n.file, n.line, n.end_line, - fc.cognitive, fc.cyclomatic, fc.max_nesting, - fc.loc, fc.sloc, fc.maintainability_index, - fc.halstead_volume, fc.halstead_difficulty, fc.halstead_effort, fc.halstead_bugs - FROM function_complexity fc - JOIN nodes n ON fc.node_id = n.id - ${where} ${having} - ORDER BY ${orderBy}`, - ) - .all(...params); - } catch (e: unknown) { - debug(`complexity query failed (table may not exist): ${(e as Error).message}`); - return { functions: [], summary: null, thresholds, hasGraph: checkHasGraph(db) }; - } - - const filtered = noTests ? rows.filter((r) => !isTestFile(r.file)) : rows; - const functions = filtered.map((r) => mapComplexityRow(r, thresholds)); - - const summary = computeComplexitySummary(db, noTests, thresholds); - const hasGraph = summary === null ? checkHasGraph(db) : false; - - const base = { functions, summary, thresholds, hasGraph }; + const base = buildComplexityResult(db, { where, having, orderBy, params }, noTests, thresholds); return paginateResult(base, 'functions', { limit: opts.limit, offset: opts.offset }); } finally { db.close(); diff --git a/src/graph/algorithms/leiden/adapter.ts b/src/graph/algorithms/leiden/adapter.ts index 390a15aa3..29efc1d55 100644 --- a/src/graph/algorithms/leiden/adapter.ts +++ b/src/graph/algorithms/leiden/adapter.ts @@ -74,22 +74,33 @@ function populateDirectedEdges( } } -/** - * Populate edge arrays for an undirected graph. Reciprocal pairs are - * symmetrized and averaged to produce a single weight per undirected edge. - * Self-loops use single-w convention (matching modularity.ts formulas). - */ -function populateUndirectedEdges( +/** Fold a single a→b weight into the unordered-pair aggregate, tracking which direction(s) were seen. */ +function recordUndirectedPairWeight( + pairAgg: Map, + a: number, + b: number, + w: number, +): void { + const i = a < b ? a : b; + const j = a < b ? b : a; + const key = `${i}:${j}`; + let rec = pairAgg.get(key); + if (!rec) { + rec = { sum: 0, seenAB: 0, seenBA: 0 }; + pairAgg.set(key, rec); + } + rec.sum += w; + if (a === i) rec.seenAB = 1; + else rec.seenBA = 1; +} + +/** Aggregate raw undirected edges into one weighted record per unordered node pair. */ +function aggregateUndirectedPairs( graph: CodeGraph, idToIndex: Map, linkWeight: (attrs: EdgeAttrs) => number, - n: number, selfLoop: Float64Array, - outEdges: EdgeEntry[][], - inEdges: InEdgeEntry[][], - strengthOut: Float64Array, - strengthIn: Float64Array, -): void { +): Map { const pairAgg = new Map(); for (const [src, tgt, attrs] of graph.edges()) { @@ -101,19 +112,20 @@ function populateUndirectedEdges( taAdd(selfLoop, a, w); continue; } - const i = a < b ? a : b; - const j = a < b ? b : a; - const key = `${i}:${j}`; - let rec = pairAgg.get(key); - if (!rec) { - rec = { sum: 0, seenAB: 0, seenBA: 0 }; - pairAgg.set(key, rec); - } - rec.sum += w; - if (a === i) rec.seenAB = 1; - else rec.seenBA = 1; + recordUndirectedPairWeight(pairAgg, a, b, w); } + return pairAgg; +} + +/** Emit symmetrized undirected edges (averaged over any reciprocal pairs) into the adjacency lists. */ +function emitUndirectedPairs( + pairAgg: Map, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { for (const [key, rec] of pairAgg.entries()) { const parts = key.split(':'); const i = +(parts[0] as string); @@ -130,10 +142,21 @@ function populateUndirectedEdges( taAdd(strengthIn, i, w); taAdd(strengthIn, j, w); } +} - // Add self-loops into adjacency and strengths. - // Note: uses single-w convention (not standard 2w) — the modularity formulas in - // modularity.ts are written to match this convention, keeping the system self-consistent. +/** + * Add self-loops into adjacency and strengths. + * Note: uses single-w convention (not standard 2w) — the modularity formulas in + * modularity.ts are written to match this convention, keeping the system self-consistent. + */ +function applyUndirectedSelfLoops( + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { for (let v = 0; v < n; v++) { const w: number = fget(selfLoop, v); if (w !== 0) { @@ -145,15 +168,56 @@ function populateUndirectedEdges( } } -export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { - const linkWeight: (attrs: EdgeAttrs) => number = - opts.linkWeight || ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)); - const nodeSize: (attrs: NodeAttrs) => number = - opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)); - const directed: boolean = !!opts.directed; - const baseNodeIds: string[] | undefined = opts.baseNodeIds; - - // Build dense node index mapping +/** + * Populate edge arrays for an undirected graph. Reciprocal pairs are + * symmetrized and averaged to produce a single weight per undirected edge. + * Self-loops use single-w convention (matching modularity.ts formulas). + */ +function populateUndirectedEdges( + graph: CodeGraph, + idToIndex: Map, + linkWeight: (attrs: EdgeAttrs) => number, + n: number, + selfLoop: Float64Array, + outEdges: EdgeEntry[][], + inEdges: InEdgeEntry[][], + strengthOut: Float64Array, + strengthIn: Float64Array, +): void { + const pairAgg = aggregateUndirectedPairs(graph, idToIndex, linkWeight, selfLoop); + emitUndirectedPairs(pairAgg, outEdges, inEdges, strengthOut, strengthIn); + applyUndirectedSelfLoops(n, selfLoop, outEdges, inEdges, strengthOut, strengthIn); +} + +interface ResolvedAdapterOptions { + linkWeight: (attrs: EdgeAttrs) => number; + nodeSize: (attrs: NodeAttrs) => number; + directed: boolean; + baseNodeIds: string[] | undefined; +} + +/** Apply GraphAdapterOptions defaults (weight=1, size=1, directed=false). */ +function resolveAdapterOptions(opts: GraphAdapterOptions): ResolvedAdapterOptions { + return { + linkWeight: + opts.linkWeight || + ((attrs) => (attrs && typeof attrs.weight === 'number' ? attrs.weight : 1)), + nodeSize: + opts.nodeSize || ((attrs) => (attrs && typeof attrs.size === 'number' ? attrs.size : 1)), + directed: !!opts.directed, + baseNodeIds: opts.baseNodeIds, + }; +} + +/** + * Build the dense node index mapping. When `baseNodeIds` is provided, node + * order/indices are pinned to it (used to align adapters built from related + * graphs); otherwise indices are assigned in CodeGraph iteration order. + */ +function buildNodeIndex( + graph: CodeGraph, + baseNodeIds: string[] | undefined, +): { nodeIds: string[]; idToIndex: Map } { const nodeIds: string[] = []; const idToIndex = new Map(); if (Array.isArray(baseNodeIds) && baseNodeIds.length > 0) { @@ -169,10 +233,39 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { nodeIds.push(id); } } + return { nodeIds, idToIndex }; +} + +/** Resolve per-node sizes via the adapter's nodeSize accessor, dense-indexed. */ +function computeNodeSizes( + graph: CodeGraph, + idToIndex: Map, + n: number, + nodeSize: (attrs: NodeAttrs) => number, +): Float64Array { + const size = new Float64Array(n); + for (const [id, attrs] of graph.nodes()) { + const i = idToIndex.get(id); + if (i != null) size[i] = +nodeSize(attrs) || 0; + } + return size; +} + +function makeForEachNeighbor( + outEdges: EdgeEntry[][], +): (i: number, cb: (to: number, w: number) => void) => void { + return (i, cb) => { + const list = outEdges[i] as EdgeEntry[]; + for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); + }; +} + +export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = {}): GraphAdapter { + const { linkWeight, nodeSize, directed, baseNodeIds } = resolveAdapterOptions(opts); + const { nodeIds, idToIndex } = buildNodeIndex(graph, baseNodeIds); const n: number = nodeIds.length; // Storage - const size = new Float64Array(n); const selfLoop = new Float64Array(n); const strengthOut = new Float64Array(n); const strengthIn = new Float64Array(n); @@ -211,20 +304,11 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { ); } - // Node sizes - for (const [id, attrs] of graph.nodes()) { - const i = idToIndex.get(id); - if (i != null) size[i] = +nodeSize(attrs) || 0; - } + const size = computeNodeSizes(graph, idToIndex, n, nodeSize); // Totals const totalWeight: number = strengthOut.reduce((a, b) => a + b, 0); - function forEachNeighbor(i: number, cb: (to: number, w: number) => void): void { - const list = outEdges[i] as EdgeEntry[]; - for (let k = 0; k < list.length; k++) cb((list[k] as EdgeEntry).to, (list[k] as EdgeEntry).w); - } - return { n, nodeIds, @@ -237,6 +321,6 @@ export function makeGraphAdapter(graph: CodeGraph, opts: GraphAdapterOptions = { inEdges, directed, totalWeight, - forEachNeighbor, + forEachNeighbor: makeForEachNeighbor(outEdges), }; } diff --git a/src/graph/algorithms/leiden/partition.ts b/src/graph/algorithms/leiden/partition.ts index de78b8f3e..8e76f8c50 100644 --- a/src/graph/algorithms/leiden/partition.ts +++ b/src/graph/algorithms/leiden/partition.ts @@ -74,6 +74,34 @@ interface PartitionState { /* Community-ID sort helper (used by compact) */ /* ------------------------------------------------------------------ */ +/** Comparator: descending by community size, tie-broken by node count then id. */ +function compareBySizeDesc( + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): (a: number, b: number) => number { + return (a, b) => + fget(communityTotalSize, b) - fget(communityTotalSize, a) || + iget(communityNodeCount, b) - iget(communityNodeCount, a) || + a - b; +} + +/** Comparator: respects a user-provided label map, falling back to size-desc for unmapped ids. */ +function compareByPreserveMap( + preserveMap: Map, + communityTotalSize: Float64Array, + communityNodeCount: Int32Array, +): (a: number, b: number) => number { + const fallback = compareBySizeDesc(communityTotalSize, communityNodeCount); + return (a, b) => { + const pa = preserveMap.get(a); + const pb = preserveMap.get(b); + if (pa != null && pb != null && pa !== pb) return pa - pb; + if (pa != null && pb == null) return -1; + if (pb != null && pa == null) return 1; + return fallback(a, b); + }; +} + /** * Sort community IDs according to the compaction options: preserve original * order, respect a user-provided label map, or sort by descending size. @@ -88,26 +116,9 @@ function buildSortedCommunityIds( if (opts.keepOldOrder) { ids.sort((a, b) => a - b); } else if (opts.preserveMap instanceof Map) { - const preserveMap = opts.preserveMap; - ids.sort((a, b) => { - const pa = preserveMap.get(a); - const pb = preserveMap.get(b); - if (pa != null && pb != null && pa !== pb) return pa - pb; - if (pa != null && pb == null) return -1; - if (pb != null && pa == null) return 1; - return ( - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b - ); - }); + ids.sort(compareByPreserveMap(opts.preserveMap, communityTotalSize, communityNodeCount)); } else { - ids.sort( - (a, b) => - fget(communityTotalSize, b) - fget(communityTotalSize, a) || - iget(communityNodeCount, b) - iget(communityNodeCount, a) || - a - b, - ); + ids.sort(compareBySizeDesc(communityTotalSize, communityNodeCount)); } } @@ -273,30 +284,55 @@ function computeDeltaModularityDirected( return deltaInternal - deltaExpected; } +/** computeCpmEdgeWeights — directed branch: in+out weight, plus self-loop correction. */ +function computeCpmEdgeWeightsDirected( + s: PartitionState, + v: number, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + const wOld: number = + (fget(s.outEdgeWeightToCommunity, oldC) || 0) + (fget(s.inEdgeWeightFromCommunity, oldC) || 0); + const wNew: number = + newC < s.outEdgeWeightToCommunity.length + ? (fget(s.outEdgeWeightToCommunity, newC) || 0) + + (fget(s.inEdgeWeightFromCommunity, newC) || 0) + : 0; + // Self-loop correction (see cpm.ts diffCPM) + const selfCorrection: number = 2 * (fget(s.graph.selfLoop, v) || 0); + return { wOld, wNew, selfCorrection }; +} + +/** computeCpmEdgeWeights — undirected branch: single neighbor-weight-to-community value. */ +function computeCpmEdgeWeightsUndirected( + s: PartitionState, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + const wOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; + const wNew: number = + newC < s.neighborEdgeWeightToCommunity.length + ? fget(s.neighborEdgeWeightToCommunity, newC) || 0 + : 0; + return { wOld, wNew, selfCorrection: 0 }; +} + +/** Directed/undirected edge-weight-to-community split used by computeDeltaCPM. */ +function computeCpmEdgeWeights( + s: PartitionState, + v: number, + oldC: number, + newC: number, +): { wOld: number; wNew: number; selfCorrection: number } { + return s.graph.directed + ? computeCpmEdgeWeightsDirected(s, v, oldC, newC) + : computeCpmEdgeWeightsUndirected(s, oldC, newC); +} + function computeDeltaCPM(s: PartitionState, v: number, newC: number, gamma: number = 1.0): number { const oldC: number = iget(s.nodeCommunity, v); if (newC === oldC) return 0; - let w_old: number; - let w_new: number; - let selfCorrection: number = 0; - if (s.graph.directed) { - w_old = - (fget(s.outEdgeWeightToCommunity, oldC) || 0) + - (fget(s.inEdgeWeightFromCommunity, oldC) || 0); - w_new = - newC < s.outEdgeWeightToCommunity.length - ? (fget(s.outEdgeWeightToCommunity, newC) || 0) + - (fget(s.inEdgeWeightFromCommunity, newC) || 0) - : 0; - // Self-loop correction (see cpm.ts diffCPM) - selfCorrection = 2 * (fget(s.graph.selfLoop, v) || 0); - } else { - w_old = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; - w_new = - newC < s.neighborEdgeWeightToCommunity.length - ? fget(s.neighborEdgeWeightToCommunity, newC) || 0 - : 0; - } + const { wOld: w_old, wNew: w_new, selfCorrection } = computeCpmEdgeWeights(s, v, oldC, newC); const nodeSz: number = fget(s.graph.size, v) || 1; const sizeOld: number = fget(s.communityTotalSize, oldC) || 0; const sizeNew: number = newC < s.communityTotalSize.length ? fget(s.communityTotalSize, newC) : 0; @@ -307,6 +343,75 @@ function computeDeltaCPM(s: PartitionState, v: number, newC: number, gamma: numb /* Extracted: node move */ /* ------------------------------------------------------------------ */ +/** Directed/undirected community strength-total delta applied by moveNode. */ +function applyMoveStrengthTotals( + s: PartitionState, + oldC: number, + newC: number, + strengthOutV: number, + strengthInV: number, +): void { + if (s.graph.directed) { + s.communityTotalOutStrength[oldC] = fget(s.communityTotalOutStrength, oldC) - strengthOutV; + s.communityTotalOutStrength[newC] = fget(s.communityTotalOutStrength, newC) + strengthOutV; + s.communityTotalInStrength[oldC] = fget(s.communityTotalInStrength, oldC) - strengthInV; + s.communityTotalInStrength[newC] = fget(s.communityTotalInStrength, newC) + strengthInV; + } else { + s.communityTotalStrength[oldC] = fget(s.communityTotalStrength, oldC) - strengthOutV; + s.communityTotalStrength[newC] = fget(s.communityTotalStrength, newC) + strengthOutV; + } +} + +/** applyMoveInternalEdgeWeightDelta — directed branch. */ +function applyMoveInternalEdgeWeightDeltaDirected( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + const outToOld: number = fget(s.outEdgeWeightToCommunity, oldC) || 0; + const inFromOld: number = fget(s.inEdgeWeightFromCommunity, oldC) || 0; + const outToNew: number = + newC < s.outEdgeWeightToCommunity.length ? fget(s.outEdgeWeightToCommunity, newC) || 0 : 0; + const inFromNew: number = + newC < s.inEdgeWeightFromCommunity.length ? fget(s.inEdgeWeightFromCommunity, newC) || 0 : 0; + // outToOld/inFromOld already include the self-loop weight (self-loops are + // in outEdges/inEdges), so subtract it once to avoid triple-counting. + s.communityInternalEdgeWeight[oldC] = + fget(s.communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); + s.communityInternalEdgeWeight[newC] = + fget(s.communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); +} + +/** applyMoveInternalEdgeWeightDelta — undirected branch. */ +function applyMoveInternalEdgeWeightDeltaUndirected( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + const weightToOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; + const weightToNew: number = fget(s.neighborEdgeWeightToCommunity, newC) || 0; + s.communityInternalEdgeWeight[oldC] = + fget(s.communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); + s.communityInternalEdgeWeight[newC] = + fget(s.communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); +} + +/** Directed/undirected community internal-edge-weight delta applied by moveNode. */ +function applyMoveInternalEdgeWeightDelta( + s: PartitionState, + oldC: number, + newC: number, + selfLoopWeight: number, +): void { + if (s.graph.directed) { + applyMoveInternalEdgeWeightDeltaDirected(s, oldC, newC, selfLoopWeight); + } else { + applyMoveInternalEdgeWeightDeltaUndirected(s, oldC, newC, selfLoopWeight); + } +} + function moveNode(s: PartitionState, v: number, newC: number): boolean { const oldC: number = iget(s.nodeCommunity, v); if (oldC === newC) return false; @@ -323,37 +428,9 @@ function moveNode(s: PartitionState, v: number, newC: number): boolean { s.communityNodeCount[newC] = iget(s.communityNodeCount, newC) + 1; s.communityTotalSize[oldC] = fget(s.communityTotalSize, oldC) - nodeSz; s.communityTotalSize[newC] = fget(s.communityTotalSize, newC) + nodeSz; - if (s.graph.directed) { - s.communityTotalOutStrength[oldC] = fget(s.communityTotalOutStrength, oldC) - strengthOutV; - s.communityTotalOutStrength[newC] = fget(s.communityTotalOutStrength, newC) + strengthOutV; - s.communityTotalInStrength[oldC] = fget(s.communityTotalInStrength, oldC) - strengthInV; - s.communityTotalInStrength[newC] = fget(s.communityTotalInStrength, newC) + strengthInV; - } else { - s.communityTotalStrength[oldC] = fget(s.communityTotalStrength, oldC) - strengthOutV; - s.communityTotalStrength[newC] = fget(s.communityTotalStrength, newC) + strengthOutV; - } - if (s.graph.directed) { - const outToOld: number = fget(s.outEdgeWeightToCommunity, oldC) || 0; - const inFromOld: number = fget(s.inEdgeWeightFromCommunity, oldC) || 0; - const outToNew: number = - newC < s.outEdgeWeightToCommunity.length ? fget(s.outEdgeWeightToCommunity, newC) || 0 : 0; - const inFromNew: number = - newC < s.inEdgeWeightFromCommunity.length ? fget(s.inEdgeWeightFromCommunity, newC) || 0 : 0; - // outToOld/inFromOld already include the self-loop weight (self-loops are - // in outEdges/inEdges), so subtract it once to avoid triple-counting. - s.communityInternalEdgeWeight[oldC] = - fget(s.communityInternalEdgeWeight, oldC) - (outToOld + inFromOld - selfLoopWeight); - s.communityInternalEdgeWeight[newC] = - fget(s.communityInternalEdgeWeight, newC) + (outToNew + inFromNew + selfLoopWeight); - } else { - const weightToOld: number = fget(s.neighborEdgeWeightToCommunity, oldC) || 0; - const weightToNew: number = fget(s.neighborEdgeWeightToCommunity, newC) || 0; - s.communityInternalEdgeWeight[oldC] = - fget(s.communityInternalEdgeWeight, oldC) - (2 * weightToOld + selfLoopWeight); - s.communityInternalEdgeWeight[newC] = - fget(s.communityInternalEdgeWeight, newC) + (2 * weightToNew + selfLoopWeight); - } + applyMoveStrengthTotals(s, oldC, newC, strengthOutV, strengthInV); + applyMoveInternalEdgeWeightDelta(s, oldC, newC, selfLoopWeight); s.nodeCommunity[v] = newC; return true; diff --git a/src/graph/model.ts b/src/graph/model.ts index d34270aa4..3961eb97f 100644 --- a/src/graph/model.ts +++ b/src/graph/model.ts @@ -228,10 +228,10 @@ export class CodeGraph { /** Merge another graph into this one. Nodes/edges from other override on conflict. */ merge(other: CodeGraph): this { for (const [id, attrs] of other.nodes()) { - this.addNode(id, attrs); + this.addNode(id, { ...attrs }); } for (const [src, tgt, attrs] of other.edges()) { - this.addEdge(src, tgt, attrs); + this.addEdge(src, tgt, { ...attrs }); } return this; } diff --git a/tests/integration/complexity.test.ts b/tests/integration/complexity.test.ts index ddfbd8a81..62f4a9b8c 100644 --- a/tests/integration/complexity.test.ts +++ b/tests/integration/complexity.test.ts @@ -14,9 +14,13 @@ import { initSchema } from '../../src/db/index.js'; import { complexityData } from '../../src/features/complexity.js'; import { loadConfig } from '../../src/infrastructure/config.js'; -vi.mock('../../src/infrastructure/config.js', () => ({ - loadConfig: vi.fn(() => ({})), -})); +vi.mock('../../src/infrastructure/config.js', async (importOriginal) => { + const actual = await importOriginal(); + return { + ...actual, + loadConfig: vi.fn(() => ({})), + }; +}); // ─── Helpers ───────────────────────────────────────────────────────────