diff --git a/scripts/token-benchmark.ts b/scripts/token-benchmark.ts index 1aa4d3951..d34268ead 100644 --- a/scripts/token-benchmark.ts +++ b/scripts/token-benchmark.ts @@ -265,6 +265,20 @@ function round1(n) { return Math.round(n * 10) / 10; } +/** + * Run `fn` `runs` times (default `PERF_RUNS`), recording the elapsed + * milliseconds per run, and return the median duration. + */ +async function timeMedian(fn, runs = PERF_RUNS) { + const timings = []; + for (let i = 0; i < runs; i++) { + const start = performance.now(); + await fn(); + timings.push(performance.now() - start); + } + return median(timings); +} + /** * Run build/query/stats benchmarks against the Next.js graph. * Reuses the same codegraph APIs as the existing benchmark scripts. @@ -306,24 +320,18 @@ async function runPerfBenchmarks(nextjsDir) { const buildResults = {}; for (const engine of engines) { console.error(` Full build (${engine})...`); - const timings = []; - for (let i = 0; i < PERF_RUNS; i++) { - if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); - const start = performance.now(); - await buildGraph(nextjsDir, { engine, incremental: false }); - timings.push(performance.now() - start); - } - const fullBuildMs = Math.round(median(timings)); + const fullBuildMs = Math.round( + await timeMedian(async () => { + if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath); + await buildGraph(nextjsDir, { engine, incremental: false }); + }), + ); // No-op rebuild console.error(` No-op rebuild (${engine})...`); - const noopTimings = []; - for (let i = 0; i < PERF_RUNS; i++) { - const start = performance.now(); - await buildGraph(nextjsDir, { engine, incremental: true }); - noopTimings.push(performance.now() - start); - } - const noopRebuildMs = Math.round(median(noopTimings)); + const noopRebuildMs = Math.round( + await timeMedian(() => buildGraph(nextjsDir, { engine, incremental: true })), + ); buildResults[engine] = { fullBuildMs, noopRebuildMs }; console.error(` full=${fullBuildMs}ms noop=${noopRebuildMs}ms`); @@ -370,23 +378,14 @@ async function runPerfBenchmarks(nextjsDir) { for (const depth of [1, 3, 5]) { // fnDeps - const depsTimings = []; - for (let i = 0; i < PERF_RUNS; i++) { - const start = performance.now(); - fnDepsData(hubName, dbPath, { depth, noTests: true }); - depsTimings.push(performance.now() - start); - } + queryResults[`fnDeps_depth${depth}Ms`] = round1( + await timeMedian(() => fnDepsData(hubName, dbPath, { depth, noTests: true })), + ); // fnImpact - const impactTimings = []; - for (let i = 0; i < PERF_RUNS; i++) { - const start = performance.now(); - fnImpactData(hubName, dbPath, { depth, noTests: true }); - impactTimings.push(performance.now() - start); - } - - queryResults[`fnDeps_depth${depth}Ms`] = round1(median(depsTimings)); - queryResults[`fnImpact_depth${depth}Ms`] = round1(median(impactTimings)); + queryResults[`fnImpact_depth${depth}Ms`] = round1( + await timeMedian(() => fnImpactData(hubName, dbPath, { depth, noTests: true })), + ); } console.error( diff --git a/src/ast-analysis/visitor-utils.ts b/src/ast-analysis/visitor-utils.ts index 4da9a25fa..a207b8564 100644 --- a/src/ast-analysis/visitor-utils.ts +++ b/src/ast-analysis/visitor-utils.ts @@ -93,6 +93,81 @@ export function extractParams( return result; } +/** + * Resolution result for a single node in the parameter-name worklist: either + * a base case with names to record, or intermediate `next` nodes that still + * need to be resolved. + */ +type ParamNodeResolution = { names?: string[]; next?: TreeSitterNode[] }; + +/** One entry in the node-type -> handler dispatch table used by `resolveParamNode`. */ +interface ParamNodeHandler { + matches(nodeType: string, rules: LanguageRules): boolean; + resolve(node: TreeSitterNode, rules: LanguageRules): ParamNodeResolution | null; +} + +function resolveWrapperParam(node: TreeSitterNode): ParamNodeResolution | null { + const pattern = node.childForFieldName('pattern') || node.childForFieldName('name'); + return pattern ? { next: [pattern] } : null; +} + +function resolveDefaultParam(node: TreeSitterNode): ParamNodeResolution | null { + const left = node.childForFieldName('left') || node.childForFieldName('name'); + return left ? { next: [left] } : null; +} + +function resolveRestParam(node: TreeSitterNode, rules: LanguageRules): ParamNodeResolution | null { + const nameNode = node.childForFieldName('name'); + if (nameNode) return { names: [nameNode.text] }; + for (const child of node.namedChildren) { + if (child.type === rules.paramIdentifier) return { names: [child.text] }; + } + return null; +} + +function resolveObjectDestructParam( + node: TreeSitterNode, + rules: LanguageRules, +): ParamNodeResolution { + return { next: collectObjectDestructChildren(node, rules) }; +} + +function resolveArrayDestructParam(node: TreeSitterNode): ParamNodeResolution { + return { next: [...node.namedChildren] }; +} + +/** + * Ordered node-type -> handler dispatch table for `resolveParamNode`. Order + * matters: earlier entries take precedence, matching the original + * if/else-if cascade exactly. + */ +const PARAM_NODE_HANDLERS: ParamNodeHandler[] = [ + { + matches: (t, rules) => t === rules.paramIdentifier, + resolve: (node) => ({ names: [node.text] }), + }, + { + matches: (t, rules) => rules.paramWrapperTypes.has(t), + resolve: resolveWrapperParam, + }, + { + matches: (t, rules) => !!rules.defaultParamType && t === rules.defaultParamType, + resolve: resolveDefaultParam, + }, + { + matches: (t, rules) => !!rules.restParamType && t === rules.restParamType, + resolve: resolveRestParam, + }, + { + matches: (t, rules) => !!rules.objectDestructType && t === rules.objectDestructType, + resolve: resolveObjectDestructParam, + }, + { + matches: (t, rules) => !!rules.arrayDestructType && t === rules.arrayDestructType, + resolve: resolveArrayDestructParam, + }, +]; + /** * Resolve a single parameter node to either a direct list of names (base case) * or a list of child nodes that still need processing. Returns `null` if the @@ -102,46 +177,16 @@ export function extractParams( * `extractParamNames`, breaking the 3-node mutual recursion cycle between * `extractParamNames`, `extractObjectDestructNames`, and `extractArrayDestructNames`. */ -function resolveParamNode( - node: TreeSitterNode, - rules: LanguageRules, -): { names?: string[]; next?: TreeSitterNode[] } | null { - const t = node.type; - +function resolveParamNode(node: TreeSitterNode, rules: LanguageRules): ParamNodeResolution | null { if (rules.extractParamName) { const result = rules.extractParamName(node); if (result) return { names: result }; } - if (t === rules.paramIdentifier) return { names: [node.text] }; - - if (rules.paramWrapperTypes.has(t)) { - const pattern = node.childForFieldName('pattern') || node.childForFieldName('name'); - return pattern ? { next: [pattern] } : null; - } - - if (rules.defaultParamType && t === rules.defaultParamType) { - const left = node.childForFieldName('left') || node.childForFieldName('name'); - return left ? { next: [left] } : null; - } - - if (rules.restParamType && t === rules.restParamType) { - const nameNode = node.childForFieldName('name'); - if (nameNode) return { names: [nameNode.text] }; - for (const child of node.namedChildren) { - if (child.type === rules.paramIdentifier) return { names: [child.text] }; - } - return null; - } - - if (rules.objectDestructType && t === rules.objectDestructType) { - return { next: collectObjectDestructChildren(node, rules) }; - } - - if (rules.arrayDestructType && t === rules.arrayDestructType) { - return { next: [...node.namedChildren] }; + const t = node.type; + for (const handler of PARAM_NODE_HANDLERS) { + if (handler.matches(t, rules)) return handler.resolve(node, rules); } - return null; } @@ -170,6 +215,41 @@ function collectObjectDestructChildren( return next; } +/** Is this node a shorthand identifier inside an object destructuring pattern? */ +function isShorthandPropPattern(node: TreeSitterNode, rules: LanguageRules): boolean { + return !!rules.shorthandPropPattern && node.type === rules.shorthandPropPattern; +} + +/** + * Push nodes onto the worklist stack in reverse order so that popping them + * (LIFO) visits them in the same left-to-right order as the original + * recursive traversal. + */ +function pushParamWorklist(stack: TreeSitterNode[], nodes: TreeSitterNode[]): void { + for (let i = nodes.length - 1; i >= 0; i--) { + const child = nodes[i]; + if (child) stack.push(child); + } +} + +/** Resolve one worklist entry: record any names, queue any further nodes to visit. */ +function visitParamWorklistNode( + current: TreeSitterNode, + rules: LanguageRules, + names: string[], + stack: TreeSitterNode[], +): void { + if (isShorthandPropPattern(current, rules)) { + names.push(current.text); + return; + } + + const resolved = resolveParamNode(current, rules); + if (!resolved) return; + if (resolved.names) names.push(...resolved.names); + if (resolved.next) pushParamWorklist(stack, resolved.next); +} + /** * Extract parameter names from a single parameter node. * @@ -184,24 +264,7 @@ export function extractParamNames(node: TreeSitterNode | null, rules: LanguageRu while (stack.length > 0) { const current = stack.pop(); - if (!current) continue; - - // Shorthand identifier inside an object destructuring is just the node's text. - if (rules.shorthandPropPattern && current.type === rules.shorthandPropPattern) { - names.push(current.text); - continue; - } - - const resolved = resolveParamNode(current, rules); - if (!resolved) continue; - if (resolved.names) names.push(...resolved.names); - if (resolved.next) { - // Push in reverse so traversal order matches the previous recursive order. - for (let i = resolved.next.length - 1; i >= 0; i--) { - const child = resolved.next[i]; - if (child) stack.push(child); - } - } + if (current) visitParamWorklistNode(current, rules, names, stack); } return names; diff --git a/src/ast-analysis/visitors/cfg-conditionals.ts b/src/ast-analysis/visitors/cfg-conditionals.ts index e96460ba8..3ffdb7ff2 100644 --- a/src/ast-analysis/visitors/cfg-conditionals.ts +++ b/src/ast-analysis/visitors/cfg-conditionals.ts @@ -6,7 +6,32 @@ import type { LoopCtx, ProcessStatementsFn, } from './cfg-shared.js'; -import { getBodyStatements, isCaseNode, isIfNode, nn } from './cfg-shared.js'; +import { getBodyStatements, isCaseNode, isIfNode, requireNode } from './cfg-shared.js'; + +/** + * Create a branch block off `condBlock`, wire the `branchKind` edge into it, + * run `runBranchBody` to populate the branch and get its exit block, then — + * if the branch falls through (exit block is non-null) — wire a + * `fallthrough` edge from that exit into `joinBlock`. + * + * Shared by `processIf`, `processAlternative`, and `processElifSiblings` for + * the true-branch / else-branch / else-if-branch shapes, which all follow + * the same make-block -> add-edge -> run-body -> fallthrough-edge sequence + * (previously hand-inlined 6+ times across those three functions). + */ +function processBranch( + condBlock: CfgBlockInternal, + joinBlock: CfgBlockInternal, + S: FuncState, + branchKind: 'branch_true' | 'branch_false', + label: string, + runBranchBody: (branchBlock: CfgBlockInternal) => CfgBlockInternal | null, +): void { + const branchBlock = S.makeBlock(branchKind, null, null, label); + S.addEdge(condBlock, branchBlock, branchKind); + const branchEnd = runBranchBody(branchBlock); + if (branchEnd) S.addEdge(branchEnd, joinBlock, 'fallthrough'); +} export function processIf( ifStmt: TreeSitterNode, @@ -29,13 +54,10 @@ export function processIf( const consequentField = cfgRules.ifConsequentField || 'consequence'; const consequent = ifStmt.childForFieldName(consequentField); - const trueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(condBlock, trueBlock, 'branch_true'); - const trueStmts = getBodyStatements(consequent, cfgRules); - const trueEnd = processStatements(trueStmts, trueBlock, S, cfgRules); - if (trueEnd) { - S.addEdge(trueEnd, joinBlock, 'fallthrough'); - } + processBranch(condBlock, joinBlock, S, 'branch_true', 'then', (trueBlock) => { + const trueStmts = getBodyStatements(consequent, cfgRules); + return processStatements(trueStmts, trueBlock, S, cfgRules); + }); if (cfgRules.elifNode) { processElifSiblings(ifStmt, condBlock, joinBlock, S, cfgRules, processStatements); @@ -62,33 +84,29 @@ function processAlternative( if (cfgRules.elseViaAlternative && alternative.type !== cfgRules.elseClause) { if (isIfNode(alternative.type, cfgRules)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(alternative, falseBlock, S, cfgRules, processStatements); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else-if', (falseBlock) => + processIf(alternative, falseBlock, S, cfgRules, processStatements), + ); } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseStmts = getBodyStatements(alternative, cfgRules); - const falseEnd = processStatements(falseStmts, falseBlock, S, cfgRules); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else', (falseBlock) => { + const falseStmts = getBodyStatements(alternative, cfgRules); + return processStatements(falseStmts, falseBlock, S, cfgRules); + }); } } else if (alternative.type === cfgRules.elseClause) { const elseChildren: TreeSitterNode[] = []; for (let i = 0; i < alternative.namedChildCount; i++) { - elseChildren.push(nn(alternative.namedChild(i))); + elseChildren.push(requireNode(alternative.namedChild(i))); } const firstChild = elseChildren[0]; if (elseChildren.length === 1 && firstChild && isIfNode(firstChild.type, cfgRules)) { - const falseBlock = S.makeBlock('branch_false', null, null, 'else-if'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const elseIfEnd = processIf(firstChild, falseBlock, S, cfgRules, processStatements); - if (elseIfEnd) S.addEdge(elseIfEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else-if', (falseBlock) => + processIf(firstChild, falseBlock, S, cfgRules, processStatements), + ); } else { - const falseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(condBlock, falseBlock, 'branch_false'); - const falseEnd = processStatements(elseChildren, falseBlock, S, cfgRules); - if (falseEnd) S.addEdge(falseEnd, joinBlock, 'fallthrough'); + processBranch(condBlock, joinBlock, S, 'branch_false', 'else', (falseBlock) => + processStatements(elseChildren, falseBlock, S, cfgRules), + ); } } } @@ -105,7 +123,7 @@ function processElifSiblings( let foundElse = false; for (let i = 0; i < ifStmt.namedChildCount; i++) { - const child = nn(ifStmt.namedChild(i)); + const child = requireNode(ifStmt.namedChild(i)); if (child.type === cfgRules.elifNode) { const elifCondBlock = S.makeBlock( @@ -118,17 +136,13 @@ function processElifSiblings( const elifConsequentField = cfgRules.ifConsequentField || 'consequence'; const elifConsequent = child.childForFieldName(elifConsequentField); - const elifTrueBlock = S.makeBlock('branch_true', null, null, 'then'); - S.addEdge(elifCondBlock, elifTrueBlock, 'branch_true'); - const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); - const elifTrueEnd = processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); - if (elifTrueEnd) S.addEdge(elifTrueEnd, joinBlock, 'fallthrough'); + processBranch(elifCondBlock, joinBlock, S, 'branch_true', 'then', (elifTrueBlock) => { + const elifTrueStmts = getBodyStatements(elifConsequent, cfgRules); + return processStatements(elifTrueStmts, elifTrueBlock, S, cfgRules); + }); lastCondBlock = elifCondBlock; } else if (child.type === cfgRules.elseClause) { - const elseBlock = S.makeBlock('branch_false', null, null, 'else'); - S.addEdge(lastCondBlock, elseBlock, 'branch_false'); - const elseBody = child.childForFieldName('body'); let elseStmts: TreeSitterNode[]; if (elseBody) { @@ -136,11 +150,12 @@ function processElifSiblings( } else { elseStmts = []; for (let j = 0; j < child.namedChildCount; j++) { - elseStmts.push(nn(child.namedChild(j))); + elseStmts.push(requireNode(child.namedChild(j))); } } - const elseEnd = processStatements(elseStmts, elseBlock, S, cfgRules); - if (elseEnd) S.addEdge(elseEnd, joinBlock, 'fallthrough'); + processBranch(lastCondBlock, joinBlock, S, 'branch_false', 'else', (elseBlock) => + processStatements(elseStmts, elseBlock, S, cfgRules), + ); foundElse = true; } @@ -177,7 +192,7 @@ export function processSwitch( let hasDefault = false; for (let i = 0; i < container.namedChildCount; i++) { - const caseClause = nn(container.namedChild(i)); + const caseClause = requireNode(container.namedChild(i)); const isDefault = caseClause.type === cfgRules.defaultNode; const isCase = isDefault || isCaseNode(caseClause.type, cfgRules); @@ -212,11 +227,11 @@ function extractCaseBody(caseClause: TreeSitterNode, cfgRules: AnyRules): TreeSi const valueNode = caseClause.childForFieldName('value'); const patternNode = caseClause.childForFieldName('pattern'); for (let j = 0; j < caseClause.namedChildCount; j++) { - const child = nn(caseClause.namedChild(j)); + const child = requireNode(caseClause.namedChild(j)); if (child !== valueNode && child !== patternNode && child.type !== 'switch_label') { if (child.type === 'statement_list') { for (let k = 0; k < child.namedChildCount; k++) { - stmts.push(nn(child.namedChild(k))); + stmts.push(requireNode(child.namedChild(k))); } } else { stmts.push(child); diff --git a/src/ast-analysis/visitors/cfg-shared.ts b/src/ast-analysis/visitors/cfg-shared.ts index 9987918e2..721e85854 100644 --- a/src/ast-analysis/visitors/cfg-shared.ts +++ b/src/ast-analysis/visitors/cfg-shared.ts @@ -6,11 +6,11 @@ export type AnyRules = any; export type ProcessStatementsFn = ( stmts: TreeSitterNode[], currentBlock: CfgBlockInternal, - S: FuncState, + state: FuncState, cfgRules: AnyRules, ) => CfgBlockInternal | null; -export function nn(node: TreeSitterNode | null, context?: string): TreeSitterNode { +export function requireNode(node: TreeSitterNode | null, context?: string): TreeSitterNode { if (node === null) { throw new Error(`Unexpected null tree-sitter node${context ? ` (${context})` : ''}`); } @@ -114,18 +114,18 @@ export function isControlFlow(type: string, cfgRules: AnyRules): boolean { export function effectiveNode(node: TreeSitterNode, cfgRules: AnyRules): TreeSitterNode { if (node.type === 'expression_statement' && node.namedChildCount === 1) { - const inner = nn(node.namedChild(0)); + const inner = requireNode(node.namedChild(0)); if (isControlFlow(inner.type, cfgRules)) return inner; } return node; } export function registerLabelCtx( - S: FuncState, + state: FuncState, headerBlock: CfgBlockInternal, exitBlock: CfgBlockInternal, ): void { - for (const [, ctx] of Array.from(S.labelMap)) { + for (const [, ctx] of Array.from(state.labelMap)) { if (!ctx.headerBlock) { ctx.headerBlock = headerBlock; ctx.exitBlock = exitBlock; @@ -141,10 +141,10 @@ export function getBodyStatements( if (isBlockNode(bodyNode.type, cfgRules)) { const stmts: TreeSitterNode[] = []; for (let i = 0; i < bodyNode.namedChildCount; i++) { - const child = nn(bodyNode.namedChild(i)); + const child = requireNode(bodyNode.namedChild(i)); if (child.type === 'statement_list') { for (let j = 0; j < child.namedChildCount; j++) { - stmts.push(nn(child.namedChild(j))); + stmts.push(requireNode(child.namedChild(j))); } } else { stmts.push(child); diff --git a/src/ast-analysis/visitors/cfg-try-catch.ts b/src/ast-analysis/visitors/cfg-try-catch.ts index ea44ddb51..6211cba5a 100644 --- a/src/ast-analysis/visitors/cfg-try-catch.ts +++ b/src/ast-analysis/visitors/cfg-try-catch.ts @@ -1,6 +1,6 @@ import type { TreeSitterNode } from '../../types.js'; import type { AnyRules, CfgBlockInternal, FuncState, ProcessStatementsFn } from './cfg-shared.js'; -import { getBodyStatements, nn } from './cfg-shared.js'; +import { getBodyStatements, requireNode } from './cfg-shared.js'; export function processTryCatch( tryStmt: TreeSitterNode, @@ -23,7 +23,7 @@ export function processTryCatch( tryBodyStart = tryStmt.startPosition.row + 1; tryStmts = []; for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = nn(tryStmt.namedChild(i)); + const child = requireNode(tryStmt.namedChild(i)); if (cfgRules.catchNode && child.type === cfgRules.catchNode) continue; if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) continue; tryStmts.push(child); @@ -63,7 +63,7 @@ function findTryHandlers( let catchHandler: TreeSitterNode | null = null; let finallyHandler: TreeSitterNode | null = null; for (let i = 0; i < tryStmt.namedChildCount; i++) { - const child = nn(tryStmt.namedChild(i)); + const child = requireNode(tryStmt.namedChild(i)); if (cfgRules.catchNode && child.type === cfgRules.catchNode) catchHandler = child; if (cfgRules.finallyNode && child.type === cfgRules.finallyNode) finallyHandler = child; } @@ -90,7 +90,7 @@ function processCatchHandler( } else { catchStmts = []; for (let i = 0; i < catchHandler.namedChildCount; i++) { - catchStmts.push(nn(catchHandler.namedChild(i))); + catchStmts.push(requireNode(catchHandler.namedChild(i))); } } const catchEnd = processStatements(catchStmts, catchBlock, S, cfgRules); diff --git a/src/cli/commands/info.ts b/src/cli/commands/info.ts index a68fad60f..faf135e60 100644 --- a/src/cli/commands/info.ts +++ b/src/cli/commands/info.ts @@ -1,6 +1,96 @@ import { debug } from '../../infrastructure/logger.js'; import { toErrorMessage } from '../../shared/errors.js'; -import type { CommandDefinition } from '../types.js'; +import type { NativeAddon } from '../../types.js'; +import type { CliContext, CommandDefinition, CommandOpts } from '../types.js'; + +/** Print the "Native version" diagnostic line (reconciles npm package vs. loaded binary version). */ +function printNativeVersionInfo( + native: NativeAddon, + getNativePackageVersion: () => string | null, +): void { + const binaryVersion = + typeof native.engineVersion === 'function' ? native.engineVersion() : 'unknown'; + const pkgVersion = getNativePackageVersion(); + const knownBinaryVersion = binaryVersion !== 'unknown' ? binaryVersion : null; + if (pkgVersion && knownBinaryVersion && pkgVersion !== knownBinaryVersion) { + console.log( + ` Native version: ${pkgVersion} (binary built as ${knownBinaryVersion}, engine loaded OK)`, + ); + } else { + console.log(` Native version: ${pkgVersion ?? binaryVersion}`); + } +} + +/** Print the top "Codegraph Diagnostics" block: version, platform, native/active engine info. */ +function printEngineInfo( + ctx: CliContext, + engine: string, + activeName: string, + activeVersion: string | null, + nativeAvailable: boolean, + loadNative: () => NativeAddon | null, + getNativePackageVersion: () => string | null, +): void { + console.log('\nCodegraph Diagnostics'); + console.log('===================='); + console.log(` Version : ${ctx.program.version()}`); + console.log(` Node.js : ${process.version}`); + console.log(` Platform : ${process.platform}-${process.arch}`); + console.log(` Native engine : ${nativeAvailable ? 'available' : 'unavailable'}`); + const native = nativeAvailable ? loadNative() : null; + if (native) { + printNativeVersionInfo(native, getNativePackageVersion); + } + console.log(` Engine flag : --engine ${engine}`); + console.log(` Active engine : ${activeName}${activeVersion ? ` (v${activeVersion})` : ''}`); + console.log(); +} + +/** Print the "Build metadata" block read from the graph DB, if one exists. Never throws. */ +async function printBuildMetadata( + ctx: CliContext, + opts: CommandOpts, + activeName: string, +): Promise { + try { + const { findDbPath, getBuildMeta } = await import('../../db/index.js'); + const Database = (await import('better-sqlite3')).default; + const dbPath = findDbPath(opts.db as string | undefined); + const fs = await import('node:fs'); + if (fs.existsSync(dbPath)) { + const db = new Database(dbPath, { readonly: true }); + const buildEngine = getBuildMeta(db, 'engine'); + const buildVersion = getBuildMeta(db, 'codegraph_version'); + const builtAt = getBuildMeta(db, 'built_at'); + db.close(); + + if (buildEngine || buildVersion || builtAt) { + console.log('Build metadata'); + console.log( + '\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500', + ); + if (buildEngine) console.log(` Engine : ${buildEngine}`); + if (buildVersion) console.log(` Version : ${buildVersion}`); + if (builtAt) console.log(` Built at : ${builtAt}`); + + if (buildVersion && buildVersion !== ctx.program.version()) { + console.log( + ` \u26A0 DB was built with v${buildVersion}, current is v${ctx.program.version()}. Consider: codegraph build --no-incremental`, + ); + } + if (buildEngine && buildEngine !== activeName) { + console.log( + ` \u26A0 DB was built with ${buildEngine} engine, active is ${activeName}. Consider: codegraph build --no-incremental`, + ); + } + console.log(); + } + } + } catch (e) { + /* diagnostics must never crash */ + debug(`DB build-metadata diagnostics failed: ${toErrorMessage(e)}`); + } +} export const command: CommandDefinition = { name: 'info', @@ -16,67 +106,16 @@ export const command: CommandDefinition = { const { name: activeName, version: activeVersion } = getActiveEngine({ engine }); const nativeAvailable = isNativeAvailable(); - console.log('\nCodegraph Diagnostics'); - console.log('===================='); - console.log(` Version : ${ctx.program.version()}`); - console.log(` Node.js : ${process.version}`); - console.log(` Platform : ${process.platform}-${process.arch}`); - console.log(` Native engine : ${nativeAvailable ? 'available' : 'unavailable'}`); - if (nativeAvailable) { - const native = loadNative()!; - const binaryVersion = - typeof native.engineVersion === 'function' ? native.engineVersion() : 'unknown'; - const pkgVersion = getNativePackageVersion(); - const knownBinaryVersion = binaryVersion !== 'unknown' ? binaryVersion : null; - if (pkgVersion && knownBinaryVersion && pkgVersion !== knownBinaryVersion) { - console.log( - ` Native version: ${pkgVersion} (binary built as ${knownBinaryVersion}, engine loaded OK)`, - ); - } else { - console.log(` Native version: ${pkgVersion ?? binaryVersion}`); - } - } - console.log(` Engine flag : --engine ${engine}`); - console.log(` Active engine : ${activeName}${activeVersion ? ` (v${activeVersion})` : ''}`); - console.log(); - - try { - const { findDbPath, getBuildMeta } = await import('../../db/index.js'); - const Database = (await import('better-sqlite3')).default; - const dbPath = findDbPath(opts.db as string | undefined); - const fs = await import('node:fs'); - if (fs.existsSync(dbPath)) { - const db = new Database(dbPath, { readonly: true }); - const buildEngine = getBuildMeta(db, 'engine'); - const buildVersion = getBuildMeta(db, 'codegraph_version'); - const builtAt = getBuildMeta(db, 'built_at'); - db.close(); - - if (buildEngine || buildVersion || builtAt) { - console.log('Build metadata'); - console.log( - '\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500', - ); - if (buildEngine) console.log(` Engine : ${buildEngine}`); - if (buildVersion) console.log(` Version : ${buildVersion}`); - if (builtAt) console.log(` Built at : ${builtAt}`); + printEngineInfo( + ctx, + engine, + activeName, + activeVersion, + nativeAvailable, + loadNative, + getNativePackageVersion, + ); - if (buildVersion && buildVersion !== ctx.program.version()) { - console.log( - ` \u26A0 DB was built with v${buildVersion}, current is v${ctx.program.version()}. Consider: codegraph build --no-incremental`, - ); - } - if (buildEngine && buildEngine !== activeName) { - console.log( - ` \u26A0 DB was built with ${buildEngine} engine, active is ${activeName}. Consider: codegraph build --no-incremental`, - ); - } - console.log(); - } - } - } catch (e) { - /* diagnostics must never crash */ - debug(`DB build-metadata diagnostics failed: ${toErrorMessage(e)}`); - } + await printBuildMetadata(ctx, opts, activeName); }, }; diff --git a/src/extractors/csharp.ts b/src/extractors/csharp.ts index 850bb8a34..8a3a06e9e 100644 --- a/src/extractors/csharp.ts +++ b/src/extractors/csharp.ts @@ -335,25 +335,38 @@ function handleCSharpVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!typeNode) return; if (typeNode.type === 'implicit_type') { - // var x = new Foo() — infer type from object_creation_expression initializer - if (!ctx.typeMap) return; - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (child?.type !== 'variable_declarator') continue; - const nameNode = child.childForFieldName('name') || child.child(0); - if (nameNode?.type !== 'identifier') continue; - const objCreation = findChild(child, 'object_creation_expression'); - if (!objCreation) continue; - const ctorTypeNode = objCreation.childForFieldName('type'); - if (!ctorTypeNode) continue; - const ctorType = extractCSharpTypeName(ctorTypeNode); - if (ctorType) setTypeMapEntry(ctx.typeMap, nameNode.text, ctorType, 1.0); - } + handleCSharpImplicitVarDecl(node, ctx); return; } const typeName = extractCSharpTypeName(typeNode); if (!typeName) return; + handleCSharpExplicitVarDecl(node, ctx, typeName); +} + +// var x = new Foo() — infer type from object_creation_expression initializer +function handleCSharpImplicitVarDecl(node: TreeSitterNode, ctx: ExtractorOutput): void { + if (!ctx.typeMap) return; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child?.type !== 'variable_declarator') continue; + const nameNode = child.childForFieldName('name') || child.child(0); + if (nameNode?.type !== 'identifier') continue; + const objCreation = findChild(child, 'object_creation_expression'); + if (!objCreation) continue; + const ctorTypeNode = objCreation.childForFieldName('type'); + if (!ctorTypeNode) continue; + const ctorType = extractCSharpTypeName(ctorTypeNode); + if (ctorType) setTypeMapEntry(ctx.typeMap, nameNode.text, ctorType, 1.0); + } +} + +// Explicitly-typed declarator list: `Foo x = ..., y = ...;` +function handleCSharpExplicitVarDecl( + node: TreeSitterNode, + ctx: ExtractorOutput, + typeName: string, +): void { for (let i = 0; i < node.childCount; i++) { const child = node.child(i); if (child?.type !== 'variable_declarator') continue; diff --git a/src/extractors/dart.ts b/src/extractors/dart.ts index cd0642f46..2b166f8d5 100644 --- a/src/extractors/dart.ts +++ b/src/extractors/dart.ts @@ -256,58 +256,66 @@ function handleDartSelector(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!argPart) return; const line = node.startPosition.row + 1; + const methodName = resolveDartSelectorMethodName(node); + if (!methodName) return; - // Look for the identifier this selector belongs to. - // Two layouts are possible depending on grammar version: - // A) selector has both unconditional_assignable_selector + argument_part (same node) - // B) one selector node holds unconditional_assignable_selector (.method), - // the next holds argument_part (the call args) — method name is in the previous sibling - const unconditional = findChild(node, 'unconditional_assignable_selector'); - let methodName: string | null = null; + // Function.apply(fn, positionalArgs, namedArgs) — dynamic higher-order dispatch + if (methodName === 'apply' && isDartFunctionApplyCall(node)) { + ctx.calls.push({ + name: '', + line, + dynamic: true, + dynamicKind: 'unresolved-dynamic', + }); + return; + } + + ctx.calls.push({ name: methodName, line }); +} +// Look for the identifier this selector belongs to. +// Two layouts are possible depending on grammar version: +// A) selector has both unconditional_assignable_selector + argument_part (same node) +// B) one selector node holds unconditional_assignable_selector (.method), +// the next holds argument_part (the call args) — method name is in the previous sibling +function resolveDartSelectorMethodName(node: TreeSitterNode): string | null { + const unconditional = findChild(node, 'unconditional_assignable_selector'); if (unconditional) { const id = findChild(unconditional, 'identifier'); - if (id) methodName = id.text; - } else { - // Layout B: look at the previous sibling selector for the method name - const parent = node.parent; - if (parent) { - for (let i = 0; i < parent.childCount; i++) { - const sibling = parent.child(i); - if (sibling === node) break; - if (sibling?.type === 'selector') { - const unc2 = findChild(sibling, 'unconditional_assignable_selector'); - if (unc2) { - const id2 = findChild(unc2, 'identifier'); - if (id2) methodName = id2.text; - } - } - } - } + return id ? id.text : null; } - if (!methodName) return; + // Layout B: look at the previous sibling selector for the method name + const parent = node.parent; + if (!parent) return null; - // Function.apply(fn, positionalArgs, namedArgs) — dynamic higher-order dispatch - if (methodName === 'apply') { - const parent = node.parent; - if (parent) { - for (let i = 0; i < parent.childCount; i++) { - const sibling = parent.child(i); - if (sibling && sibling !== node && sibling.text === 'Function') { - ctx.calls.push({ - name: '', - line, - dynamic: true, - dynamicKind: 'unresolved-dynamic', - }); - return; - } + let methodName: string | null = null; + for (let i = 0; i < parent.childCount; i++) { + const sibling = parent.child(i); + if (sibling === node) break; + if (sibling?.type === 'selector') { + const unc2 = findChild(sibling, 'unconditional_assignable_selector'); + if (unc2) { + const id2 = findChild(unc2, 'identifier'); + if (id2) methodName = id2.text; } } } + return methodName; +} - ctx.calls.push({ name: methodName, line }); +// Detects `Function.apply(...)` calls: true when a sibling selector's text is +// the literal `Function` identifier preceding this call. +function isDartFunctionApplyCall(node: TreeSitterNode): boolean { + const parent = node.parent; + if (!parent) return false; + for (let i = 0; i < parent.childCount; i++) { + const sibling = parent.child(i); + if (sibling && sibling !== node && sibling.text === 'Function') { + return true; + } + } + return false; } function handleDartTypeAlias(node: TreeSitterNode, ctx: ExtractorOutput): void { diff --git a/src/extractors/elixir.ts b/src/extractors/elixir.ts index b1ad19b8b..ffff493dc 100644 --- a/src/extractors/elixir.ts +++ b/src/extractors/elixir.ts @@ -278,25 +278,50 @@ function pushElixirMapValues(node: TreeSitterNode, stack: TreeSitterNode[]): voi for (let i = 0; i < node.childCount; i++) { const content = node.child(i); if (content?.type !== 'map_content') continue; - for (let j = 0; j < content.childCount; j++) { - const kws = content.child(j); - if (kws?.type !== 'keywords') continue; - for (let k = 0; k < kws.childCount; k++) { - const pair = kws.child(k); - if (pair?.type !== 'pair') continue; - for (let p = 0; p < pair.childCount; p++) { - const part = pair.child(p); - if (!part || part.type === 'keyword') continue; - parts.push(part); - } - } - } + parts.push(...collectElixirMapContentParts(content)); } for (let i = parts.length - 1; i >= 0; i--) { stack.push(parts[i] as TreeSitterNode); } } +// Walks a `map_content` node's `keywords` children, collecting every pair's +// value part (see collectElixirPairValueParts) in document order. +function collectElixirMapContentParts(content: TreeSitterNode): TreeSitterNode[] { + const parts: TreeSitterNode[] = []; + for (let j = 0; j < content.childCount; j++) { + const kws = content.child(j); + if (kws?.type !== 'keywords') continue; + parts.push(...collectElixirKeywordsParts(kws)); + } + return parts; +} + +// Walks a `keywords` node's `pair` children, collecting each pair's value +// part in document order. +function collectElixirKeywordsParts(kws: TreeSitterNode): TreeSitterNode[] { + const parts: TreeSitterNode[] = []; + for (let k = 0; k < kws.childCount; k++) { + const pair = kws.child(k); + if (pair?.type !== 'pair') continue; + parts.push(...collectElixirPairValueParts(pair)); + } + return parts; +} + +// Collects a single `pair` node's non-keyword children (the value side of +// `key: value`; the leading `struct`/`keyword` child is intentionally +// skipped — see the pushElixirMapValues doc comment). +function collectElixirPairValueParts(pair: TreeSitterNode): TreeSitterNode[] { + const parts: TreeSitterNode[] = []; + for (let p = 0; p < pair.childCount; p++) { + const part = pair.child(p); + if (!part || part.type === 'keyword') continue; + parts.push(part); + } + return parts; +} + function handleDefprotocol(node: TreeSitterNode, ctx: ExtractorOutput): void { const args = findChild(node, 'arguments'); if (!args) return; diff --git a/src/extractors/groovy.ts b/src/extractors/groovy.ts index 9b5162ace..a9bcf26ea 100644 --- a/src/extractors/groovy.ts +++ b/src/extractors/groovy.ts @@ -35,46 +35,8 @@ export function extractGroovySymbols(tree: TreeSitterTree, _filePath: string): E } function walkGroovyNode(node: TreeSitterNode, ctx: ExtractorOutput): void { - switch (node.type) { - case 'class_definition': - case 'class_declaration': - handleGroovyClassDecl(node, ctx); - break; - case 'interface_definition': - case 'interface_declaration': - handleGroovyInterfaceDecl(node, ctx); - break; - case 'enum_definition': - case 'enum_declaration': - handleGroovyEnumDecl(node, ctx); - break; - case 'method_definition': - case 'method_declaration': - handleGroovyMethodDecl(node, ctx); - break; - case 'constructor_definition': - case 'constructor_declaration': - handleGroovyConstructorDecl(node, ctx); - break; - case 'function_definition': - case 'function_declaration': - handleGroovyFunctionDecl(node, ctx); - break; - case 'import_statement': - case 'import_declaration': - handleGroovyImport(node, ctx); - break; - case 'method_call': - case 'method_invocation': - case 'call_expression': - case 'function_call': - case 'juxt_function_call': - handleGroovyCallExpr(node, ctx); - break; - case 'object_creation_expression': - handleGroovyObjectCreation(node, ctx); - break; - } + const handler = GROOVY_NODE_HANDLERS[node.type]; + if (handler) handler(node, ctx); for (let i = 0; i < node.childCount; i++) { const child = node.child(i); @@ -82,6 +44,32 @@ function walkGroovyNode(node: TreeSitterNode, ctx: ExtractorOutput): void { } } +// Lookup table keyed on node.type, replacing a linear switch dispatch. +// Multiple grammar-version type names map to the same handler (mirrors the +// original switch's fallthrough case groups). +const GROOVY_NODE_HANDLERS: Record void> = { + class_definition: handleGroovyClassDecl, + class_declaration: handleGroovyClassDecl, + interface_definition: handleGroovyInterfaceDecl, + interface_declaration: handleGroovyInterfaceDecl, + enum_definition: handleGroovyEnumDecl, + enum_declaration: handleGroovyEnumDecl, + method_definition: handleGroovyMethodDecl, + method_declaration: handleGroovyMethodDecl, + constructor_definition: handleGroovyConstructorDecl, + constructor_declaration: handleGroovyConstructorDecl, + function_definition: handleGroovyFunctionDecl, + function_declaration: handleGroovyFunctionDecl, + import_statement: handleGroovyImport, + import_declaration: handleGroovyImport, + method_call: handleGroovyCallExpr, + method_invocation: handleGroovyCallExpr, + call_expression: handleGroovyCallExpr, + function_call: handleGroovyCallExpr, + juxt_function_call: handleGroovyCallExpr, + object_creation_expression: handleGroovyObjectCreation, +}; + // ── Handlers ─────────────────────────────────────────────────────────────── const GROOVY_PARENT_TYPES = [ diff --git a/src/extractors/helpers.ts b/src/extractors/helpers.ts index 4f222ccd1..7e64333a5 100644 --- a/src/extractors/helpers.ts +++ b/src/extractors/helpers.ts @@ -209,9 +209,9 @@ export function extractBodyMembers( for (let i = 0; i < body.childCount; i++) { const member = body.child(i); if (!member || member.type !== memberType) continue; - const nn = member.childForFieldName(nameField); - if (!nn) continue; - const entry: SubDeclaration = { name: nn.text, kind, line: member.startPosition.row + 1 }; + const nameNode = member.childForFieldName(nameField); + if (!nameNode) continue; + const entry: SubDeclaration = { name: nameNode.text, kind, line: member.startPosition.row + 1 }; if (visibility) entry.visibility = visibility(member); members.push(entry); } @@ -356,7 +356,7 @@ export function isCPrimitiveType(typeName: string): boolean { /** * Options for {@link extractSimpleParameters}. */ -export interface ExtractParametersOptions { +interface ExtractParametersOptions { /** Tree-sitter types that mark a single parameter node (e.g. `formal_parameter`). */ paramTypes: readonly string[]; /** diff --git a/src/extractors/julia.ts b/src/extractors/julia.ts index d412fecda..1bf9b80a3 100644 --- a/src/extractors/julia.ts +++ b/src/extractors/julia.ts @@ -222,27 +222,57 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { const typeHead = findChild(node, 'type_head'); if (!typeHead) return; - let nameNode: TreeSitterNode | null; - let supertypeNode: TreeSitterNode | null = null; + const { nameNode, supertypeNode } = resolveJuliaStructHeadNames(typeHead); + if (!nameNode) return; + const structName = nameNode.text; + const children = collectJuliaStructFields(node); + + if (supertypeNode) { + ctx.classes.push({ + name: structName, + extends: supertypeNode.text, + line: nodeStartLine(node), + }); + } + + ctx.definitions.push({ + name: structName, + kind: 'struct', + line: nodeStartLine(node), + endLine: nodeEndLine(node), + children: children.length > 0 ? children : undefined, + }); +} + +// Resolves the struct's name and optional supertype from its `type_head`. +// Handles both the plain form (`Name` or `Vec{T}`) and the `Name <: Super` +// binary_expression form, walking into each side to find the base-name +// identifier for parameterized forms like `Vec{T} <: AbstractArray{T,1}`. +function resolveJuliaStructHeadNames(typeHead: TreeSitterNode): { + nameNode: TreeSitterNode | null; + supertypeNode: TreeSitterNode | null; +} { const binary = findChild(typeHead, 'binary_expression'); - if (binary) { - // Walk into each side of the binary expression to find the base-name - // identifier — handles parameterized forms like `Vec{T} <: AbstractArray{T,1}`. - const sides: TreeSitterNode[] = []; - for (let i = 0; i < binary.childCount; i++) { - const c = binary.child(i); - if (c && c.type !== 'operator') sides.push(c); - } - nameNode = sides[0] ? findBaseName(sides[0]) : null; - supertypeNode = sides[1] ? findBaseName(sides[1]) : null; - } else { - nameNode = findBaseName(typeHead); + if (!binary) { + return { nameNode: findBaseName(typeHead), supertypeNode: null }; } - if (!nameNode) return; - const structName = nameNode.text; + const sides: TreeSitterNode[] = []; + for (let i = 0; i < binary.childCount; i++) { + const c = binary.child(i); + if (c && c.type !== 'operator') sides.push(c); + } + return { + nameNode: sides[0] ? findBaseName(sides[0]) : null, + supertypeNode: sides[1] ? findBaseName(sides[1]) : null, + }; +} +// Collects the struct's field declarations: `typed_expression` (typed field) +// and plain `identifier` (untyped field) direct children of the +// struct_definition node. +function collectJuliaStructFields(node: TreeSitterNode): SubDeclaration[] { const children: SubDeclaration[] = []; for (let i = 0; i < node.childCount; i++) { const child = node.child(i); @@ -263,22 +293,7 @@ function handleStructDef(node: TreeSitterNode, ctx: ExtractorOutput): void { children.push({ name: child.text, kind: 'property', line: nodeStartLine(child) }); } } - - if (supertypeNode) { - ctx.classes.push({ - name: structName, - extends: supertypeNode.text, - line: nodeStartLine(node), - }); - } - - ctx.definitions.push({ - name: structName, - kind: 'struct', - line: nodeStartLine(node), - endLine: nodeEndLine(node), - children: children.length > 0 ? children : undefined, - }); + return children; } function handleAbstractDef(node: TreeSitterNode, ctx: ExtractorOutput): void { @@ -341,24 +356,7 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { if (!source) source = txt; names.push(txt.split('.').pop() || txt); } else if (child.type === 'selected_import') { - // First identifier-bearing node is the source module; the rest are - // imported names. The module may itself be a `scoped_identifier` - // (e.g. `import Foo.Bar: baz`) — handle it alongside bare - // `identifier` and use the trailing segment as the display name, - // mirroring the outer loop. - let first = true; - for (let j = 0; j < child.childCount; j++) { - const part = child.child(j); - if (!part) continue; - if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue; - const txt = part.text; - if (first) { - if (!source) source = txt; - first = false; - } else { - names.push(txt.split('.').pop() || txt); - } - } + source = collectJuliaSelectedImportParts(child, names, source); } } @@ -370,6 +368,35 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { } } +// Handles the `selected_import` shape (`import Base: show` / +// `import Foo.Bar: baz`): the first identifier-bearing node is the source +// module; the rest are imported names. The module may itself be a +// `scoped_identifier` — handled alongside bare `identifier`, using the +// trailing segment as the display name, mirroring the outer loop. Returns +// the resolved source (unchanged from `currentSource` if already set or if +// no identifier-bearing child was found). +function collectJuliaSelectedImportParts( + child: TreeSitterNode, + names: string[], + currentSource: string, +): string { + let source = currentSource; + let first = true; + for (let j = 0; j < child.childCount; j++) { + const part = child.child(j); + if (!part) continue; + if (part.type !== 'identifier' && part.type !== 'scoped_identifier') continue; + const txt = part.text; + if (first) { + if (!source) source = txt; + first = false; + } else { + names.push(txt.split('.').pop() || txt); + } + } + return source; +} + function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { // Don't record if parent is assignment LHS (that's a function definition) if (node.parent?.type === 'assignment' && node === node.parent.child(0)) return; diff --git a/src/extractors/r.ts b/src/extractors/r.ts index ef0a863e0..4763cf99d 100644 --- a/src/extractors/r.ts +++ b/src/extractors/r.ts @@ -162,52 +162,59 @@ function handleLibraryCall(node: TreeSitterNode, ctx: ExtractorOutput): void { // `library(package = dplyr)`, prefer the field-named `value` child of the // `argument` node so we extract `dplyr` (the value), not `package` (the // parameter name). Keeps native (Rust) and WASM extractors in parity. - for (let i = 0; i < node.childCount; i++) { - const child = node.child(i); - if (!child) continue; - if (child.type === 'arguments') { - for (let j = 0; j < child.childCount; j++) { - const arg = child.child(j); - if (!arg) continue; - if (arg.type === 'identifier') { - pushImport(ctx, node, arg.text, [arg.text]); - return; - } - if (arg.type === 'string' || arg.type === 'string_content') { - const text = stripQuotes(arg.text); - pushImport(ctx, node, text, [text]); - return; - } - // Argument might be wrapped - if (arg.type === 'argument') { - // Prefer the `value` field (correct for named arguments). - const valueNode = arg.childForFieldName('value'); - let pick: TreeSitterNode | null = null; - if (valueNode && (valueNode.type === 'string' || valueNode.type === 'identifier')) { - pick = valueNode; - } else { - // Fallback: skip the parameter-name child if the grammar exposes - // it via the `name` field, then pick the first string/identifier. - const nameNode = arg.childForFieldName('name'); - for (let k = 0; k < arg.childCount; k++) { - const inner = arg.child(k); - if (!inner) continue; - if (nameNode && inner.id === nameNode.id) continue; - if (inner.type === 'string' || inner.type === 'identifier') { - pick = inner; - break; - } - } - } - if (pick) { - const text = stripQuotes(pick.text); - pushImport(ctx, node, text, [text]); - return; - } - } - } + const argumentsNode = findFirstChildOfTypes(node, ['arguments']); + if (!argumentsNode) return; + + for (let j = 0; j < argumentsNode.childCount; j++) { + const arg = argumentsNode.child(j); + if (!arg) continue; + const importName = resolveLibraryImportName(arg); + if (importName !== null) { + pushImport(ctx, node, importName, [importName]); + return; + } + } +} + +// Extracts the package name text for a single library()/require() argument +// node, applying the same identifier/string/wrapped-argument precedence as +// the original inline logic (identifier args are used verbatim; string and +// resolved wrapped-argument values are unquoted via stripQuotes). +function resolveLibraryImportName(arg: TreeSitterNode): string | null { + if (arg.type === 'identifier') { + return arg.text; + } + if (arg.type === 'string' || arg.type === 'string_content') { + return stripQuotes(arg.text); + } + if (arg.type === 'argument') { + const pick = resolveLibraryArgumentValueNode(arg); + if (pick) return stripQuotes(pick.text); + } + return null; +} + +// Picks the value node out of an `argument`-wrapped library()/require() call +// argument: prefers the field-named `value` child (correct for named +// arguments like `library(package = dplyr)`), falling back to the first +// string/identifier child that isn't the `name` field. +function resolveLibraryArgumentValueNode(arg: TreeSitterNode): TreeSitterNode | null { + const valueNode = arg.childForFieldName('value'); + if (valueNode && (valueNode.type === 'string' || valueNode.type === 'identifier')) { + return valueNode; + } + // Fallback: skip the parameter-name child if the grammar exposes + // it via the `name` field, then pick the first string/identifier. + const nameNode = arg.childForFieldName('name'); + for (let k = 0; k < arg.childCount; k++) { + const inner = arg.child(k); + if (!inner) continue; + if (nameNode && inner.id === nameNode.id) continue; + if (inner.type === 'string' || inner.type === 'identifier') { + return inner; } } + return null; } function handleSourceCall(node: TreeSitterNode, ctx: ExtractorOutput): void { diff --git a/src/extractors/scala.ts b/src/extractors/scala.ts index b78a7470b..34767f3b4 100644 --- a/src/extractors/scala.ts +++ b/src/extractors/scala.ts @@ -286,37 +286,46 @@ function collectScalaBodyMembers( if (!member) continue; if (member.type === 'function_definition') { - const methName = member.childForFieldName('name'); - if (methName) { - const params = extractScalaParameters(member); - methods.push({ - name: `${parentName}.${methName.text}`, - kind: 'method', - line: member.startPosition.row + 1, - endLine: member.endPosition.row + 1, - visibility: extractModifierVisibility(member), - children: params.length > 0 ? params : undefined, - }); - } + collectScalaFunctionMember(member, parentName, methods); } else if (member.type === 'val_definition' || member.type === 'var_definition') { - const pattern = member.childForFieldName('pattern'); - if (pattern) { - const nameNode = pattern.type === 'identifier' ? pattern : findChild(pattern, 'identifier'); - if (nameNode) { - children.push({ - name: nameNode.text, - kind: 'property', - line: member.startPosition.row + 1, - visibility: extractModifierVisibility(member), - }); - } - } + collectScalaValVarMember(member, children); } } return { children, methods }; } +function collectScalaFunctionMember( + member: TreeSitterNode, + parentName: string, + methods: Definition[], +): void { + const methName = member.childForFieldName('name'); + if (!methName) return; + const params = extractScalaParameters(member); + methods.push({ + name: `${parentName}.${methName.text}`, + kind: 'method', + line: member.startPosition.row + 1, + endLine: member.endPosition.row + 1, + visibility: extractModifierVisibility(member), + children: params.length > 0 ? params : undefined, + }); +} + +function collectScalaValVarMember(member: TreeSitterNode, children: SubDeclaration[]): void { + const pattern = member.childForFieldName('pattern'); + if (!pattern) return; + const nameNode = pattern.type === 'identifier' ? pattern : findChild(pattern, 'identifier'); + if (!nameNode) return; + children.push({ + name: nameNode.text, + kind: 'property', + line: member.startPosition.row + 1, + visibility: extractModifierVisibility(member), + }); +} + // ── Parameter extraction ──────────────────────────────────────────────────── function extractScalaParameters(funcNode: TreeSitterNode): SubDeclaration[] { diff --git a/src/features/audit.ts b/src/features/audit.ts index 9f0d5183b..f5dc5bddf 100644 --- a/src/features/audit.ts +++ b/src/features/audit.ts @@ -8,7 +8,16 @@ import { debug } from '../infrastructure/logger.js'; import { isTestFile } from '../infrastructure/test-filter.js'; import { toErrorMessage } from '../shared/errors.js'; import { toSymbolRef } from '../shared/normalize.js'; -import type { BetterSqlite3Database, CodegraphConfig } from '../types.js'; +import type { + AuditFunctionEntry, + AuditHealthMetrics, + AuditResult, + BetterSqlite3Database, + CodegraphConfig, + Role, + SymbolKind, + ThresholdBreach, +} from '../types.js'; import { RULE_DEFS } from './manifesto.js'; // ─── Threshold resolution ─────────────────────────────────────────── @@ -65,13 +74,6 @@ const METRIC_TO_RULE: Record = { max_nesting: 'maxNesting', }; -interface ThresholdBreach { - metric: string; - value: number; - threshold: number; - level: 'warn' | 'fail'; -} - function checkBreaches( row: Record, thresholds: Record, @@ -128,18 +130,6 @@ interface SymbolRef { line: number; } -interface HealthMetrics { - cognitive: number | null; - cyclomatic: number | null; - maxNesting: number | null; - maintainabilityIndex: number | null; - halstead: { volume: number; difficulty: number; effort: number; bugs: number }; - loc: number; - sloc: number; - commentLines: number; - thresholdBreaches: ThresholdBreach[]; -} - interface AuditDataOpts { noTests?: boolean; config?: CodegraphConfig; @@ -152,7 +142,7 @@ export function auditData( target: string, customDbPath?: string, opts: AuditDataOpts = {}, -): { target: string; kind: string; functions: unknown[] } { +): AuditResult { const noTests = opts.noTests || false; const config = opts.config || loadConfig(); const maxDepth = @@ -176,14 +166,14 @@ export function auditData( } if (results.length === 0) { - return { target, kind: explained.kind, functions: [] }; + return { target, kind: explained.kind as 'function' | 'file', functions: [] }; } // 2. Open DB for enrichment const db = openReadonlyOrFail(customDbPath); const thresholds = resolveThresholds(customDbPath, opts.config); - let functions: unknown[]; + let functions: AuditFunctionEntry[]; try { if (explained.kind === 'file') { functions = enrichFileResults(db, results, kind, noTests, maxDepth, thresholds); @@ -196,24 +186,25 @@ export function auditData( db.close(); } - return { target, kind: explained.kind, functions }; + return { target, kind: explained.kind as 'function' | 'file', functions }; } // ─── Enrich a function result from explainData ────────────────────── +/** A function-target result as returned by `explainFunctionImpl` (always fully populated -- see domain/analysis/context.ts). */ interface ExplainResult { name: string; kind: string; file: string; line: number; - endLine?: number | null; - role?: string | null; - lineCount?: number | null; - summary?: string | null; - signature?: string | null; - callees?: SymbolRef[]; - callers?: SymbolRef[]; - relatedTests?: { file: string }[]; + endLine: number | null; + role: string | null; + lineCount: number | null; + summary: string | null; + signature: { params: string | null; returnType: string | null } | null; + callees: SymbolRef[]; + callers: SymbolRef[]; + relatedTests: { file: string }[]; } /** Enrich all symbols from file-target results. */ @@ -224,8 +215,8 @@ function enrichFileResults( noTests: boolean, maxDepth: number, thresholds: Record, -): unknown[] { - const functions: unknown[] = []; +): AuditFunctionEntry[] { + const functions: AuditFunctionEntry[] = []; for (const fileResult of results) { let allSymbols = [ ...(fileResult.publicApi || []), @@ -245,7 +236,7 @@ function enrichFunction( noTests: boolean, maxDepth: number, thresholds: Record, -): unknown { +): AuditFunctionEntry { const nodeRow = db .prepare('SELECT id FROM nodes WHERE name = ? AND file = ? AND line = ?') .get(r.name, r.file, r.line) as { id: number } | undefined; @@ -261,11 +252,11 @@ function enrichFunction( return { name: r.name, - kind: r.kind, + kind: r.kind as SymbolKind, file: r.file, line: r.line, endLine: r.endLine, - role: r.role, + role: r.role as Role | null, lineCount: r.lineCount, summary: r.summary, signature: r.signature, @@ -280,13 +271,14 @@ function enrichFunction( // ─── Enrich a symbol from file-level explainData ──────────────────── +/** A file-target symbol as returned by `explainFileImpl`'s `mapSymbol` (always fully populated -- see domain/analysis/context.ts). */ interface FileSymbol { name: string; kind: string; line: number; - role?: string | null; - summary?: string | null; - signature?: string | null; + role: string | null; + summary: string | null; + signature: { params: string | null; returnType: string | null } | null; } /** Query callees, callers, and related test files for a node. */ @@ -336,7 +328,7 @@ function enrichSymbol( noTests: boolean, maxDepth: number, thresholds: Record, -): unknown { +): AuditFunctionEntry { const nodeRow = db .prepare('SELECT id, end_line FROM nodes WHERE name = ? AND file = ? AND line = ?') .get(sym.name, file, sym.line) as { id: number; end_line: number | null } | undefined; @@ -359,11 +351,11 @@ function enrichSymbol( return { name: sym.name, - kind: sym.kind, + kind: sym.kind as SymbolKind, file, line: sym.line, endLine, - role: sym.role || null, + role: (sym.role || null) as Role | null, lineCount, summary: sym.summary || null, signature: sym.signature || null, @@ -396,7 +388,7 @@ function buildHealth( db: BetterSqlite3Database, nodeId: number, thresholds: Record, -): HealthMetrics { +): AuditHealthMetrics { try { const row = db .prepare( @@ -431,7 +423,7 @@ function buildHealth( } } -function defaultHealth(): HealthMetrics { +function defaultHealth(): AuditHealthMetrics { return { cognitive: null, cyclomatic: null, diff --git a/src/presentation/audit.ts b/src/presentation/audit.ts index f82bc1dac..a2d14fa15 100644 --- a/src/presentation/audit.ts +++ b/src/presentation/audit.ts @@ -1,6 +1,7 @@ import { kindIcon } from '../domain/queries.js'; import { auditData } from '../features/audit.js'; import { outputResult } from '../infrastructure/result-formatter.js'; +import type { AuditFunctionEntry, AuditResult, CodegraphConfig } from '../types.js'; interface AuditOpts { json?: boolean; @@ -12,11 +13,14 @@ interface AuditOpts { limit?: number; offset?: number; depth?: number; - config?: unknown; + config?: CodegraphConfig; } +/** A caller/callee reference as rendered under the "Calls"/"Called by" sections. */ +type CallRef = AuditFunctionEntry['callees'][number]; + /** Render health metrics for a single audit function. */ -function renderHealthMetrics(fn: any): void { +function renderHealthMetrics(fn: AuditFunctionEntry): void { if (fn.health.cognitive == null) return; console.log(`\n Health:`); console.log( @@ -35,8 +39,8 @@ function renderHealthMetrics(fn: any): void { } } -/** Render a single audited function with all its sections. */ -function renderAuditFunction(fn: any): void { +/** Render the name/kind/location/summary/signature header for an audited function. */ +function renderFunctionHeader(fn: AuditFunctionEntry): void { const lineRange = fn.endLine ? `${fn.line}-${fn.endLine}` : `${fn.line}`; const roleTag = fn.role ? ` [${fn.role}]` : ''; console.log(`## ${kindIcon(fn.kind)} ${fn.name} (${fn.kind})${roleTag}`); @@ -46,42 +50,53 @@ function renderAuditFunction(fn: any): void { if (fn.signature.params != null) console.log(` Parameters: (${fn.signature.params})`); if (fn.signature.returnType) console.log(` Returns: ${fn.signature.returnType}`); } +} - renderHealthMetrics(fn); - - if (fn.health.thresholdBreaches.length > 0) { - console.log(`\n Threshold Breaches:`); - for (const b of fn.health.thresholdBreaches) { - const icon = b.level === 'fail' ? 'FAIL' : 'WARN'; - console.log(` [${icon}] ${b.metric}: ${b.value} >= ${b.threshold}`); - } +/** Render manifesto threshold breaches (cognitive/cyclomatic/nesting over warn/fail limits). */ +function renderThresholdBreaches(fn: AuditFunctionEntry): void { + if (fn.health.thresholdBreaches.length === 0) return; + console.log(`\n Threshold Breaches:`); + for (const b of fn.health.thresholdBreaches) { + const icon = b.level === 'fail' ? 'FAIL' : 'WARN'; + console.log(` [${icon}] ${b.metric}: ${b.value} >= ${b.threshold}`); } +} +/** Render the transitive-dependent impact summary, one line per BFS level. */ +function renderImpactSection(fn: AuditFunctionEntry): void { console.log(`\n Impact: ${fn.impact.totalDependents} transitive dependent(s)`); for (const [level, nodes] of Object.entries(fn.impact.levels)) { - console.log( - ` Level ${level}: ${(nodes as Array<{ name: string }>).map((n) => n.name).join(', ')}`, - ); + console.log(` Level ${level}: ${nodes.map((n) => n.name).join(', ')}`); } +} - if (fn.callees.length > 0) { - console.log(`\n Calls (${fn.callees.length}):`); - for (const c of fn.callees) { - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } - } - if (fn.callers.length > 0) { - console.log(`\n Called by (${fn.callers.length}):`); - for (const c of fn.callers) { - console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); - } +/** Render a labeled list of call references (used for both "Calls" and "Called by"). */ +function renderCallRefs(label: string, refs: CallRef[]): void { + if (refs.length === 0) return; + console.log(`\n ${label} (${refs.length}):`); + for (const c of refs) { + console.log(` ${kindIcon(c.kind)} ${c.name} ${c.file}:${c.line}`); } - if (fn.relatedTests.length > 0) { - console.log(`\n Tests (${fn.relatedTests.length}):`); - for (const t of fn.relatedTests) { - console.log(` ${t.file}`); - } +} + +/** Render the related-test-file list for an audited function. */ +function renderRelatedTests(fn: AuditFunctionEntry): void { + if (fn.relatedTests.length === 0) return; + console.log(`\n Tests (${fn.relatedTests.length}):`); + for (const t of fn.relatedTests) { + console.log(` ${t.file}`); } +} + +/** Render a single audited function with all its sections. */ +function renderAuditFunction(fn: AuditFunctionEntry): void { + renderFunctionHeader(fn); + renderHealthMetrics(fn); + renderThresholdBreaches(fn); + renderImpactSection(fn); + renderCallRefs('Calls', fn.callees); + renderCallRefs('Called by', fn.callers); + renderRelatedTests(fn); console.log(); } @@ -91,7 +106,7 @@ export function audit( customDbPath: string | undefined, opts: AuditOpts = {}, ): void { - const data: any = auditData(target, customDbPath, opts as any); + const data: AuditResult = auditData(target, customDbPath, opts); if (outputResult(data, null, opts)) return; diff --git a/src/types.ts b/src/types.ts index 88c85c3b8..66b894f36 100644 --- a/src/types.ts +++ b/src/types.ts @@ -1611,6 +1611,27 @@ export interface AuditResult { functions: AuditFunctionEntry[]; } +/** A single manifesto threshold breach reported against an audited function. */ +export interface ThresholdBreach { + metric: string; + value: number; + threshold: number; + level: 'warn' | 'fail'; +} + +/** Complexity/maintainability health metrics attached to an audited function. */ +export interface AuditHealthMetrics { + cognitive: number | null; + cyclomatic: number | null; + maxNesting: number | null; + maintainabilityIndex: number | null; + halstead: HalsteadMetrics; + loc: number; + sloc: number; + commentLines: number; + thresholdBreaches: ThresholdBreach[]; +} + export interface AuditFunctionEntry { name: string; kind: SymbolKind; @@ -1618,30 +1639,20 @@ export interface AuditFunctionEntry { line: number; endLine: number | null; role: Role | null; - lineCount: number; + lineCount: number | null; summary: string | null; - signature: string | null; - callees: string[]; - callers: string[]; - relatedTests: string[]; + signature: { params: string | null; returnType: string | null } | null; + callees: Array<{ name: string; kind: string; file: string; line: number }>; + callers: Array<{ name: string; kind: string; file: string; line: number }>; + relatedTests: Array<{ file: string }>; impact: { totalDependents: number; levels: Record; }; - health: { - cognitive: number; - cyclomatic: number; - maxNesting: number; - maintainabilityIndex: number | null; - halstead: HalsteadMetrics | null; - loc: number; - sloc: number; - commentLines: number; - thresholdBreaches: string[]; - }; - riskScore: number; - complexityNotes: string[]; - sideEffects: string[]; + health: AuditHealthMetrics; + riskScore: number | null; + complexityNotes: string | null; + sideEffects: string | null; } export interface ImpactLevelEntry { diff --git a/tests/benchmarks/resolution/tracer/loader-hooks.mjs b/tests/benchmarks/resolution/tracer/loader-hooks.mjs index bfe54e764..d0446e9eb 100644 --- a/tests/benchmarks/resolution/tracer/loader-hooks.mjs +++ b/tests/benchmarks/resolution/tracer/loader-hooks.mjs @@ -34,6 +34,67 @@ const NOT_FUNCTIONS = new Set([ 'export', ]); +/** Matches a class declaration line; returns the class name or null. */ +function matchClassDeclaration(trimmed) { + const classMatch = trimmed.match(/^(?:export\s+)?class\s+(\w+)/); + return classMatch && trimmed.includes('{') ? classMatch[1] : null; +} + +/** Matches `function NAME(`, `export function NAME(`, `async function NAME(`. */ +function matchFunctionDeclaration(trimmed) { + const funcDecl = trimmed.match( + /^(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(/, + ); + return funcDecl ? funcDecl[1] : null; +} + +/** Matches `const/let/var NAME = async? (function | arrow)`. */ +function matchAssignedFunction(trimmed) { + const assignedFunc = trimmed.match( + /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:function\s*\w*\s*\(|[^=]*=>\s*\{)/, + ); + return assignedFunc ? assignedFunc[1] : null; +} + +/** Matches a class method/constructor/getter/setter declaration (only inside a class body). */ +function matchClassMethod(trimmed, currentClass, braceDepth, classDepth) { + if (!currentClass || braceDepth <= classDepth) return null; + const methodDecl = trimmed.match(/^(?:async\s+)?(?:static\s+)?(?:get\s+|set\s+)?#?(\w+)\s*\(/); + if (!methodDecl || NOT_FUNCTIONS.has(methodDecl[1])) return null; + const mname = methodDecl[1]; + return mname === 'constructor' ? `${currentClass}.constructor` : `${currentClass}.${mname}`; +} + +/** + * Detects the function/method name declared on this line, if any. + * Tries each pattern in order and returns the first match. + */ +function detectFunctionName(trimmed, currentClass, braceDepth, classDepth) { + return ( + matchFunctionDeclaration(trimmed) || + matchAssignedFunction(trimmed) || + matchClassMethod(trimmed, currentClass, braceDepth, classDepth) + ); +} + +/** Pops and closes any function scopes whose body ends at this line's new brace depth. */ +function closeFinishedScopes(funcStack, newDepth, indent, output) { + while (funcStack.length > 0 && newDepth <= funcStack[funcStack.length - 1].openDepth) { + funcStack.pop(); + output.push(`${indent}} finally { globalThis.__tracer?.exit(); }`); + } +} + +/** Opens a new traced scope (enter + try) if this line declares a function/method. */ +function openScopeIfDeclared(funcName, trimmed, indent, file, braceDepth, funcStack, output) { + if (!funcName || !trimmed.endsWith('{')) return; + const inner = `${indent} `; + const escaped = funcName.replace(/'/g, "\\'"); + output.push(`${inner}globalThis.__tracer?.enter('${escaped}', '${file}');`); + output.push(`${inner}try {`); + funcStack.push({ name: funcName, openDepth: braceDepth }); +} + /** * Instrument all function/method declarations in source code. * Injects enter()/try and finally/exit() around each function body. @@ -60,58 +121,17 @@ function instrumentSource(source, filename) { const closeBraces = (line.match(/\}/g) || []).length; const newDepth = braceDepth + openBraces - closeBraces; - // Detect class declarations - const classMatch = trimmed.match(/^(?:export\s+)?class\s+(\w+)/); - if (classMatch && trimmed.includes('{')) { - currentClass = classMatch[1]; + const classMatch = matchClassDeclaration(trimmed); + if (classMatch) { + currentClass = classMatch; classDepth = braceDepth; } - // Detect function/method declarations - let funcName = null; - - // function NAME(, export function NAME(, async function NAME( - const funcDecl = trimmed.match( - /^(?:export\s+)?(?:default\s+)?(?:async\s+)?function\s+(\w+)\s*\(/, - ); - if (funcDecl) funcName = funcDecl[1]; - - // const/let/var NAME = async? (function | arrow) - if (!funcName) { - const assignedFunc = trimmed.match( - /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?(?:function\s*\w*\s*\(|[^=]*=>\s*\{)/, - ); - if (assignedFunc) funcName = assignedFunc[1]; - } - - // Class method (only inside a class body) - if (!funcName && currentClass && braceDepth > classDepth) { - const methodDecl = trimmed.match( - /^(?:async\s+)?(?:static\s+)?(?:get\s+|set\s+)?#?(\w+)\s*\(/, - ); - if (methodDecl && !NOT_FUNCTIONS.has(methodDecl[1])) { - const mname = methodDecl[1]; - funcName = - mname === 'constructor' ? `${currentClass}.constructor` : `${currentClass}.${mname}`; - } - } - - // Insert finally blocks for closing function scopes - while (funcStack.length > 0 && newDepth <= funcStack[funcStack.length - 1].openDepth) { - funcStack.pop(); - output.push(`${indent}} finally { globalThis.__tracer?.exit(); }`); - } + const funcName = detectFunctionName(trimmed, currentClass, braceDepth, classDepth); + closeFinishedScopes(funcStack, newDepth, indent, output); output.push(line); - - // Insert enter/try for new function declarations - if (funcName && trimmed.endsWith('{')) { - const inner = `${indent} `; - const escaped = funcName.replace(/'/g, "\\'"); - output.push(`${inner}globalThis.__tracer?.enter('${escaped}', '${file}');`); - output.push(`${inner}try {`); - funcStack.push({ name: funcName, openDepth: braceDepth }); - } + openScopeIfDeclared(funcName, trimmed, indent, file, braceDepth, funcStack, output); braceDepth = newDepth; diff --git a/tests/benchmarks/resolution/tracer/lua-tracer.lua b/tests/benchmarks/resolution/tracer/lua-tracer.lua index 0fcc75882..873e2853e 100644 --- a/tests/benchmarks/resolution/tracer/lua-tracer.lua +++ b/tests/benchmarks/resolution/tracer/lua-tracer.lua @@ -99,7 +99,12 @@ debug.sethook(hook, "cr") local ok, err = pcall(dofile, fixture_dir .. "main.lua") if not ok then - -- Swallow errors - we only care about call edges + -- We only care about call edges captured before the fixture errored, so + -- keep going rather than aborting the trace — but still surface the + -- error to stderr so a genuine failure (e.g. a Lua syntax error + -- producing zero edges) is visible instead of looking like a silent + -- successful trace. + io.stderr:write(tostring(err), "\n") end debug.sethook() diff --git a/tests/benchmarks/resolution/tracer/native-tracer.sh b/tests/benchmarks/resolution/tracer/native-tracer.sh index 5d004972a..602b5268e 100644 --- a/tests/benchmarks/resolution/tracer/native-tracer.sh +++ b/tests/benchmarks/resolution/tracer/native-tracer.sh @@ -14,9 +14,12 @@ set -euo pipefail FIXTURE_DIR="${1:-}" -LANG="${2:-}" +# Named TRACE_LANG (not LANG) — LANG is the POSIX locale environment variable; +# reassigning it here would clobber locale settings inherited by every +# compiler toolchain this script spawns (gcc, cargo, dotnet, swiftc, zig, ...). +TRACE_LANG="${2:-}" -if [[ -z "$FIXTURE_DIR" || -z "$LANG" ]]; then +if [[ -z "$FIXTURE_DIR" || -z "$TRACE_LANG" ]]; then echo "Usage: native-tracer.sh " >&2 exit 1 fi @@ -41,6 +44,153 @@ empty_result() { exit 0 } +# Shared source-instrumentation helpers used by trace_rust / trace_swift / +# trace_zig / trace_dart. Each of those languages needs the same scan: track +# an optional enclosing context (impl/class/struct), detect a function or +# method declaration, and inject a trace-call statement right after the +# opening brace. They differ only in the regexes, the qualname separator +# context, and how the traced scope is closed: +# +# raii — the language has its own scope-exit primitive (Rust +# `let _tg = ;`, Swift/Zig `defer`), so the entry +# template alone is enough — no brace tracking needed. +# finally — the language has neither (Dart), so the function body's +# closing brace is located via manual brace-depth counting and +# a "} finally { ... }" statement is injected right before it. +# +# The helpers below are dispatched one per source line from +# instrument_one_file's loop, each handling one concern and returning 0 only +# when it fully handled the line (caller should move on to the next line). +# Bash dynamically scopes `local` variables into called functions, so they +# read/write inject_trace_calls's and instrument_one_file's locals +# (ctx_regex, decl_regex, strategy, current_ctx, in_func, tmpfile, ...) +# directly instead of threading a dozen parameters through each call. +# This only works because the call chain is always exactly +# inject_trace_calls -> instrument_one_file -> maybe_*. Calling a maybe_* +# helper outside that chain is silently wrong rather than erroring: with +# decl_regex unset, `maybe_inject_declaration`'s `[[ "$trimmed" =~ $decl_regex ]]` +# matches the empty pattern against every line, injecting a trace call into +# every line of the file instead of doing nothing. + +# Tracks an optional enclosing context (impl/class/struct) block opening, +# and detects the block's closing brace. Returns 0 (line fully handled) only +# when it wrote the closing-brace line itself. +maybe_close_context() { + local line="$1" trimmed="$2" + [[ -z "$ctx_regex" ]] && return 1 + + if [[ "$trimmed" =~ $ctx_regex ]]; then + current_ctx="${BASH_REMATCH[$ctx_group]}" + fi + if [[ "$trimmed" == "}" && -n "$current_ctx" ]] && [[ ! "$line" =~ ^[[:space:]] ]]; then + printf '%s\n' "$line" >> "$tmpfile" + current_ctx="" + return 0 + fi + return 1 +} + +# For the "finally" strategy only: tracks the brace depth of the currently +# open instrumented function body and injects the finally-block right before +# its closing brace. Returns 0 once that injection is made. +maybe_close_finally_scope() { + local line="$1" + [[ "$strategy" == "finally" ]] || return 1 + (( in_func )) || return 1 + + local opens="${line//[^\{]/}" closes="${line//[^\}]/}" + (( func_brace_depth += ${#opens} - ${#closes} )) || true + (( func_brace_depth <= 0 )) || return 1 + + printf -- "$finally_tmpl"'\n' >> "$tmpfile" + printf '%s\n' "$line" >> "$tmpfile" + in_func=0 + func_brace_depth=0 + return 0 +} + +# Detects a function/method declaration on this line and, unless excluded, +# injects the entry template (plus "try {" for the "finally" strategy). +# Returns 0 once an injection is made. +maybe_inject_declaration() { + local line="$1" trimmed="$2" + [[ "$trimmed" =~ $decl_regex ]] || return 1 + local fname_candidate="${BASH_REMATCH[$decl_group]}" + + if [[ -n "$decl_exclude_regex" && "$trimmed" =~ $decl_exclude_regex ]]; then + return 1 + fi + [[ "$trimmed" =~ \{[[:space:]]*$ ]] || return 1 + + local qualname="$fname_candidate" + [[ -n "$current_ctx" ]] && qualname="${current_ctx}.${fname_candidate}" + printf '%s\n' "$line" >> "$tmpfile" + printf -- "$entry_tmpl"'\n' "$qualname" "$base" >> "$tmpfile" + if [[ "$strategy" == "finally" ]]; then + printf ' try {\n' >> "$tmpfile" + in_func=1 + func_brace_depth=1 + fi + return 0 +} + +# Instruments one source file in place: scans it line-by-line, dispatching +# each line to the maybe_* handlers above in order, falling back to copying +# the line verbatim when none of them handle it. +instrument_one_file() { + local srcfile="$1" + local base + base="$(basename "$srcfile")" + [[ "$base" == "$skip_file" ]] && return + + local current_ctx="" in_func=0 func_brace_depth=0 + local tmpfile + tmpfile="$(mktemp)" + + local line trimmed + while IFS= read -r line || [[ -n "$line" ]]; do + trimmed="${line#"${line%%[![:space:]]*}"}" + maybe_close_context "$line" "$trimmed" && continue + maybe_close_finally_scope "$line" && continue + maybe_inject_declaration "$line" "$trimmed" && continue + printf '%s\n' "$line" >> "$tmpfile" + done < "$srcfile" + + mv "$tmpfile" "$srcfile" +} + +# Args: +# $1 glob pattern for source files to instrument (e.g. "$TMP_DIR"/*.rs) +# $2 filename to skip (the generated trace-support file for this language) +# $3 context regex (e.g. '^impl[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)'); +# empty string disables context (class/impl) tracking entirely +# $4 capture group index holding the context name (ignored if $3 is empty) +# $5 declaration regex (e.g. matches "fn NAME"/"func NAME") +# $6 capture group index holding the candidate function/method name +# $7 declaration exclusion regex — if the trimmed line also matches this, +# the declaration match is discarded (empty string disables filtering; +# used by Dart to skip import/if/while/for/switch/catch/class lines) +# $8 strategy: "raii" or "finally" +# $9 entry template — printf format string, args are (qualname, base file) +# $10 finally template — printf format string with no args (only used when +# strategy is "finally") +inject_trace_calls() { + local glob_pattern="$1" skip_file="$2" + local ctx_regex="$3" ctx_group="$4" + local decl_regex="$5" decl_group="$6" decl_exclude_regex="$7" + local strategy="$8" entry_tmpl="$9" finally_tmpl="${10:-}" + + # `compgen -G` expands the glob itself rather than relying on the shell + # to word-split an unquoted variable — so it still matches correctly even + # if TMP_DIR (from mktemp -d) ever contained a space. It exits 1 with no + # output on zero matches; `|| true` keeps that from tripping `set -e`. + local srcfile + while IFS= read -r srcfile; do + [[ -e "$srcfile" ]] || continue + instrument_one_file "$srcfile" + done < <(compgen -G "$glob_pattern" || true) +} + # ── C / C++ ────────────────────────────────────────────────────────────── trace_c_cpp() { local compiler="$1" @@ -279,48 +429,17 @@ RSTRACE # Add mod trace_support to main.rs sedi '1s/^/mod trace_support;\n/' "$TMP_DIR/src/main.rs" - # Inject trace_call into every fn body using a bash loop that tracks impl blocks - for rsfile in "$TMP_DIR/src"/*.rs; do - base="$(basename "$rsfile")" - [[ "$base" == "trace_support.rs" ]] && continue - - local current_impl="" - local tmpfile="$(mktemp)" - - while IFS= read -r line || [[ -n "$line" ]]; do - # Track impl blocks: "impl TypeName" or "impl TypeName for Trait" - if [[ "$line" =~ ^impl[[:space:]]+([A-Za-z_][A-Za-z0-9_]*) ]]; then - current_impl="${BASH_REMATCH[1]}" - fi - - # End of impl block (top-level closing brace) - if [[ "$line" == "}" && -n "$current_impl" ]]; then - printf '%s\n' "$line" >> "$tmpfile" - current_impl="" - continue - fi - - # Detect fn declarations ending with { - # Save capture before second regex clobbers BASH_REMATCH - if [[ "$line" =~ fn[[:space:]]+([a-z_][a-z0-9_]*) ]]; then - local fname_candidate="${BASH_REMATCH[1]}" - if [[ "$line" =~ \{[[:space:]]*$ ]]; then - local fname="$fname_candidate" - local qualname="$fname" - if [[ -n "$current_impl" ]]; then - qualname="${current_impl}.${fname}" - fi - printf '%s\n' "$line" >> "$tmpfile" - printf ' let _tg = crate::trace_support::trace_call("%s", "%s");\n' "$qualname" "$base" >> "$tmpfile" - continue - fi - fi - - printf '%s\n' "$line" >> "$tmpfile" - done < "$rsfile" - - mv "$tmpfile" "$rsfile" - done + # Inject trace_call into every fn body, tracking impl blocks for qualnames. + # Rust's Drop-guard RAII pattern means only entry needs injecting — the + # guard's Drop impl fires trace_support's exit hook automatically. + inject_trace_calls \ + "$TMP_DIR/src/*.rs" \ + "trace_support.rs" \ + '^impl[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)' 1 \ + 'fn[[:space:]]+([a-z_][a-z0-9_]*)' 1 \ + '' \ + raii \ + ' let _tg = crate::trace_support::trace_call("%s", "%s");' # Inject dump_trace() at end of main() sedi '/^fn main/,/^\}/ { @@ -563,44 +682,17 @@ class CallTracer { } SWTRACE - # Inject traceCall into every func body using bash loop - for swfile in "$TMP_DIR"/*.swift; do - base="$(basename "$swfile")" - [[ "$base" == "TraceSupport.swift" ]] && continue - local current_class="" - local tmpfile="$(mktemp)" - - while IFS= read -r line || [[ -n "$line" ]]; do - local trimmed="${line#"${line%%[![:space:]]*}"}" - # Track class/struct declarations - if [[ "$trimmed" =~ ^(class|struct)[[:space:]]+([A-Za-z_][A-Za-z0-9_]*) ]]; then - current_class="${BASH_REMATCH[2]}" - fi - # End of class (top-level }) - if [[ "$trimmed" == "}" && -n "$current_class" ]] && [[ ! "$line" =~ ^[[:space:]] ]]; then - printf '%s\n' "$line" >> "$tmpfile" - current_class="" - continue - fi - # Detect func declarations ending with { - # Save capture before second regex clobbers BASH_REMATCH - if [[ "$trimmed" =~ ^(override[[:space:]]+)?func[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*) ]]; then - local fname_candidate="${BASH_REMATCH[2]}" - if [[ "$trimmed" =~ \{[[:space:]]*$ ]]; then - local fname="$fname_candidate" - local qualname="$fname" - if [[ -n "$current_class" ]]; then - qualname="${current_class}.${fname}" - fi - printf '%s\n' "$line" >> "$tmpfile" - printf ' CallTracer.shared.traceCall("%s", "%s"); defer { CallTracer.shared.traceReturn() }\n' "$qualname" "$base" >> "$tmpfile" - continue - fi - fi - printf '%s\n' "$line" >> "$tmpfile" - done < "$swfile" - mv "$tmpfile" "$swfile" - done + # Inject traceCall into every func body, tracking class/struct blocks for + # qualnames. Swift's `defer` gives us a built-in scope-exit hook, so only + # entry needs injecting. + inject_trace_calls \ + "$TMP_DIR/*.swift" \ + "TraceSupport.swift" \ + '^(class|struct)[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)' 2 \ + '^(override[[:space:]]+)?func[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*)' 2 \ + '' \ + raii \ + ' CallTracer.shared.traceCall("%s", "%s"); defer { CallTracer.shared.traceReturn() }' # Inject dump at end of main (top-level code or main function) if grep -q 'func main' "$TMP_DIR/main.swift" 2>/dev/null; then @@ -687,68 +779,19 @@ DARTTRACE sedi "1s|^|import 'dart:io';\nimport 'trace_support.dart';\n|" "$dartfile" done - # Inject traceCall + try/finally into function/method bodies. - # We track brace depth per function so we can inject - # "} finally { CallTracer.instance.traceReturn(); }" at the closing brace. - for dartfile in "$TMP_DIR"/*.dart; do - base="$(basename "$dartfile")" - [[ "$base" == "trace_support.dart" ]] && continue - local current_class="" - local in_func=0 - local func_brace_depth=0 - local tmpfile="$(mktemp)" - - while IFS= read -r line || [[ -n "$line" ]]; do - local trimmed="${line#"${line%%[![:space:]]*}"}" - - # Track class - if [[ "$trimmed" =~ ^class[[:space:]]+([A-Za-z_][A-Za-z0-9_]*) ]]; then - current_class="${BASH_REMATCH[1]}" - fi - if [[ "$trimmed" == "}" && -n "$current_class" ]] && [[ ! "$line" =~ ^[[:space:]] ]]; then - printf '%s\n' "$line" >> "$tmpfile" - current_class="" - continue - fi - - # If inside an instrumented function, track braces to find its end - if (( in_func )); then - local opens="${line//[^\{]/}" - local closes="${line//[^\}]/}" - (( func_brace_depth += ${#opens} - ${#closes} )) || true - if (( func_brace_depth <= 0 )); then - # This line contains the function's closing brace — - # inject "} finally { traceReturn(); }" before it - printf ' } finally { CallTracer.instance.traceReturn(); }\n' >> "$tmpfile" - printf '%s\n' "$line" >> "$tmpfile" - in_func=0 - func_brace_depth=0 - continue - fi - fi - - # Detect function declarations (return_type name(args) {) - # Save capture before subsequent regexes clobber BASH_REMATCH - if [[ "$trimmed" =~ [[:space:]]([a-zA-Z_][a-zA-Z0-9_]*)\( ]]; then - local fname_candidate="${BASH_REMATCH[1]}" - if [[ "$trimmed" =~ \{[[:space:]]*$ ]] && [[ ! "$trimmed" =~ ^(import|if|while|for|switch|catch|class) ]]; then - local fname="$fname_candidate" - local qualname="$fname" - if [[ -n "$current_class" ]]; then - qualname="${current_class}.${fname}" - fi - printf '%s\n' "$line" >> "$tmpfile" - printf ' CallTracer.instance.traceCall("%s", "%s");\n' "$qualname" "$base" >> "$tmpfile" - printf ' try {\n' >> "$tmpfile" - in_func=1 - func_brace_depth=1 # we're inside the function's opening brace - continue - fi - fi - printf '%s\n' "$line" >> "$tmpfile" - done < "$dartfile" - mv "$tmpfile" "$dartfile" - done + # Inject traceCall + try/finally into function/method bodies, tracking + # class blocks for qualnames. Dart has neither RAII nor `defer`, so the + # function body's closing brace is located via manual depth counting and + # "} finally { CallTracer.instance.traceReturn(); }" is injected before it. + inject_trace_calls \ + "$TMP_DIR/*.dart" \ + "trace_support.dart" \ + '^class[[:space:]]+([A-Za-z_][A-Za-z0-9_]*)' 1 \ + '[[:space:]]([a-zA-Z_][a-zA-Z0-9_]*)\(' 1 \ + '^(import|if|while|for|switch|catch|class)' \ + finally \ + ' CallTracer.instance.traceCall("%s", "%s");' \ + ' } finally { CallTracer.instance.traceReturn(); }' # Inject dump at end of main sedi '/^void main/,/^\}/ { @@ -844,33 +887,24 @@ pub fn dumpTrace() void { } ZIGTRACE - # Inject traceCall into fn bodies + # Add import of trace_support at top of every fixture file for zigfile in "$TMP_DIR"/*.zig; do base="$(basename "$zigfile")" [[ "$base" == "trace_support.zig" ]] && continue - - # Add import of trace_support at top sedi "1s|^|const trace_support = @import(\"trace_support.zig\");\n|" "$zigfile" - - # Use bash loop to inject trace calls - local tmpfile="$(mktemp)" - while IFS= read -r line || [[ -n "$line" ]]; do - local trimmed="${line#"${line%%[![:space:]]*}"}" - # Save capture before second regex clobbers BASH_REMATCH - if [[ "$trimmed" =~ ^(pub[[:space:]]+)?fn[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*) ]]; then - local fname_candidate="${BASH_REMATCH[2]}" - if [[ "$trimmed" =~ \{[[:space:]]*$ ]]; then - local fname="$fname_candidate" - printf '%s\n' "$line" >> "$tmpfile" - printf ' trace_support.traceCall("%s", "%s"); defer trace_support.traceReturn();\n' "$fname" "$base" >> "$tmpfile" - continue - fi - fi - printf '%s\n' "$line" >> "$tmpfile" - done < "$zigfile" - mv "$tmpfile" "$zigfile" done + # Inject traceCall into fn bodies. Zig has no class/impl concept here, so + # context tracking is disabled; `defer` gives us a built-in scope-exit hook. + inject_trace_calls \ + "$TMP_DIR/*.zig" \ + "trace_support.zig" \ + '' 0 \ + '^(pub[[:space:]]+)?fn[[:space:]]+([a-zA-Z_][a-zA-Z0-9_]*)' 2 \ + '' \ + raii \ + ' trace_support.traceCall("%s", "%s"); defer trace_support.traceReturn();' + # Inject dump at end of main sedi '/^pub fn main/,/^\}/ { /^\}/ i\ trace_support.dumpTrace(); @@ -1121,7 +1155,7 @@ trace_cuda() { } # ── Dispatch ───────────────────────────────────────────────────────────── -case "$LANG" in +case "$TRACE_LANG" in c) trace_c_cpp "gcc" "c" ;; cpp) trace_c_cpp "g++" "cpp" ;; rust) trace_rust ;; @@ -1138,5 +1172,5 @@ case "$LANG" in cuda) trace_cuda ;; verilog) empty_result "verilog is a hardware description language — no runtime tracing" ;; hcl) empty_result "HCL/Terraform has no callable functions — no runtime tracing" ;; - *) empty_result "unknown language: $LANG" ;; + *) empty_result "unknown language: $TRACE_LANG" ;; esac