Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
820ff83
chore: remove dead duplicate type declarations from types.ts (closes …
carlos-alm Jul 2, 2026
9fa4279
chore: remove unused iterComplexity export from complexity-query.ts (…
carlos-alm Jul 2, 2026
0f9bbe6
refactor: extract shared aggregate and typed-array helpers from leide…
carlos-alm Jul 2, 2026
f3e1119
refactor: extract shared name-map scanner into scripts/lib/name-map.mjs
carlos-alm Jul 2, 2026
e945bca
fix: replace event-loop-blocking Atomics.wait with shared sleepSync i…
carlos-alm Jul 2, 2026
4a348db
refactor: extract shared resolveFileTree helper from dataflow.ts and …
carlos-alm Jul 2, 2026
8fed8bc
refactor: extend DEFAULTS with previously-hardcoded config constants
carlos-alm Jul 2, 2026
f0a4882
refactor: move generatePlotHTML from features/graph-enrichment.ts to …
carlos-alm Jul 2, 2026
370f336
refactor: decompose extractDestructuredBindingsWalk/handleVariableDec…
carlos-alm Jul 2, 2026
ad14cf7
refactor: decompose resolveFallbackTargets/buildEdges/buildCallEdgesN…
carlos-alm Jul 2, 2026
51c3816
refactor: extract NativeOrchestrationSession from tryNativeOrchestrat…
carlos-alm Jul 2, 2026
63ab855
refactor: split embedRemote into request-executor and response-valida…
carlos-alm Jul 2, 2026
f31468c
fix: correct in-place mutation bug in applyExcludeTestsShorthand and …
carlos-alm Jul 2, 2026
57d3782
fix: correct connection-leak ordering in openReadonlyWithNative, dedu…
carlos-alm Jul 2, 2026
506e2ce
fix: add debug() logging to silent catch blocks across builder pipeli…
carlos-alm Jul 2, 2026
dbf34b8
refactor: split buildChaContext into three focused builder functions …
carlos-alm Jul 2, 2026
a1946af
refactor: split purgeAndAddReverseDeps and wire fast-skip-diag via co…
carlos-alm Jul 2, 2026
0e83ba0
refactor: extract getOrCreateBatchStmt, dedupe batch-insert helpers (…
carlos-alm Jul 2, 2026
c0c1f7d
fix: address quality issues in domain/graph/resolver (docs check ackn…
carlos-alm Jul 2, 2026
57143a8
fix: adopt buildFileConditionSQL in prepare.ts and move console.log o…
carlos-alm Jul 2, 2026
21db9a9
fix: address quality issues in graph unified model (model.ts merge() …
carlos-alm Jul 2, 2026
f7ce310
fix: address quality issues in features/complexity-query.ts (docs che…
carlos-alm Jul 2, 2026
5b708ee
fix: address quality issues in features/cochange.ts (docs check ackno…
carlos-alm Jul 2, 2026
7c3b869
fix: address quality issues in features/branch-compare.ts (docs check…
carlos-alm Jul 2, 2026
ce84353
fix: address quality issues in ast-analysis (docs check acknowledged)
carlos-alm Jul 2, 2026
9946db5
fix: decompose renderAuditFunction, adopt typed AuditResult (docs che…
carlos-alm Jul 2, 2026
210abd2
fix: decompose highest-complexity extractor functions (docs check ack…
carlos-alm Jul 2, 2026
cb5bc85
refactor: split execute() into printEngineInfo/printNativeVersionInfo…
carlos-alm Jul 2, 2026
387dabe
refactor: extract timeMedian helper in token-benchmark.ts
carlos-alm Jul 2, 2026
8386f71
refactor: address warnings in benchmark tracer tooling
carlos-alm Jul 3, 2026
a4dd687
refactor: address warnings in ast-analysis and extractors/helpers nam…
carlos-alm Jul 3, 2026
58ccdea
refactor(leiden): adopt fget/iget from typed-array-helpers in cpm.ts …
carlos-alm Jul 3, 2026
d914aab
refactor(config): wire db.busyTimeoutMs and community.capacityGrowthF…
carlos-alm Jul 3, 2026
824f916
refactor(extractors): adopt resolveMethodDefinitionName across 3 dupl…
carlos-alm Jul 3, 2026
e229c6b
refactor(resolver): dedupe forge phase-11 call-resolution helpers
carlos-alm Jul 3, 2026
b4a1d87
refactor(builder): extract markExportedSymbols, dedupe batch UPDATE (…
carlos-alm Jul 4, 2026
1201518
refactor(scripts): promote timeMedian/median/round1 into shared bench…
carlos-alm Jul 4, 2026
da0c267
fix(leiden): dedupe DEFAULT_CAPACITY_GROWTH_FACTOR (docs check acknow…
carlos-alm Jul 4, 2026
dc369a0
fix: resolve merge conflicts with main
carlos-alm Jul 5, 2026
7e71731
fix: import performance explicitly in bench-timing.ts (#1793)
carlos-alm Jul 5, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions scripts/benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import { fileURLToPath } from 'node:url';
import Database from 'better-sqlite3';
import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js';
import { median, round1, timeMedian } from './lib/bench-timing.js';

// ── Parent process: fork one child per engine, assemble final output ─────
if (!isWorker()) {
Expand Down Expand Up @@ -97,16 +98,6 @@ const QUERY_WARMUP_RUNS = 3;
const PROBE_FILE = path.join(root, 'src', 'domain', 'queries.ts');
const BENCH_EXCLUDE = [...resolveBenchmarkExcludes()];

function median(arr) {
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}

function round1(n) {
return Math.round(n * 10) / 10;
}

function selectTargets() {
const db = new Database(dbPath, { readonly: true });
const rows = db
Expand Down Expand Up @@ -158,13 +149,12 @@ try {
for (let i = 0; i < WARMUP_RUNS; i++) {
await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE });
}
const noopTimings = [];
for (let i = 0; i < INCREMENTAL_RUNS; i++) {
const start = performance.now();
await buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE });
noopTimings.push(performance.now() - start);
}
noopRebuildMs = Math.round(median(noopTimings));
noopRebuildMs = Math.round(
await timeMedian(
() => buildGraph(root, { engine, incremental: true, exclude: BENCH_EXCLUDE }),
INCREMENTAL_RUNS,
),
);
} catch (err) {
console.error(` [${engine}] No-op rebuild failed: ${(err as Error).message}`);
}
Expand Down
37 changes: 10 additions & 27 deletions scripts/incremental-benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import { performance } from 'node:perf_hooks';
import { fileURLToPath } from 'node:url';
import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
import { isWorker, workerEngine, forkEngines } from './lib/fork-engine.js';
import { median, round1, timeMedian } from './lib/bench-timing.js';

// ── Parent process: fork one child per engine, assemble final output ─────
if (!isWorker()) {
Expand Down Expand Up @@ -51,12 +52,6 @@ if (!isWorker()) {
// jitter and produces CI-amplified false regressions.
const RUNS = 5;
const WARMUP_RUNS = 2;
function median(arr) {
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}
function round1(n) { return Math.round(n * 10) / 10; }

function collectImportPairs() {
const srcRoot = path.join(rootParent, 'src');
Expand Down Expand Up @@ -190,37 +185,25 @@ const WARMUP_RUNS = 2;
// the same corpus.
const BUILD_OPTS = { engine, exclude: [...resolveBenchmarkExcludes()] };

function median(arr) {
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}

console.error(`Benchmarking ${engine} engine...`);

// Full build (delete DB first)
const fullTimings = [];
for (let i = 0; i < RUNS; i++) {
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
const start = performance.now();
await buildGraph(root, { ...BUILD_OPTS, incremental: false });
fullTimings.push(performance.now() - start);
}
const fullBuildMs = Math.round(median(fullTimings));
const fullBuildMs = Math.round(
await timeMedian(async () => {
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
await buildGraph(root, { ...BUILD_OPTS, incremental: false });
}, RUNS),
);

// No-op rebuild (nothing changed)
let noopRebuildMs = null;
try {
for (let i = 0; i < WARMUP_RUNS; i++) {
await buildGraph(root, { ...BUILD_OPTS, incremental: true });
}
const noopTimings = [];
for (let i = 0; i < RUNS; i++) {
const start = performance.now();
await buildGraph(root, { ...BUILD_OPTS, incremental: true });
noopTimings.push(performance.now() - start);
}
noopRebuildMs = Math.round(median(noopTimings));
noopRebuildMs = Math.round(
await timeMedian(() => buildGraph(root, { ...BUILD_OPTS, incremental: true }), RUNS),
);
} catch (err) {
console.error(` [${engine}] No-op rebuild failed: ${(err as Error).message}`);
}
Expand Down
53 changes: 53 additions & 0 deletions scripts/lib/bench-timing.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**
* Shared timing helpers for benchmark scripts.
*
* `median`/`round1` were independently duplicated (byte-for-byte, in most
* cases) across token-benchmark.ts, query-benchmark.ts,
* incremental-benchmark.ts (twice — once per process: parent and worker),
* and benchmark.ts. `timeMedian` wraps the "run N times, time each run,
* return the median" loop that recurred at every call site measuring a
* single scalar latency.
*
* Usage (in a benchmark script):
*
* import { median, round1, timeMedian } from './lib/bench-timing.js';
*
* const fullBuildMs = Math.round(
* await timeMedian(() => buildGraph(root, { engine, incremental: false }), RUNS),
* );
*/
import { performance } from 'node:perf_hooks';

/**
* Returns the median of `arr`. `arr` is not mutated (sorted on a copy).
* Returns 0 for an empty array.
*/
export function median(arr: number[]): number {
if (arr.length === 0) return 0;
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}

/**
* Rounds `n` to 1 decimal place.
*/
export function round1(n: number): number {
return Math.round(n * 10) / 10;
}

/**
* Runs `fn` `runs` times, recording the elapsed milliseconds per run, and
* returns the median duration. Awaits `fn()` each iteration, so both sync
* and async `fn` work — pass an async closure when `fn` itself needs to
* `await` (e.g. wrapping `buildGraph`).
*/
export async function timeMedian(fn: () => unknown, runs: number): Promise<number> {
const timings: number[] = [];
for (let i = 0; i < runs; i++) {
const start = performance.now();
await fn();
timings.push(performance.now() - start);
}
return median(timings);
}
11 changes: 1 addition & 10 deletions scripts/query-benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import { fileURLToPath } from 'node:url';
import Database from 'better-sqlite3';
import { resolveBenchmarkExcludes, resolveBenchmarkSource, srcImport } from './lib/bench-config.js';
import { isWorker, workerEngine, workerTargets, forkEngines } from './lib/fork-engine.js';
import { median, round1 } from './lib/bench-timing.js';

// ── Parent process: fork one child per engine, assemble final output ─────
if (!isWorker()) {
Expand Down Expand Up @@ -117,16 +118,6 @@ const RUNS = 5;
// before timing so the metric reflects warm-call latency, not cold-start.
const WARMUP_RUNS = 3;

function median(arr) {
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}

function round1(n) {
return Math.round(n * 10) / 10;
}

// Pinned hub targets — stable function names that exist across versions.
// Auto-selecting the most-connected node makes version-to-version comparison
// meaningless when barrel/type files get added or removed.
Expand Down
34 changes: 5 additions & 29 deletions scripts/token-benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import { parseArgs } from 'node:util';

import { ISSUES, extractAgentOutput, validateResult } from './token-benchmark-issues.js';
import { getBenchmarkVersion } from './bench-version.js';
import { median, round1, timeMedian } from './lib/bench-timing.js';

const __dirname = path.dirname(fileURLToPath(import.meta.url));
const root = path.resolve(__dirname, '..');
Expand Down Expand Up @@ -76,13 +77,6 @@ const selectedIssues = selectedIssueIds.map((id) => {

// ── Helpers ───────────────────────────────────────────────────────────────

function median(arr) {
if (arr.length === 0) return 0;
const sorted = [...arr].sort((a, b) => a - b);
const mid = Math.floor(sorted.length / 2);
return sorted.length % 2 ? sorted[mid] : (sorted[mid - 1] + sorted[mid]) / 2;
}

function round2(n) {
return Math.round(n * 100) / 100;
}
Expand Down Expand Up @@ -261,24 +255,6 @@ async function runSession(mode, issue, nextjsDir) {

const PERF_RUNS = 3;

function round1(n) {
return Math.round(n * 10) / 10;
}

/**
* Run `fn` `runs` times (default `PERF_RUNS`), recording the elapsed
* milliseconds per run, and return the median duration.
*/
async function timeMedian(fn, runs = PERF_RUNS) {
const timings = [];
for (let i = 0; i < runs; i++) {
const start = performance.now();
await fn();
timings.push(performance.now() - start);
}
return median(timings);
}

/**
* Run build/query/stats benchmarks against the Next.js graph.
* Reuses the same codegraph APIs as the existing benchmark scripts.
Expand Down Expand Up @@ -324,13 +300,13 @@ async function runPerfBenchmarks(nextjsDir) {
await timeMedian(async () => {
if (fs.existsSync(dbPath)) fs.unlinkSync(dbPath);
await buildGraph(nextjsDir, { engine, incremental: false });
}),
}, PERF_RUNS),
);

// No-op rebuild
console.error(` No-op rebuild (${engine})...`);
const noopRebuildMs = Math.round(
await timeMedian(() => buildGraph(nextjsDir, { engine, incremental: true })),
await timeMedian(() => buildGraph(nextjsDir, { engine, incremental: true }), PERF_RUNS),
);

buildResults[engine] = { fullBuildMs, noopRebuildMs };
Expand Down Expand Up @@ -379,12 +355,12 @@ async function runPerfBenchmarks(nextjsDir) {
for (const depth of [1, 3, 5]) {
// fnDeps
queryResults[`fnDeps_depth${depth}Ms`] = round1(
await timeMedian(() => fnDepsData(hubName, dbPath, { depth, noTests: true })),
await timeMedian(() => fnDepsData(hubName, dbPath, { depth, noTests: true }), PERF_RUNS),
);

// fnImpact
queryResults[`fnImpact_depth${depth}Ms`] = round1(
await timeMedian(() => fnImpactData(hubName, dbPath, { depth, noTests: true })),
await timeMedian(() => fnImpactData(hubName, dbPath, { depth, noTests: true }), PERF_RUNS),
);
}

Expand Down
43 changes: 30 additions & 13 deletions src/db/connection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { execFileSync } from 'node:child_process';
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
import { loadConfig } from '../infrastructure/config.js';
import { DEFAULTS, loadConfig } from '../infrastructure/config.js';
import { debug, warn } from '../infrastructure/logger.js';
import { getNative, isNativeAvailable } from '../infrastructure/native.js';
import { DbError, toErrorMessage } from '../shared/errors.js';
Expand Down Expand Up @@ -158,7 +158,10 @@ function isSameDirectory(a: string, b: string): boolean {
}
}

export function openDb(dbPath: string): LockedDatabase {
export function openDb(
dbPath: string,
busyTimeoutMs: number = DEFAULTS.db.busyTimeoutMs,
): LockedDatabase {
// Flush any deferred DB close from a previous build (avoids WAL contention)
flushDeferredClose();
const dir = path.dirname(dbPath);
Expand All @@ -167,7 +170,7 @@ export function openDb(dbPath: string): LockedDatabase {
const Database = getDatabase();
const db = new Database(dbPath) as unknown as LockedDatabase;
db.pragma('journal_mode = WAL');
db.pragma('busy_timeout = 5000');
db.pragma(`busy_timeout = ${busyTimeoutMs}`);
db.__lockPath = `${dbPath}.lock`;
return db;
}
Expand Down Expand Up @@ -327,7 +330,10 @@ export function findDbPath(customPath?: string): string {
}

/** Open a database in readonly mode, with a user-friendly error if the DB doesn't exist. */
export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database {
export function openReadonlyOrFail(
customPath?: string,
busyTimeoutMs: number = DEFAULTS.db.busyTimeoutMs,
): BetterSqlite3Database {
const dbPath = findDbPath(customPath);
if (!fs.existsSync(dbPath)) {
throw new DbError(
Expand All @@ -337,7 +343,7 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database {
}
const Database = getDatabase();
const db = new Database(dbPath, { readonly: true }) as unknown as BetterSqlite3Database;
db.pragma('busy_timeout = 5000');
db.pragma(`busy_timeout = ${busyTimeoutMs}`);

warnOnVersionMismatch(() => {
const row = db
Expand All @@ -349,27 +355,38 @@ export function openReadonlyOrFail(customPath?: string): BetterSqlite3Database {
return db;
}

/** Effective engine plus config-derived DB settings shared by openRepo() and openReadonlyWithNative(). */
interface ResolvedDbSettings {
engine: 'native' | 'wasm' | 'auto';
busyTimeoutMs: number;
}

/**
* Resolve the effective engine for DB access: explicit opts.engine > config.build.engine > 'auto'.
* Resolve the effective engine for DB access (explicit opts.engine > config.build.engine >
* 'auto') alongside config.db.busyTimeoutMs, in a single loadConfig() call.
* Derives rootDir from the resolved DB path so loadConfig reads the right project config.
* Shared by openRepo() and openReadonlyWithNative() so the two call sites can't drift.
*
* MUST be called before opening any DB handle: loadConfig can throw (e.g. ConfigError
* via resolveSecrets on a malformed llm.apiKeyCommand config), and an already-open
* handle at that point would never be closed.
*/
function resolveDbEngine(
function resolveDbSettings(
customDbPath: string | undefined,
engineOpt: 'native' | 'wasm' | 'auto' | undefined,
): 'native' | 'wasm' | 'auto' {
): ResolvedDbSettings {
// Using findDbPath (not path.resolve(customDbPath)) ensures directory inputs like
// --db /path/to/repo are normalised to .codegraph/graph.db before we strip two levels.
// Convention: resolvedDbPath = <rootDir>/.codegraph/graph.db
const resolvedDbPath = customDbPath ? findDbPath(customDbPath) : undefined;
const rootDir = resolvedDbPath ? path.dirname(path.dirname(resolvedDbPath)) : undefined;
const config = loadConfig(rootDir);
// config.build.engine is already populated from CODEGRAPH_ENGINE env by applyEnvOverrides,
// so this covers both the env-var path and the .codegraphrc.json config-file path.
return engineOpt ?? loadConfig(rootDir).build.engine ?? 'auto';
return {
engine: engineOpt ?? config.build.engine ?? 'auto',
busyTimeoutMs: config.db.busyTimeoutMs ?? DEFAULTS.db.busyTimeoutMs,
};
}

/** Open a NativeRepository via rusqlite, throwing DbError if the DB file is missing. */
Expand Down Expand Up @@ -422,7 +439,7 @@ export function openRepo(

// Respect explicit engine selection: opts.engine > config.build.engine > auto.
// This ensures --engine wasm and benchmark workers bypass the native path.
const engine = resolveDbEngine(customDbPath, opts.engine);
const { engine, busyTimeoutMs } = resolveDbSettings(customDbPath, opts.engine);

// Try native rusqlite path first (Phase 6.14)
if (engine !== 'wasm' && isNativeAvailable()) {
Expand All @@ -442,7 +459,7 @@ export function openRepo(
}
}

const db = openReadonlyOrFail(customDbPath);
const db = openReadonlyOrFail(customDbPath, busyTimeoutMs);
return {
repo: new SqliteRepository(db),
close() {
Expand Down Expand Up @@ -476,9 +493,9 @@ export function openReadonlyWithNative(
// handle has been opened yet, so nothing is left leaked. (Previously this ran
// AFTER openReadonlyOrFail(), so a config error here leaked the already-open
// better-sqlite3 handle — see the phase-15 gauntlet finding.)
const engine = resolveDbEngine(customPath, opts.engine);
const { engine, busyTimeoutMs } = resolveDbSettings(customPath, opts.engine);

const db = openReadonlyOrFail(customPath);
const db = openReadonlyOrFail(customPath, busyTimeoutMs);

let nativeDb: NativeDatabase | undefined;
if (engine !== 'wasm' && isNativeAvailable()) {
Expand Down
Loading
Loading