From 2b03ea09651738da40fa61935a77574269a1a058 Mon Sep 17 00:00:00 2001 From: archief2910 Date: Sun, 19 Apr 2026 01:46:27 +0530 Subject: [PATCH 1/4] feat(db): add hot-path indexes and query benchmark (#68) --- CONFIGURATION.md | 24 ++ README.md | 12 + .../20260420_120000_add_hot_path_indexes.js | 63 ++++ package.json | 1 + src/scripts/benchmark-queries.ts | 325 ++++++++++++++++++ 5 files changed, 425 insertions(+) create mode 100644 migrations/20260420_120000_add_hot_path_indexes.js create mode 100644 src/scripts/benchmark-queries.ts diff --git a/CONFIGURATION.md b/CONFIGURATION.md index 2826569d..a5bce081 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -78,6 +78,30 @@ The i2pd web console (tunnel status, `.b32.i2p` destinations) is published to th If you've set READ_REPLICAS to 4, you should configure RR0_ through RR3_. +## Database indexes and benchmarking + +The schema ships with a small, query-driven set of indexes. The most important ones for relay hot paths are: + +| Index | Covers | +|----------------------------------------------|----------------------------------------------------------------------------------------------------------| +| `events_active_pubkey_kind_created_at_idx` | `REQ` with `authors`+`kinds` ordered by `created_at DESC`; `hasActiveRequestToVanish`; by-pubkey deletes. Partial on `deleted_at IS NULL`. | +| `events_deleted_at_partial_idx` | Retention purge over soft-deleted rows. Partial on `deleted_at IS NOT NULL`. | +| `invoices_pending_created_at_idx` | `findPendingInvoices` poll. Partial on `status = 'pending'`. | +| `event_tags (tag_name, tag_value)` | NIP-01 generic tag filters (`#e`, `#p`, …) via the normalized `event_tags` table. | +| `events_event_created_at_index` | Time-range scans (`since` / `until`). | +| `events_event_kind_index` | Kind-only filters and purge kind-whitelist logic. | + +Run the read-only benchmark against your own database to confirm the planner is using the expected indexes and to record baseline latencies: + +```sh +NODE_OPTIONS="-r dotenv/config" npm run db:benchmark +NODE_OPTIONS="-r dotenv/config" npm run db:benchmark -- --runs 5 --kind 1 --limit 500 +``` + +The benchmark issues only `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements — it never writes. Flags: `--runs ` (default 3), `--kind ` (default 1 / `TEXT_NOTE`), `--limit ` (default 500), `--horizon-days ` (default 7), `--help`. + +The hot-path index migration (`20260420_120000_add_hot_path_indexes.js`) uses `CREATE INDEX CONCURRENTLY`, so it can be applied to a running relay without taking `ACCESS EXCLUSIVE` locks on the `events` or `invoices` tables. + # Settings Running `nostream` for the first time creates the settings file in `/.nostr/settings.yaml`. If the file is not created and an error is thrown ensure that the `/.nostr` folder exists. The configuration directory can be changed by setting the `NOSTR_CONFIG_DIR` environment variable. `nostream` will pick up any changes to this settings file without needing to restart. diff --git a/README.md b/README.md index 5ddb1b6d..18004547 100644 --- a/README.md +++ b/README.md @@ -647,6 +647,18 @@ npm run export -- backup-2024-01-01.jsonl # custom filename ``` The script reads the same `DB_*` environment variables used by the relay (see [CONFIGURATION.md](CONFIGURATION.md)). + +## Benchmark Database Queries + +Run the read-only query benchmark to record the planner's choices and timings for the relay's hot-path queries (REQ subscriptions, vanish checks, purge scans, pending-invoice polls): + +``` +npm run db:benchmark +npm run db:benchmark -- --runs 5 --kind 1 --limit 500 +``` + +The benchmark only issues `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements against your configured database — it never writes. Use it to confirm the `events_active_pubkey_kind_created_at_idx`, `events_deleted_at_partial_idx`, and `invoices_pending_created_at_idx` indexes are being picked up. See the *Database indexes and benchmarking* section of [CONFIGURATION.md](CONFIGURATION.md). + ## Relay Maintenance Use `clean-db` to wipe or prune `events` table data. This also removes diff --git a/migrations/20260420_120000_add_hot_path_indexes.js b/migrations/20260420_120000_add_hot_path_indexes.js new file mode 100644 index 00000000..bcc8c4e4 --- /dev/null +++ b/migrations/20260420_120000_add_hot_path_indexes.js @@ -0,0 +1,63 @@ +/** + * Add narrow, query-driven indexes to cover the hottest read paths. + * + * Each index is created with CREATE INDEX CONCURRENTLY so the migration can be + * applied to a running relay without taking an ACCESS EXCLUSIVE lock on the + * events table. CONCURRENTLY is not allowed inside a transaction, so this + * migration opts out of Knex's default transactional wrapper via + * `exports.config.transaction = false`. + * + * Rationale for each index is documented inline. See also: + * https://devcenter.heroku.com/articles/postgresql-indexes + */ + +exports.config = { transaction: false } + +exports.up = async function (knex) { + // Covers the hottest write-adjacent reads: + // + // 1. `EventRepository.hasActiveRequestToVanish(pubkey)` + // WHERE event_pubkey = ? AND event_kind = 62 AND deleted_at IS NULL + // -- invoked on every inbound event via UserRepository.isVanished + // + // 2. `EventRepository.deleteByPubkeyExceptKinds(pubkey, kinds)` + // WHERE event_pubkey = ? AND event_kind NOT IN (...) AND deleted_at IS NULL + // + // 3. NIP-01 REQ with `authors` + `kinds` filters ordered by created_at: + // WHERE event_pubkey IN (...) AND event_kind IN (...) + // ORDER BY event_created_at DESC LIMIT N + // + // Partial on `deleted_at IS NULL` so soft-deleted rows never bloat the index. + // DESC on event_created_at lets the planner satisfy LIMIT N without a sort. + await knex.raw(` + CREATE INDEX CONCURRENTLY IF NOT EXISTS events_active_pubkey_kind_created_at_idx + ON events (event_pubkey, event_kind, event_created_at DESC) + WHERE deleted_at IS NULL + `) + + // Supports the retention/purge scan in `deleteExpiredAndRetained`: + // WHERE deleted_at IS NOT NULL + // Partial index is tiny because well-maintained relays hard-delete these rows + // periodically and most events have deleted_at IS NULL. + await knex.raw(` + CREATE INDEX CONCURRENTLY IF NOT EXISTS events_deleted_at_partial_idx + ON events (deleted_at) + WHERE deleted_at IS NOT NULL + `) + + // Supports `InvoiceRepository.findPendingInvoices` which is polled by the + // maintenance worker: + // WHERE status = 'pending' ORDER BY created_at + // Partial on status='pending' so the index only contains the rows we scan. + await knex.raw(` + CREATE INDEX CONCURRENTLY IF NOT EXISTS invoices_pending_created_at_idx + ON invoices (created_at) + WHERE status = 'pending' + `) +} + +exports.down = async function (knex) { + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS invoices_pending_created_at_idx') + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS events_deleted_at_partial_idx') + await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS events_active_pubkey_kind_created_at_idx') +} diff --git a/package.json b/package.json index ac954483..d2edd5bf 100644 --- a/package.json +++ b/package.json @@ -42,6 +42,7 @@ "db:migrate": "knex migrate:latest", "db:migrate:rollback": "knex migrate:rollback", "db:seed": "knex seed:run", + "db:benchmark": "node -r ts-node/register src/scripts/benchmark-queries.ts", "pretest:unit": "node -e \"require('fs').mkdirSync('.test-reports/unit', {recursive: true})\"", "test:unit": "mocha 'test/**/*.spec.ts'", "test:unit:watch": "npm run test:unit -- --min --watch --watch-files src/**/*,test/**/*", diff --git a/src/scripts/benchmark-queries.ts b/src/scripts/benchmark-queries.ts new file mode 100644 index 00000000..e715b3ba --- /dev/null +++ b/src/scripts/benchmark-queries.ts @@ -0,0 +1,325 @@ +/** + * Read-only benchmark for the hot query paths on `events` / `invoices`. + * + * Runs `EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT JSON)` against canonical + * query shapes used by the relay (REQ subscriptions, vanish checks, purge + * scans, pending invoice polls) and reports the planner's choice and the + * measured execution time so operators can validate index effectiveness + * before and after applying the hot-path-indexes migration. + * + * Usage: + * npm run db:benchmark + * npm run db:benchmark -- --runs 5 --kind 1 --limit 500 + * + * The script is read-only: it only issues EXPLAIN and SELECT statements. + */ + +import { Knex } from 'knex' + +import { EventKinds } from '../constants/base' +import { getMasterDbClient } from '../database/client' + +type ExplainPlanNode = { + 'Node Type'?: string + 'Index Name'?: string + 'Relation Name'?: string + 'Actual Total Time'?: number + 'Actual Rows'?: number + 'Shared Hit Blocks'?: number + 'Shared Read Blocks'?: number + 'Plan Rows'?: number + Plans?: ExplainPlanNode[] +} + +type ExplainResult = { + Plan: ExplainPlanNode + 'Execution Time': number + 'Planning Time': number +} + +type BenchmarkCase = { + name: string + description: string + skipIf?: (ctx: BenchContext) => string | undefined + build: (ctx: BenchContext) => Knex.QueryBuilder | Knex.Raw +} + +type BenchContext = { + db: Knex + samplePubkey?: Buffer + eventCount: number + invoiceCount: number + kind: number + limit: number + horizonSeconds: number +} + +type CliOptions = { + runs: number + kind: number + limit: number + horizonDays: number +} + +function parseArgs(argv: string[]): CliOptions { + const opts: CliOptions = { + runs: 3, + kind: EventKinds.TEXT_NOTE, + limit: 500, + horizonDays: 7, + } + for (let i = 0; i < argv.length; i++) { + const arg = argv[i] + const next = argv[i + 1] + switch (arg) { + case '--runs': + opts.runs = Math.max(1, Number(next) || opts.runs) + i++ + break + case '--kind': + opts.kind = Number(next) || opts.kind + i++ + break + case '--limit': + opts.limit = Math.max(1, Number(next) || opts.limit) + i++ + break + case '--horizon-days': + opts.horizonDays = Math.max(1, Number(next) || opts.horizonDays) + i++ + break + case '--help': + case '-h': + printUsage() + process.exit(0) + } + } + return opts +} + +function printUsage(): void { + console.log( + [ + 'Usage: npm run db:benchmark -- [options]', + '', + 'Options:', + ' --runs Execute each query N times (default 3).', + ' --kind Event kind for kind-based queries (default 1).', + ' --limit LIMIT used in ordered queries (default 500).', + ' --horizon-days Lookback window for time-range queries (default 7).', + ' -h, --help Show this message.', + ].join('\n'), + ) +} + +function walkPlan(node: ExplainPlanNode, visit: (n: ExplainPlanNode) => void): void { + visit(node) + if (node.Plans) { + for (const child of node.Plans) { + walkPlan(child, visit) + } + } +} + +function summarizePlan(plan: ExplainPlanNode): { indexes: string[]; scans: string[] } { + const indexes = new Set() + const scans = new Set() + walkPlan(plan, (node) => { + if (node['Index Name']) { + indexes.add(node['Index Name']) + } + if (node['Node Type']) { + scans.add(node['Node Type']) + } + }) + return { + indexes: Array.from(indexes), + scans: Array.from(scans), + } +} + +async function explain(db: Knex, query: Knex.QueryBuilder | Knex.Raw): Promise { + const { sql, bindings } = query.toSQL().toNative + ? (query as any).toSQL().toNative() + : { sql: query.toString(), bindings: [] as unknown[] } + + const { rows } = await db.raw<{ rows: { 'QUERY PLAN': ExplainResult[] }[] }>( + `EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT JSON) ${sql}`, + bindings, + ) + return rows[0]['QUERY PLAN'][0] +} + +function formatDuration(ms: number): string { + if (ms < 1) { + return `${(ms * 1000).toFixed(0)} µs` + } + if (ms < 1000) { + return `${ms.toFixed(2)} ms` + } + return `${(ms / 1000).toFixed(2)} s` +} + +function buildCases(): BenchmarkCase[] { + return [ + { + name: 'REQ: authors + kinds ORDER BY created_at DESC', + description: + "NIP-01 REQ with a single pubkey filter + kind=TEXT_NOTE. Canonical per-author subscription; should hit events_active_pubkey_kind_created_at_idx.", + skipIf: (ctx) => (ctx.samplePubkey ? undefined : 'no events rows found'), + build: (ctx) => + ctx.db('events') + .select('event_id', 'event_pubkey', 'event_kind', 'event_created_at') + .where('event_pubkey', ctx.samplePubkey as Buffer) + .whereIn('event_kind', [ctx.kind]) + .whereNull('deleted_at') + .orderBy('event_created_at', 'desc') + .orderBy('event_id', 'asc') + .limit(ctx.limit), + }, + { + name: 'REQ: kind + created_at time range', + description: + 'REQ with no authors but a time window and a kind. Exercises the (kind, created_at) access paths.', + build: (ctx) => { + const now = Math.floor(Date.now() / 1000) + const since = now - ctx.horizonSeconds + return ctx.db('events') + .select('event_id') + .where('event_kind', ctx.kind) + .whereBetween('event_created_at', [since, now]) + .whereNull('deleted_at') + .orderBy('event_created_at', 'desc') + .limit(ctx.limit) + }, + }, + { + name: 'hasActiveRequestToVanish (pubkey + kind=62 + not deleted)', + description: + 'Exact query run on every inbound event via UserRepository.isVanished; latency here is a per-message tax.', + skipIf: (ctx) => (ctx.samplePubkey ? undefined : 'no events rows found'), + build: (ctx) => + ctx.db('events') + .select('event_id') + .where('event_pubkey', ctx.samplePubkey as Buffer) + .where('event_kind', EventKinds.REQUEST_TO_VANISH) + .whereNull('deleted_at') + .limit(1), + }, + { + name: 'Purge scan (soft-deleted rows)', + description: + 'MaintenanceWorker retention sweep; hits events_deleted_at_partial_idx when present.', + build: (ctx) => + ctx.db('events').select('event_id').whereNotNull('deleted_at').limit(ctx.limit), + }, + { + name: 'Purge scan (expired events)', + description: + 'Retention sweep by expires_at; already served by the existing expires_at btree.', + build: (ctx) => { + const now = Math.floor(Date.now() / 1000) + return ctx.db('events').select('event_id').where('expires_at', '<', now).limit(ctx.limit) + }, + }, + { + name: 'findPendingInvoices (status=pending ORDER BY created_at)', + description: + 'InvoiceRepository poll; hits invoices_pending_created_at_idx when present.', + skipIf: (ctx) => (ctx.invoiceCount > 0 ? undefined : 'invoices table is empty'), + build: (ctx) => + ctx.db('invoices').select('id').where('status', 'pending').orderBy('created_at', 'asc').limit(ctx.limit), + }, + ] +} + +async function gatherContext(db: Knex, options: CliOptions): Promise { + const [{ count: eventCountText = '0' } = { count: '0' }] = await db('events').count('* as count') + const [{ count: invoiceCountText = '0' } = { count: '0' }] = await db('invoices').count('* as count') + const sample = await db('events').select('event_pubkey').whereNull('deleted_at').limit(1).first() + + return { + db, + samplePubkey: sample?.event_pubkey, + eventCount: Number(eventCountText), + invoiceCount: Number(invoiceCountText), + kind: options.kind, + limit: options.limit, + horizonSeconds: options.horizonDays * 86400, + } +} + +function printHeader(ctx: BenchContext, options: CliOptions): void { + console.log('Nostream query benchmark') + console.log('------------------------') + console.log(`events rows: ${ctx.eventCount.toLocaleString()}`) + console.log(`invoices rows: ${ctx.invoiceCount.toLocaleString()}`) + console.log(`sample pubkey: ${ctx.samplePubkey ? ctx.samplePubkey.toString('hex').slice(0, 16) + '…' : ''}`) + console.log(`runs per query: ${options.runs}`) + console.log(`kind (REQ/time): ${options.kind}`) + console.log(`limit: ${options.limit}`) + console.log(`time horizon (days): ${options.horizonDays}`) + console.log('') +} + +async function runCase(db: Knex, runs: number, testCase: BenchmarkCase, ctx: BenchContext): Promise { + const skip = testCase.skipIf?.(ctx) + console.log(`• ${testCase.name}`) + if (skip) { + console.log(` skipped: ${skip}`) + console.log('') + return + } + console.log(` ${testCase.description}`) + + const timings: number[] = [] + let planningTime = 0 + let indexes: string[] = [] + let scans: string[] = [] + let rowsReturned = 0 + + for (let i = 0; i < runs; i++) { + const plan = await explain(db, testCase.build(ctx)) + timings.push(plan['Execution Time']) + planningTime = plan['Planning Time'] + const summary = summarizePlan(plan.Plan) + indexes = summary.indexes + scans = summary.scans + rowsReturned = plan.Plan['Actual Rows'] ?? 0 + } + + const min = Math.min(...timings) + const max = Math.max(...timings) + const avg = timings.reduce((a, b) => a + b, 0) / timings.length + + console.log( + [ + ` exec (min/avg/max): ${formatDuration(min)} / ${formatDuration(avg)} / ${formatDuration(max)}`, + ` planning: ${formatDuration(planningTime)}`, + ` rows returned: ${rowsReturned.toLocaleString()}`, + ` node types: ${scans.join(', ') || ''}`, + ` indexes used: ${indexes.length ? indexes.join(', ') : ''}`, + ].join('\n'), + ) + console.log('') +} + +async function main(): Promise { + const options = parseArgs(process.argv.slice(2)) + const db = getMasterDbClient() + + try { + const ctx = await gatherContext(db, options) + printHeader(ctx, options) + for (const testCase of buildCases()) { + await runCase(db, options.runs, testCase, ctx) + } + } finally { + await db.destroy() + } +} + +main().catch((error) => { + console.error('Benchmark failed:', error) + process.exitCode = 1 +}) From 11f2180c6dfae934e6d3ddf55d71a6c6cc65a274 Mon Sep 17 00:00:00 2001 From: archief2910 Date: Sun, 19 Apr 2026 20:57:23 +0530 Subject: [PATCH 2/4] docs: added changeset (#68) --- .changeset/hot-path-indexes-benchmark.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/hot-path-indexes-benchmark.md diff --git a/.changeset/hot-path-indexes-benchmark.md b/.changeset/hot-path-indexes-benchmark.md new file mode 100644 index 00000000..de1024e9 --- /dev/null +++ b/.changeset/hot-path-indexes-benchmark.md @@ -0,0 +1,5 @@ +--- +"nostream": minor +--- + +Add hot-path PostgreSQL indexes for subscription, vanish, retention, and invoice queries; add `db:benchmark` and `db:verify-index-impact` tooling; document index rationale and benchmarking. Closes #68. From 9ca336776f5f17ec66bc301d2e08ca608c151059 Mon Sep 17 00:00:00 2001 From: archief2910 Date: Sun, 19 Apr 2026 22:10:26 +0530 Subject: [PATCH 3/4] chore: addressed review comments (#68) --- CONFIGURATION.md | 16 +- README.md | 10 +- .../20260420_120000_add_hot_path_indexes.js | 49 ++- package.json | 3 +- scripts/verify-index-impact.ts | 337 ++++++++++++++++++ src/repositories/invoice-repository.ts | 4 + src/scripts/benchmark-queries.ts | 42 ++- .../hot-path-indexes-migration.spec.ts | 17 + 8 files changed, 441 insertions(+), 37 deletions(-) create mode 100644 scripts/verify-index-impact.ts create mode 100644 test/unit/migrations/hot-path-indexes-migration.spec.ts diff --git a/CONFIGURATION.md b/CONFIGURATION.md index ab9d168e..927da453 100644 --- a/CONFIGURATION.md +++ b/CONFIGURATION.md @@ -84,9 +84,9 @@ The schema ships with a small, query-driven set of indexes. The most important o | Index | Covers | |----------------------------------------------|----------------------------------------------------------------------------------------------------------| -| `events_active_pubkey_kind_created_at_idx` | `REQ` with `authors`+`kinds` ordered by `created_at DESC`; `hasActiveRequestToVanish`; by-pubkey deletes. Partial on `deleted_at IS NULL`. | +| `events_active_pubkey_kind_created_at_idx` | `REQ` with `authors`+`kinds` ordered by `created_at DESC, event_id ASC`; `hasActiveRequestToVanish`; by-pubkey deletes. Composite key `(event_pubkey, event_kind, event_created_at DESC, event_id)` so the ORDER BY tie-breaker is satisfied from the index without a sort step. | | `events_deleted_at_partial_idx` | Retention purge over soft-deleted rows. Partial on `deleted_at IS NOT NULL`. | -| `invoices_pending_created_at_idx` | `findPendingInvoices` poll. Partial on `status = 'pending'`. | +| `invoices_pending_created_at_idx` | `findPendingInvoices` poll (`ORDER BY created_at ASC`). Partial on `status = 'pending'`. | | `event_tags (tag_name, tag_value)` | NIP-01 generic tag filters (`#e`, `#p`, …) via the normalized `event_tags` table. | | `events_event_created_at_index` | Time-range scans (`since` / `until`). | | `events_event_kind_index` | Kind-only filters and purge kind-whitelist logic. | @@ -94,11 +94,17 @@ The schema ships with a small, query-driven set of indexes. The most important o Run the read-only benchmark against your own database to confirm the planner is using the expected indexes and to record baseline latencies: ```sh -NODE_OPTIONS="-r dotenv/config" npm run db:benchmark -NODE_OPTIONS="-r dotenv/config" npm run db:benchmark -- --runs 5 --kind 1 --limit 500 +npm run db:benchmark +npm run db:benchmark -- --runs 5 --kind 1 --limit 500 ``` -The benchmark issues only `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements — it never writes. Flags: `--runs ` (default 3), `--kind ` (default 1 / `TEXT_NOTE`), `--limit ` (default 500), `--horizon-days ` (default 7), `--help`. +The `db:benchmark` script loads the local `.env` file automatically (via `node --env-file-if-exists=.env`), using the same `DB_HOST`/`DB_PORT`/`DB_USER`/`DB_PASSWORD`/`DB_NAME` variables as the relay. The benchmark issues only `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements — it never writes. Flags: `--runs ` (default 3), `--kind ` (default 1 / `TEXT_NOTE`; pass `0` for SET_METADATA), `--limit ` (default 500), `--horizon-days ` (default 7), `--help`. + +For a full before/after proof of the index impact (seeds a throwaway dataset, drops and recreates the indexes, and prints a BEFORE/AFTER table), use: + +```sh +npm run db:verify-index-impact +``` The hot-path index migration (`20260420_120000_add_hot_path_indexes.js`) uses `CREATE INDEX CONCURRENTLY`, so it can be applied to a running relay without taking `ACCESS EXCLUSIVE` locks on the `events` or `invoices` tables. diff --git a/README.md b/README.md index 7b30c576..2b53b333 100644 --- a/README.md +++ b/README.md @@ -660,7 +660,15 @@ npm run db:benchmark npm run db:benchmark -- --runs 5 --kind 1 --limit 500 ``` -The benchmark only issues `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements against your configured database — it never writes. Use it to confirm the `events_active_pubkey_kind_created_at_idx`, `events_deleted_at_partial_idx`, and `invoices_pending_created_at_idx` indexes are being picked up. See the *Database indexes and benchmarking* section of [CONFIGURATION.md](CONFIGURATION.md). +The benchmark only issues `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements against your configured database — it never writes. It loads `DB_*` variables from `.env` automatically (via `node --env-file-if-exists=.env`), so no extra setup is required beyond the one you already need to run the relay. Use it to confirm the `events_active_pubkey_kind_created_at_idx`, `events_deleted_at_partial_idx`, and `invoices_pending_created_at_idx` indexes are being picked up. + +For a reproducible before/after proof on a throwaway dataset, run: + +``` +npm run db:verify-index-impact +``` + +It seeds ~200k synthetic events, drops the hot-path indexes, runs EXPLAIN (ANALYZE, BUFFERS) for each hot query, recreates the indexes, and prints a BEFORE/AFTER table. See the *Database indexes and benchmarking* section of [CONFIGURATION.md](CONFIGURATION.md). ## Relay Maintenance diff --git a/migrations/20260420_120000_add_hot_path_indexes.js b/migrations/20260420_120000_add_hot_path_indexes.js index bcc8c4e4..3f9ef575 100644 --- a/migrations/20260420_120000_add_hot_path_indexes.js +++ b/migrations/20260420_120000_add_hot_path_indexes.js @@ -14,41 +14,54 @@ exports.config = { transaction: false } exports.up = async function (knex) { - // Covers the hottest write-adjacent reads: + // Covers the hottest subscription / per-message reads: // - // 1. `EventRepository.hasActiveRequestToVanish(pubkey)` + // 1. NIP-01 REQ with `authors` + `kinds` ordered by created_at DESC + // (see EventRepository.findByFilters): + // WHERE event_pubkey = ? AND event_kind IN (...) + // ORDER BY event_created_at DESC, event_id ASC LIMIT N + // + // 2. `EventRepository.hasActiveRequestToVanish(pubkey)` — invoked on every + // inbound event via UserRepository.isVanished: // WHERE event_pubkey = ? AND event_kind = 62 AND deleted_at IS NULL - // -- invoked on every inbound event via UserRepository.isVanished // - // 2. `EventRepository.deleteByPubkeyExceptKinds(pubkey, kinds)` + // 3. `EventRepository.deleteByPubkeyExceptKinds(pubkey, kinds)`: // WHERE event_pubkey = ? AND event_kind NOT IN (...) AND deleted_at IS NULL // - // 3. NIP-01 REQ with `authors` + `kinds` filters ordered by created_at: - // WHERE event_pubkey IN (...) AND event_kind IN (...) - // ORDER BY event_created_at DESC LIMIT N + // The index is intentionally NOT partial on `deleted_at IS NULL`: the REQ + // subscription path in findByFilters does not currently add that predicate, + // so a partial index would be ineligible for the most important query shape. + // Soft-deleted rows are a small fraction of total rows in practice (they get + // hard-deleted by the retention sweep), so the bloat is negligible compared + // to the benefit of the index being usable by the hot path. // - // Partial on `deleted_at IS NULL` so soft-deleted rows never bloat the index. - // DESC on event_created_at lets the planner satisfy LIMIT N without a sort. + // Including `event_id` as the final column makes the composite key match the + // full ORDER BY (created_at DESC, event_id ASC) used by findByFilters, so the + // planner can satisfy LIMIT N directly from the index without an extra sort + // step for the tie-breaker. await knex.raw(` CREATE INDEX CONCURRENTLY IF NOT EXISTS events_active_pubkey_kind_created_at_idx - ON events (event_pubkey, event_kind, event_created_at DESC) - WHERE deleted_at IS NULL + ON events (event_pubkey, event_kind, event_created_at DESC, event_id) `) - // Supports the retention/purge scan in `deleteExpiredAndRetained`: + // Supports the retention / purge scan in `deleteExpiredAndRetained` and the + // vanish hard-delete follow-up: // WHERE deleted_at IS NOT NULL - // Partial index is tiny because well-maintained relays hard-delete these rows - // periodically and most events have deleted_at IS NULL. + // Partial index is tiny because well-maintained relays hard-delete these + // rows periodically and the vast majority of events have deleted_at IS NULL. await knex.raw(` CREATE INDEX CONCURRENTLY IF NOT EXISTS events_deleted_at_partial_idx ON events (deleted_at) WHERE deleted_at IS NOT NULL `) - // Supports `InvoiceRepository.findPendingInvoices` which is polled by the - // maintenance worker: - // WHERE status = 'pending' ORDER BY created_at - // Partial on status='pending' so the index only contains the rows we scan. + // Supports `InvoiceRepository.findPendingInvoices`, which is polled by the + // maintenance worker to detect settled invoices: + // WHERE status = 'pending' ORDER BY created_at ASC OFFSET ? LIMIT ? + // Partial on status = 'pending' so the index only contains the rows the + // poller actually scans. Keyed on `created_at` so the planner can satisfy + // the ORDER BY straight from the index (FIFO polling, bounded tail latency + // even with large pending backlogs). await knex.raw(` CREATE INDEX CONCURRENTLY IF NOT EXISTS invoices_pending_created_at_idx ON invoices (created_at) diff --git a/package.json b/package.json index f9f304bb..06945be1 100644 --- a/package.json +++ b/package.json @@ -43,7 +43,8 @@ "db:migrate": "knex migrate:latest", "db:migrate:rollback": "knex migrate:rollback", "db:seed": "knex seed:run", - "db:benchmark": "node -r ts-node/register src/scripts/benchmark-queries.ts", + "db:benchmark": "node --env-file-if-exists=.env -r ts-node/register src/scripts/benchmark-queries.ts", + "db:verify-index-impact": "node --env-file-if-exists=.env -r ts-node/register scripts/verify-index-impact.ts", "pretest:unit": "node -e \"require('fs').mkdirSync('.test-reports/unit', {recursive: true})\"", "test:unit": "mocha 'test/**/*.spec.ts'", "test:unit:watch": "npm run test:unit -- --min --watch --watch-files src/**/*,test/**/*", diff --git a/scripts/verify-index-impact.ts b/scripts/verify-index-impact.ts new file mode 100644 index 00000000..29655159 --- /dev/null +++ b/scripts/verify-index-impact.ts @@ -0,0 +1,337 @@ +/** + * End-to-end proof harness for the hot-path-indexes migration. + * + * Seeds ~N realistic event rows into a Postgres instance, drops the three + * indexes added by migration 20260420_120000_add_hot_path_indexes.js, runs + * EXPLAIN (ANALYZE, BUFFERS) for the hot-path queries, recreates the indexes, + * runs the same EXPLAINs again, and prints a BEFORE/AFTER table. + * + * This script is intentionally self-contained so reviewers can reproduce the + * numbers without trusting the main benchmark script. It expects the standard + * DB_HOST/DB_PORT/DB_USER/DB_PASSWORD/DB_NAME env vars (same as the relay). + * + * Usage: + * node -r ts-node/register scripts/verify-index-impact.ts [--events N] [--pubkeys N] [--runs N] + * npm run db:verify-index-impact + */ + +import { randomBytes } from 'node:crypto' +import pg from 'pg' + +const { Client } = pg + +type ExplainPlanNode = { + 'Node Type'?: string + 'Index Name'?: string + Plans?: ExplainPlanNode[] +} + +type ExplainResult = { + Plan: ExplainPlanNode + 'Execution Time': number + 'Planning Time': number +} + +type BenchmarkCase = { + name: string + sql: string + params: unknown[] +} + +const args = process.argv.slice(2) +const getFlag = (name: string, def: number): number => { + const idx = args.indexOf(`--${name}`) + if (idx === -1) { + return def + } + const value = Number(args[idx + 1]) + return Number.isFinite(value) ? value : def +} + +const EVENTS = getFlag('events', 200_000) +const PUBKEYS = getFlag('pubkeys', 500) +const RUNS = getFlag('runs', 5) + +const client = new Client({ + host: process.env.DB_HOST ?? '127.0.0.1', + port: Number(process.env.DB_PORT ?? 5432), + user: process.env.DB_USER ?? 'nostr_ts_relay', + password: process.env.DB_PASSWORD ?? 'nostr_ts_relay', + database: process.env.DB_NAME ?? 'nostr_ts_relay', +}) + +const kinds = [0, 1, 1, 1, 1, 1, 3, 4, 7, 7, 1059, 62] + +function randPubkey(): Buffer { + return randomBytes(32) +} + +async function seed(): Promise { + const { rows } = await client.query<{ count: number }>('SELECT COUNT(*)::int AS count FROM events') + const count = rows[0]?.count ?? 0 + if (count >= EVENTS) { + console.log(`seed: skipping (events=${count} >= target=${EVENTS})`) + return + } + + console.log(`seed: inserting ${EVENTS - count} events across ${PUBKEYS} pubkeys…`) + + const pubkeys = Array.from({ length: PUBKEYS }, randPubkey) + const now = Math.floor(Date.now() / 1000) + const BATCH = 2000 + const toInsert = EVENTS - count + + await client.query('BEGIN') + await client.query('ALTER TABLE events DISABLE TRIGGER insert_event_tags') + try { + for (let i = 0; i < toInsert; i += BATCH) { + const values: string[] = [] + const params: unknown[] = [] + const size = Math.min(BATCH, toInsert - i) + for (let j = 0; j < size; j++) { + const idx = params.length + const pk = pubkeys[(i + j) % PUBKEYS] + const kind = kinds[(i + j) % kinds.length] + const created = now - Math.floor(Math.random() * 60 * 86400) + const deleted = Math.random() < 0.02 ? new Date(created * 1000) : null + params.push( + randomBytes(32), + pk, + created, + kind, + '[]', + '', + randomBytes(64), + null, + null, + deleted, + ) + values.push( + `($${idx + 1}, $${idx + 2}, $${idx + 3}, $${idx + 4}, $${idx + 5}::jsonb, $${idx + 6}, $${idx + 7}, $${idx + 8}, $${idx + 9}, $${idx + 10})`, + ) + } + await client.query( + `INSERT INTO events (event_id, event_pubkey, event_created_at, event_kind, event_tags, event_content, event_signature, event_deduplication, expires_at, deleted_at) + VALUES ${values.join(',')} ON CONFLICT DO NOTHING`, + params, + ) + if ((i / BATCH) % 10 === 0) { + process.stdout.write(` inserted ${i + size}/${toInsert}\r`) + } + } + } finally { + await client.query('ALTER TABLE events ENABLE TRIGGER insert_event_tags') + await client.query('COMMIT') + } + + console.log('\nseed: ANALYZE events') + await client.query('ANALYZE events') + + const { rows: invRows } = await client.query<{ count: number }>('SELECT COUNT(*)::int AS count FROM invoices') + const pend = invRows[0]?.count ?? 0 + if (pend === 0) { + console.log('seed: inserting 1000 pending invoices…') + const inv: string[] = [] + const invParams: unknown[] = [] + for (let i = 0; i < 1000; i++) { + const idx = invParams.length + invParams.push(randomBytes(32), `lnbc${i}`, 1000, 'sats', 'pending', 'bench') + inv.push(`(uuid_generate_v4(), $${idx + 1}, $${idx + 2}, $${idx + 3}, $${idx + 4}, $${idx + 5}, $${idx + 6})`) + } + await client.query( + `INSERT INTO invoices (id, pubkey, bolt11, amount_requested, unit, status, description) + VALUES ${inv.join(',')}`, + invParams, + ) + await client.query('ANALYZE invoices') + } +} + +async function dropHotPathIndexes(): Promise { + console.log('before: dropping hot-path indexes') + await client.query('DROP INDEX IF EXISTS events_active_pubkey_kind_created_at_idx') + await client.query('DROP INDEX IF EXISTS events_deleted_at_partial_idx') + await client.query('DROP INDEX IF EXISTS invoices_pending_created_at_idx') +} + +async function createHotPathIndexes(): Promise { + console.log('after: creating hot-path indexes') + // Shape must match migrations/20260420_120000_add_hot_path_indexes.js + // exactly, otherwise the before/after numbers are meaningless. + await client.query(`CREATE INDEX IF NOT EXISTS events_active_pubkey_kind_created_at_idx + ON events (event_pubkey, event_kind, event_created_at DESC, event_id)`) + await client.query(`CREATE INDEX IF NOT EXISTS events_deleted_at_partial_idx + ON events (deleted_at) WHERE deleted_at IS NOT NULL`) + await client.query(`CREATE INDEX IF NOT EXISTS invoices_pending_created_at_idx + ON invoices (created_at) WHERE status = 'pending'`) + await client.query('ANALYZE events') + await client.query('ANALYZE invoices') +} + +async function pickSamplePubkey(): Promise { + // Production REQ does not filter on deleted_at, so pick the densest pubkey + // regardless of soft-delete state — mirrors what EventRepository.findByFilters + // will actually scan. + const { rows } = await client.query<{ event_pubkey: Buffer }>( + 'SELECT event_pubkey FROM events GROUP BY event_pubkey ORDER BY COUNT(*) DESC LIMIT 1', + ) + return rows[0]?.event_pubkey +} + +function cases(samplePubkey: Buffer): BenchmarkCase[] { + const now = Math.floor(Date.now() / 1000) + const sevenDaysAgo = now - 7 * 86400 + return [ + { + // Shape matches EventRepository.findByFilters exactly. + name: 'REQ authors+kind ORDER BY created_at DESC LIMIT 500', + sql: `SELECT event_id FROM events + WHERE event_pubkey = $1 AND event_kind = ANY($2::int[]) + ORDER BY event_created_at DESC, event_id ASC LIMIT 500`, + params: [samplePubkey, [1]], + }, + { + // This is the only hot path that filters on deleted_at in production. + name: 'hasActiveRequestToVanish (pubkey + kind=62)', + sql: `SELECT event_id FROM events + WHERE event_pubkey = $1 AND event_kind = 62 AND deleted_at IS NULL LIMIT 1`, + params: [samplePubkey], + }, + { + name: 'Purge scan: soft-deleted rows', + sql: `SELECT event_id FROM events WHERE deleted_at IS NOT NULL LIMIT 500`, + params: [], + }, + { + // Shape matches InvoiceRepository.findPendingInvoices exactly. + name: 'findPendingInvoices ORDER BY created_at', + sql: `SELECT id FROM invoices WHERE status = 'pending' ORDER BY created_at ASC OFFSET 0 LIMIT 500`, + params: [], + }, + { + name: 'REQ kind + time range ORDER BY created_at DESC LIMIT 500', + sql: `SELECT event_id FROM events + WHERE event_kind = 1 AND event_created_at BETWEEN $1 AND $2 + ORDER BY event_created_at DESC, event_id ASC LIMIT 500`, + params: [sevenDaysAgo, now], + }, + ] +} + +async function explain(sql: string, params: unknown[]): Promise { + const { rows } = await client.query<{ 'QUERY PLAN': ExplainResult[] }>( + `EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON) ${sql}`, + params, + ) + const plan = rows[0]?.['QUERY PLAN']?.[0] + if (!plan) { + throw new Error('EXPLAIN returned no plan') + } + return plan +} + +function walk(node: ExplainPlanNode, visit: (n: ExplainPlanNode) => void): void { + visit(node) + if (node.Plans) { + for (const c of node.Plans) { + walk(c, visit) + } + } +} + +function summarize(plan: ExplainResult): { + indexes: string[] + nodeTypes: string[] + execMs: number + planMs: number +} { + const indexes = new Set() + const nodeTypes = new Set() + walk(plan.Plan, (n) => { + if (n['Index Name']) { + indexes.add(n['Index Name']) + } + if (n['Node Type']) { + nodeTypes.add(n['Node Type']) + } + }) + return { + indexes: [...indexes], + nodeTypes: [...nodeTypes], + execMs: plan['Execution Time'], + planMs: plan['Planning Time'], + } +} + +type MeasureResult = { + indexes: string[] + nodeTypes: string[] + min: number + median: number + max: number +} + +async function measure(testCase: BenchmarkCase): Promise { + const runs: ExplainResult[] = [] + for (let i = 0; i < RUNS; i++) { + runs.push(await explain(testCase.sql, testCase.params)) + } + const summaries = runs.map(summarize) + const exec = summaries.map((s) => s.execMs).sort((a, b) => a - b) + const median = exec[Math.floor(exec.length / 2)] + const min = exec[0] + const max = exec[exec.length - 1] + return { + indexes: summaries[0]?.indexes ?? [], + nodeTypes: summaries[0]?.nodeTypes ?? [], + min, + median, + max, + } +} + +async function main(): Promise { + await client.connect() + try { + await seed() + const samplePubkey = await pickSamplePubkey() + if (!samplePubkey) { + console.error('no pubkey found — seeding failed') + process.exit(1) + } + + await dropHotPathIndexes() + const before: Array<{ name: string } & MeasureResult> = [] + for (const tc of cases(samplePubkey)) { + before.push({ name: tc.name, ...(await measure(tc)) }) + } + + await createHotPathIndexes() + const after: Array<{ name: string } & MeasureResult> = [] + for (const tc of cases(samplePubkey)) { + after.push({ name: tc.name, ...(await measure(tc)) }) + } + + console.log('\n=== RESULTS (median of %d runs, milliseconds) ===\n', RUNS) + for (let i = 0; i < before.length; i++) { + const b = before[i] + const a = after[i] + const speedup = (b.median / a.median).toFixed(2) + console.log(`• ${b.name}`) + console.log( + ` BEFORE: ${b.median.toFixed(2)} ms | nodes=${b.nodeTypes.join(',')} | idx=[${b.indexes.join(', ') || 'none'}]`, + ) + console.log( + ` AFTER: ${a.median.toFixed(2)} ms | nodes=${a.nodeTypes.join(',')} | idx=[${a.indexes.join(', ') || 'none'}]`, + ) + console.log(` SPEEDUP: ${speedup}x\n`) + } + } finally { + await client.end() + } +} + +main().catch((err: unknown) => { + console.error(err) + process.exit(1) +}) diff --git a/src/repositories/invoice-repository.ts b/src/repositories/invoice-repository.ts index f002a150..9c984b2a 100644 --- a/src/repositories/invoice-repository.ts +++ b/src/repositories/invoice-repository.ts @@ -40,8 +40,12 @@ export class InvoiceRepository implements IInvoiceRepository { } public async findPendingInvoices(offset = 0, limit = 10, client: DatabaseClient = this.dbClient): Promise { + // Order by created_at ASC for deterministic FIFO polling: oldest pending + // invoices are picked up first, and the scan is index-only against + // invoices_pending_created_at_idx (partial on status = 'pending'). const dbInvoices = await client('invoices') .where('status', InvoiceStatus.PENDING) + .orderBy('created_at', 'asc') .offset(offset) .limit(limit) .select() diff --git a/src/scripts/benchmark-queries.ts b/src/scripts/benchmark-queries.ts index e715b3ba..cbe2d9af 100644 --- a/src/scripts/benchmark-queries.ts +++ b/src/scripts/benchmark-queries.ts @@ -17,6 +17,7 @@ import { Knex } from 'knex' import { EventKinds } from '../constants/base' +import { InvoiceStatus } from '../@types/invoice' import { getMasterDbClient } from '../database/client' type ExplainPlanNode = { @@ -61,6 +62,16 @@ type CliOptions = { horizonDays: number } +function parseIntArg(raw: string | undefined, fallback: number, { min = Number.NEGATIVE_INFINITY } = {}): number { + // Use Number.isFinite rather than falsy-coalescing so `0` is a valid input + // (e.g. `--kind 0` selects SET_METADATA, which is a valid Nostr kind). + const parsed = Number(raw) + if (!Number.isFinite(parsed)) { + return fallback + } + return Math.max(min, parsed) +} + function parseArgs(argv: string[]): CliOptions { const opts: CliOptions = { runs: 3, @@ -73,19 +84,19 @@ function parseArgs(argv: string[]): CliOptions { const next = argv[i + 1] switch (arg) { case '--runs': - opts.runs = Math.max(1, Number(next) || opts.runs) + opts.runs = parseIntArg(next, opts.runs, { min: 1 }) i++ break case '--kind': - opts.kind = Number(next) || opts.kind + opts.kind = parseIntArg(next, opts.kind, { min: 0 }) i++ break case '--limit': - opts.limit = Math.max(1, Number(next) || opts.limit) + opts.limit = parseIntArg(next, opts.limit, { min: 1 }) i++ break case '--horizon-days': - opts.horizonDays = Math.max(1, Number(next) || opts.horizonDays) + opts.horizonDays = parseIntArg(next, opts.horizonDays, { min: 1 }) i++ break case '--help': @@ -165,14 +176,13 @@ function buildCases(): BenchmarkCase[] { { name: 'REQ: authors + kinds ORDER BY created_at DESC', description: - "NIP-01 REQ with a single pubkey filter + kind=TEXT_NOTE. Canonical per-author subscription; should hit events_active_pubkey_kind_created_at_idx.", + 'NIP-01 REQ with a single pubkey filter + kind=TEXT_NOTE. Canonical per-author subscription; shape matches EventRepository.findByFilters and should hit events_active_pubkey_kind_created_at_idx.', skipIf: (ctx) => (ctx.samplePubkey ? undefined : 'no events rows found'), build: (ctx) => ctx.db('events') .select('event_id', 'event_pubkey', 'event_kind', 'event_created_at') .where('event_pubkey', ctx.samplePubkey as Buffer) .whereIn('event_kind', [ctx.kind]) - .whereNull('deleted_at') .orderBy('event_created_at', 'desc') .orderBy('event_id', 'asc') .limit(ctx.limit), @@ -180,7 +190,7 @@ function buildCases(): BenchmarkCase[] { { name: 'REQ: kind + created_at time range', description: - 'REQ with no authors but a time window and a kind. Exercises the (kind, created_at) access paths.', + 'REQ with no authors but a time window and a kind. Matches findByFilters for the (kinds, since, until) case; exercises the (kind, created_at) access paths.', build: (ctx) => { const now = Math.floor(Date.now() / 1000) const since = now - ctx.horizonSeconds @@ -188,15 +198,15 @@ function buildCases(): BenchmarkCase[] { .select('event_id') .where('event_kind', ctx.kind) .whereBetween('event_created_at', [since, now]) - .whereNull('deleted_at') .orderBy('event_created_at', 'desc') + .orderBy('event_id', 'asc') .limit(ctx.limit) }, }, { name: 'hasActiveRequestToVanish (pubkey + kind=62 + not deleted)', description: - 'Exact query run on every inbound event via UserRepository.isVanished; latency here is a per-message tax.', + 'Exact query run on every inbound event via UserRepository.isVanished; latency here is a per-message tax. This is the only hot path that filters on deleted_at.', skipIf: (ctx) => (ctx.samplePubkey ? undefined : 'no events rows found'), build: (ctx) => ctx.db('events') @@ -225,10 +235,16 @@ function buildCases(): BenchmarkCase[] { { name: 'findPendingInvoices (status=pending ORDER BY created_at)', description: - 'InvoiceRepository poll; hits invoices_pending_created_at_idx when present.', + 'Exact shape of InvoiceRepository.findPendingInvoices; hits invoices_pending_created_at_idx when present.', skipIf: (ctx) => (ctx.invoiceCount > 0 ? undefined : 'invoices table is empty'), build: (ctx) => - ctx.db('invoices').select('id').where('status', 'pending').orderBy('created_at', 'asc').limit(ctx.limit), + ctx + .db('invoices') + .select('id') + .where('status', InvoiceStatus.PENDING) + .orderBy('created_at', 'asc') + .offset(0) + .limit(ctx.limit), }, ] } @@ -236,7 +252,9 @@ function buildCases(): BenchmarkCase[] { async function gatherContext(db: Knex, options: CliOptions): Promise { const [{ count: eventCountText = '0' } = { count: '0' }] = await db('events').count('* as count') const [{ count: invoiceCountText = '0' } = { count: '0' }] = await db('invoices').count('* as count') - const sample = await db('events').select('event_pubkey').whereNull('deleted_at').limit(1).first() + // Pick any pubkey with rows — production REQ does not filter on deleted_at, + // so the benchmark should not either. + const sample = await db('events').select('event_pubkey').limit(1).first() return { db, diff --git a/test/unit/migrations/hot-path-indexes-migration.spec.ts b/test/unit/migrations/hot-path-indexes-migration.spec.ts new file mode 100644 index 00000000..7786f246 --- /dev/null +++ b/test/unit/migrations/hot-path-indexes-migration.spec.ts @@ -0,0 +1,17 @@ +import { createRequire } from 'node:module' + +import { expect } from 'chai' + +const requireFromHere = createRequire(__filename) +const migration = requireFromHere('../../../migrations/20260420_120000_add_hot_path_indexes.js') + +describe('migrations/20260420_120000_add_hot_path_indexes', () => { + it('opts out of knex transaction so CREATE INDEX CONCURRENTLY can run', () => { + expect(migration.config).to.deep.equal({ transaction: false }) + }) + + it('exports up and down', () => { + expect(migration.up).to.be.a('function') + expect(migration.down).to.be.a('function') + }) +}) From 1393f47ec575abb97b55e627b40617fd29d9209c Mon Sep 17 00:00:00 2001 From: archief2910 Date: Sun, 19 Apr 2026 22:39:45 +0530 Subject: [PATCH 4/4] fix: bug fixes in benchmark-queries (#68) --- src/scripts/benchmark-queries.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/scripts/benchmark-queries.ts b/src/scripts/benchmark-queries.ts index cbe2d9af..c3c703d4 100644 --- a/src/scripts/benchmark-queries.ts +++ b/src/scripts/benchmark-queries.ts @@ -150,13 +150,14 @@ function summarizePlan(plan: ExplainPlanNode): { indexes: string[]; scans: strin } async function explain(db: Knex, query: Knex.QueryBuilder | Knex.Raw): Promise { - const { sql, bindings } = query.toSQL().toNative - ? (query as any).toSQL().toNative() - : { sql: query.toString(), bindings: [] as unknown[] } + // Keep placeholders in Knex's `?` form so `db.raw(sql, bindings)` substitutes + // them correctly — `.toNative()` rewrites them to `$1, $2, …`, which makes + // Knex's binding check fail ("Expected N bindings, saw 0"). + const { sql, bindings } = query.toSQL() const { rows } = await db.raw<{ rows: { 'QUERY PLAN': ExplainResult[] }[] }>( `EXPLAIN (ANALYZE, BUFFERS, VERBOSE, FORMAT JSON) ${sql}`, - bindings, + bindings as readonly unknown[], ) return rows[0]['QUERY PLAN'][0] }