cameri · cameri · Apr 19, 2026 · Apr 18, 2026 · Apr 19, 2026 · Apr 19, 2026
diff --git a/.changeset/hot-path-indexes-benchmark.md b/.changeset/hot-path-indexes-benchmark.md
@@ -0,0 +1,5 @@
+---
+"nostream": minor
+---
+
+Add hot-path PostgreSQL indexes for subscription, vanish, retention, and invoice queries; add `db:benchmark` and `db:verify-index-impact` tooling; document index rationale and benchmarking. Closes #68.
diff --git a/CONFIGURATION.md b/CONFIGURATION.md
@@ -78,6 +78,36 @@ The i2pd web console (tunnel status, `.b32.i2p` destinations) is published to th
 
 If you've set READ_REPLICAS to 4, you should configure RR0_ through RR3_.
 
+## Database indexes and benchmarking
+
+The schema ships with a small, query-driven set of indexes. The most important ones for relay hot paths are:
+
+| Index                                        | Covers                                                                                                   |
+|----------------------------------------------|----------------------------------------------------------------------------------------------------------|
+| `events_active_pubkey_kind_created_at_idx`   | `REQ` with `authors`+`kinds` ordered by `created_at DESC, event_id ASC`; `hasActiveRequestToVanish`; by-pubkey deletes. Composite key `(event_pubkey, event_kind, event_created_at DESC, event_id)` so the ORDER BY tie-breaker is satisfied from the index without a sort step. |
+| `events_deleted_at_partial_idx`              | Retention purge over soft-deleted rows. Partial on `deleted_at IS NOT NULL`.                             |
+| `invoices_pending_created_at_idx`            | `findPendingInvoices` poll (`ORDER BY created_at ASC`). Partial on `status = 'pending'`.                  |
+| `event_tags (tag_name, tag_value)`           | NIP-01 generic tag filters (`#e`, `#p`, …) via the normalized `event_tags` table.                         |
+| `events_event_created_at_index`              | Time-range scans (`since` / `until`).                                                                    |
+| `events_event_kind_index`                    | Kind-only filters and purge kind-whitelist logic.                                                        |
+
+Run the read-only benchmark against your own database to confirm the planner is using the expected indexes and to record baseline latencies:
+
+```sh
+npm run db:benchmark
+npm run db:benchmark -- --runs 5 --kind 1 --limit 500
+```
+
+The `db:benchmark` script loads the local `.env` file automatically (via `node --env-file-if-exists=.env`), using the same `DB_HOST`/`DB_PORT`/`DB_USER`/`DB_PASSWORD`/`DB_NAME` variables as the relay. The benchmark issues only `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements — it never writes. Flags: `--runs <n>` (default 3), `--kind <n>` (default 1 / `TEXT_NOTE`; pass `0` for SET_METADATA), `--limit <n>` (default 500), `--horizon-days <n>` (default 7), `--help`.
+
+For a full before/after proof of the index impact (seeds a throwaway dataset, drops and recreates the indexes, and prints a BEFORE/AFTER table), use:
+
+```sh
+npm run db:verify-index-impact
+```
+
+The hot-path index migration (`20260420_120000_add_hot_path_indexes.js`) uses `CREATE INDEX CONCURRENTLY`, so it can be applied to a running relay without taking `ACCESS EXCLUSIVE` locks on the `events` or `invoices` tables.
+
 # Settings
 
 Running `nostream` for the first time creates the settings file in `<project_root>/.nostr/settings.yaml`. If the file is not created and an error is thrown ensure that the `<project_root>/.nostr` folder exists. The configuration directory can be changed by setting the `NOSTR_CONFIG_DIR` environment variable. `nostream` will pick up any changes to this settings file without needing to restart.

diff --git a/README.md b/README.md
@@ -650,6 +650,26 @@ npm run export -- backup-2024-01-01.jsonl # custom filename
 ```
 
 The script reads the same `DB_*` environment variables used by the relay (see [CONFIGURATION.md](CONFIGURATION.md)).
+
+## Benchmark Database Queries
+
+Run the read-only query benchmark to record the planner's choices and timings for the relay's hot-path queries (REQ subscriptions, vanish checks, purge scans, pending-invoice polls):
+
+```
+npm run db:benchmark
+npm run db:benchmark -- --runs 5 --kind 1 --limit 500
+```
+
+The benchmark only issues `EXPLAIN (ANALYZE, BUFFERS)` and `SELECT` statements against your configured database — it never writes. It loads `DB_*` variables from `.env` automatically (via `node --env-file-if-exists=.env`), so no extra setup is required beyond the one you already need to run the relay. Use it to confirm the `events_active_pubkey_kind_created_at_idx`, `events_deleted_at_partial_idx`, and `invoices_pending_created_at_idx` indexes are being picked up.
+
+For a reproducible before/after proof on a throwaway dataset, run:
+
+```
+npm run db:verify-index-impact
+```
+
+It seeds ~200k synthetic events, drops the hot-path indexes, runs EXPLAIN (ANALYZE, BUFFERS) for each hot query, recreates the indexes, and prints a BEFORE/AFTER table. See the *Database indexes and benchmarking* section of [CONFIGURATION.md](CONFIGURATION.md).
+
 ## Relay Maintenance
 
 Use `clean-db` to wipe or prune `events` table data. This also removes

diff --git a/migrations/20260420_120000_add_hot_path_indexes.js b/migrations/20260420_120000_add_hot_path_indexes.js
@@ -0,0 +1,76 @@
+/**
+ * Add narrow, query-driven indexes to cover the hottest read paths.
+ *
+ * Each index is created with CREATE INDEX CONCURRENTLY so the migration can be
+ * applied to a running relay without taking an ACCESS EXCLUSIVE lock on the
+ * events table. CONCURRENTLY is not allowed inside a transaction, so this
+ * migration opts out of Knex's default transactional wrapper via
+ * `exports.config.transaction = false`.
+ *
+ * Rationale for each index is documented inline. See also:
+ *   https://devcenter.heroku.com/articles/postgresql-indexes
+ */
+
+exports.config = { transaction: false }
+
+exports.up = async function (knex) {
+  // Covers the hottest subscription / per-message reads:
+  //
+  //   1. NIP-01 REQ with `authors` + `kinds` ordered by created_at DESC
+  //      (see EventRepository.findByFilters):
+  //        WHERE event_pubkey = ? AND event_kind IN (...)
+  //        ORDER BY event_created_at DESC, event_id ASC LIMIT N
+  //
+  //   2. `EventRepository.hasActiveRequestToVanish(pubkey)` — invoked on every
+  //      inbound event via UserRepository.isVanished:
+  //        WHERE event_pubkey = ? AND event_kind = 62 AND deleted_at IS NULL
+  //
+  //   3. `EventRepository.deleteByPubkeyExceptKinds(pubkey, kinds)`:
+  //        WHERE event_pubkey = ? AND event_kind NOT IN (...) AND deleted_at IS NULL
+  //
+  // The index is intentionally NOT partial on `deleted_at IS NULL`: the REQ
+  // subscription path in findByFilters does not currently add that predicate,
+  // so a partial index would be ineligible for the most important query shape.
+  // Soft-deleted rows are a small fraction of total rows in practice (they get
+  // hard-deleted by the retention sweep), so the bloat is negligible compared
+  // to the benefit of the index being usable by the hot path.
+  //
+  // Including `event_id` as the final column makes the composite key match the
+  // full ORDER BY (created_at DESC, event_id ASC) used by findByFilters, so the
+  // planner can satisfy LIMIT N directly from the index without an extra sort
+  // step for the tie-breaker.
+  await knex.raw(`
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS events_active_pubkey_kind_created_at_idx
+    ON events (event_pubkey, event_kind, event_created_at DESC, event_id)
+  `)
+
+  // Supports the retention / purge scan in `deleteExpiredAndRetained` and the
+  // vanish hard-delete follow-up:
+  //   WHERE deleted_at IS NOT NULL
+  // Partial index is tiny because well-maintained relays hard-delete these
+  // rows periodically and the vast majority of events have deleted_at IS NULL.
+  await knex.raw(`
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS events_deleted_at_partial_idx
+    ON events (deleted_at)
+    WHERE deleted_at IS NOT NULL
+  `)
+
+  // Supports `InvoiceRepository.findPendingInvoices`, which is polled by the
+  // maintenance worker to detect settled invoices:
+  //   WHERE status = 'pending' ORDER BY created_at ASC OFFSET ? LIMIT ?
+  // Partial on status = 'pending' so the index only contains the rows the
+  // poller actually scans. Keyed on `created_at` so the planner can satisfy
+  // the ORDER BY straight from the index (FIFO polling, bounded tail latency
+  // even with large pending backlogs).
+  await knex.raw(`
+    CREATE INDEX CONCURRENTLY IF NOT EXISTS invoices_pending_created_at_idx
+    ON invoices (created_at)
+    WHERE status = 'pending'
+  `)
+}
+
+exports.down = async function (knex) {
+  await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS invoices_pending_created_at_idx')
+  await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS events_deleted_at_partial_idx')
+  await knex.raw('DROP INDEX CONCURRENTLY IF EXISTS events_active_pubkey_kind_created_at_idx')
+}
diff --git a/package.json b/package.json
@@ -43,6 +43,8 @@
     "db:migrate": "knex migrate:latest",
     "db:migrate:rollback": "knex migrate:rollback",
     "db:seed": "knex seed:run",
+    "db:benchmark": "node --env-file-if-exists=.env -r ts-node/register src/scripts/benchmark-queries.ts",
+    "db:verify-index-impact": "node --env-file-if-exists=.env -r ts-node/register scripts/verify-index-impact.ts",
     "pretest:unit": "node -e \"require('fs').mkdirSync('.test-reports/unit', {recursive: true})\"",
     "test:unit": "mocha 'test/**/*.spec.ts'",
     "test:unit:watch": "npm run test:unit -- --min --watch --watch-files src/**/*,test/**/*",