From 1a06501d588333f991e77f317100a69e7b08f89d Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Sun, 7 Jun 2026 06:03:25 -0300
Subject: [PATCH 1/3] =?UTF-8?q?bench(atomicassets):=20add=20http-bench.mjs?=
 =?UTF-8?q?=20=E2=80=94=20e2e=20HTTP=20load=20harness=20(WormDB=20vs=20ato?=
 =?UTF-8?q?micassets-api)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

End-to-end served-latency + throughput driver for the AtomicAssets read path. Cycle B
made WormDB serve the identical eosio-contract-api shape + query params as the reference
Postgres atomicassets-api, so the same URL corpus hits both targets.

- Samples a real corpus (asset_ids / owners / collections / (coll,schema) pairs) from a
  source endpoint, then runs a weighted mixed workload (point / collection / owner /
  faceted / browse / account) against each target under C concurrent workers.
- Reports per-query-type + overall p50/p95/p99 latency and sustained req/s; warms caches
  first; runs targets sequentially so the client doesn't self-contend.
- Env-driven: WORMDB / ATOMIC base URLs, N, C, SAMPLE, SAMPLE_FROM. Portable ESM
  (node or bun). Resource use (CPU/RSS) is sampled separately per host while it runs.

Validated against the jungle4 wormdb-aa endpoint (0 errors, full per-type breakdown). The
WAX-232M side-by-side vs the production atomicassets-api is the proving run (remote env).
---
 .../atomicassets/validate/http-bench.mjs      | 117 ++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 benchmark/atomicassets/validate/http-bench.mjs

diff --git a/benchmark/atomicassets/validate/http-bench.mjs b/benchmark/atomicassets/validate/http-bench.mjs
new file mode 100644
index 0000000..0643dc7
--- /dev/null
+++ b/benchmark/atomicassets/validate/http-bench.mjs
@@ -0,0 +1,117 @@
+#!/usr/bin/env node
+// http-bench.mjs — end-to-end HTTP latency + throughput for the AtomicAssets read path, comparing one or
+// more endpoints under the SAME query corpus. Cycle B made WormDB serve the identical eosio-contract-api
+// shape + query params as the reference Postgres atomicassets-api, so the same URLs hit both.
+//
+//   WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io N=10000 C=50 node http-bench.mjs
+//
+// Env: WORMDB / ATOMIC = base URLs of the targets (set one or both); N = requests per target; C =
+// concurrency; SAMPLE = corpus size; SAMPLE_FROM = base URL to sample the corpus from (default WORMDB).
+//
+// It samples a REAL corpus (asset_ids / owners / collections / (coll,schema) pairs) from a source, then
+// runs a weighted mixed workload (point / collection / owner / faceted / browse / account) against each
+// target and reports per-query-type + overall p50/p95/p99 latency and sustained req/s. Latency is the
+// client-observed served-HTTP time (what a consumer sees) — the apples-to-apples number across both.
+// Resource use (CPU/RSS) is measured separately on each host (e.g. `docker stats`) while this runs.
+
+const env = process.env;
+const N = Number(env.N ?? 10000);
+const C = Number(env.C ?? 50);
+const SAMPLE = Number(env.SAMPLE ?? 400);
+const P = "/atomicassets/v1";
+
+const norm = (u) => u.replace(/\/+$/, "");
+const targets = [];
+if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB) });
+if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC) });
+if (!targets.length) {
+  console.error("set WORMDB and/or ATOMIC to the target base URLs");
+  process.exit(1);
+}
+const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC);
+
+const now = () => performance.now();
+const pick = (a) => a[(Math.random() * a.length) | 0];
+async function getJson(url) {
+  try {
+    const r = await fetch(url);
+    return r.ok ? await r.json() : null;
+  } catch {
+    return null;
+  }
+}
+
+// ── 1) sample a real corpus from one endpoint ──
+console.log(`[bench] sampling ${SAMPLE} assets from ${sampleBase} …`);
+const seed = await getJson(`${sampleBase}${P}/assets?limit=${SAMPLE}`);
+const rows = seed?.data ?? [];
+if (!rows.length) {
+  console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`);
+  process.exit(1);
+}
+const collOf = (a) => a.collection?.collection_name ?? a.collection_name ?? null;
+const schemaOf = (a) => a.schema?.schema_name ?? a.schema_name ?? null;
+const ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))];
+const owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))];
+const colls = [...new Set(rows.map(collOf).filter(Boolean))];
+const csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()];
+console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`);
+
+// ── 2) weighted query mix (roughly real-API-traffic-shaped; per-type latency is reported separately) ──
+const MIX = [
+  { type: "point", w: 35, url: () => `${P}/assets/${pick(ids)}` },
+  { type: "coll", w: 25, url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` },
+  { type: "owner", w: 15, url: () => `${P}/assets?owner=${pick(owners)}&limit=100` },
+  { type: "faceted", w: 10, url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } },
+  { type: "browse", w: 8, url: () => `${P}/assets?limit=100` },
+  { type: "account", w: 7, url: () => `${P}/accounts/${pick(owners)}` },
+].filter((m) => m.type === "point" || m.type === "browse" || (m.type === "faceted" ? csPairs.length : m.type === "account" || m.type === "owner" ? owners.length : colls.length));
+const totalW = MIX.reduce((s, m) => s + m.w, 0);
+function pickMix() {
+  let r = Math.random() * totalW;
+  for (const m of MIX) if ((r -= m.w) < 0) return m;
+  return MIX[0];
+}
+
+const pctile = (arr, p) => (arr.length ? arr.sort((a, b) => a - b)[Math.min(arr.length - 1, Math.floor(arr.length * p))] : NaN);
+const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—");
+
+async function run(target) {
+  const lat = Object.fromEntries(MIX.map((m) => [m.type, []]));
+  const all = [];
+  let errs = 0;
+  const per = Math.ceil(N / C);
+  // warm up (fill caches / JIT) before measuring
+  await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetch(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } }));
+  const t0 = now();
+  await Promise.all(
+    Array.from({ length: C }, async () => {
+      for (let i = 0; i < per; i++) {
+        const m = pickMix();
+        const s = now();
+        const ok = await fetch(`${target.base}${m.url()}`).then((r) => r.text()).then(() => true).catch(() => false);
+        const d = now() - s;
+        if (ok) { lat[m.type].push(d); all.push(d); } else errs++;
+      }
+    }),
+  );
+  const wall = now() - t0;
+  const done = per * C;
+  console.log(`\n══ ${target.name}  (${target.base}) ══`);
+  console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${((done / wall) * 1000).toFixed(0)} req/s (c=${C})  errors=${errs}`);
+  console.log(`  type      n      p50     p95     p99   (ms)`);
+  for (const m of MIX) {
+    const a = lat[m.type];
+    console.log(`  ${m.type.padEnd(8)} ${String(a.length).padStart(6)}  ${f(pctile(a, 0.5)).padStart(6)}  ${f(pctile(a, 0.95)).padStart(6)}  ${f(pctile(a, 0.99)).padStart(6)}`);
+  }
+  console.log(`  ${"OVERALL".padEnd(8)} ${String(all.length).padStart(6)}  ${f(pctile(all, 0.5)).padStart(6)}  ${f(pctile(all, 0.95)).padStart(6)}  ${f(pctile(all, 0.99)).padStart(6)}`);
+  return { name: target.name, reqps: (done / wall) * 1000, p50: pctile(all, 0.5), p99: pctile(all, 0.99) };
+}
+
+const results = [];
+for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client
+
+if (results.length > 1) {
+  console.log(`\n══ side-by-side ══`);
+  for (const r of results) console.log(`  ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s   p50=${f(r.p50)}ms  p99=${f(r.p99)}ms`);
+}

From ccc502ea63127f1f74d11dc0551e2d413a7b6354 Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Sun, 7 Jun 2026 06:08:15 -0300
Subject: [PATCH 2/3] =?UTF-8?q?bench(atomicassets):=20harden=20http-bench?=
 =?UTF-8?q?=20=E2=80=94=20duration=20mode,=20docker-stats,=20results=20fil?=
 =?UTF-8?q?es,=20mix?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Follow-up hardening on the load harness:
- DURATION=<s> steady-state mode (each worker loops to a deadline) alongside N-per-target.
- STATS_WORMDB / STATS_ATOMIC sample container CPU%/RSS via `docker stats` during that
  target's run (self-scheduling --no-stream polls; silently skipped if docker is absent).
- Writes <OUT>.json + <OUT>.md — per-type + overall p50/95/99, min/mean/max, a latency
  histogram, resource use, and a side-by-side table — a committable proving artifact.
- MIX=type=w,… overrides the query weights; corpus now sampled across newest+oldest pages
  for collection/owner variety.
- README: a benchmark section (env table + the proving-run caveat: WAX-232M on native
  Linux is the real test; a Windows-loopback jungle4 run only validates the harness).

Validated on jungle4 wormdb-aa: 8s/c20 -> 6.5k req/s, p50 2.5ms / p99 10ms, RSS ~74MiB,
0 errors; JSON+MD emitted with the histogram + resource sample.
---
 benchmark/atomicassets/validate/README.md     |  32 +++
 .../atomicassets/validate/http-bench.mjs      | 185 ++++++++++++++----
 2 files changed, 179 insertions(+), 38 deletions(-)

diff --git a/benchmark/atomicassets/validate/README.md b/benchmark/atomicassets/validate/README.md
index 61f990d..312c94e 100644
--- a/benchmark/atomicassets/validate/README.md
+++ b/benchmark/atomicassets/validate/README.md
@@ -43,3 +43,35 @@ exact matches vs `test.wax.api.atomicassets.io`. On-disk footprint (WiredTiger-c
 fully indexed ≈ **8 GB**, dominated by the `data.$**` wildcard. (Report on-disk `storageSize`, not the
 uncompressed `size`.) The one parity fix this surfaced: asset `float`/`double` attributes render as
 strings on the live API (templates keep numbers) — handled in `map_asset`.
+
+## HTTP load benchmark (WormDB vs the Postgres atomicassets-api)
+
+`http-bench.mjs` measures **end-to-end served latency + throughput** for the read path. Cycle B made
+WormDB serve the identical eosio-contract-api shape + query params as the reference Postgres
+`atomicassets-api`, so the **same URL corpus hits both** and the comparison is apples-to-apples.
+
+```sh
+# one or both targets; same corpus, run sequentially so the client never self-contends
+WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io \
+  N=50000 C=100 STATS_WORMDB=aa-wormdb OUT=wax-run node http-bench.mjs
+```
+
+It samples a real corpus (ids / owners / collections / (coll,schema) pairs, newest+oldest pages), runs a
+weighted mix — `point` `/assets/:id`, `coll`, `owner`, `faceted` (coll+schema), `browse`, `account` —
+and reports per-type + overall **p50/p95/p99** (min/mean/max + a latency histogram in the JSON) and
+**req/s**. Writes `<OUT>.json` + `<OUT>.md`.
+
+| env | meaning |
+|---|---|
+| `WORMDB` / `ATOMIC` | target base URLs (set one or both) |
+| `N` / `DURATION` | requests per target, or seconds of steady-state load (`DURATION` wins) |
+| `C` | concurrency |
+| `SAMPLE` / `SAMPLE_FROM` | corpus size / base URL to sample from (default `WORMDB`) |
+| `MIX` | override weights, e.g. `MIX=point=50,coll=20,owner=10,faceted=10,browse=5,account=5` |
+| `STATS_WORMDB` / `STATS_ATOMIC` | container to sample CPU/RSS via `docker stats` during that run |
+| `OUT` | results-file prefix (default `bench-results`) |
+
+**Proving run = WAX 232M on native Linux**, both targets on the same data. Note that latency on the
+**Windows Docker-Desktop loopback adds ~2–4 ms** and a tiny testnet segment makes postings trivial — so a
+jungle4 run validates the harness but is *not* a proving number. The WSEG micro-bench already shows the
+storage win (~33×) + µs in-process lookups; this harness is the served-HTTP p50/95/99 + throughput half.
diff --git a/benchmark/atomicassets/validate/http-bench.mjs b/benchmark/atomicassets/validate/http-bench.mjs
index 0643dc7..2c9b6a3 100644
--- a/benchmark/atomicassets/validate/http-bench.mjs
+++ b/benchmark/atomicassets/validate/http-bench.mjs
@@ -3,27 +3,38 @@
 // more endpoints under the SAME query corpus. Cycle B made WormDB serve the identical eosio-contract-api
 // shape + query params as the reference Postgres atomicassets-api, so the same URLs hit both.
 //
-//   WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io N=10000 C=50 node http-bench.mjs
+//   WORMDB=http://127.0.0.1:6390 ATOMIC=https://wax.api.atomicassets.io \
+//   N=50000 C=100 STATS_WORMDB=aa-wormdb OUT=wax-run node http-bench.mjs
 //
-// Env: WORMDB / ATOMIC = base URLs of the targets (set one or both); N = requests per target; C =
-// concurrency; SAMPLE = corpus size; SAMPLE_FROM = base URL to sample the corpus from (default WORMDB).
+// Targets:   WORMDB / ATOMIC = base URLs (set one or both).
+// Load:      N = requests per target (ignored if DURATION set); DURATION = seconds of steady-state load
+//            per target; C = concurrency.
+// Corpus:    SAMPLE = corpus size; SAMPLE_FROM = base URL to sample from (default WORMDB). Sampled across
+//            newest+oldest pages for collection/owner variety.
+// Mix:       MIX = override weights, e.g. MIX=point=50,coll=20,owner=10,faceted=10,browse=5,account=5
+// Resource:  STATS_WORMDB / STATS_ATOMIC = container name to sample CPU/RSS via `docker stats` during
+//            that target's run (skipped if docker is absent / the name is unset).
+// Output:    OUT = results-file prefix (default "bench-results") -> writes <OUT>.json + <OUT>.md.
 //
-// It samples a REAL corpus (asset_ids / owners / collections / (coll,schema) pairs) from a source, then
-// runs a weighted mixed workload (point / collection / owner / faceted / browse / account) against each
-// target and reports per-query-type + overall p50/p95/p99 latency and sustained req/s. Latency is the
-// client-observed served-HTTP time (what a consumer sees) — the apples-to-apples number across both.
-// Resource use (CPU/RSS) is measured separately on each host (e.g. `docker stats`) while this runs.
+// Reports per-query-type + overall p50/p95/p99 (min/mean/max + a latency histogram in the JSON) and
+// sustained req/s. Latency is the client-observed served-HTTP time — the apples-to-apples consumer number.
+// Portable ESM (node or bun). Targets run sequentially so the client never self-contends.
+
+import { spawn } from "node:child_process";
+import { writeFileSync } from "node:fs";
 
 const env = process.env;
 const N = Number(env.N ?? 10000);
 const C = Number(env.C ?? 50);
+const DURATION = env.DURATION ? Number(env.DURATION) : 0; // seconds; >0 => duration-based
 const SAMPLE = Number(env.SAMPLE ?? 400);
+const OUT = env.OUT ?? "bench-results";
 const P = "/atomicassets/v1";
 
 const norm = (u) => u.replace(/\/+$/, "");
 const targets = [];
-if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB) });
-if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC) });
+if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB), stats: env.STATS_WORMDB });
+if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC), stats: env.STATS_ATOMIC });
 if (!targets.length) {
   console.error("set WORMDB and/or ATOMIC to the target base URLs");
   process.exit(1);
@@ -32,6 +43,8 @@ const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC);
 
 const now = () => performance.now();
 const pick = (a) => a[(Math.random() * a.length) | 0];
+const sum = (a) => a.reduce((x, y) => x + y, 0);
+const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—");
 async function getJson(url) {
   try {
     const r = await fetch(url);
@@ -41,10 +54,21 @@ async function getJson(url) {
   }
 }
 
-// ── 1) sample a real corpus from one endpoint ──
-console.log(`[bench] sampling ${SAMPLE} assets from ${sampleBase} …`);
-const seed = await getJson(`${sampleBase}${P}/assets?limit=${SAMPLE}`);
-const rows = seed?.data ?? [];
+// ── 1) sample a real corpus (newest + oldest pages for collection/owner variety) ──
+async function sampleCorpus(base, want) {
+  const out = [];
+  for (let page = 1; out.length < want && page <= 25; page++) {
+    const j = await getJson(`${base}${P}/assets?limit=200&order=desc&page=${page}`);
+    const d = j?.data ?? [];
+    if (!d.length) break;
+    out.push(...d);
+  }
+  const asc = await getJson(`${base}${P}/assets?limit=200&order=asc`);
+  if (asc?.data) out.push(...asc.data);
+  return out;
+}
+console.log(`[bench] sampling ~${SAMPLE} assets from ${sampleBase} …`);
+const rows = await sampleCorpus(sampleBase, SAMPLE);
 if (!rows.length) {
   console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`);
   process.exit(1);
@@ -55,63 +79,148 @@ const ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))];
 const owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))];
 const colls = [...new Set(rows.map(collOf).filter(Boolean))];
 const csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()];
+const corpus = { ids: ids.length, owners: owners.length, collections: colls.length, csPairs: csPairs.length };
 console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`);
 
-// ── 2) weighted query mix (roughly real-API-traffic-shaped; per-type latency is reported separately) ──
+// ── 2) weighted query mix (default ~real-API-traffic-shaped; override with MIX=type=w,…) ──
+const W = { point: 35, coll: 25, owner: 15, faceted: 10, browse: 8, account: 7 };
+if (env.MIX) for (const part of env.MIX.split(",")) { const [k, v] = part.split("="); if (k in W && Number.isFinite(+v)) W[k] = +v; }
+const have = { point: ids.length, coll: colls.length, owner: owners.length, faceted: csPairs.length, browse: 1, account: owners.length };
 const MIX = [
-  { type: "point", w: 35, url: () => `${P}/assets/${pick(ids)}` },
-  { type: "coll", w: 25, url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` },
-  { type: "owner", w: 15, url: () => `${P}/assets?owner=${pick(owners)}&limit=100` },
-  { type: "faceted", w: 10, url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } },
-  { type: "browse", w: 8, url: () => `${P}/assets?limit=100` },
-  { type: "account", w: 7, url: () => `${P}/accounts/${pick(owners)}` },
-].filter((m) => m.type === "point" || m.type === "browse" || (m.type === "faceted" ? csPairs.length : m.type === "account" || m.type === "owner" ? owners.length : colls.length));
-const totalW = MIX.reduce((s, m) => s + m.w, 0);
+  { type: "point", url: () => `${P}/assets/${pick(ids)}` },
+  { type: "coll", url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` },
+  { type: "owner", url: () => `${P}/assets?owner=${pick(owners)}&limit=100` },
+  { type: "faceted", url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } },
+  { type: "browse", url: () => `${P}/assets?limit=100` },
+  { type: "account", url: () => `${P}/accounts/${pick(owners)}` },
+].map((m) => ({ ...m, w: W[m.type] })).filter((m) => m.w > 0 && have[m.type] > 0);
+const totalW = sum(MIX.map((m) => m.w));
 function pickMix() {
   let r = Math.random() * totalW;
   for (const m of MIX) if ((r -= m.w) < 0) return m;
   return MIX[0];
 }
 
-const pctile = (arr, p) => (arr.length ? arr.sort((a, b) => a - b)[Math.min(arr.length - 1, Math.floor(arr.length * p))] : NaN);
-const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—");
+// ── stats helpers ──
+const pctile = (a, p) => (a.length ? a[Math.min(a.length - 1, Math.floor(a.length * p))] : NaN);
+const HBINS = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000];
+function stats(arr) {
+  if (!arr.length) return { n: 0 };
+  const a = [...arr].sort((x, y) => x - y);
+  const histo = {};
+  let lo = 0;
+  for (const hi of HBINS) { histo[`<${hi}`] = a.filter((x) => x >= lo && x < hi).length; lo = hi; }
+  histo[`>=${HBINS[HBINS.length - 1]}`] = a.filter((x) => x >= HBINS[HBINS.length - 1]).length;
+  return { n: a.length, min: a[0], mean: sum(a) / a.length, p50: pctile(a, 0.5), p95: pctile(a, 0.95), p99: pctile(a, 0.99), max: a[a.length - 1], histo };
+}
+
+// ── docker-stats resource sampler (self-scheduling --no-stream polls) ──
+function startStats(container) {
+  if (!container) return null;
+  const samples = [];
+  let stopped = false;
+  (function tick() {
+    if (stopped) return;
+    let out = "";
+    let proc;
+    try {
+      proc = spawn("docker", ["stats", "--no-stream", "--format", "{{.CPUPerc}};{{.MemUsage}}", container], { stdio: ["ignore", "pipe", "ignore"] });
+    } catch {
+      stopped = true;
+      return;
+    }
+    proc.stdout.on("data", (d) => (out += d.toString()));
+    proc.on("error", () => (stopped = true)); // docker not installed
+    proc.on("close", () => {
+      const m = out.trim().match(/([\d.]+)%\s*;\s*([\d.]+)\s*([KMGi]+)/i);
+      if (m) {
+        let mem = parseFloat(m[2]);
+        const u = m[3].toLowerCase();
+        if (u.startsWith("g")) mem *= 1024;
+        else if (u.startsWith("k")) mem /= 1024; // -> MiB
+        samples.push({ cpu: parseFloat(m[1]), mem });
+      }
+      if (!stopped) setTimeout(tick, 250);
+    });
+  })();
+  return {
+    stop() {
+      stopped = true;
+      if (!samples.length) return null;
+      const cpu = samples.map((s) => s.cpu), mem = samples.map((s) => s.mem);
+      return { container, samples: samples.length, cpuAvgPct: sum(cpu) / cpu.length, cpuPeakPct: Math.max(...cpu), memAvgMiB: sum(mem) / mem.length, memPeakMiB: Math.max(...mem) };
+    },
+  };
+}
 
 async function run(target) {
   const lat = Object.fromEntries(MIX.map((m) => [m.type, []]));
   const all = [];
-  let errs = 0;
-  const per = Math.ceil(N / C);
-  // warm up (fill caches / JIT) before measuring
+  let errs = 0, done = 0;
+  // warm up (fill caches / JIT) before measuring + before sampling resources
   await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetch(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } }));
+  const sampler = startStats(target.stats);
   const t0 = now();
+  const deadline = DURATION ? t0 + DURATION * 1000 : 0;
+  const per = DURATION ? Infinity : Math.ceil(N / C);
   await Promise.all(
     Array.from({ length: C }, async () => {
       for (let i = 0; i < per; i++) {
+        if (DURATION && now() >= deadline) break;
         const m = pickMix();
         const s = now();
         const ok = await fetch(`${target.base}${m.url()}`).then((r) => r.text()).then(() => true).catch(() => false);
         const d = now() - s;
+        done++;
         if (ok) { lat[m.type].push(d); all.push(d); } else errs++;
       }
     }),
   );
   const wall = now() - t0;
-  const done = per * C;
+  const res = sampler ? sampler.stop() : null;
+  const perType = Object.fromEntries(MIX.map((m) => [m.type, stats(lat[m.type])]));
+  const overall = stats(all);
+  const reqps = (done / wall) * 1000;
+
   console.log(`\n══ ${target.name}  (${target.base}) ══`);
-  console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${((done / wall) * 1000).toFixed(0)} req/s (c=${C})  errors=${errs}`);
-  console.log(`  type      n      p50     p95     p99   (ms)`);
-  for (const m of MIX) {
-    const a = lat[m.type];
-    console.log(`  ${m.type.padEnd(8)} ${String(a.length).padStart(6)}  ${f(pctile(a, 0.5)).padStart(6)}  ${f(pctile(a, 0.95)).padStart(6)}  ${f(pctile(a, 0.99)).padStart(6)}`);
-  }
-  console.log(`  ${"OVERALL".padEnd(8)} ${String(all.length).padStart(6)}  ${f(pctile(all, 0.5)).padStart(6)}  ${f(pctile(all, 0.95)).padStart(6)}  ${f(pctile(all, 0.99)).padStart(6)}`);
-  return { name: target.name, reqps: (done / wall) * 1000, p50: pctile(all, 0.5), p99: pctile(all, 0.99) };
+  console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${reqps.toFixed(0)} req/s (c=${C}${DURATION ? `, ${DURATION}s` : ""})  errors=${errs}`);
+  console.log(`  type      n      p50     p95     p99     max   (ms)`);
+  for (const m of MIX) { const t = perType[m.type]; console.log(`  ${m.type.padEnd(8)} ${String(t.n).padStart(6)}  ${f(t.p50).padStart(6)}  ${f(t.p95).padStart(6)}  ${f(t.p99).padStart(6)}  ${f(t.max).padStart(6)}`); }
+  console.log(`  ${"OVERALL".padEnd(8)} ${String(overall.n).padStart(6)}  ${f(overall.p50).padStart(6)}  ${f(overall.p95).padStart(6)}  ${f(overall.p99).padStart(6)}  ${f(overall.max).padStart(6)}`);
+  if (res) console.log(`  resource (${res.container}): cpu avg ${f(res.cpuAvgPct)}% peak ${f(res.cpuPeakPct)}%  |  rss avg ${f(res.memAvgMiB)}MiB peak ${f(res.memPeakMiB)}MiB  (${res.samples} samples)`);
+  return { name: target.name, base: target.base, reqps, errors: errs, wallMs: wall, concurrency: C, overall, perType, resource: res };
 }
 
 const results = [];
 for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client
 
+// ── side-by-side + results files ──
 if (results.length > 1) {
   console.log(`\n══ side-by-side ══`);
-  for (const r of results) console.log(`  ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s   p50=${f(r.p50)}ms  p99=${f(r.p99)}ms`);
+  for (const r of results) console.log(`  ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s   p50=${f(r.overall.p50)}ms  p95=${f(r.overall.p95)}ms  p99=${f(r.overall.p99)}ms`);
+}
+
+const report = { generatedBy: "http-bench.mjs", params: { N, C, DURATION, SAMPLE, mix: W, sampleBase }, corpus, targets: results };
+writeFileSync(`${OUT}.json`, JSON.stringify(report, null, 2));
+
+const md = [];
+md.push(`# AtomicAssets HTTP benchmark`);
+md.push("");
+md.push(`Load: ${DURATION ? `${DURATION}s steady-state` : `${N} reqs`} per target, concurrency ${C}. Corpus: ${corpus.ids} ids / ${corpus.owners} owners / ${corpus.collections} collections / ${corpus.csPairs} (coll,schema) pairs sampled from \`${sampleBase}\`. Latency = client-observed served-HTTP time (ms).`);
+md.push("");
+md.push(`| target | req/s | p50 | p95 | p99 | max | errors | cpu avg/peak | rss avg/peak (MiB) |`);
+md.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|`);
+for (const r of results) {
+  const o = r.overall, res = r.resource;
+  md.push(`| ${r.name} | ${f(r.reqps)} | ${f(o.p50)} | ${f(o.p95)} | ${f(o.p99)} | ${f(o.max)} | ${r.errors} | ${res ? `${f(res.cpuAvgPct)}%/${f(res.cpuPeakPct)}%` : "—"} | ${res ? `${f(res.memAvgMiB)}/${f(res.memPeakMiB)}` : "—"} |`);
+}
+md.push("");
+for (const r of results) {
+  md.push(`## ${r.name} — per query type`);
+  md.push(`| type | n | p50 | p95 | p99 | max |`);
+  md.push(`|---|---:|---:|---:|---:|---:|`);
+  for (const m of MIX) { const t = r.perType[m.type]; md.push(`| ${m.type} | ${t.n} | ${f(t.p50)} | ${f(t.p95)} | ${f(t.p99)} | ${f(t.max)} |`); }
+  md.push("");
 }
+writeFileSync(`${OUT}.md`, md.join("\n"));
+console.log(`\n[bench] wrote ${OUT}.json + ${OUT}.md`);

From a22769aa063a6dbf1623cb3cb6405fc0d99a21fe Mon Sep 17 00:00:00 2001
From: Igor Lins e Silva <4753812+igorls@users.noreply.github.com>
Date: Sun, 7 Jun 2026 06:54:19 -0300
Subject: [PATCH 3/3] =?UTF-8?q?bench(atomicassets):=20address=20PR=20#12?=
 =?UTF-8?q?=20review=20=E2=80=94=20correctness,=20fairness,=20clean=20exit?=
 =?UTF-8?q?s?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bot review (Codex/Gemini/Copilot) + an adversarial multi-lens self-review:

- Count non-2xx/timeout/network failures as errors, never as fast responses: drain the
  body and return r.ok; per-request timeout via AbortController+clearTimeout (fetchT).
- req/s counts SUCCESSFUL requests only, so fast error pages can't inflate throughput;
  a nonzero error count loudly flags the run as suspect.
- Percentile = nearest-rank Math.round(p*(n-1)) — no p99==max collapse on small n.
- docker-stats mem regex handles B/KiB/MiB/GiB/TiB (was MiB/GiB only).
- Empty query mix fails fast with a clear message.
- Wrap execution in main() + process.exitCode (no abrupt process.exit while undici
  sockets are open — that tripped a libuv "handle closing" assertion on Windows).
- Fairness (adversarial review, confirmed): with 2+ targets, reduce the corpus to the
  cross-target INTERSECTION so a divergent dataset (live API vs lagging local) can't make
  targets do different work for the same URL; record dropped counts in JSON `coverage` so
  any divergence is visible. README documents the same-data assumption.

Verified on jungle4: single-target unaffected (intersection gated off); 2-target run
(same endpoint) drops 0, emits the side-by-side + coverage; all fatal paths exit cleanly.
---
 benchmark/atomicassets/validate/README.md     |   6 +
 .../atomicassets/validate/http-bench.mjs      | 318 +++++++++++-------
 2 files changed, 207 insertions(+), 117 deletions(-)

diff --git a/benchmark/atomicassets/validate/README.md b/benchmark/atomicassets/validate/README.md
index 312c94e..87f849d 100644
--- a/benchmark/atomicassets/validate/README.md
+++ b/benchmark/atomicassets/validate/README.md
@@ -71,6 +71,12 @@ and reports per-type + overall **p50/p95/p99** (min/mean/max + a latency histogr
 | `STATS_WORMDB` / `STATS_ATOMIC` | container to sample CPU/RSS via `docker stats` during that run |
 | `OUT` | results-file prefix (default `bench-results`) |
 
+**Same-data assumption.** A fair side-by-side requires both targets on the same chain/head. With 2+
+targets the harness reduces the corpus to the **cross-target intersection** (entities present on every
+target) so a divergent dataset can't make them do different work for the same URL; the dropped counts are
+recorded in the JSON `coverage` block so any divergence is visible. (Point misses already surface as
+errors; the intersection is what protects the list queries, whose misses are HTTP 200 with smaller pages.)
+
 **Proving run = WAX 232M on native Linux**, both targets on the same data. Note that latency on the
 **Windows Docker-Desktop loopback adds ~2–4 ms** and a tiny testnet segment makes postings trivial — so a
 jungle4 run validates the harness but is *not* a proving number. The WSEG micro-bench already shows the
diff --git a/benchmark/atomicassets/validate/http-bench.mjs b/benchmark/atomicassets/validate/http-bench.mjs
index 2c9b6a3..aeb0815 100644
--- a/benchmark/atomicassets/validate/http-bench.mjs
+++ b/benchmark/atomicassets/validate/http-bench.mjs
@@ -8,17 +8,22 @@
 //
 // Targets:   WORMDB / ATOMIC = base URLs (set one or both).
 // Load:      N = requests per target (ignored if DURATION set); DURATION = seconds of steady-state load
-//            per target; C = concurrency.
+//            per target; C = concurrency; TIMEOUT_MS = per-request deadline (a timeout counts as error).
 // Corpus:    SAMPLE = corpus size; SAMPLE_FROM = base URL to sample from (default WORMDB). Sampled across
-//            newest+oldest pages for collection/owner variety.
+//            newest+oldest pages for collection/owner variety. With 2+ targets the corpus is reduced to
+//            the cross-target INTERSECTION (entities present on every target) so a divergent dataset (a
+//            live API vs a lagging local) can't make targets do different work for the same URL; the
+//            dropped counts land in the JSON `coverage` so any divergence is visible. Fair side-by-sides
+//            still assume both targets are on the same chain/head.
 // Mix:       MIX = override weights, e.g. MIX=point=50,coll=20,owner=10,faceted=10,browse=5,account=5
 // Resource:  STATS_WORMDB / STATS_ATOMIC = container name to sample CPU/RSS via `docker stats` during
 //            that target's run (skipped if docker is absent / the name is unset).
 // Output:    OUT = results-file prefix (default "bench-results") -> writes <OUT>.json + <OUT>.md.
 //
 // Reports per-query-type + overall p50/p95/p99 (min/mean/max + a latency histogram in the JSON) and
-// sustained req/s. Latency is the client-observed served-HTTP time — the apples-to-apples consumer number.
-// Portable ESM (node or bun). Targets run sequentially so the client never self-contends.
+// sustained req/s (successful only). Latency is the client-observed served-HTTP time — the apples-to-
+// apples consumer number. Portable ESM (node or bun). Targets run sequentially so the client never
+// self-contends. Non-2xx, timeouts, and network errors are counted as failures, never as fast responses.
 
 import { spawn } from "node:child_process";
 import { writeFileSync } from "node:fs";
@@ -27,34 +32,38 @@ const env = process.env;
 const N = Number(env.N ?? 10000);
 const C = Number(env.C ?? 50);
 const DURATION = env.DURATION ? Number(env.DURATION) : 0; // seconds; >0 => duration-based
+const TIMEOUT_MS = Number(env.TIMEOUT_MS ?? 10000); // per-request deadline; a timeout counts as an error
 const SAMPLE = Number(env.SAMPLE ?? 400);
 const OUT = env.OUT ?? "bench-results";
 const P = "/atomicassets/v1";
 
 const norm = (u) => u.replace(/\/+$/, "");
-const targets = [];
-if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB), stats: env.STATS_WORMDB });
-if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC), stats: env.STATS_ATOMIC });
-if (!targets.length) {
-  console.error("set WORMDB and/or ATOMIC to the target base URLs");
-  process.exit(1);
-}
-const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC);
-
 const now = () => performance.now();
 const pick = (a) => a[(Math.random() * a.length) | 0];
 const sum = (a) => a.reduce((x, y) => x + y, 0);
 const f = (x) => (Number.isFinite(x) ? x.toFixed(2) : "—");
+
+// fetch with a per-request deadline. A manual AbortController + clearTimeout (not AbortSignal.timeout) so
+// the timer is freed the instant the request settles — no lingering 10s handles piling up over a big run.
+async function fetchT(url) {
+  const ac = new AbortController();
+  const timer = setTimeout(() => ac.abort(), TIMEOUT_MS);
+  try {
+    return await fetch(url, { signal: ac.signal });
+  } finally {
+    clearTimeout(timer);
+  }
+}
 async function getJson(url) {
   try {
-    const r = await fetch(url);
+    const r = await fetchT(url);
     return r.ok ? await r.json() : null;
   } catch {
     return null;
   }
 }
 
-// ── 1) sample a real corpus (newest + oldest pages for collection/owner variety) ──
+// sample a real corpus (newest + oldest pages for collection/owner variety)
 async function sampleCorpus(base, want) {
   const out = [];
   for (let page = 1; out.length < want && page <= 25; page++) {
@@ -67,42 +76,33 @@ async function sampleCorpus(base, want) {
   if (asc?.data) out.push(...asc.data);
   return out;
 }
-console.log(`[bench] sampling ~${SAMPLE} assets from ${sampleBase} …`);
-const rows = await sampleCorpus(sampleBase, SAMPLE);
-if (!rows.length) {
-  console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`);
-  process.exit(1);
-}
-const collOf = (a) => a.collection?.collection_name ?? a.collection_name ?? null;
-const schemaOf = (a) => a.schema?.schema_name ?? a.schema_name ?? null;
-const ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))];
-const owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))];
-const colls = [...new Set(rows.map(collOf).filter(Boolean))];
-const csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()];
-const corpus = { ids: ids.length, owners: owners.length, collections: colls.length, csPairs: csPairs.length };
-console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`);
-
-// ── 2) weighted query mix (default ~real-API-traffic-shaped; override with MIX=type=w,…) ──
-const W = { point: 35, coll: 25, owner: 15, faceted: 10, browse: 8, account: 7 };
-if (env.MIX) for (const part of env.MIX.split(",")) { const [k, v] = part.split("="); if (k in W && Number.isFinite(+v)) W[k] = +v; }
-const have = { point: ids.length, coll: colls.length, owner: owners.length, faceted: csPairs.length, browse: 1, account: owners.length };
-const MIX = [
-  { type: "point", url: () => `${P}/assets/${pick(ids)}` },
-  { type: "coll", url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` },
-  { type: "owner", url: () => `${P}/assets?owner=${pick(owners)}&limit=100` },
-  { type: "faceted", url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } },
-  { type: "browse", url: () => `${P}/assets?limit=100` },
-  { type: "account", url: () => `${P}/accounts/${pick(owners)}` },
-].map((m) => ({ ...m, w: W[m.type] })).filter((m) => m.w > 0 && have[m.type] > 0);
-const totalW = sum(MIX.map((m) => m.w));
-function pickMix() {
-  let r = Math.random() * totalW;
-  for (const m of MIX) if ((r -= m.w) < 0) return m;
-  return MIX[0];
+
+// Restrict a candidate list to entries that return data on EVERY target, so divergent datasets (a live
+// API vs a lagging/pruned local WormDB) can't make the two targets do different work for the "same" URL.
+// Bounded concurrency; a getJson failure/timeout counts as absent (conservatively dropped). This is the
+// fairness guard for list queries (owner/collection/faceted), where a miss is an HTTP 200 with a smaller
+// page rather than a flagged 404 — so without it, data-shape divergence would silently skew percentiles.
+async function keepPresentOnAll(items, toUrl, bases) {
+  const kept = [];
+  let dropped = 0, idx = 0;
+  async function worker() {
+    while (idx < items.length) {
+      const it = items[idx++];
+      let present = true;
+      for (const b of bases) {
+        const j = await getJson(`${b}${toUrl(it)}`);
+        const has = j && (Array.isArray(j.data) ? j.data.length > 0 : j.data != null);
+        if (!has) { present = false; break; }
+      }
+      if (present) kept.push(it); else dropped++;
+    }
+  }
+  await Promise.all(Array.from({ length: Math.min(12, items.length) }, worker));
+  return { kept, dropped };
 }
 
 // ── stats helpers ──
-const pctile = (a, p) => (a.length ? a[Math.min(a.length - 1, Math.floor(a.length * p))] : NaN);
+const pctile = (a, p) => (a.length ? a[Math.round(p * (a.length - 1))] : NaN); // nearest-rank; 1.0 -> max, no collapse on small n
 const HBINS = [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000];
 function stats(arr) {
   if (!arr.length) return { n: 0 };
@@ -132,12 +132,16 @@ function startStats(container) {
     proc.stdout.on("data", (d) => (out += d.toString()));
     proc.on("error", () => (stopped = true)); // docker not installed
     proc.on("close", () => {
-      const m = out.trim().match(/([\d.]+)%\s*;\s*([\d.]+)\s*([KMGi]+)/i);
+      // docker MemUsage unit is B / KiB / MiB / GiB / TiB (the "used" side, before the " / limit")
+      const m = out.trim().match(/([\d.]+)%\s*;\s*([\d.]+)\s*([A-Za-z]+)/);
       if (m) {
         let mem = parseFloat(m[2]);
         const u = m[3].toLowerCase();
-        if (u.startsWith("g")) mem *= 1024;
-        else if (u.startsWith("k")) mem /= 1024; // -> MiB
+        if (u.startsWith("t")) mem *= 1024 * 1024;
+        else if (u.startsWith("g")) mem *= 1024;
+        else if (u.startsWith("m")) mem *= 1; // already MiB
+        else if (u.startsWith("k")) mem /= 1024;
+        else mem /= 1024 * 1024; // plain bytes -> MiB
         samples.push({ cpu: parseFloat(m[1]), mem });
       }
       if (!stopped) setTimeout(tick, 250);
@@ -153,74 +157,154 @@ function startStats(container) {
   };
 }
 
-async function run(target) {
-  const lat = Object.fromEntries(MIX.map((m) => [m.type, []]));
-  const all = [];
-  let errs = 0, done = 0;
-  // warm up (fill caches / JIT) before measuring + before sampling resources
-  await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetch(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } }));
-  const sampler = startStats(target.stats);
-  const t0 = now();
-  const deadline = DURATION ? t0 + DURATION * 1000 : 0;
-  const per = DURATION ? Infinity : Math.ceil(N / C);
-  await Promise.all(
-    Array.from({ length: C }, async () => {
-      for (let i = 0; i < per; i++) {
-        if (DURATION && now() >= deadline) break;
-        const m = pickMix();
-        const s = now();
-        const ok = await fetch(`${target.base}${m.url()}`).then((r) => r.text()).then(() => true).catch(() => false);
-        const d = now() - s;
-        done++;
-        if (ok) { lat[m.type].push(d); all.push(d); } else errs++;
-      }
-    }),
-  );
-  const wall = now() - t0;
-  const res = sampler ? sampler.stop() : null;
-  const perType = Object.fromEntries(MIX.map((m) => [m.type, stats(lat[m.type])]));
-  const overall = stats(all);
-  const reqps = (done / wall) * 1000;
-
-  console.log(`\n══ ${target.name}  (${target.base}) ══`);
-  console.log(`${done} reqs in ${wall.toFixed(0)}ms → ${reqps.toFixed(0)} req/s (c=${C}${DURATION ? `, ${DURATION}s` : ""})  errors=${errs}`);
-  console.log(`  type      n      p50     p95     p99     max   (ms)`);
-  for (const m of MIX) { const t = perType[m.type]; console.log(`  ${m.type.padEnd(8)} ${String(t.n).padStart(6)}  ${f(t.p50).padStart(6)}  ${f(t.p95).padStart(6)}  ${f(t.p99).padStart(6)}  ${f(t.max).padStart(6)}`); }
-  console.log(`  ${"OVERALL".padEnd(8)} ${String(overall.n).padStart(6)}  ${f(overall.p50).padStart(6)}  ${f(overall.p95).padStart(6)}  ${f(overall.p99).padStart(6)}  ${f(overall.max).padStart(6)}`);
-  if (res) console.log(`  resource (${res.container}): cpu avg ${f(res.cpuAvgPct)}% peak ${f(res.cpuPeakPct)}%  |  rss avg ${f(res.memAvgMiB)}MiB peak ${f(res.memPeakMiB)}MiB  (${res.samples} samples)`);
-  return { name: target.name, base: target.base, reqps, errors: errs, wallMs: wall, concurrency: C, overall, perType, resource: res };
-}
+async function main() {
+  const targets = [];
+  if (env.WORMDB) targets.push({ name: "wormdb", base: norm(env.WORMDB), stats: env.STATS_WORMDB });
+  if (env.ATOMIC) targets.push({ name: "atomic", base: norm(env.ATOMIC), stats: env.STATS_ATOMIC });
+  if (!targets.length) {
+    console.error("set WORMDB and/or ATOMIC to the target base URLs");
+    process.exitCode = 1;
+    return;
+  }
+  const sampleBase = norm(env.SAMPLE_FROM ?? env.WORMDB ?? env.ATOMIC);
 
-const results = [];
-for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client
+  // ── 1) sample a real corpus ──
+  console.log(`[bench] sampling ~${SAMPLE} assets from ${sampleBase} …`);
+  const rows = await sampleCorpus(sampleBase, SAMPLE);
+  if (!rows.length) {
+    console.error(`[bench] no sample data from ${sampleBase} — is it serving /atomicassets/v1/assets?`);
+    process.exitCode = 1;
+    return;
+  }
+  const collOf = (a) => a.collection?.collection_name ?? a.collection_name ?? null;
+  const schemaOf = (a) => a.schema?.schema_name ?? a.schema_name ?? null;
+  let ids = [...new Set(rows.map((a) => a.asset_id).filter(Boolean))];
+  let owners = [...new Set(rows.map((a) => a.owner).filter(Boolean))];
+  let colls = [...new Set(rows.map(collOf).filter(Boolean))];
+  let csPairs = [...new Map(rows.filter((a) => collOf(a) && schemaOf(a)).map((a) => [`${collOf(a)}|${schemaOf(a)}`, { c: collOf(a), s: schemaOf(a) }])).values()];
+  console.log(`[bench] sampled: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`);
 
-// ── side-by-side + results files ──
-if (results.length > 1) {
-  console.log(`\n══ side-by-side ══`);
-  for (const r of results) console.log(`  ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s   p50=${f(r.overall.p50)}ms  p95=${f(r.overall.p95)}ms  p99=${f(r.overall.p99)}ms`);
-}
+  // With 2+ targets, restrict the corpus to entities present on ALL of them so each target serves the
+  // same rows even if the datasets diverge (live API vs lagging local). Dropped counts make divergence
+  // visible; residual list-query page-size differences (a present owner with different counts across
+  // out-of-sync targets) are not fully equalized — reflected in `coverage` for the reader to judge.
+  let coverage = null;
+  if (targets.length > 1) {
+    const bases = targets.map((t) => t.base);
+    console.log(`[bench] cross-target intersection over ${bases.length} targets…`);
+    const ri = await keepPresentOnAll(ids, (id) => `${P}/assets/${id}`, bases);
+    const ro = await keepPresentOnAll(owners, (o) => `${P}/assets?owner=${o}&limit=1`, bases);
+    const rc = await keepPresentOnAll(colls, (c) => `${P}/assets?collection_name=${c}&limit=1`, bases);
+    const rcs = await keepPresentOnAll(csPairs, (cs) => `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=1`, bases);
+    coverage = { idsDropped: ri.dropped, ownersDropped: ro.dropped, collectionsDropped: rc.dropped, csPairsDropped: rcs.dropped };
+    ids = ri.kept; owners = ro.kept; colls = rc.kept; csPairs = rcs.kept;
+    const totalDropped = ri.dropped + ro.dropped + rc.dropped + rcs.dropped;
+    if (totalDropped) console.log(`[bench] ⚠ dropped ${totalDropped} entities not present on all targets (ids -${ri.dropped}, owners -${ro.dropped}, colls -${rc.dropped}, pairs -${rcs.dropped}) — targets are NOT fully in sync; list-query page sizes may still differ`);
+    if (!ids.length && !owners.length && !colls.length) {
+      console.error("[bench] the targets' datasets do not overlap — nothing common to benchmark; are they on the same chain/head?");
+      process.exitCode = 1;
+      return;
+    }
+  }
+  const corpus = { ids: ids.length, owners: owners.length, collections: colls.length, csPairs: csPairs.length, coverage };
+  console.log(`[bench] corpus: ${ids.length} ids, ${owners.length} owners, ${colls.length} collections, ${csPairs.length} (coll,schema) pairs`);
 
-const report = { generatedBy: "http-bench.mjs", params: { N, C, DURATION, SAMPLE, mix: W, sampleBase }, corpus, targets: results };
-writeFileSync(`${OUT}.json`, JSON.stringify(report, null, 2));
-
-const md = [];
-md.push(`# AtomicAssets HTTP benchmark`);
-md.push("");
-md.push(`Load: ${DURATION ? `${DURATION}s steady-state` : `${N} reqs`} per target, concurrency ${C}. Corpus: ${corpus.ids} ids / ${corpus.owners} owners / ${corpus.collections} collections / ${corpus.csPairs} (coll,schema) pairs sampled from \`${sampleBase}\`. Latency = client-observed served-HTTP time (ms).`);
-md.push("");
-md.push(`| target | req/s | p50 | p95 | p99 | max | errors | cpu avg/peak | rss avg/peak (MiB) |`);
-md.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|`);
-for (const r of results) {
-  const o = r.overall, res = r.resource;
-  md.push(`| ${r.name} | ${f(r.reqps)} | ${f(o.p50)} | ${f(o.p95)} | ${f(o.p99)} | ${f(o.max)} | ${r.errors} | ${res ? `${f(res.cpuAvgPct)}%/${f(res.cpuPeakPct)}%` : "—"} | ${res ? `${f(res.memAvgMiB)}/${f(res.memPeakMiB)}` : "—"} |`);
-}
-md.push("");
-for (const r of results) {
-  md.push(`## ${r.name} — per query type`);
-  md.push(`| type | n | p50 | p95 | p99 | max |`);
-  md.push(`|---|---:|---:|---:|---:|---:|`);
-  for (const m of MIX) { const t = r.perType[m.type]; md.push(`| ${m.type} | ${t.n} | ${f(t.p50)} | ${f(t.p95)} | ${f(t.p99)} | ${f(t.max)} |`); }
+  // ── 2) weighted query mix (default ~real-API-traffic-shaped; override with MIX=type=w,…) ──
+  const W = { point: 35, coll: 25, owner: 15, faceted: 10, browse: 8, account: 7 };
+  if (env.MIX) for (const part of env.MIX.split(",")) { const [k, v] = part.split("="); if (k in W && Number.isFinite(+v)) W[k] = +v; }
+  const have = { point: ids.length, coll: colls.length, owner: owners.length, faceted: csPairs.length, browse: 1, account: owners.length };
+  const MIX = [
+    { type: "point", url: () => `${P}/assets/${pick(ids)}` },
+    { type: "coll", url: () => `${P}/assets?collection_name=${pick(colls)}&limit=100` },
+    { type: "owner", url: () => `${P}/assets?owner=${pick(owners)}&limit=100` },
+    { type: "faceted", url: () => { const cs = pick(csPairs); return `${P}/assets?collection_name=${cs.c}&schema_name=${cs.s}&limit=100`; } },
+    { type: "browse", url: () => `${P}/assets?limit=100` },
+    { type: "account", url: () => `${P}/accounts/${pick(owners)}` },
+  ].map((m) => ({ ...m, w: W[m.type] })).filter((m) => m.w > 0 && have[m.type] > 0);
+  if (!MIX.length) {
+    console.error("[bench] empty query mix — the sampled corpus has no usable dimensions, or every MIX weight is 0");
+    process.exitCode = 1;
+    return;
+  }
+  const totalW = sum(MIX.map((m) => m.w));
+  const pickMix = () => {
+    let r = Math.random() * totalW;
+    for (const m of MIX) if ((r -= m.w) < 0) return m;
+    return MIX[0];
+  };
+
+  async function run(target) {
+    const lat = Object.fromEntries(MIX.map((m) => [m.type, []]));
+    const all = [];
+    let errs = 0, done = 0;
+    // warm up (fill caches / JIT) before measuring + before sampling resources
+    await Promise.all(Array.from({ length: Math.min(C, 20) }, async () => { for (let i = 0; i < 5; i++) { try { await fetchT(`${target.base}${pickMix().url()}`).then((r) => r.text()); } catch {} } }));
+    const sampler = startStats(target.stats);
+    const t0 = now();
+    const deadline = DURATION ? t0 + DURATION * 1000 : 0;
+    const per = DURATION ? Infinity : Math.ceil(N / C);
+    await Promise.all(
+      Array.from({ length: C }, async () => {
+        for (let i = 0; i < per; i++) {
+          if (DURATION && now() >= deadline) break;
+          const m = pickMix();
+          const s = now();
+          // drain the body either way (frees the connection); count non-2xx + timeouts + network errors as failures
+          const ok = await fetchT(`${target.base}${m.url()}`).then((r) => r.text().then(() => r.ok)).catch(() => false);
+          const d = now() - s;
+          done++;
+          if (ok) { lat[m.type].push(d); all.push(d); } else errs++;
+        }
+      }),
+    );
+    const wall = now() - t0;
+    const res = sampler ? sampler.stop() : null;
+    const perType = Object.fromEntries(MIX.map((m) => [m.type, stats(lat[m.type])]));
+    const overall = stats(all);
+    const reqps = (all.length / wall) * 1000; // successful only — fast error pages must not inflate throughput
+
+    console.log(`\n══ ${target.name}  (${target.base}) ══`);
+    console.log(`${all.length} ok / ${done} sent in ${wall.toFixed(0)}ms → ${reqps.toFixed(0)} req/s (c=${C}${DURATION ? `, ${DURATION}s` : ""})  errors=${errs}${errs ? "  ⚠ results suspect — investigate errors" : ""}`);
+    console.log(`  type      n      p50     p95     p99     max   (ms)`);
+    for (const m of MIX) { const t = perType[m.type]; console.log(`  ${m.type.padEnd(8)} ${String(t.n).padStart(6)}  ${f(t.p50).padStart(6)}  ${f(t.p95).padStart(6)}  ${f(t.p99).padStart(6)}  ${f(t.max).padStart(6)}`); }
+    console.log(`  ${"OVERALL".padEnd(8)} ${String(overall.n).padStart(6)}  ${f(overall.p50).padStart(6)}  ${f(overall.p95).padStart(6)}  ${f(overall.p99).padStart(6)}  ${f(overall.max).padStart(6)}`);
+    if (res) console.log(`  resource (${res.container}): cpu avg ${f(res.cpuAvgPct)}% peak ${f(res.cpuPeakPct)}%  |  rss avg ${f(res.memAvgMiB)}MiB peak ${f(res.memPeakMiB)}MiB  (${res.samples} samples)`);
+    return { name: target.name, base: target.base, reqps, sent: done, errors: errs, wallMs: wall, concurrency: C, overall, perType, resource: res };
+  }
+
+  const results = [];
+  for (const t of targets) results.push(await run(t)); // sequential so the two targets don't contend on the client
+
+  // ── side-by-side + results files ──
+  if (results.length > 1) {
+    console.log(`\n══ side-by-side ══`);
+    for (const r of results) console.log(`  ${r.name.padEnd(8)} ${f(r.reqps).padStart(9)} req/s   p50=${f(r.overall.p50)}ms  p95=${f(r.overall.p95)}ms  p99=${f(r.overall.p99)}ms${r.errors ? `  (errors=${r.errors}!)` : ""}`);
+  }
+
+  const report = { generatedBy: "http-bench.mjs", params: { N, C, DURATION, SAMPLE, timeoutMs: TIMEOUT_MS, mix: W, sampleBase }, corpus, targets: results };
+  writeFileSync(`${OUT}.json`, JSON.stringify(report, null, 2));
+
+  const md = [];
+  md.push(`# AtomicAssets HTTP benchmark`);
+  md.push("");
+  md.push(`Load: ${DURATION ? `${DURATION}s steady-state` : `${N} reqs`} per target, concurrency ${C}. Corpus: ${corpus.ids} ids / ${corpus.owners} owners / ${corpus.collections} collections / ${corpus.csPairs} (coll,schema) pairs sampled from \`${sampleBase}\`. Latency = client-observed served-HTTP time (ms); req/s = successful only.`);
   md.push("");
+  md.push(`| target | req/s | p50 | p95 | p99 | max | errors | cpu avg/peak | rss avg/peak (MiB) |`);
+  md.push(`|---|---:|---:|---:|---:|---:|---:|---:|---:|`);
+  for (const r of results) {
+    const o = r.overall, res = r.resource;
+    md.push(`| ${r.name} | ${f(r.reqps)} | ${f(o.p50)} | ${f(o.p95)} | ${f(o.p99)} | ${f(o.max)} | ${r.errors} | ${res ? `${f(res.cpuAvgPct)}%/${f(res.cpuPeakPct)}%` : "—"} | ${res ? `${f(res.memAvgMiB)}/${f(res.memPeakMiB)}` : "—"} |`);
+  }
+  md.push("");
+  for (const r of results) {
+    md.push(`## ${r.name} — per query type`);
+    md.push(`| type | n | p50 | p95 | p99 | max |`);
+    md.push(`|---|---:|---:|---:|---:|---:|`);
+    for (const m of MIX) { const t = r.perType[m.type]; md.push(`| ${m.type} | ${t.n} | ${f(t.p50)} | ${f(t.p95)} | ${f(t.p99)} | ${f(t.max)} |`); }
+    md.push("");
+  }
+  writeFileSync(`${OUT}.md`, md.join("\n"));
+  console.log(`\n[bench] wrote ${OUT}.json + ${OUT}.md`);
 }
-writeFileSync(`${OUT}.md`, md.join("\n"));
-console.log(`\n[bench] wrote ${OUT}.json + ${OUT}.md`);
+
+await main();