From 2f9dad02522cedb84ec0944079743a0c5236ae54 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 8 Apr 2026 16:40:03 +1000
Subject: [PATCH 01/28] feat(bench): add benchmark table with 10K rows,
 indexes, and verification tests

Migration 007 creates bench table with encrypted_text, encrypted_int,
encrypted_bigint columns and seeds 10K rows via create_encrypted_json()
with cycling offsets for distribution variety. Fixture creates 5 indexes
(hash/btree/GIN) split from migration for before/after testing. 9 tests
verify row count, column population, index term extraction, index usage
via EXPLAIN, and seq scan baseline.
---
 tests/sqlx/fixtures/bench_setup.sql           |  31 ++++
 .../migrations/007_install_bench_data.sql     |  28 ++++
 tests/sqlx/tests/bench_data_tests.rs          | 141 ++++++++++++++++++
 3 files changed, 200 insertions(+)
 create mode 100644 tests/sqlx/fixtures/bench_setup.sql
 create mode 100644 tests/sqlx/migrations/007_install_bench_data.sql
 create mode 100644 tests/sqlx/tests/bench_data_tests.rs

diff --git a/tests/sqlx/fixtures/bench_setup.sql b/tests/sqlx/fixtures/bench_setup.sql
new file mode 100644
index 00000000..164a4b20
--- /dev/null
+++ b/tests/sqlx/fixtures/bench_setup.sql
@@ -0,0 +1,31 @@
+-- Fixture: bench_setup.sql
+--
+-- Creates benchmark indexes and refreshes planner statistics.
+-- Table and 10K rows created by migration 007_install_bench_data.sql.
+--
+-- Indexes:
+--   bench_text_hmac_idx   - hash on eql_v2.hmac_256(encrypted_text) for equality
+--   bench_text_ore_idx    - btree on encrypted_text via operator class for text ordering
+--   bench_int_ore_idx     - btree on encrypted_int via operator class for range/ORDER BY
+--   bench_bigint_ore_idx  - btree on encrypted_bigint via operator class
+--   bench_text_bloom_idx  - GIN on eql_v2.bloom_filter(encrypted_text) for containment
+--
+-- Pattern follows containment_with_index_tests.rs: indexes in fixture (not migration)
+-- so tests can verify before/after index creation.
+
+CREATE INDEX IF NOT EXISTS bench_text_hmac_idx
+    ON bench USING hash (eql_v2.hmac_256(encrypted_text));
+
+CREATE INDEX IF NOT EXISTS bench_text_ore_idx
+    ON bench USING btree (encrypted_text eql_v2.encrypted_operator_class);
+
+CREATE INDEX IF NOT EXISTS bench_int_ore_idx
+    ON bench USING btree (encrypted_int eql_v2.encrypted_operator_class);
+
+CREATE INDEX IF NOT EXISTS bench_bigint_ore_idx
+    ON bench USING btree (encrypted_bigint eql_v2.encrypted_operator_class);
+
+CREATE INDEX IF NOT EXISTS bench_text_bloom_idx
+    ON bench USING gin (eql_v2.bloom_filter(encrypted_text));
+
+ANALYZE bench;
diff --git a/tests/sqlx/migrations/007_install_bench_data.sql b/tests/sqlx/migrations/007_install_bench_data.sql
new file mode 100644
index 00000000..7786d971
--- /dev/null
+++ b/tests/sqlx/migrations/007_install_bench_data.sql
@@ -0,0 +1,28 @@
+-- Migration: 007_install_bench_data.sql
+--
+-- Creates benchmark table with 10K rows for performance testing.
+-- Each column cycles through 100 distinct encrypted values (from ore ids 1-100).
+--
+-- Columns:
+--   encrypted_text   - text equality (hmac), pattern match (bloom), ordering (ore)
+--   encrypted_int    - integer ORE range/equality/ordering
+--   encrypted_bigint - bigint ORE at scale
+--
+-- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec)
+-- Data generated via create_encrypted_json() from 004_install_test_helpers.sql.
+
+CREATE TABLE bench (
+    id SERIAL PRIMARY KEY,
+    encrypted_text eql_v2_encrypted,
+    encrypted_int eql_v2_encrypted,
+    encrypted_bigint eql_v2_encrypted
+);
+
+-- Seed 10K rows. Each column uses a different offset to create varied distributions.
+-- create_encrypted_json(id) valid for ids 1-100 (ore table lookup at 10*id, max ore.id=1000).
+INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
+SELECT
+    create_encrypted_json(((gs - 1) % 100) + 1),
+    create_encrypted_json(((gs + 33) % 100) + 1),
+    create_encrypted_json(((gs + 66) % 100) + 1)
+FROM generate_series(1, 10000) AS gs;
diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs
new file mode 100644
index 00000000..c58ca929
--- /dev/null
+++ b/tests/sqlx/tests/bench_data_tests.rs
@@ -0,0 +1,141 @@
+//! Benchmark data verification tests
+//!
+//! Validates migration 007_install_bench_data.sql and bench_setup fixture:
+//! - 10K rows seeded correctly across 3 encrypted columns
+//! - Index terms (hmac, bloom, ORE) are extractable
+//! - Indexes are used by the query planner (EXPLAIN assertions)
+//! - Sequential scan baseline without indexes
+
+use anyhow::Result;
+use eql_tests::{analyze_table, assert_uses_index, assert_uses_seq_scan, explain_query};
+use sqlx::PgPool;
+
+// ========== Data Integrity Tests ==========
+
+/// Verify migration seeded exactly 10K rows
+#[sqlx::test]
+async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> {
+    let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM bench")
+        .fetch_one(&pool)
+        .await?;
+    assert_eq!(count.0, 10000, "bench table should have 10000 rows");
+    Ok(())
+}
+
+/// Verify all three columns have non-null encrypted data
+#[sqlx::test]
+async fn bench_columns_are_populated(pool: PgPool) -> Result<()> {
+    let count: (i64,) = sqlx::query_as(
+        "SELECT COUNT(*) FROM bench
+         WHERE encrypted_text IS NOT NULL
+           AND encrypted_int IS NOT NULL
+           AND encrypted_bigint IS NOT NULL",
+    )
+    .fetch_one(&pool)
+    .await?;
+    assert_eq!(
+        count.0, 10000,
+        "all rows should have non-null encrypted columns"
+    );
+    Ok(())
+}
+
+/// Verify hmac_256 index terms are extractable from encrypted_text
+#[sqlx::test]
+async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> {
+    let count: (i64,) = sqlx::query_as(
+        "SELECT COUNT(*) FROM bench WHERE eql_v2.hmac_256(encrypted_text) IS NOT NULL",
+    )
+    .fetch_one(&pool)
+    .await?;
+    assert_eq!(count.0, 10000, "all rows should have hmac_256 index terms");
+    Ok(())
+}
+
+/// Verify bloom_filter index terms are extractable from encrypted_text
+#[sqlx::test]
+async fn bench_encrypted_text_has_bloom_filter_terms(pool: PgPool) -> Result<()> {
+    let count: (i64,) = sqlx::query_as(
+        "SELECT COUNT(*) FROM bench WHERE eql_v2.bloom_filter(encrypted_text) IS NOT NULL",
+    )
+    .fetch_one(&pool)
+    .await?;
+    assert_eq!(
+        count.0, 10000,
+        "all rows should have bloom_filter index terms"
+    );
+    Ok(())
+}
+
+/// Verify ORE terms are extractable from encrypted_int (3 of 5 indexes are ORE btree)
+#[sqlx::test]
+async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> {
+    let count: (i64,) = sqlx::query_as(
+        "SELECT COUNT(*) FROM bench WHERE eql_v2.ore_block_u64_8_256(encrypted_int) IS NOT NULL",
+    )
+    .fetch_one(&pool)
+    .await?;
+    assert_eq!(count.0, 10000, "all rows should have ORE block index terms");
+    Ok(())
+}
+
+// ========== Index Usage Tests (with fixture) ==========
+
+/// Verify hash index is used for hmac_256 equality lookup
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))]
+async fn bench_hmac_equality_uses_hash_index(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)",
+        encrypted
+    );
+    assert_uses_index(&pool, &sql, "bench_text_hmac_idx").await?;
+    Ok(())
+}
+
+/// Verify btree index is used for ORDER BY with LIMIT on encrypted_int
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))]
+async fn bench_ore_order_uses_btree_index(pool: PgPool) -> Result<()> {
+    let sql = "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10";
+    assert_uses_index(&pool, sql, "bench_int_ore_idx").await?;
+    Ok(())
+}
+
+/// Verify GIN index is used for bloom_filter containment
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))]
+async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)",
+        encrypted
+    );
+    assert_uses_index(&pool, &sql, "bench_text_bloom_idx").await?;
+    Ok(())
+}
+
+/// Verify sequential scan without indexes (before/after pattern sanity check)
+#[sqlx::test]
+async fn bench_hmac_without_index_uses_seq_scan(pool: PgPool) -> Result<()> {
+    analyze_table(&pool, "bench").await?;
+
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)",
+        encrypted
+    );
+    let explain = explain_query(&pool, &sql).await?;
+    assert_uses_seq_scan(&explain);
+    Ok(())
+}

From 44dabd7d1cdb1bd4877e2b13d5fa9d433ad7f7d9 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Thu, 9 Apr 2026 09:25:25 +1000
Subject: [PATCH 02/28] refactor(bench): extract BENCH_ROW_COUNT constant from
 magic number

Addresses code review feedback: the literal 10000 appeared in 5 assert_eq
calls. Single constant makes it easy to adjust if row count changes for CI.
---
 tests/sqlx/tests/bench_data_tests.rs | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs
index c58ca929..55400b38 100644
--- a/tests/sqlx/tests/bench_data_tests.rs
+++ b/tests/sqlx/tests/bench_data_tests.rs
@@ -10,6 +10,8 @@ use anyhow::Result;
 use eql_tests::{analyze_table, assert_uses_index, assert_uses_seq_scan, explain_query};
 use sqlx::PgPool;
 
+const BENCH_ROW_COUNT: i64 = 10000;
+
 // ========== Data Integrity Tests ==========
 
 /// Verify migration seeded exactly 10K rows
@@ -18,7 +20,10 @@ async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM bench")
         .fetch_one(&pool)
         .await?;
-    assert_eq!(count.0, 10000, "bench table should have 10000 rows");
+    assert_eq!(
+        count.0, BENCH_ROW_COUNT,
+        "bench table should have 10000 rows"
+    );
     Ok(())
 }
 
@@ -48,7 +53,10 @@ async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> {
     )
     .fetch_one(&pool)
     .await?;
-    assert_eq!(count.0, 10000, "all rows should have hmac_256 index terms");
+    assert_eq!(
+        count.0, BENCH_ROW_COUNT,
+        "all rows should have hmac_256 index terms"
+    );
     Ok(())
 }
 
@@ -75,7 +83,10 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> {
     )
     .fetch_one(&pool)
     .await?;
-    assert_eq!(count.0, 10000, "all rows should have ORE block index terms");
+    assert_eq!(
+        count.0, BENCH_ROW_COUNT,
+        "all rows should have ORE block index terms"
+    );
     Ok(())
 }
 

From 1934a91fc6337136dc6262eb17d1e4a8b820c804 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Thu, 9 Apr 2026 12:17:59 +1000
Subject: [PATCH 03/28] fix(bench): move 10K row INSERT from migration to
 opt-in fixture

The INSERT ... generate_series in migration 007 bloated the sqlx template
database, making every test pay the copy cost and causing disk space
crashes. Now migration 007 is DDL-only (CREATE TABLE bench) and the 10K
row seed lives in bench_data.sql fixture. Only bench tests opt in.

Also fixes remaining BENCH_ROW_COUNT literals missed by earlier replace.
---
 tests/sqlx/fixtures/bench_data.sql            | 19 ++++++++++++++
 .../migrations/007_install_bench_data.sql     | 17 +++---------
 tests/sqlx/tests/bench_data_tests.rs          | 26 +++++++++----------
 3 files changed, 35 insertions(+), 27 deletions(-)
 create mode 100644 tests/sqlx/fixtures/bench_data.sql

diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql
new file mode 100644
index 00000000..baeae5ad
--- /dev/null
+++ b/tests/sqlx/fixtures/bench_data.sql
@@ -0,0 +1,19 @@
+-- Fixture: bench_data.sql
+--
+-- Seeds 10K rows into the bench table for performance testing.
+-- Each column cycles through 100 distinct encrypted values (from ore ids 1-100).
+--
+-- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec)
+-- Data generated via create_encrypted_json() from 004_install_test_helpers.sql.
+--
+-- Cycling offsets create varied distributions:
+--   encrypted_text:   ids 1, 2, ..., 100, 1, 2, ... (offset 0)
+--   encrypted_int:    ids 34, 35, ..., 100, 1, ..., 33 (offset +33)
+--   encrypted_bigint: ids 67, 68, ..., 100, 1, ..., 66 (offset +66)
+
+INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
+SELECT
+    create_encrypted_json(((gs - 1) % 100) + 1),
+    create_encrypted_json(((gs + 33) % 100) + 1),
+    create_encrypted_json(((gs + 66) % 100) + 1)
+FROM generate_series(1, 10000) AS gs;
diff --git a/tests/sqlx/migrations/007_install_bench_data.sql b/tests/sqlx/migrations/007_install_bench_data.sql
index 7786d971..04db7695 100644
--- a/tests/sqlx/migrations/007_install_bench_data.sql
+++ b/tests/sqlx/migrations/007_install_bench_data.sql
@@ -1,15 +1,13 @@
 -- Migration: 007_install_bench_data.sql
 --
--- Creates benchmark table with 10K rows for performance testing.
--- Each column cycles through 100 distinct encrypted values (from ore ids 1-100).
+-- Creates benchmark table for performance testing.
+-- DDL only — data is loaded by the bench_data.sql fixture so that
+-- only bench tests pay the 10K-row seeding cost, not the entire suite.
 --
 -- Columns:
 --   encrypted_text   - text equality (hmac), pattern match (bloom), ordering (ore)
 --   encrypted_int    - integer ORE range/equality/ordering
 --   encrypted_bigint - bigint ORE at scale
---
--- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec)
--- Data generated via create_encrypted_json() from 004_install_test_helpers.sql.
 
 CREATE TABLE bench (
     id SERIAL PRIMARY KEY,
@@ -17,12 +15,3 @@ CREATE TABLE bench (
     encrypted_int eql_v2_encrypted,
     encrypted_bigint eql_v2_encrypted
 );
-
--- Seed 10K rows. Each column uses a different offset to create varied distributions.
--- create_encrypted_json(id) valid for ids 1-100 (ore table lookup at 10*id, max ore.id=1000).
-INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
-SELECT
-    create_encrypted_json(((gs - 1) % 100) + 1),
-    create_encrypted_json(((gs + 33) % 100) + 1),
-    create_encrypted_json(((gs + 66) % 100) + 1)
-FROM generate_series(1, 10000) AS gs;
diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs
index 55400b38..8225df5d 100644
--- a/tests/sqlx/tests/bench_data_tests.rs
+++ b/tests/sqlx/tests/bench_data_tests.rs
@@ -1,6 +1,6 @@
 //! Benchmark data verification tests
 //!
-//! Validates migration 007_install_bench_data.sql and bench_setup fixture:
+//! Validates bench_data fixture (10K rows) and bench_setup fixture (indexes):
 //! - 10K rows seeded correctly across 3 encrypted columns
 //! - Index terms (hmac, bloom, ORE) are extractable
 //! - Indexes are used by the query planner (EXPLAIN assertions)
@@ -14,8 +14,8 @@ const BENCH_ROW_COUNT: i64 = 10000;
 
 // ========== Data Integrity Tests ==========
 
-/// Verify migration seeded exactly 10K rows
-#[sqlx::test]
+/// Verify fixture seeded exactly 10K rows
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM bench")
         .fetch_one(&pool)
@@ -28,7 +28,7 @@ async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> {
 }
 
 /// Verify all three columns have non-null encrypted data
-#[sqlx::test]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_columns_are_populated(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as(
         "SELECT COUNT(*) FROM bench
@@ -39,14 +39,14 @@ async fn bench_columns_are_populated(pool: PgPool) -> Result<()> {
     .fetch_one(&pool)
     .await?;
     assert_eq!(
-        count.0, 10000,
+        count.0, BENCH_ROW_COUNT,
         "all rows should have non-null encrypted columns"
     );
     Ok(())
 }
 
 /// Verify hmac_256 index terms are extractable from encrypted_text
-#[sqlx::test]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as(
         "SELECT COUNT(*) FROM bench WHERE eql_v2.hmac_256(encrypted_text) IS NOT NULL",
@@ -61,7 +61,7 @@ async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> {
 }
 
 /// Verify bloom_filter index terms are extractable from encrypted_text
-#[sqlx::test]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_encrypted_text_has_bloom_filter_terms(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as(
         "SELECT COUNT(*) FROM bench WHERE eql_v2.bloom_filter(encrypted_text) IS NOT NULL",
@@ -69,14 +69,14 @@ async fn bench_encrypted_text_has_bloom_filter_terms(pool: PgPool) -> Result<()>
     .fetch_one(&pool)
     .await?;
     assert_eq!(
-        count.0, 10000,
+        count.0, BENCH_ROW_COUNT,
         "all rows should have bloom_filter index terms"
     );
     Ok(())
 }
 
 /// Verify ORE terms are extractable from encrypted_int (3 of 5 indexes are ORE btree)
-#[sqlx::test]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as(
         "SELECT COUNT(*) FROM bench WHERE eql_v2.ore_block_u64_8_256(encrypted_int) IS NOT NULL",
@@ -93,7 +93,7 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> {
 // ========== Index Usage Tests (with fixture) ==========
 
 /// Verify hash index is used for hmac_256 equality lookup
-#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn bench_hmac_equality_uses_hash_index(pool: PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
@@ -109,7 +109,7 @@ async fn bench_hmac_equality_uses_hash_index(pool: PgPool) -> Result<()> {
 }
 
 /// Verify btree index is used for ORDER BY with LIMIT on encrypted_int
-#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn bench_ore_order_uses_btree_index(pool: PgPool) -> Result<()> {
     let sql = "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10";
     assert_uses_index(&pool, sql, "bench_int_ore_idx").await?;
@@ -117,7 +117,7 @@ async fn bench_ore_order_uses_btree_index(pool: PgPool) -> Result<()> {
 }
 
 /// Verify GIN index is used for bloom_filter containment
-#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
@@ -133,7 +133,7 @@ async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> {
 }
 
 /// Verify sequential scan without indexes (before/after pattern sanity check)
-#[sqlx::test]
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_hmac_without_index_uses_seq_scan(pool: PgPool) -> Result<()> {
     analyze_table(&pool, "bench").await?;
 

From a57682aa8f78397c6db94d228cd8518fc1d06378 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Thu, 9 Apr 2026 12:18:05 +1000
Subject: [PATCH 04/28] fix(test): correct pg_stat_statements_reset argument
 order

Database OID was passed as 3rd arg (queryid) instead of 2nd arg (dbid).
read_pg_stat_statements correctly filters by dbid, confirming the intent.
The reset now scopes to the current database instead of matching a
non-existent query ID.
---
 tests/sqlx/src/helpers.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs
index a2f454e9..8fe12844 100644
--- a/tests/sqlx/src/helpers.rs
+++ b/tests/sqlx/src/helpers.rs
@@ -701,7 +701,7 @@ pub async fn ensure_pg_stat_statements(pool: &PgPool) -> Result<()> {
 /// let stats = read_pg_stat_statements(&pool, "%FROM bench%").await?;
 /// ```
 pub async fn reset_pg_stat_statements(pool: &PgPool) -> Result<()> {
-    sqlx::query("SELECT pg_stat_statements_reset(NULL::oid, NULL::oid, (SELECT oid FROM pg_database WHERE datname = current_database()))")
+    sqlx::query("SELECT pg_stat_statements_reset(NULL::oid, (SELECT oid FROM pg_database WHERE datname = current_database()), 0::bigint)")
         .execute(pool)
         .await
         .with_context(|| "resetting pg_stat_statements counters for current database")?;

From 89f86f665399232d6d4a155c8a5a4446793b7807 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Tue, 21 Apr 2026 13:23:59 +1000
Subject: [PATCH 05/28] fix(bench): address code review feedback

- Fix misleading bench_setup.sql comment (DDL-only migration, rows from fixture)
- Fix off-by-one in bench_data.sql offset documentation comments
- Add missing ORE term extraction test for encrypted_bigint
- Add missing index-usage tests for bench_text_ore_idx and bench_bigint_ore_idx
- Document bench_data and bench_setup fixtures in FIXTURE_SCHEMA.md
- Update migrations README to list all migrations 002-007
---
 tests/sqlx/fixtures/FIXTURE_SCHEMA.md | 53 ++++++++++++++++++++++++++-
 tests/sqlx/fixtures/bench_data.sql    |  4 +-
 tests/sqlx/fixtures/bench_setup.sql   |  2 +-
 tests/sqlx/migrations/README.md       | 11 ++++--
 tests/sqlx/tests/bench_data_tests.rs  | 31 ++++++++++++++++
 5 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
index 7988fb23..70d3b0d9 100644
--- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
+++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
@@ -9,7 +9,8 @@ EQL Extension (via migrations)
   ├── encrypted_json.sql
   ├── array_data.sql
   ├── order_by_null_data.sql (depends on ore migration)
-  └── ore_data.sql
+  ├── ore_data.sql
+  └── bench_data.sql + bench_setup.sql (depend on migration 007)
 ```
 
 All fixtures depend on the EQL extension being installed via SQLx migrations.
@@ -132,6 +133,56 @@ CREATE TABLE ore (
 
 ---
 
+## bench_data.sql
+
+**Purpose:** Seeds 10K rows into the `bench` table for performance benchmarking. Opt-in fixture — only loaded when a test explicitly includes `scripts("bench_data")`, so other tests don't pay the cost.
+
+**Dependencies:**
+- Requires `bench` table from migration `007_install_bench_data.sql`
+- Uses `create_encrypted_json()` from migration `004_install_test_helpers.sql`
+
+**Schema:** Uses `bench` table (DDL in migration 007):
+```sql
+CREATE TABLE bench (
+  id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
+  encrypted_text    eql_v2_encrypted,
+  encrypted_int     eql_v2_encrypted,
+  encrypted_bigint  eql_v2_encrypted
+);
+```
+
+**Data:**
+- 10,000 rows cycling through 100 distinct encrypted values (ore ids 1-100)
+- Cycling offsets create varied column distributions:
+  - `encrypted_text`: ids 1, 2, ..., 100, 1, 2, ... (offset 0)
+  - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +33)
+  - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +66)
+- Each row has HMAC, bloom filter, and ORE index terms
+
+**Used By:**
+- bench_data_tests.rs (all tests)
+
+---
+
+## bench_setup.sql
+
+**Purpose:** Creates the 5 benchmark indexes and refreshes planner statistics. Always loaded after `bench_data.sql` in tests that verify index usage.
+
+**Dependencies:**
+- Requires `bench` table with data from `bench_data.sql`
+
+**Indexes created:**
+- `bench_text_hmac_idx` — hash on `eql_v2.hmac_256(encrypted_text)` for equality
+- `bench_text_ore_idx` — btree on `encrypted_text` via operator class for text ordering
+- `bench_int_ore_idx` — btree on `encrypted_int` via operator class for range/ORDER BY
+- `bench_bigint_ore_idx` — btree on `encrypted_bigint` via operator class
+- `bench_text_bloom_idx` — GIN on `eql_v2.bloom_filter(encrypted_text)` for containment
+
+**Used By:**
+- bench_data_tests.rs (index-usage tests: `scripts("bench_data", "bench_setup")`)
+
+---
+
 ## Validation Tests
 
 Each fixture should have a validation test to ensure correct structure:
diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql
index baeae5ad..ca0db6dd 100644
--- a/tests/sqlx/fixtures/bench_data.sql
+++ b/tests/sqlx/fixtures/bench_data.sql
@@ -8,8 +8,8 @@
 --
 -- Cycling offsets create varied distributions:
 --   encrypted_text:   ids 1, 2, ..., 100, 1, 2, ... (offset 0)
---   encrypted_int:    ids 34, 35, ..., 100, 1, ..., 33 (offset +33)
---   encrypted_bigint: ids 67, 68, ..., 100, 1, ..., 66 (offset +66)
+--   encrypted_int:    ids 35, 36, ..., 100, 1, ..., 34 (offset +33)
+--   encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +66)
 
 INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
 SELECT
diff --git a/tests/sqlx/fixtures/bench_setup.sql b/tests/sqlx/fixtures/bench_setup.sql
index 164a4b20..0f997940 100644
--- a/tests/sqlx/fixtures/bench_setup.sql
+++ b/tests/sqlx/fixtures/bench_setup.sql
@@ -1,7 +1,7 @@
 -- Fixture: bench_setup.sql
 --
 -- Creates benchmark indexes and refreshes planner statistics.
--- Table and 10K rows created by migration 007_install_bench_data.sql.
+-- Table DDL from migration 007_install_bench_data.sql; 10K rows from bench_data.sql fixture.
 --
 -- Indexes:
 --   bench_text_hmac_idx   - hash on eql_v2.hmac_256(encrypted_text) for equality
diff --git a/tests/sqlx/migrations/README.md b/tests/sqlx/migrations/README.md
index a03dcaa0..f8b5b169 100644
--- a/tests/sqlx/migrations/README.md
+++ b/tests/sqlx/migrations/README.md
@@ -10,10 +10,13 @@ These migrations install EQL and test helpers into the test database using a **h
 - In `.gitignore` - never commit this file
 - Ensures tests always use current EQL version
 
-**Migrations 002-004 are static fixtures**:
-- 002: Test helpers (`test_helpers.sql`)
-- 003: ORE test data (`ore.sql`)
-- 004: STE Vec test data (`ste_vec.sql`)
+**Migrations 002-007 are static fixtures**:
+- 002: ORE test data (`ore.sql`)
+- 003: STE Vec test data (`ste_vec.sql`)
+- 004: Test helpers (`test_helpers.sql`)
+- 005: STE Vec vast data
+- 006: ORE text data
+- 007: Benchmark table DDL (`bench` table with 3 encrypted columns — DDL only, no rows)
 
 ## How SQLx Uses These Migrations
 
diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs
index 8225df5d..24ea9955 100644
--- a/tests/sqlx/tests/bench_data_tests.rs
+++ b/tests/sqlx/tests/bench_data_tests.rs
@@ -90,6 +90,21 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> {
     Ok(())
 }
 
+/// Verify ORE terms are extractable from encrypted_bigint
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
+async fn bench_encrypted_bigint_has_ore_terms(pool: PgPool) -> Result<()> {
+    let count: (i64,) = sqlx::query_as(
+        "SELECT COUNT(*) FROM bench WHERE eql_v2.ore_block_u64_8_256(encrypted_bigint) IS NOT NULL",
+    )
+    .fetch_one(&pool)
+    .await?;
+    assert_eq!(
+        count.0, BENCH_ROW_COUNT,
+        "all rows should have ORE block index terms"
+    );
+    Ok(())
+}
+
 // ========== Index Usage Tests (with fixture) ==========
 
 /// Verify hash index is used for hmac_256 equality lookup
@@ -132,6 +147,22 @@ async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> {
     Ok(())
 }
 
+/// Verify btree index is used for ORDER BY with LIMIT on encrypted_text
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn bench_ore_text_order_uses_btree_index(pool: PgPool) -> Result<()> {
+    let sql = "SELECT * FROM bench ORDER BY encrypted_text LIMIT 10";
+    assert_uses_index(&pool, sql, "bench_text_ore_idx").await?;
+    Ok(())
+}
+
+/// Verify btree index is used for ORDER BY with LIMIT on encrypted_bigint
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn bench_ore_bigint_order_uses_btree_index(pool: PgPool) -> Result<()> {
+    let sql = "SELECT * FROM bench ORDER BY encrypted_bigint LIMIT 10";
+    assert_uses_index(&pool, sql, "bench_bigint_ore_idx").await?;
+    Ok(())
+}
+
 /// Verify sequential scan without indexes (before/after pattern sanity check)
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_hmac_without_index_uses_seq_scan(pool: PgPool) -> Result<()> {

From 493f085da952a38cf0b492430e70e463ab571bea Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Tue, 21 Apr 2026 15:33:56 +1000
Subject: [PATCH 06/28] fix(bench): address second code review round
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Use BIGINT GENERATED ALWAYS AS IDENTITY for bench.id (matches
  documented schema in FIXTURE_SCHEMA.md and other tables in suite)
- Fix stale migration range in README (001-004 → 001-007)
- Add comment clarifying int/bigint ORE tests verify data seeding,
  not distinct encoding paths
---
 tests/sqlx/migrations/007_install_bench_data.sql | 2 +-
 tests/sqlx/migrations/README.md                  | 2 +-
 tests/sqlx/tests/bench_data_tests.rs             | 3 +++
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/sqlx/migrations/007_install_bench_data.sql b/tests/sqlx/migrations/007_install_bench_data.sql
index 04db7695..49ff6975 100644
--- a/tests/sqlx/migrations/007_install_bench_data.sql
+++ b/tests/sqlx/migrations/007_install_bench_data.sql
@@ -10,7 +10,7 @@
 --   encrypted_bigint - bigint ORE at scale
 
 CREATE TABLE bench (
-    id SERIAL PRIMARY KEY,
+    id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
     encrypted_text eql_v2_encrypted,
     encrypted_int eql_v2_encrypted,
     encrypted_bigint eql_v2_encrypted
diff --git a/tests/sqlx/migrations/README.md b/tests/sqlx/migrations/README.md
index f8b5b169..f78f0635 100644
--- a/tests/sqlx/migrations/README.md
+++ b/tests/sqlx/migrations/README.md
@@ -22,7 +22,7 @@ These migrations install EQL and test helpers into the test database using a **h
 
 When using `#[sqlx::test]`:
 - Each test gets a fresh database
-- All migrations (001-004) run automatically before each test
+- All migrations (001-007) run automatically before each test
 - Migration 001 contains the latest built EQL
 - No need to manually reset database between tests
 
diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs
index 24ea9955..a6912b46 100644
--- a/tests/sqlx/tests/bench_data_tests.rs
+++ b/tests/sqlx/tests/bench_data_tests.rs
@@ -91,6 +91,9 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> {
 }
 
 /// Verify ORE terms are extractable from encrypted_bigint
+///
+/// Both int and bigint columns use the same eql_v2_encrypted type and ob index structure.
+/// These tests verify that data seeding populated both columns, not that encoding differs.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))]
 async fn bench_encrypted_bigint_has_ore_terms(pool: PgPool) -> Result<()> {
     let count: (i64,) = sqlx::query_as(

From 2e371e6480bf9d63b9b9444f949cdcea0799982b Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 09:25:03 +1000
Subject: [PATCH 07/28] docs(bench): address CodeRabbit feedback on fixture
 docs

- bench_data.sql, FIXTURE_SCHEMA.md: correct offset labels from +33/+66
  to +34/+67 (formulas unchanged; labels now match the id sequences)
- FIXTURE_SCHEMA.md: remove self-contradictory ore_data.sql fixture
  references; the ore table is migration-only, not a fixture
- migrations/README.md: update stale example filename from
  005_my_fixture.sql (slot occupied) to 008_my_fixture.sql
---
 tests/sqlx/fixtures/FIXTURE_SCHEMA.md | 14 +++++++-------
 tests/sqlx/fixtures/bench_data.sql    |  4 ++--
 tests/sqlx/migrations/README.md       |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
index 70d3b0d9..87c352ef 100644
--- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
+++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
@@ -9,7 +9,7 @@ EQL Extension (via migrations)
   ├── encrypted_json.sql
   ├── array_data.sql
   ├── order_by_null_data.sql (depends on ore migration)
-  ├── ore_data.sql
+  ├── ore table (migration 002 — not a fixture)
   └── bench_data.sql + bench_setup.sql (depend on migration 007)
 ```
 
@@ -155,8 +155,8 @@ CREATE TABLE bench (
 - 10,000 rows cycling through 100 distinct encrypted values (ore ids 1-100)
 - Cycling offsets create varied column distributions:
   - `encrypted_text`: ids 1, 2, ..., 100, 1, 2, ... (offset 0)
-  - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +33)
-  - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +66)
+  - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +34)
+  - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +67)
 - Each row has HMAC, bloom filter, and ORE index terms
 
 **Used By:**
@@ -199,15 +199,15 @@ async fn fixture_encrypted_json_has_three_records(pool: PgPool) {
 }
 ```
 
-### ore_data Validation
+### ore Migration Validation
 ```rust
-#[sqlx::test(fixtures(path = "../fixtures", scripts("ore_data")))]
+#[sqlx::test]
 async fn fixture_ore_data_has_99_records(pool: PgPool) {
     let count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM ore")
         .fetch_one(&pool)
         .await
         .unwrap();
-    assert_eq!(count, 99, "ore_data fixture should create 99 records");
+    assert_eq!(count, 99, "ore migration should provide 99 records");
 }
 ```
 
@@ -217,7 +217,7 @@ async fn fixture_ore_data_has_99_records(pool: PgPool) {
 
 - Use snake_case for fixture file names
 - Name should describe the data, not the test using it
-- Examples: `encrypted_json.sql`, `ore_data.sql`, `array_data.sql`
+- Examples: `encrypted_json.sql`, `array_data.sql`, `bench_data.sql`
 
 ## Adding New Fixtures
 
diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql
index ca0db6dd..149c08db 100644
--- a/tests/sqlx/fixtures/bench_data.sql
+++ b/tests/sqlx/fixtures/bench_data.sql
@@ -8,8 +8,8 @@
 --
 -- Cycling offsets create varied distributions:
 --   encrypted_text:   ids 1, 2, ..., 100, 1, 2, ... (offset 0)
---   encrypted_int:    ids 35, 36, ..., 100, 1, ..., 34 (offset +33)
---   encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +66)
+--   encrypted_int:    ids 35, 36, ..., 100, 1, ..., 34 (offset +34)
+--   encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +67)
 
 INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
 SELECT
diff --git a/tests/sqlx/migrations/README.md b/tests/sqlx/migrations/README.md
index f78f0635..abfc7471 100644
--- a/tests/sqlx/migrations/README.md
+++ b/tests/sqlx/migrations/README.md
@@ -39,7 +39,7 @@ cp release/cipherstash-encrypt.sql tests/sqlx/migrations/001_install_eql.sql
 ## Adding New Test Fixtures
 
 To add new test data or helpers:
-1. Create a new migration: `tests/sqlx/migrations/005_my_fixture.sql`
+1. Create a new migration using the next unused number (e.g. `tests/sqlx/migrations/008_my_fixture.sql`)
 2. Add your SQL fixtures
 3. Commit it (static migrations are version-controlled)
 4. SQLx will apply it automatically in test runs

From 53972f90a4086e939e80ae842ad8eb03477d1a5e Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 10:09:43 +1000
Subject: [PATCH 08/28] refactor(bench): use Zipf-like skew for bench fixture
 distribution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replaces the uniform, phase-locked cycling (each column got 100 rows
each of 100 distinct ids, offsets +34/+67) with a deterministic
Zipf-like skew seeded via setseed(0.42) and transformed through
random()^2.

Key differences:
- Skewed distribution — top id gets ~5% of rows, tail ids ~0.5%
  (~10x ratio), giving the planner realistic histograms instead
  of a perfectly flat distribution.
- Three independent draws per row decorrelate the columns; previously
  all three were the same cycle with fixed phase shifts.
- Id range tightened to [1, 99] — create_encrypted_json(id) looks up
  ore.id = 10*id, so id=100 previously resolved to a missing ore row.

Existing bench tests are distribution-agnostic (row-count / non-null /
read-and-query-by-id=1) and continue to pass.
---
 tests/sqlx/fixtures/FIXTURE_SCHEMA.md |  9 ++++-----
 tests/sqlx/fixtures/bench_data.sql    | 22 +++++++++++++---------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
index 87c352ef..52c2daa7 100644
--- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
+++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
@@ -152,11 +152,10 @@ CREATE TABLE bench (
 ```
 
 **Data:**
-- 10,000 rows cycling through 100 distinct encrypted values (ore ids 1-100)
-- Cycling offsets create varied column distributions:
-  - `encrypted_text`: ids 1, 2, ..., 100, 1, 2, ... (offset 0)
-  - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +34)
-  - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +67)
+- 10,000 rows drawn from 99 distinct encrypted values (ore ids 1-99)
+- Zipf-like skew via `setseed(0.42)` + `random()^2` — deterministic and byte-identical across runs
+- Top id gets ~5% of rows; tail ids ~0.5% each (top:bottom ratio ~10x)
+- Each column draws independently, so column values are decorrelated within a row
 - Each row has HMAC, bloom filter, and ORE index terms
 
 **Used By:**
diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql
index 149c08db..247d4ed5 100644
--- a/tests/sqlx/fixtures/bench_data.sql
+++ b/tests/sqlx/fixtures/bench_data.sql
@@ -1,19 +1,23 @@
 -- Fixture: bench_data.sql
 --
 -- Seeds 10K rows into the bench table for performance testing.
--- Each column cycles through 100 distinct encrypted values (from ore ids 1-100).
+-- Each column draws independently from 99 distinct encrypted values (ore ids 1-99)
+-- using a Zipf-like skew so the planner sees realistic histograms.
 --
 -- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec)
 -- Data generated via create_encrypted_json() from 004_install_test_helpers.sql.
 --
--- Cycling offsets create varied distributions:
---   encrypted_text:   ids 1, 2, ..., 100, 1, 2, ... (offset 0)
---   encrypted_int:    ids 35, 36, ..., 100, 1, ..., 34 (offset +34)
---   encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +67)
+-- Distribution:
+--   Deterministic via setseed(0.42) — byte-identical across runs.
+--   random()^2 produces a power-law skew: P(id=k) is proportional to 1/sqrt(k).
+--   Top id gets ~5% of rows (~500); tail ids get ~0.5% each (~50). Ratio ~10x.
+--   Three independent draws per row decorrelate the columns.
+
+SELECT setseed(0.42);
 
 INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
 SELECT
-    create_encrypted_json(((gs - 1) % 100) + 1),
-    create_encrypted_json(((gs + 33) % 100) + 1),
-    create_encrypted_json(((gs + 66) % 100) + 1)
-FROM generate_series(1, 10000) AS gs;
+    create_encrypted_json(1 + floor(99 * power(random(), 2))::int),
+    create_encrypted_json(1 + floor(99 * power(random(), 2))::int),
+    create_encrypted_json(1 + floor(99 * power(random(), 2))::int)
+FROM generate_series(1, 10000);

From 86e2e14333e1d70816ac47ae0c2b84f3ba533a21 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Tue, 21 Apr 2026 16:21:26 +1000
Subject: [PATCH 09/28] feat(bench): add Tier 1 plan assertions for ORE range
 queries and P0 ignored patterns

---
 tests/sqlx/tests/bench_plan_tests.rs | 115 +++++++++++++++++++++++++++
 1 file changed, 115 insertions(+)
 create mode 100644 tests/sqlx/tests/bench_plan_tests.rs

diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs
new file mode 100644
index 00000000..badc9d26
--- /dev/null
+++ b/tests/sqlx/tests/bench_plan_tests.rs
@@ -0,0 +1,115 @@
+//! Tier 1 benchmark plan assertions
+//!
+//! EXPLAIN-based tests asserting each P0/P1 query pattern uses the expected
+//! index access method. Tests for known-broken patterns are marked #[ignore].
+
+use anyhow::Result;
+use eql_tests::assert_uses_index;
+use sqlx::PgPool;
+
+/// ORE range query (less-than) uses btree index
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn ore_int_range_lt_uses_btree_index(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        encrypted
+    );
+    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    Ok(())
+}
+
+/// ORE range query (greater-than) uses btree index
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn ore_int_range_gt_uses_btree_index(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE encrypted_int > '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        encrypted
+    );
+    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    Ok(())
+}
+
+/// ORE combined range (>= low AND <= high) uses btree index
+///
+/// Uses explicit >= / <= rather than BETWEEN — BETWEEN's operator resolution
+/// against eql_v2_encrypted is untested and may not resolve to the btree family.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn ore_int_range_combined_uses_btree_index(pool: PgPool) -> Result<()> {
+    let low: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 10")
+            .fetch_one(&pool)
+            .await?;
+    let high: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 90")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE encrypted_int >= '{}'::jsonb::eql_v2_encrypted AND encrypted_int <= '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        low, high
+    );
+    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    Ok(())
+}
+
+/// eql_cast equality should use hash index — currently seq scans (CIP-2831)
+///
+/// "eql_cast" refers to the implicit JSONB-to-eql_v2_encrypted assignment cast
+/// defined in `src/encrypted/casts.sql` (`CREATE CAST (jsonb AS eql_v2_encrypted)
+/// WITH FUNCTION eql_v2.to_encrypted(jsonb)`). The SQL under test uses
+/// `'...'::jsonb::eql_v2_encrypted`, which invokes that cast. PostgreSQL does not
+/// recognise this cast path as equivalent to the indexed `hmac_256` term, so the
+/// planner falls back to a sequential scan instead of using `bench_text_hmac_idx`.
+///
+/// Remove #[ignore] when eql_cast index usage is fixed. At 1M rows this query
+/// takes 7.83s vs 0.4ms for hmac_256 — a 19,500x regression.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[ignore = "CIP-2831: eql_cast equality performs full seq scan, no index used"]
+async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE encrypted_text = '{}'::jsonb::eql_v2_encrypted",
+        encrypted
+    );
+    assert_uses_index(&pool, &sql, "bench_text_hmac_idx").await?;
+    Ok(())
+}
+
+/// ORE equality via operator class should use btree — currently seq scans (CIP-2831)
+///
+/// Like `eql_cast_equality_uses_hash_index`, the SQL uses `'...'::jsonb::eql_v2_encrypted`
+/// (the implicit JSONB assignment cast from `src/encrypted/casts.sql`). For integer
+/// columns with ORE index terms the planner should satisfy equality via the btree
+/// operator class, but the cast path prevents index recognition and causes a seq scan.
+///
+/// Remove #[ignore] when ORE equality index usage is fixed. At 1M rows this
+/// query takes 18.47s vs 0.4ms for hmac_256.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[ignore = "CIP-2831: ORE equality via operator class performs full seq scan"]
+async fn ore_equality_uses_btree_index(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE encrypted_int = '{}'::jsonb::eql_v2_encrypted",
+        encrypted
+    );
+    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    Ok(())
+}

From cb22f6b9fb4c51289dfd41496f9c5a327c5cea03 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Tue, 21 Apr 2026 16:30:12 +1000
Subject: [PATCH 10/28] feat(bench): add Tier 1 magnitude regression tests with
 timing thresholds

---
 tests/sqlx/tests/bench_regression_tests.rs | 94 ++++++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 tests/sqlx/tests/bench_regression_tests.rs

diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
new file mode 100644
index 00000000..9ab7c250
--- /dev/null
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -0,0 +1,94 @@
+//! Tier 1 benchmark magnitude regression tests
+//!
+//! Asserts execution time stays under generous thresholds (~100x expected)
+//! to catch catastrophic regressions while tolerating CI runner variance.
+//! Uses EXPLAIN ANALYZE averaged over 5 runs for server-side timing.
+//!
+//! Patterns known to be broken (P0 seq scans) are NOT included here — encoding
+//! bad performance as "acceptable" defeats the purpose. See bench_plan_tests.rs
+//! for their #[ignore] plan assertions.
+
+use anyhow::Result;
+use eql_tests::{explain_analyze_avg, ExplainStats};
+use sqlx::PgPool;
+
+/// hmac_256 equality must stay under 50ms on 10K rows (expected ~0.5ms)
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)",
+        encrypted
+    );
+    let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
+    assert!(
+        stats.execution_time_ms < 50.0,
+        "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows)",
+        stats.execution_time_ms
+    );
+    Ok(())
+}
+
+/// bloom_filter containment must stay under 100ms on 10K rows (expected ~1ms)
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)",
+        encrypted
+    );
+    let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
+    assert!(
+        stats.execution_time_ms < 100.0,
+        "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows)",
+        stats.execution_time_ms
+    );
+    Ok(())
+}
+
+/// ORE range query (< LIMIT 10) must stay under 200ms on 10K rows (expected ~2ms)
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> {
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50")
+            .fetch_one(&pool)
+            .await?;
+
+    let sql = format!(
+        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        encrypted
+    );
+    let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
+    assert!(
+        stats.execution_time_ms < 200.0,
+        "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows)",
+        stats.execution_time_ms
+    );
+    Ok(())
+}
+
+/// ORE ORDER BY LIMIT 10 must stay under 2000ms on 10K rows
+///
+/// The design doc's observed baseline for this pattern is ~543ms at 10K rows
+/// ("Full-set comparison before sort"). Threshold is set at 2000ms — 4x the
+/// observed baseline — to absorb CI variance while catching catastrophic regressions.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> {
+    let stats: ExplainStats =
+        explain_analyze_avg(&pool, "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10", 5)
+            .await?;
+    assert!(
+        stats.execution_time_ms < 2000.0,
+        "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows)",
+        stats.execution_time_ms
+    );
+    Ok(())
+}

From b6133f4f685e7bb0e25e1c2727f26e36658649c8 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Tue, 21 Apr 2026 17:29:44 +1000
Subject: [PATCH 11/28] fix(bench): address post-review code quality issues

- Add get_bench_encrypted_int/text helpers to helpers.rs and re-export from lib.rs
- Replace duplicate inline fetch queries with helpers in bench_plan_tests.rs
- Add BENCH_INT_ORE_IDX constant to eliminate repeated index name literals
- Fix long combined-range format string with line continuation
- Add ANALYZE fixture dependency comment to bench_plan_tests.rs module doc
- Clarify #[ignore] tests: 10K rows sufficient for validation, CIP-2831 is one root cause
- Fix module doc in bench_regression_tests.rs: acknowledge ore_order_by uses 4x not ~100x
- Add stats.node_type to all 4 regression assert messages
- Add cardinality comments for id=1 and id=50 probe row choices
---
 tests/sqlx/src/helpers.rs                  | 22 ++++++++
 tests/sqlx/src/lib.rs                      | 10 ++--
 tests/sqlx/tests/bench_plan_tests.rs       | 58 ++++++++++------------
 tests/sqlx/tests/bench_regression_tests.rs | 45 ++++++++---------
 4 files changed, 74 insertions(+), 61 deletions(-)

diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs
index 8fe12844..b06ab01c 100644
--- a/tests/sqlx/src/helpers.rs
+++ b/tests/sqlx/src/helpers.rs
@@ -40,6 +40,28 @@ pub async fn get_ore_text_encrypted(pool: &PgPool, id: i32) -> Result<String> {
     result.with_context(|| format!("ore_text returned NULL for id={}", id))
 }
 
+/// Fetch encrypted_int value from the bench table by id
+///
+/// The bench table is created by the bench_data fixture (10K rows, ids 1-10000).
+pub async fn get_bench_encrypted_int(pool: &PgPool, id: i32) -> Result<String> {
+    sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = $1")
+        .bind(id)
+        .fetch_one(pool)
+        .await
+        .with_context(|| format!("fetching bench encrypted_int for id={id}"))
+}
+
+/// Fetch encrypted_text value from the bench table by id
+///
+/// The bench table is created by the bench_data fixture (10K rows, ids 1-10000).
+pub async fn get_bench_encrypted_text(pool: &PgPool, id: i32) -> Result<String> {
+    sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = $1")
+        .bind(id)
+        .fetch_one(pool)
+        .await
+        .with_context(|| format!("fetching bench encrypted_text for id={id}"))
+}
+
 /// Assert sorted rows match expected sequential id range
 pub fn assert_sequential_ids(rows: &[sqlx::postgres::PgRow], start: i64, end: i64) {
     let ids: Vec<i64> = rows.iter().map(|r| r.try_get(0).unwrap()).collect();
diff --git a/tests/sqlx/src/lib.rs b/tests/sqlx/src/lib.rs
index f72cc45f..911264c3 100644
--- a/tests/sqlx/src/lib.rs
+++ b/tests/sqlx/src/lib.rs
@@ -13,11 +13,11 @@ pub use assertions::QueryAssertion;
 pub use helpers::{
     analyze_table, assert_no_seq_scan, assert_sequential_ids, assert_uses_index,
     assert_uses_seq_scan, create_jsonb_gin_index, ensure_pg_stat_statements, explain_analyze_avg,
-    explain_json, explain_query, get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb,
-    get_ore_text_encrypted, get_ore_text_encrypted_as_jsonb, get_ste_vec_encrypted,
-    get_ste_vec_encrypted_pair, get_ste_vec_selector_term, get_ste_vec_sv_element,
-    get_ste_vec_term_by_id, read_pg_stat_statements, reset_pg_stat_statements, ExplainStats,
-    PgStatEntry,
+    explain_json, explain_query, get_bench_encrypted_int, get_bench_encrypted_text,
+    get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb, get_ore_text_encrypted,
+    get_ore_text_encrypted_as_jsonb, get_ste_vec_encrypted, get_ste_vec_encrypted_pair,
+    get_ste_vec_selector_term, get_ste_vec_sv_element, get_ste_vec_term_by_id,
+    read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, PgStatEntry,
 };
 pub use index_types as IndexTypes;
 pub use selectors::Selectors;
diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs
index badc9d26..44d1b3bd 100644
--- a/tests/sqlx/tests/bench_plan_tests.rs
+++ b/tests/sqlx/tests/bench_plan_tests.rs
@@ -2,40 +2,40 @@
 //!
 //! EXPLAIN-based tests asserting each P0/P1 query pattern uses the expected
 //! index access method. Tests for known-broken patterns are marked #[ignore].
+//!
+//! ANALYZE is run by the bench_setup fixture — planner statistics are populated at fixture load.
 
 use anyhow::Result;
-use eql_tests::assert_uses_index;
+use eql_tests::{assert_uses_index, get_bench_encrypted_int, get_bench_encrypted_text};
 use sqlx::PgPool;
 
+const BENCH_INT_ORE_IDX: &str = "bench_int_ore_idx";
+
 /// ORE range query (less-than) uses btree index
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn ore_int_range_lt_uses_btree_index(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50")
-            .fetch_one(&pool)
-            .await?;
+    let encrypted = get_bench_encrypted_int(&pool, 50).await?;
 
     let sql = format!(
-        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted \
+         ORDER BY encrypted_int LIMIT 10",
         encrypted
     );
-    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?;
     Ok(())
 }
 
 /// ORE range query (greater-than) uses btree index
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn ore_int_range_gt_uses_btree_index(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50")
-            .fetch_one(&pool)
-            .await?;
+    let encrypted = get_bench_encrypted_int(&pool, 50).await?;
 
     let sql = format!(
-        "SELECT * FROM bench WHERE encrypted_int > '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        "SELECT * FROM bench WHERE encrypted_int > '{}'::jsonb::eql_v2_encrypted \
+         ORDER BY encrypted_int LIMIT 10",
         encrypted
     );
-    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?;
     Ok(())
 }
 
@@ -45,20 +45,17 @@ async fn ore_int_range_gt_uses_btree_index(pool: PgPool) -> Result<()> {
 /// against eql_v2_encrypted is untested and may not resolve to the btree family.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn ore_int_range_combined_uses_btree_index(pool: PgPool) -> Result<()> {
-    let low: String =
-        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 10")
-            .fetch_one(&pool)
-            .await?;
-    let high: String =
-        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 90")
-            .fetch_one(&pool)
-            .await?;
+    let low = get_bench_encrypted_int(&pool, 10).await?;
+    let high = get_bench_encrypted_int(&pool, 90).await?;
 
     let sql = format!(
-        "SELECT * FROM bench WHERE encrypted_int >= '{}'::jsonb::eql_v2_encrypted AND encrypted_int <= '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        "SELECT * FROM bench \
+         WHERE encrypted_int >= '{}'::jsonb::eql_v2_encrypted \
+           AND encrypted_int <= '{}'::jsonb::eql_v2_encrypted \
+         ORDER BY encrypted_int LIMIT 10",
         low, high
     );
-    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?;
     Ok(())
 }
 
@@ -73,13 +70,11 @@ async fn ore_int_range_combined_uses_btree_index(pool: PgPool) -> Result<()> {
 ///
 /// Remove #[ignore] when eql_cast index usage is fixed. At 1M rows this query
 /// takes 7.83s vs 0.4ms for hmac_256 — a 19,500x regression.
+/// Passing with the 10K-row fixture confirms index usage — timing data above was measured at 1M rows.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 #[ignore = "CIP-2831: eql_cast equality performs full seq scan, no index used"]
 async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
-            .await?;
+    let encrypted = get_bench_encrypted_text(&pool, 1).await?;
 
     let sql = format!(
         "SELECT * FROM bench WHERE encrypted_text = '{}'::jsonb::eql_v2_encrypted",
@@ -96,20 +91,19 @@ async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> {
 /// columns with ORE index terms the planner should satisfy equality via the btree
 /// operator class, but the cast path prevents index recognition and causes a seq scan.
 ///
+/// CIP-2831 covers both this and `eql_cast_equality_uses_hash_index` as a single root cause fix.
 /// Remove #[ignore] when ORE equality index usage is fixed. At 1M rows this
 /// query takes 18.47s vs 0.4ms for hmac_256.
+/// Passing with the 10K-row fixture confirms index usage — timing data above was measured at 1M rows.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 #[ignore = "CIP-2831: ORE equality via operator class performs full seq scan"]
 async fn ore_equality_uses_btree_index(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
-            .await?;
+    let encrypted = get_bench_encrypted_int(&pool, 1).await?;
 
     let sql = format!(
         "SELECT * FROM bench WHERE encrypted_int = '{}'::jsonb::eql_v2_encrypted",
         encrypted
     );
-    assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?;
+    assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?;
     Ok(())
 }
diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
index 9ab7c250..a5de334d 100644
--- a/tests/sqlx/tests/bench_regression_tests.rs
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -1,7 +1,9 @@
 //! Tier 1 benchmark magnitude regression tests
 //!
-//! Asserts execution time stays under generous thresholds (~100x expected)
-//! to catch catastrophic regressions while tolerating CI runner variance.
+//! Asserts execution time stays under generous thresholds to catch catastrophic regressions
+//! while tolerating CI runner variance. Most thresholds are ~100x the expected baseline;
+//! ore_order_by uses 4x (543ms observed baseline leaves little headroom for a 100x multiple
+//! without creating a test that never fails).
 //! Uses EXPLAIN ANALYZE averaged over 5 runs for server-side timing.
 //!
 //! Patterns known to be broken (P0 seq scans) are NOT included here — encoding
@@ -9,16 +11,14 @@
 //! for their #[ignore] plan assertions.
 
 use anyhow::Result;
-use eql_tests::{explain_analyze_avg, ExplainStats};
+use eql_tests::{explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats};
 use sqlx::PgPool;
 
 /// hmac_256 equality must stay under 50ms on 10K rows (expected ~0.5ms)
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
-            .await?;
+    // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K
+    let encrypted = get_bench_encrypted_text(&pool, 1).await?;
 
     let sql = format!(
         "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)",
@@ -27,8 +27,8 @@ async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> {
     let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
     assert!(
         stats.execution_time_ms < 50.0,
-        "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows)",
-        stats.execution_time_ms
+        "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows, node_type={})",
+        stats.execution_time_ms, stats.node_type
     );
     Ok(())
 }
@@ -36,10 +36,8 @@ async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> {
 /// bloom_filter containment must stay under 100ms on 10K rows (expected ~1ms)
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
-            .await?;
+    // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K
+    let encrypted = get_bench_encrypted_text(&pool, 1).await?;
 
     let sql = format!(
         "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)",
@@ -48,8 +46,8 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> {
     let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
     assert!(
         stats.execution_time_ms < 100.0,
-        "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows)",
-        stats.execution_time_ms
+        "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows, node_type={})",
+        stats.execution_time_ms, stats.node_type
     );
     Ok(())
 }
@@ -57,20 +55,19 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> {
 /// ORE range query (< LIMIT 10) must stay under 200ms on 10K rows (expected ~2ms)
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> {
-    let encrypted: String =
-        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50")
-            .fetch_one(&pool)
-            .await?;
+    // id=50 is the distribution midpoint → ~4,900 rows below threshold
+    let encrypted = get_bench_encrypted_int(&pool, 50).await?;
 
     let sql = format!(
-        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted \
+         ORDER BY encrypted_int LIMIT 10",
         encrypted
     );
     let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
     assert!(
         stats.execution_time_ms < 200.0,
-        "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows)",
-        stats.execution_time_ms
+        "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows, node_type={})",
+        stats.execution_time_ms, stats.node_type
     );
     Ok(())
 }
@@ -87,8 +84,8 @@ async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> {
             .await?;
     assert!(
         stats.execution_time_ms < 2000.0,
-        "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows)",
-        stats.execution_time_ms
+        "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows, node_type={})",
+        stats.execution_time_ms, stats.node_type
     );
     Ok(())
 }

From 999d22a6ab6afe5e48af3f41fd799627a9a0e4b8 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Tue, 21 Apr 2026 18:43:03 +1000
Subject: [PATCH 12/28] fix(bench): address third code review round

- Add NULL context to get_bench_encrypted_int/text helpers (mirrors ore helper pattern)
- Add BENCH_TEXT_HMAC_IDX constant to bench_plan_tests.rs for consistency with BENCH_INT_ORE_IDX
- Clarify id=50 midpoint comment: bench row midpoint, not ORE value midpoint (+33 offset)
- Split long use statement in bench_regression_tests.rs onto two lines
---
 tests/sqlx/src/helpers.rs                  | 24 +++++++++++++---------
 tests/sqlx/tests/bench_plan_tests.rs       |  3 ++-
 tests/sqlx/tests/bench_regression_tests.rs | 16 ++++++++++-----
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs
index b06ab01c..6bf5fc4c 100644
--- a/tests/sqlx/src/helpers.rs
+++ b/tests/sqlx/src/helpers.rs
@@ -44,22 +44,26 @@ pub async fn get_ore_text_encrypted(pool: &PgPool, id: i32) -> Result<String> {
 ///
 /// The bench table is created by the bench_data fixture (10K rows, ids 1-10000).
 pub async fn get_bench_encrypted_int(pool: &PgPool, id: i32) -> Result<String> {
-    sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = $1")
-        .bind(id)
-        .fetch_one(pool)
-        .await
-        .with_context(|| format!("fetching bench encrypted_int for id={id}"))
+    let result: Option<String> =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = $1")
+            .bind(id)
+            .fetch_one(pool)
+            .await
+            .with_context(|| format!("fetching bench encrypted_int for id={id}"))?;
+    result.with_context(|| format!("bench.encrypted_int is NULL for id={id}"))
 }
 
 /// Fetch encrypted_text value from the bench table by id
 ///
 /// The bench table is created by the bench_data fixture (10K rows, ids 1-10000).
 pub async fn get_bench_encrypted_text(pool: &PgPool, id: i32) -> Result<String> {
-    sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = $1")
-        .bind(id)
-        .fetch_one(pool)
-        .await
-        .with_context(|| format!("fetching bench encrypted_text for id={id}"))
+    let result: Option<String> =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = $1")
+            .bind(id)
+            .fetch_one(pool)
+            .await
+            .with_context(|| format!("fetching bench encrypted_text for id={id}"))?;
+    result.with_context(|| format!("bench.encrypted_text is NULL for id={id}"))
 }
 
 /// Assert sorted rows match expected sequential id range
diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs
index 44d1b3bd..7d2dc163 100644
--- a/tests/sqlx/tests/bench_plan_tests.rs
+++ b/tests/sqlx/tests/bench_plan_tests.rs
@@ -10,6 +10,7 @@ use eql_tests::{assert_uses_index, get_bench_encrypted_int, get_bench_encrypted_
 use sqlx::PgPool;
 
 const BENCH_INT_ORE_IDX: &str = "bench_int_ore_idx";
+const BENCH_TEXT_HMAC_IDX: &str = "bench_text_hmac_idx";
 
 /// ORE range query (less-than) uses btree index
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
@@ -80,7 +81,7 @@ async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> {
         "SELECT * FROM bench WHERE encrypted_text = '{}'::jsonb::eql_v2_encrypted",
         encrypted
     );
-    assert_uses_index(&pool, &sql, "bench_text_hmac_idx").await?;
+    assert_uses_index(&pool, &sql, BENCH_TEXT_HMAC_IDX).await?;
     Ok(())
 }
 
diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
index a5de334d..f3668476 100644
--- a/tests/sqlx/tests/bench_regression_tests.rs
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -11,7 +11,9 @@
 //! for their #[ignore] plan assertions.
 
 use anyhow::Result;
-use eql_tests::{explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats};
+use eql_tests::{
+    explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats,
+};
 use sqlx::PgPool;
 
 /// hmac_256 equality must stay under 50ms on 10K rows (expected ~0.5ms)
@@ -55,7 +57,8 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> {
 /// ORE range query (< LIMIT 10) must stay under 200ms on 10K rows (expected ~2ms)
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> {
-    // id=50 is the distribution midpoint → ~4,900 rows below threshold
+    // id=50 is the bench row midpoint; encrypted_int uses a +33 offset so this maps
+    // to ore id 83, but the 10K distribution still yields ~4,900 rows below the predicate
     let encrypted = get_bench_encrypted_int(&pool, 50).await?;
 
     let sql = format!(
@@ -79,9 +82,12 @@ async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> {
 /// observed baseline — to absorb CI variance while catching catastrophic regressions.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> {
-    let stats: ExplainStats =
-        explain_analyze_avg(&pool, "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10", 5)
-            .await?;
+    let stats: ExplainStats = explain_analyze_avg(
+        &pool,
+        "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10",
+        5,
+    )
+    .await?;
     assert!(
         stats.execution_time_ms < 2000.0,
         "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows, node_type={})",

From 808b789092f92d15e8bb12a784d0f408139e2d51 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:37:50 +1000
Subject: [PATCH 13/28] chore(bench): scaffold tests/benchmarks/ directory with
 README and gitignore

---
 tests/benchmarks/.gitignore       |  6 ++++
 tests/benchmarks/README.md        | 48 +++++++++++++++++++++++++++++++
 tests/benchmarks/reports/.gitkeep |  0
 3 files changed, 54 insertions(+)
 create mode 100644 tests/benchmarks/.gitignore
 create mode 100644 tests/benchmarks/README.md
 create mode 100644 tests/benchmarks/reports/.gitkeep

diff --git a/tests/benchmarks/.gitignore b/tests/benchmarks/.gitignore
new file mode 100644
index 00000000..9e7d7623
--- /dev/null
+++ b/tests/benchmarks/.gitignore
@@ -0,0 +1,6 @@
+# Generated reports (too large for git, regenerated on demand)
+reports/*
+!reports/.gitkeep
+
+# Local Proxy credentials
+.env
diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
new file mode 100644
index 00000000..885ddc74
--- /dev/null
+++ b/tests/benchmarks/README.md
@@ -0,0 +1,48 @@
+# EQL Scheduled Benchmarks (Tier 2)
+
+Heavy-weight performance benchmarks that run weekly in CI against 100K-row
+encrypted datasets. Complements the Tier 1 tests in `tests/sqlx/tests/bench_*`.
+
+## What this is
+
+- Brings up Postgres + CipherStash Proxy via docker-compose
+- Inserts 100K plaintext rows through the Proxy (which encrypts them)
+- Runs each P0/P1/P2 query pattern 1000 times
+- Reads `pg_stat_statements` for statistical aggregates
+- Outputs JSON + Markdown reports
+
+## Local usage
+
+```bash
+# Populate credentials
+cp tests/benchmarks/.env.example tests/benchmarks/.env
+# Edit .env with your CipherStash credentials
+
+# Start Postgres + Proxy
+mise run bench:up
+
+# Build EQL and generate 100K dataset (bench:generate depends on build)
+mise run bench:generate
+
+# Run the full Tier 2 suite
+mise run bench:full
+
+# Results land in tests/benchmarks/reports/
+```
+
+## CI usage
+
+Runs automatically every Monday at 03:00 UTC via
+`.github/workflows/benchmark.yml`. Also manually invocable from the
+GitHub Actions UI (Run workflow button).
+
+## Why a separate workflow
+
+- 100K generation takes ~100 seconds via the Proxy
+- 1000-run query loops add several minutes per pattern
+- Regular PR CI must stay under 10 minutes; this suite would blow that budget
+
+## Output
+
+`tests/benchmarks/reports/benchmark-YYYY-MM-DD.{json,md}` — uploaded as
+GitHub Actions artifact named `benchmark-report-<run-id>`.
diff --git a/tests/benchmarks/reports/.gitkeep b/tests/benchmarks/reports/.gitkeep
new file mode 100644
index 00000000..e69de29b

From 41b95cbe2d35504883775830e6496d1104e2ac7b Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:40:43 +1000
Subject: [PATCH 14/28] feat(bench): add docker-compose with Postgres +
 CipherStash Proxy for data generation

---
 tests/benchmarks/.env.example       |  7 ++++
 tests/benchmarks/docker-compose.yml | 59 +++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 tests/benchmarks/.env.example
 create mode 100644 tests/benchmarks/docker-compose.yml

diff --git a/tests/benchmarks/.env.example b/tests/benchmarks/.env.example
new file mode 100644
index 00000000..fe41909a
--- /dev/null
+++ b/tests/benchmarks/.env.example
@@ -0,0 +1,7 @@
+# CipherStash Proxy credentials
+# Get these from https://dashboard.cipherstash.com
+CS_CLIENT_ACCESS_KEY=
+CS_DEFAULT_KEYSET_ID=
+CS_CLIENT_KEY=
+CS_CLIENT_ID=
+CS_WORKSPACE_CRN=
diff --git a/tests/benchmarks/docker-compose.yml b/tests/benchmarks/docker-compose.yml
new file mode 100644
index 00000000..f8d47d48
--- /dev/null
+++ b/tests/benchmarks/docker-compose.yml
@@ -0,0 +1,59 @@
+services:
+  postgres:
+    image: postgres:17
+    container_name: bench-postgres
+    command: >
+      postgres
+      -c track_functions=all
+      -c shared_preload_libraries=pg_stat_statements
+      -c pg_stat_statements.track=all
+      -c pg_stat_statements.max=10000
+    ports:
+      - "7433:5432"
+    environment:
+      POSTGRES_DB: cipherstash
+      POSTGRES_USER: cipherstash
+      POSTGRES_PASSWORD: password
+    healthcheck:
+      test: ["CMD-SHELL", "pg_isready -U cipherstash"]
+      interval: 1s
+      timeout: 5s
+      retries: 10
+    networks:
+      - bench
+
+  proxy:
+    image: cipherstash/proxy:latest
+    container_name: bench-proxy
+    ports:
+      - "6433:6432"
+    environment:
+      CS_DATABASE__NAME: cipherstash
+      CS_DATABASE__USERNAME: cipherstash
+      CS_DATABASE__PASSWORD: password
+      CS_DATABASE__HOST: postgres
+      CS_DATABASE__PORT: 5432
+      # EQL install is performed explicitly by generate.sh before schema.sql runs.
+      # Leaving Proxy's own install off avoids racing against generate.sh.
+      CS_DATABASE__INSTALL_EQL: "false"
+      CS_CLIENT_ACCESS_KEY: ${CS_CLIENT_ACCESS_KEY}
+      CS_DEFAULT_KEYSET_ID: ${CS_DEFAULT_KEYSET_ID}
+      CS_CLIENT_KEY: ${CS_CLIENT_KEY}
+      CS_CLIENT_ID: ${CS_CLIENT_ID}
+      CS_WORKSPACE_CRN: ${CS_WORKSPACE_CRN}
+    healthcheck:
+      # Probe the Proxy's pg-protocol listener (no auth handshake required).
+      # busybox `nc` is present in the cipherstash/proxy image.
+      test: ["CMD-SHELL", "nc -z localhost 6432"]
+      interval: 1s
+      timeout: 5s
+      retries: 30
+    depends_on:
+      postgres:
+        condition: service_healthy
+    networks:
+      - bench
+
+networks:
+  bench:
+    driver: bridge

From 721a3f00e276e2ee7cc75340bde6871cf0fbbf40 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:44:00 +1000
Subject: [PATCH 15/28] feat(bench): add schema.sql with bench table and Proxy
 search configuration

---
 tests/benchmarks/schema.sql | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 tests/benchmarks/schema.sql

diff --git a/tests/benchmarks/schema.sql b/tests/benchmarks/schema.sql
new file mode 100644
index 00000000..e8693ef1
--- /dev/null
+++ b/tests/benchmarks/schema.sql
@@ -0,0 +1,35 @@
+-- Bench schema for Tier 2 benchmarks.
+-- Applied against the bench-postgres container AFTER EQL has been explicitly
+-- installed by generate.sh (see Task 4 — generate.sh installs
+-- release/cipherstash-encrypt.sql directly, not relying on Proxy's async install).
+
+DROP TABLE IF EXISTS bench;
+
+CREATE TABLE bench (
+    id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY,
+    encrypted_text eql_v2_encrypted,
+    encrypted_int eql_v2_encrypted,
+    encrypted_bigint eql_v2_encrypted
+);
+
+-- Proxy search configuration: tells Proxy which index terms to generate
+-- for each column when plaintext is inserted.
+--
+-- Signature: eql_v2.add_search_config(table, column, index, cast_as)
+-- (see src/config/functions.sql). add_search_config calls activate_config
+-- internally when migrating=false, so no explicit activate_config call.
+
+-- text column: equality (hmac), pattern match (bloom), ordering (ore)
+SELECT eql_v2.add_search_config('bench', 'encrypted_text', 'unique', 'text');
+SELECT eql_v2.add_search_config('bench', 'encrypted_text', 'match',  'text');
+SELECT eql_v2.add_search_config('bench', 'encrypted_text', 'ore',    'text');
+
+-- integer column: equality + ORE range/ordering
+SELECT eql_v2.add_search_config('bench', 'encrypted_int', 'unique', 'int');
+SELECT eql_v2.add_search_config('bench', 'encrypted_int', 'ore',    'int');
+
+-- bigint column: equality + ORE range/ordering
+SELECT eql_v2.add_search_config('bench', 'encrypted_bigint', 'unique', 'big_int');
+SELECT eql_v2.add_search_config('bench', 'encrypted_bigint', 'ore',    'big_int');
+
+-- Indexes (created after data load in generate.sh, after ANALYZE)

From 8fe674863953465a467107745458c4689dd281eb Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:46:42 +1000
Subject: [PATCH 16/28] feat(bench): add generate.sh for 100K dataset
 generation via Proxy

---
 tests/benchmarks/generate.sh | 58 ++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100755 tests/benchmarks/generate.sh

diff --git a/tests/benchmarks/generate.sh b/tests/benchmarks/generate.sh
new file mode 100755
index 00000000..bf6aee96
--- /dev/null
+++ b/tests/benchmarks/generate.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Generates a 100K-row encrypted bench dataset via CipherStash Proxy.
+# No dump is written in v1 — the Tier 2 workflow regenerates fresh each run.
+#
+# Prerequisites:
+#   - mise run build  (produces release/cipherstash-encrypt.sql)
+#   - docker compose -f tests/benchmarks/docker-compose.yml up -d --wait
+#   - tests/benchmarks/.env populated with CipherStash credentials
+
+REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+EQL_SQL="$REPO_ROOT/release/cipherstash-encrypt.sql"
+SCALE="${1:-100k}"
+
+case "$SCALE" in
+    100k) ROWS=100000 ;;
+    *) echo "Unsupported scale: $SCALE (only 100k in v1)" >&2; exit 1 ;;
+esac
+
+if [ ! -f "$EQL_SQL" ]; then
+    echo "ERROR: $EQL_SQL not found. Run 'mise run build' first." >&2
+    exit 1
+fi
+
+PG_URL="postgresql://cipherstash:password@localhost:7433/cipherstash"
+PROXY_URL="postgresql://cipherstash:password@localhost:6433/cipherstash"
+
+echo "==> Installing EQL into bench-postgres"
+psql "$PG_URL" -v ON_ERROR_STOP=1 -f "$EQL_SQL" >/dev/null
+
+echo "==> Applying bench schema and Proxy search configuration"
+psql "$PG_URL" -v ON_ERROR_STOP=1 -f "$SCRIPT_DIR/schema.sql"
+
+echo "==> Inserting $ROWS plaintext rows through Proxy (this encrypts them)"
+# generate_series emits plaintext rows; Proxy intercepts and encrypts each
+# column per the search config applied in schema.sql.
+psql "$PROXY_URL" -v ON_ERROR_STOP=1 -c "
+INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint)
+SELECT
+    ('text_' || (((gs - 1) % 1000) + 1))::text,
+    (((gs - 1) % 1000) + 1)::int,
+    (((gs - 1) % 1000) + 1)::bigint * 1000000000
+FROM generate_series(1, $ROWS) AS gs;
+"
+
+echo "==> Creating indexes and running ANALYZE"
+psql "$PG_URL" -v ON_ERROR_STOP=1 -c "
+CREATE INDEX IF NOT EXISTS bench_text_hmac_idx   ON bench USING hash  (eql_v2.hmac_256(encrypted_text));
+CREATE INDEX IF NOT EXISTS bench_text_ore_idx    ON bench USING btree (encrypted_text eql_v2.encrypted_operator_class);
+CREATE INDEX IF NOT EXISTS bench_int_ore_idx     ON bench USING btree (encrypted_int eql_v2.encrypted_operator_class);
+CREATE INDEX IF NOT EXISTS bench_bigint_ore_idx  ON bench USING btree (encrypted_bigint eql_v2.encrypted_operator_class);
+CREATE INDEX IF NOT EXISTS bench_text_bloom_idx  ON bench USING gin   (eql_v2.bloom_filter(encrypted_text));
+ANALYZE bench;
+"
+
+echo "==> Done. Rows: $ROWS"

From c58629c21b437542243e646c7a9c798ca77870f9 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:50:26 +1000
Subject: [PATCH 17/28] feat(bench): add mise tasks bench:up/down/generate/full

---
 mise.toml        |  2 +-
 tasks/bench.toml | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)
 create mode 100644 tasks/bench.toml

diff --git a/mise.toml b/mise.toml
index ff70e4ce..878cb8cc 100644
--- a/mise.toml
+++ b/mise.toml
@@ -14,7 +14,7 @@
 "python" = "3.13"
 
 [task_config]
-includes = ["tasks", "tasks/postgres.toml"]
+includes = ["tasks", "tasks/postgres.toml", "tasks/bench.toml"]
 
 [env]
 POSTGRES_DB = "cipherstash"
diff --git a/tasks/bench.toml b/tasks/bench.toml
new file mode 100644
index 00000000..d4b70bc7
--- /dev/null
+++ b/tasks/bench.toml
@@ -0,0 +1,35 @@
+["bench:up"]
+description = "Start Postgres + Proxy for benchmark data generation"
+dir = "{{config_root}}"
+run = """
+if [ ! -f tests/benchmarks/.env ]; then
+  echo "ERROR: tests/benchmarks/.env missing. Copy .env.example and fill in credentials." >&2
+  exit 1
+fi
+docker compose --env-file tests/benchmarks/.env -f tests/benchmarks/docker-compose.yml up -d --wait
+"""
+
+["bench:down"]
+description = "Stop benchmark Postgres + Proxy"
+dir = "{{config_root}}"
+run = """
+docker compose -f tests/benchmarks/docker-compose.yml down -v
+"""
+
+["bench:generate"]
+description = "Generate 100K encrypted bench dataset (requires bench:up first)"
+# `build` produces release/cipherstash-encrypt.sql, which generate.sh
+# installs into the bench Postgres container before applying schema.sql.
+depends = ["build"]
+dir = "{{config_root}}"
+run = """
+tests/benchmarks/generate.sh 100k
+"""
+
+["bench:full"]
+description = "Run full Tier 2 benchmark suite against bench-postgres"
+dir = "{{config_root}}/tests/sqlx"
+env = { BENCH_DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" }
+run = """
+cargo test --test bench_perf_tests -- --ignored --nocapture --test-threads=1
+"""

From a827486b2754f8f88bab50f105200c0a69a849e1 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:55:37 +1000
Subject: [PATCH 18/28] feat(bench): add PerfResult struct and JSON/Markdown
 report writer

---
 tests/sqlx/src/lib.rs     |   2 +
 tests/sqlx/src/reports.rs | 112 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 114 insertions(+)
 create mode 100644 tests/sqlx/src/reports.rs

diff --git a/tests/sqlx/src/lib.rs b/tests/sqlx/src/lib.rs
index 911264c3..f979ed85 100644
--- a/tests/sqlx/src/lib.rs
+++ b/tests/sqlx/src/lib.rs
@@ -7,6 +7,7 @@ use sqlx::PgPool;
 pub mod assertions;
 pub mod helpers;
 pub mod index_types;
+pub mod reports;
 pub mod selectors;
 
 pub use assertions::QueryAssertion;
@@ -20,6 +21,7 @@ pub use helpers::{
     read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, PgStatEntry,
 };
 pub use index_types as IndexTypes;
+pub use reports::{append_result, write_reports, PerfResult};
 pub use selectors::Selectors;
 
 /// Reset pg_stat_user_functions tracking before tests
diff --git a/tests/sqlx/src/reports.rs b/tests/sqlx/src/reports.rs
new file mode 100644
index 00000000..b02a36af
--- /dev/null
+++ b/tests/sqlx/src/reports.rs
@@ -0,0 +1,112 @@
+//! Benchmark report writer for Tier 2 scheduled benchmarks.
+//!
+//! Each `#[ignore]` test in `bench_perf_tests.rs` pushes a `PerfResult` into
+//! `append_result`. A teardown-style test (run last, alphabetical order) calls
+//! `write_reports` to flush all accumulated results to JSON + Markdown.
+//!
+//! Output shape matches the design doc (.work/eql-index-performance/
+//! 2026-03-30-benchmarking-design.md §Report Format) with one caveat: the
+//! design doc lists `p95_ms` / `p99_ms` fields; Postgres `pg_stat_statements`
+//! does not expose percentiles — only mean / stddev / total. v1 omits them
+//! and documents the gap. Adding percentiles would require a different timing
+//! strategy (e.g. client-side histograms) deferred to a follow-up.
+
+use anyhow::{Context, Result};
+use serde::Serialize;
+use std::fs;
+use std::path::PathBuf;
+use std::sync::Mutex;
+
+/// One benchmark case result.
+#[derive(Debug, Clone, Serialize)]
+pub struct PerfResult {
+    /// Test name (e.g. "hmac_256_equality")
+    pub name: String,
+    /// Priority tier (P0, P1, P2)
+    pub priority: String,
+    /// Number of executions
+    pub runs: i64,
+    /// Plan node type (e.g. "Index Scan", "Seq Scan")
+    pub plan_type: String,
+    /// Mean execution time in milliseconds
+    pub mean_ms: f64,
+    /// Population standard deviation in milliseconds
+    pub stddev_ms: f64,
+    /// Total execution time across all runs in milliseconds
+    pub total_ms: f64,
+}
+
+/// Top-level report structure — matches the design doc's JSON shape.
+#[derive(Debug, Clone, Serialize)]
+pub struct BenchmarkReport {
+    /// RFC3339 UTC timestamp at report-write time
+    pub timestamp: String,
+    /// Postgres major version (e.g. "17")
+    pub postgres_version: String,
+    /// Dataset size this report was produced against
+    pub dataset_rows: i64,
+    /// One entry per benchmark case
+    pub results: Vec<PerfResult>,
+}
+
+static RESULTS: Mutex<Vec<PerfResult>> = Mutex::new(Vec::new());
+
+/// Push a result onto the shared in-memory accumulator.
+pub fn append_result(r: PerfResult) {
+    RESULTS.lock().expect("results mutex poisoned").push(r);
+}
+
+/// Write JSON + Markdown reports for all accumulated results.
+///
+/// Output paths:
+///   `<output_dir>/benchmark-<date>.json`
+///   `<output_dir>/benchmark-<date>.md`
+///
+/// `date` is an ISO-8601 date string provided by the caller (usually today).
+/// `postgres_version` and `dataset_rows` are embedded in the report header.
+pub fn write_reports(
+    output_dir: &str,
+    date: &str,
+    postgres_version: &str,
+    dataset_rows: i64,
+) -> Result<(PathBuf, PathBuf)> {
+    let results = RESULTS.lock().expect("results mutex poisoned").clone();
+    let report = BenchmarkReport {
+        timestamp: format!("{date}T00:00:00Z"),
+        postgres_version: postgres_version.to_string(),
+        dataset_rows,
+        results,
+    };
+
+    fs::create_dir_all(output_dir)
+        .with_context(|| format!("creating output dir {output_dir}"))?;
+
+    let json_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.json"));
+    let md_path   = PathBuf::from(output_dir).join(format!("benchmark-{date}.md"));
+
+    let json = serde_json::to_string_pretty(&report)
+        .context("serializing report to JSON")?;
+    fs::write(&json_path, json)
+        .with_context(|| format!("writing {}", json_path.display()))?;
+
+    fs::write(&md_path, render_markdown(&report))
+        .with_context(|| format!("writing {}", md_path.display()))?;
+
+    Ok((json_path, md_path))
+}
+
+fn render_markdown(report: &BenchmarkReport) -> String {
+    let mut out = String::new();
+    out.push_str(&format!("# Benchmark Report — {}\n\n", report.timestamp));
+    out.push_str(&format!("- Postgres: {}\n", report.postgres_version));
+    out.push_str(&format!("- Dataset rows: {}\n\n", report.dataset_rows));
+    out.push_str("| Query Pattern | Priority | Plan | Runs | Mean (ms) | Stddev (ms) |\n");
+    out.push_str("|---|---|---|---|---|---|\n");
+    for r in &report.results {
+        out.push_str(&format!(
+            "| {} | {} | {} | {} | {:.3} | {:.3} |\n",
+            r.name, r.priority, r.plan_type, r.runs, r.mean_ms, r.stddev_ms
+        ));
+    }
+    out
+}

From 780ac79108da833f27ed283301d312fb381b8317 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 13:59:45 +1000
Subject: [PATCH 19/28] feat(bench): add Tier 2 perf test infrastructure and
 hmac_256 baseline case

---
 tests/sqlx/tests/bench_perf_tests.rs | 113 +++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)
 create mode 100644 tests/sqlx/tests/bench_perf_tests.rs

diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
new file mode 100644
index 00000000..5c6558c7
--- /dev/null
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -0,0 +1,113 @@
+//! Tier 2 scheduled benchmarks.
+//!
+//! All tests are marked #[ignore] so regular CI doesn't run them. The scheduled
+//! workflow in .github/workflows/benchmark.yml invokes them via
+//! `cargo test --test bench_perf_tests -- --ignored`.
+//!
+//! Unlike Tier 1 tests, these use #[tokio::test] with a manual pool connected
+//! via BENCH_DATABASE_URL against a pre-loaded 100K-row dataset.
+//!
+//! Each test:
+//!   1. Resets pg_stat_statements
+//!   2. Runs its query pattern 1000 times
+//!   3. Reads pg_stat_statements for the match
+//!   4. Appends a PerfResult to the shared accumulator
+//!
+//! A single `zz_write_reports` test (alphabetical last) flushes the accumulator
+//! to JSON + Markdown. --test-threads=1 guarantees ordering.
+
+use anyhow::Result;
+use eql_tests::{
+    append_result, ensure_pg_stat_statements, read_pg_stat_statements,
+    reset_pg_stat_statements, write_reports, PerfResult,
+};
+use sqlx::postgres::PgPoolOptions;
+use sqlx::PgPool;
+
+const RUNS: i64 = 1000;
+const DATASET_ROWS: i64 = 100_000;
+
+async fn connect() -> Result<PgPool> {
+    let url = std::env::var("BENCH_DATABASE_URL")
+        .expect("BENCH_DATABASE_URL must be set (run `mise run bench:full`)");
+    let pool = PgPoolOptions::new()
+        .max_connections(4)
+        .connect(&url)
+        .await?;
+    ensure_pg_stat_statements(&pool).await?;
+    Ok(pool)
+}
+
+/// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows.
+#[tokio::test]
+#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+async fn hmac_256_equality() -> Result<()> {
+    let pool = connect().await?;
+
+    let encrypted: String = sqlx::query_scalar(
+        "SELECT (encrypted_text).data::text FROM bench WHERE id = 1",
+    )
+    .fetch_one(&pool)
+    .await?;
+
+    reset_pg_stat_statements(&pool).await?;
+
+    for _ in 0..RUNS {
+        sqlx::query(
+            "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)",
+        )
+        .bind(&encrypted)
+        .fetch_all(&pool)
+        .await?;
+    }
+
+    let stats = read_pg_stat_statements(
+        &pool,
+        "%FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($%",
+    )
+    .await?;
+
+    append_result(PerfResult {
+        name: "hmac_256_equality".into(),
+        priority: "P0".into(),
+        runs: stats.calls,
+        plan_type: "Index Scan".into(),
+        mean_ms: stats.mean_exec_time,
+        stddev_ms: stats.stddev_exec_time,
+        total_ms: stats.total_exec_time,
+    });
+
+    assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls");
+    Ok(())
+}
+
+/// Alphabetical-last test — flushes accumulated results to disk.
+/// Requires `--test-threads=1` so it runs after all benchmark cases.
+#[tokio::test]
+#[ignore = "Tier 2: report writer, runs last under --test-threads=1"]
+async fn zz_write_reports() -> Result<()> {
+    let pool = connect().await?;
+    let pg_version: String =
+        sqlx::query_scalar("SHOW server_version_num").fetch_one(&pool).await?;
+    // server_version_num is "170004" etc — take the major version digits
+    let pg_major = pg_version
+        .get(..pg_version.len().saturating_sub(4))
+        .unwrap_or(&pg_version)
+        .to_string();
+
+    let date = std::env::var("BENCH_REPORT_DATE").unwrap_or_else(|_| today_utc());
+    let output_dir = std::env::var("BENCH_REPORT_DIR")
+        .unwrap_or_else(|_| "../../tests/benchmarks/reports".into());
+    let (json, md) = write_reports(&output_dir, &date, &pg_major, DATASET_ROWS)?;
+    eprintln!("wrote {} and {}", json.display(), md.display());
+    Ok(())
+}
+
+fn today_utc() -> String {
+    // Avoid adding the `chrono` dep; shell out to `date -u` for UTC.
+    let out = std::process::Command::new("date")
+        .args(["-u", "+%Y-%m-%d"])
+        .output()
+        .expect("invoking date");
+    String::from_utf8(out.stdout).unwrap().trim().to_string()
+}

From 0941f2d2dfe35378cbc09fbb9c37634ed98718b8 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 14:09:49 +1000
Subject: [PATCH 20/28] feat(bench): add Tier 2 perf tests for P0/P1/P2 query
 patterns

---
 tests/sqlx/tests/bench_perf_tests.rs | 167 +++++++++++++++++++++++++++
 1 file changed, 167 insertions(+)

diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
index 5c6558c7..195a43e3 100644
--- a/tests/sqlx/tests/bench_perf_tests.rs
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -81,6 +81,173 @@ async fn hmac_256_equality() -> Result<()> {
     Ok(())
 }
 
+/// P2: bloom_filter containment — expected ~3.35ms at 100K rows.
+#[tokio::test]
+#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+async fn bloom_filter_containment() -> Result<()> {
+    let pool = connect().await?;
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool).await?;
+
+    reset_pg_stat_statements(&pool).await?;
+    for _ in 0..RUNS {
+        sqlx::query(
+            "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)",
+        )
+        .bind(&encrypted)
+        .fetch_all(&pool).await?;
+    }
+    let stats = read_pg_stat_statements(
+        &pool,
+        "%eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($%",
+    ).await?;
+
+    append_result(PerfResult {
+        name: "bloom_filter_containment".into(),
+        priority: "P2".into(),
+        runs: stats.calls,
+        plan_type: "Bitmap Index Scan".into(),
+        mean_ms: stats.mean_exec_time,
+        stddev_ms: stats.stddev_exec_time,
+        total_ms: stats.total_exec_time,
+    });
+    assert_eq!(stats.calls, RUNS);
+    Ok(())
+}
+
+/// P0: eql_cast equality — currently seq scans (CIP-2831). Report records the
+/// actual plan + timing so the number is visible week-over-week until the fix ships.
+#[tokio::test]
+#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+async fn eql_cast_equality() -> Result<()> {
+    let pool = connect().await?;
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool).await?;
+
+    reset_pg_stat_statements(&pool).await?;
+    for _ in 0..RUNS {
+        sqlx::query("SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted")
+            .bind(&encrypted)
+            .fetch_all(&pool).await?;
+    }
+    let stats = read_pg_stat_statements(
+        &pool,
+        "%FROM bench WHERE encrypted_text = $%::jsonb::eql_v2_encrypted%",
+    ).await?;
+
+    append_result(PerfResult {
+        name: "eql_cast_equality".into(),
+        priority: "P0".into(),
+        runs: stats.calls,
+        plan_type: "Seq Scan".into(),
+        mean_ms: stats.mean_exec_time,
+        stddev_ms: stats.stddev_exec_time,
+        total_ms: stats.total_exec_time,
+    });
+    assert_eq!(stats.calls, RUNS);
+    Ok(())
+}
+
+/// P0: ORE equality via operator class — currently seq scans (CIP-2831).
+#[tokio::test]
+#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+async fn ore_equality_opclass() -> Result<()> {
+    let pool = connect().await?;
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool).await?;
+
+    reset_pg_stat_statements(&pool).await?;
+    for _ in 0..RUNS {
+        sqlx::query("SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted")
+            .bind(&encrypted)
+            .fetch_all(&pool).await?;
+    }
+    let stats = read_pg_stat_statements(
+        &pool,
+        "%FROM bench WHERE encrypted_int = $%::jsonb::eql_v2_encrypted%",
+    ).await?;
+
+    append_result(PerfResult {
+        name: "ore_equality_opclass".into(),
+        priority: "P0".into(),
+        runs: stats.calls,
+        plan_type: "Seq Scan".into(),
+        mean_ms: stats.mean_exec_time,
+        stddev_ms: stats.stddev_exec_time,
+        total_ms: stats.total_exec_time,
+    });
+    assert_eq!(stats.calls, RUNS);
+    Ok(())
+}
+
+/// P1: ORE range < with LIMIT — expected ~1.93ms at 100K rows.
+#[tokio::test]
+#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+async fn ore_range_lt_limit() -> Result<()> {
+    let pool = connect().await?;
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50000")
+            .fetch_one(&pool).await?;
+
+    reset_pg_stat_statements(&pool).await?;
+    for _ in 0..RUNS {
+        sqlx::query(
+            "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
+        )
+        .bind(&encrypted)
+        .fetch_all(&pool).await?;
+    }
+    let stats = read_pg_stat_statements(
+        &pool,
+        "%FROM bench WHERE encrypted_int < $%ORDER BY encrypted_int LIMIT %",
+    ).await?;
+
+    append_result(PerfResult {
+        name: "ore_range_lt_limit".into(),
+        priority: "P1".into(),
+        runs: stats.calls,
+        plan_type: "Index Scan".into(),
+        mean_ms: stats.mean_exec_time,
+        stddev_ms: stats.stddev_exec_time,
+        total_ms: stats.total_exec_time,
+    });
+    assert_eq!(stats.calls, RUNS);
+    Ok(())
+}
+
+/// P1: ORE ORDER BY encrypted_int LIMIT 10 — design doc observes ~543ms at 10K,
+/// so expect several seconds at 100K. Report captures actual number.
+#[tokio::test]
+#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+async fn ore_order_by_limit() -> Result<()> {
+    let pool = connect().await?;
+
+    reset_pg_stat_statements(&pool).await?;
+    for _ in 0..RUNS {
+        sqlx::query("SELECT * FROM bench ORDER BY encrypted_int LIMIT 10")
+            .fetch_all(&pool).await?;
+    }
+    let stats = read_pg_stat_statements(
+        &pool,
+        "%FROM bench ORDER BY encrypted_int LIMIT %",
+    ).await?;
+
+    append_result(PerfResult {
+        name: "ore_order_by_limit".into(),
+        priority: "P1".into(),
+        runs: stats.calls,
+        plan_type: "Index Scan".into(),
+        mean_ms: stats.mean_exec_time,
+        stddev_ms: stats.stddev_exec_time,
+        total_ms: stats.total_exec_time,
+    });
+    assert_eq!(stats.calls, RUNS);
+    Ok(())
+}
+
 /// Alphabetical-last test — flushes accumulated results to disk.
 /// Requires `--test-threads=1` so it runs after all benchmark cases.
 #[tokio::test]

From dc27f91a4b47e7e1c5bca4d9eafa3fe4beac174d Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 14:12:56 +1000
Subject: [PATCH 21/28] test(bench): add consistent assertion messages to Tier
 2 perf tests

---
 tests/sqlx/tests/bench_perf_tests.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
index 195a43e3..95d3f3fa 100644
--- a/tests/sqlx/tests/bench_perf_tests.rs
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -112,7 +112,7 @@ async fn bloom_filter_containment() -> Result<()> {
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
     });
-    assert_eq!(stats.calls, RUNS);
+    assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls");
     Ok(())
 }
 
@@ -146,7 +146,7 @@ async fn eql_cast_equality() -> Result<()> {
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
     });
-    assert_eq!(stats.calls, RUNS);
+    assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls");
     Ok(())
 }
 
@@ -179,7 +179,7 @@ async fn ore_equality_opclass() -> Result<()> {
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
     });
-    assert_eq!(stats.calls, RUNS);
+    assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls");
     Ok(())
 }
 
@@ -214,7 +214,7 @@ async fn ore_range_lt_limit() -> Result<()> {
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
     });
-    assert_eq!(stats.calls, RUNS);
+    assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls");
     Ok(())
 }
 
@@ -244,7 +244,7 @@ async fn ore_order_by_limit() -> Result<()> {
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
     });
-    assert_eq!(stats.calls, RUNS);
+    assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls");
     Ok(())
 }
 

From 285593ba22ad0fe64a998ef30c1c321fda564fb7 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 14:14:10 +1000
Subject: [PATCH 22/28] feat(bench): add scheduled GitHub Actions workflow for
 weekly Tier 2 benchmarks

---
 .github/workflows/benchmark.yml | 70 +++++++++++++++++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 .github/workflows/benchmark.yml

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
new file mode 100644
index 00000000..daec0053
--- /dev/null
+++ b/.github/workflows/benchmark.yml
@@ -0,0 +1,70 @@
+name: "Scheduled Benchmarks (Tier 2)"
+
+on:
+  schedule:
+    - cron: '0 3 * * 1'   # Every Monday 03:00 UTC
+  workflow_dispatch:
+
+# Prevent a scheduled run from racing a manual dispatch for the same ports.
+concurrency:
+  group: scheduled-benchmarks
+  cancel-in-progress: false
+
+env:
+  # Matches test-eql.yml — forces JS-based composite actions onto Node 24.
+  FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true"
+
+jobs:
+  benchmark:
+    name: "100K dataset benchmark (Postgres 17)"
+    runs-on: ubuntu-latest-m
+    timeout-minutes: 60
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Install postgresql-client
+        # generate.sh uses psql directly against Postgres (port 7433) and Proxy
+        # (port 6433). jdx/mise-action only installs Rust + Python.
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y postgresql-client
+
+      - uses: jdx/mise-action@v3
+        with:
+          version: 2026.4.0
+          install: true
+          cache: true
+
+      - name: Write Proxy credentials to .env
+        run: |
+          cat > tests/benchmarks/.env <<EOF
+          CS_CLIENT_ACCESS_KEY=${{ secrets.CS_CLIENT_ACCESS_KEY }}
+          CS_DEFAULT_KEYSET_ID=${{ secrets.CS_DEFAULT_KEYSET_ID }}
+          CS_CLIENT_KEY=${{ secrets.CS_CLIENT_KEY }}
+          CS_CLIENT_ID=${{ secrets.CS_CLIENT_ID }}
+          CS_WORKSPACE_CRN=${{ secrets.CS_WORKSPACE_CRN }}
+          EOF
+
+      - name: Bring up Postgres + Proxy
+        run: mise run bench:up
+
+      - name: Generate 100K dataset
+        run: mise run bench:generate
+
+      - name: Run Tier 2 benchmark suite
+        run: |
+          export BENCH_REPORT_DATE="$(date -u +%Y-%m-%d)-${{ github.run_id }}"
+          mise run bench:full
+
+      - name: Tear down containers
+        if: always()
+        run: mise run bench:down
+
+      - name: Upload benchmark report
+        if: always()
+        uses: actions/upload-artifact@v4
+        with:
+          name: benchmark-report-${{ github.run_id }}
+          path: tests/benchmarks/reports/
+          retention-days: 90

From 5b9dba5525bd8414f1b8b88de04a42ecdb98287a Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 14:16:32 +1000
Subject: [PATCH 23/28] fix(bench): write Proxy credentials safely via env
 block + printf

---
 .github/workflows/benchmark.yml | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index daec0053..5bc386f2 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -37,14 +37,20 @@ jobs:
           cache: true
 
       - name: Write Proxy credentials to .env
+        env:
+          CS_CLIENT_ACCESS_KEY: ${{ secrets.CS_CLIENT_ACCESS_KEY }}
+          CS_DEFAULT_KEYSET_ID: ${{ secrets.CS_DEFAULT_KEYSET_ID }}
+          CS_CLIENT_KEY: ${{ secrets.CS_CLIENT_KEY }}
+          CS_CLIENT_ID: ${{ secrets.CS_CLIENT_ID }}
+          CS_WORKSPACE_CRN: ${{ secrets.CS_WORKSPACE_CRN }}
         run: |
-          cat > tests/benchmarks/.env <<EOF
-          CS_CLIENT_ACCESS_KEY=${{ secrets.CS_CLIENT_ACCESS_KEY }}
-          CS_DEFAULT_KEYSET_ID=${{ secrets.CS_DEFAULT_KEYSET_ID }}
-          CS_CLIENT_KEY=${{ secrets.CS_CLIENT_KEY }}
-          CS_CLIENT_ID=${{ secrets.CS_CLIENT_ID }}
-          CS_WORKSPACE_CRN=${{ secrets.CS_WORKSPACE_CRN }}
-          EOF
+          {
+            printf 'CS_CLIENT_ACCESS_KEY=%s\n' "$CS_CLIENT_ACCESS_KEY"
+            printf 'CS_DEFAULT_KEYSET_ID=%s\n' "$CS_DEFAULT_KEYSET_ID"
+            printf 'CS_CLIENT_KEY=%s\n' "$CS_CLIENT_KEY"
+            printf 'CS_CLIENT_ID=%s\n' "$CS_CLIENT_ID"
+            printf 'CS_WORKSPACE_CRN=%s\n' "$CS_WORKSPACE_CRN"
+          } > tests/benchmarks/.env
 
       - name: Bring up Postgres + Proxy
         run: mise run bench:up

From 6d98632e7ea008b439f2f5cbd4ac1c676eb92f61 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 14:29:37 +1000
Subject: [PATCH 24/28] refactor(bench): use DATABASE_URL for Tier 2 tests,
 drop BENCH_DATABASE_URL

The bench:full mise task overrides DATABASE_URL to point at the bench
Postgres on port 7433, so tests can read the standard env var like
every other test in tests/sqlx/. mise task is the canonical entry point.
---
 tasks/bench.toml                     |  2 +-
 tests/sqlx/tests/bench_perf_tests.rs | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/tasks/bench.toml b/tasks/bench.toml
index d4b70bc7..df21bd55 100644
--- a/tasks/bench.toml
+++ b/tasks/bench.toml
@@ -29,7 +29,7 @@ tests/benchmarks/generate.sh 100k
 ["bench:full"]
 description = "Run full Tier 2 benchmark suite against bench-postgres"
 dir = "{{config_root}}/tests/sqlx"
-env = { BENCH_DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" }
+env = { DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" }
 run = """
 cargo test --test bench_perf_tests -- --ignored --nocapture --test-threads=1
 """
diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
index 95d3f3fa..911cf4d0 100644
--- a/tests/sqlx/tests/bench_perf_tests.rs
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -5,7 +5,7 @@
 //! `cargo test --test bench_perf_tests -- --ignored`.
 //!
 //! Unlike Tier 1 tests, these use #[tokio::test] with a manual pool connected
-//! via BENCH_DATABASE_URL against a pre-loaded 100K-row dataset.
+//! via DATABASE_URL against a pre-loaded 100K-row dataset (set by `mise run bench:full`).
 //!
 //! Each test:
 //!   1. Resets pg_stat_statements
@@ -28,8 +28,8 @@ const RUNS: i64 = 1000;
 const DATASET_ROWS: i64 = 100_000;
 
 async fn connect() -> Result<PgPool> {
-    let url = std::env::var("BENCH_DATABASE_URL")
-        .expect("BENCH_DATABASE_URL must be set (run `mise run bench:full`)");
+    let url = std::env::var("DATABASE_URL")
+        .expect("DATABASE_URL must be set (run `mise run bench:full`)");
     let pool = PgPoolOptions::new()
         .max_connections(4)
         .connect(&url)
@@ -40,7 +40,7 @@ async fn connect() -> Result<PgPool> {
 
 /// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows.
 #[tokio::test]
-#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
 async fn hmac_256_equality() -> Result<()> {
     let pool = connect().await?;
 
@@ -83,7 +83,7 @@ async fn hmac_256_equality() -> Result<()> {
 
 /// P2: bloom_filter containment — expected ~3.35ms at 100K rows.
 #[tokio::test]
-#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
 async fn bloom_filter_containment() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
@@ -119,7 +119,7 @@ async fn bloom_filter_containment() -> Result<()> {
 /// P0: eql_cast equality — currently seq scans (CIP-2831). Report records the
 /// actual plan + timing so the number is visible week-over-week until the fix ships.
 #[tokio::test]
-#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
 async fn eql_cast_equality() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
@@ -152,7 +152,7 @@ async fn eql_cast_equality() -> Result<()> {
 
 /// P0: ORE equality via operator class — currently seq scans (CIP-2831).
 #[tokio::test]
-#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
 async fn ore_equality_opclass() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
@@ -185,7 +185,7 @@ async fn ore_equality_opclass() -> Result<()> {
 
 /// P1: ORE range < with LIMIT — expected ~1.93ms at 100K rows.
 #[tokio::test]
-#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
 async fn ore_range_lt_limit() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
@@ -221,7 +221,7 @@ async fn ore_range_lt_limit() -> Result<()> {
 /// P1: ORE ORDER BY encrypted_int LIMIT 10 — design doc observes ~543ms at 10K,
 /// so expect several seconds at 100K. Report captures actual number.
 #[tokio::test]
-#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
 async fn ore_order_by_limit() -> Result<()> {
     let pool = connect().await?;
 

From c2dc431fab38f109e5f4129f357127c9ff538609 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 17:24:46 +1000
Subject: [PATCH 25/28] =?UTF-8?q?perf(ci):=20mark=20slow=20perf/O(n=C2=B2)?=
 =?UTF-8?q?=20tests=20as=20#[ignore]=20to=20cut=20PR=20runtime?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Default cargo test now skips 9 tests that dominated CI time:

  order_by_sort_tests (30 tests → 25, 401.9s → 8.98s):
    - filtered_inner_query_correct_order
    - sort_compare_faster_than_correlated_subquery
    - filtered_inner_query_faster_than_unfiltered
    - sort_compare_performance_at_scale
    - filtered_inner_query_performance_at_scale

  order_by_no_opclass_tests (12 tests → 8, ~88s → 1.06s):
    - correlated_subquery_ranking_{asc,desc,with_limit,with_where}_without_opclass

These tests assert timing relationships or demonstrate O(n²) behaviour
over 1000-row ORE fixtures; they don't catch correctness regressions on
the PR path. Run them with `cargo test -- --ignored` (all 9 pass in
~7m 28s).

Measured on local PG17 for the two affected binaries combined:
  before: ~491s
  after:   ~10s (~49× faster)

Projected CI impact on test-eql.yml: cargo test step drops from ~22m
to ~2m per Postgres version.
---
 tests/sqlx/tests/order_by_no_opclass_tests.rs | 4 ++++
 tests/sqlx/tests/order_by_sort_tests.rs       | 5 +++++
 2 files changed, 9 insertions(+)

diff --git a/tests/sqlx/tests/order_by_no_opclass_tests.rs b/tests/sqlx/tests/order_by_no_opclass_tests.rs
index 136a4ce4..201e59d3 100644
--- a/tests/sqlx/tests/order_by_no_opclass_tests.rs
+++ b/tests/sqlx/tests/order_by_no_opclass_tests.rs
@@ -169,6 +169,7 @@ async fn direct_order_by_desc_wrong_order_without_opclass(pool: PgPool) -> Resul
 // ============================================================================
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"]
 async fn correlated_subquery_ranking_asc_without_opclass(pool: PgPool) -> Result<()> {
     // eql_v2.compare() is a standalone function (not an operator), so it survives
     // the operator class drops. A correlated subquery counts how many rows have a
@@ -198,6 +199,7 @@ async fn correlated_subquery_ranking_asc_without_opclass(pool: PgPool) -> Result
 }
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"]
 async fn correlated_subquery_ranking_desc_without_opclass(pool: PgPool) -> Result<()> {
     // Same correlated subquery with DESC — should return highest-ranked rows first.
 
@@ -220,6 +222,7 @@ async fn correlated_subquery_ranking_desc_without_opclass(pool: PgPool) -> Resul
 }
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"]
 async fn correlated_subquery_ranking_with_limit_without_opclass(pool: PgPool) -> Result<()> {
     // LIMIT 1 with ASC subquery ranking should return the smallest value (id=1)
 
@@ -238,6 +241,7 @@ async fn correlated_subquery_ranking_with_limit_without_opclass(pool: PgPool) ->
 }
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"]
 async fn correlated_subquery_ranking_with_where_without_opclass(pool: PgPool) -> Result<()> {
     // WHERE clause filters rows, then correlated subquery orders the result correctly.
     // Note: the subquery counts over the full table to produce a global rank.
diff --git a/tests/sqlx/tests/order_by_sort_tests.rs b/tests/sqlx/tests/order_by_sort_tests.rs
index 33b69cbf..853e9e4a 100644
--- a/tests/sqlx/tests/order_by_sort_tests.rs
+++ b/tests/sqlx/tests/order_by_sort_tests.rs
@@ -522,6 +522,7 @@ async fn sort_compare_table_ref_matches_order_by_compare(pool: PgPool) -> Result
 // ============================================================================
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "slow: O(n²) correlated subquery over 1000 rows (~7m); run with --ignored"]
 async fn filtered_inner_query_correct_order(pool: PgPool) -> Result<()> {
     // Optimized: inner query also filters, producing correct relative ordering
     // within the filtered set
@@ -602,6 +603,7 @@ async fn filtered_inner_query_with_range(pool: PgPool) -> Result<()> {
 // ============================================================================
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "perf: relative timing assertion at 1000 rows; run with --ignored"]
 async fn sort_compare_faster_than_correlated_subquery(pool: PgPool) -> Result<()> {
     // Warm up: run each query once to populate caches
     let sort_sql = "SELECT * FROM eql_v2.sort_compare(
@@ -644,6 +646,7 @@ async fn sort_compare_faster_than_correlated_subquery(pool: PgPool) -> Result<()
 }
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "perf: relative timing assertion at 1000 rows; run with --ignored"]
 async fn filtered_inner_query_faster_than_unfiltered(pool: PgPool) -> Result<()> {
     let ore_term = get_ore_encrypted(&pool, 42).await?;
 
@@ -703,6 +706,7 @@ async fn filtered_inner_query_faster_than_unfiltered(pool: PgPool) -> Result<()>
 // ============================================================================
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "perf: O(n log n) vs O(n²) demonstration at 1000 rows; run with --ignored"]
 async fn sort_compare_performance_at_scale(pool: PgPool) -> Result<()> {
     // 1000 rows is sufficient scale to demonstrate O(n log n) vs O(n²)
     let sort_sql = "SELECT * FROM eql_v2.sort_compare(
@@ -738,6 +742,7 @@ async fn sort_compare_performance_at_scale(pool: PgPool) -> Result<()> {
 }
 
 #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))]
+#[ignore = "perf: timing assertion at 1000 rows; run with --ignored"]
 async fn filtered_inner_query_performance_at_scale(pool: PgPool) -> Result<()> {
     let ore_term = get_ore_encrypted(&pool, 42).await?;
 

From 511d97be20c670f821cd73a81df4b27fc6515c4b Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 22 Apr 2026 17:29:11 +1000
Subject: [PATCH 26/28] style(bench): apply cargo fmt to reports.rs and
 bench_perf_tests.rs

Pure whitespace: collapses/expands await chains and use-imports to
match rustfmt default. No behaviour change.

Unblocks test-eql.yml lint step (cargo fmt --check was failing).
---
 tests/sqlx/src/reports.rs            | 11 ++----
 tests/sqlx/tests/bench_perf_tests.rs | 57 ++++++++++++++++------------
 2 files changed, 37 insertions(+), 31 deletions(-)

diff --git a/tests/sqlx/src/reports.rs b/tests/sqlx/src/reports.rs
index b02a36af..8860901e 100644
--- a/tests/sqlx/src/reports.rs
+++ b/tests/sqlx/src/reports.rs
@@ -78,16 +78,13 @@ pub fn write_reports(
         results,
     };
 
-    fs::create_dir_all(output_dir)
-        .with_context(|| format!("creating output dir {output_dir}"))?;
+    fs::create_dir_all(output_dir).with_context(|| format!("creating output dir {output_dir}"))?;
 
     let json_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.json"));
-    let md_path   = PathBuf::from(output_dir).join(format!("benchmark-{date}.md"));
+    let md_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.md"));
 
-    let json = serde_json::to_string_pretty(&report)
-        .context("serializing report to JSON")?;
-    fs::write(&json_path, json)
-        .with_context(|| format!("writing {}", json_path.display()))?;
+    let json = serde_json::to_string_pretty(&report).context("serializing report to JSON")?;
+    fs::write(&json_path, json).with_context(|| format!("writing {}", json_path.display()))?;
 
     fs::write(&md_path, render_markdown(&report))
         .with_context(|| format!("writing {}", md_path.display()))?;
diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
index 911cf4d0..f7d68178 100644
--- a/tests/sqlx/tests/bench_perf_tests.rs
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -18,8 +18,8 @@
 
 use anyhow::Result;
 use eql_tests::{
-    append_result, ensure_pg_stat_statements, read_pg_stat_statements,
-    reset_pg_stat_statements, write_reports, PerfResult,
+    append_result, ensure_pg_stat_statements, read_pg_stat_statements, reset_pg_stat_statements,
+    write_reports, PerfResult,
 };
 use sqlx::postgres::PgPoolOptions;
 use sqlx::PgPool;
@@ -44,11 +44,10 @@ async fn connect() -> Result<PgPool> {
 async fn hmac_256_equality() -> Result<()> {
     let pool = connect().await?;
 
-    let encrypted: String = sqlx::query_scalar(
-        "SELECT (encrypted_text).data::text FROM bench WHERE id = 1",
-    )
-    .fetch_one(&pool)
-    .await?;
+    let encrypted: String =
+        sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
+            .fetch_one(&pool)
+            .await?;
 
     reset_pg_stat_statements(&pool).await?;
 
@@ -88,7 +87,8 @@ async fn bloom_filter_containment() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool).await?;
+            .fetch_one(&pool)
+            .await?;
 
     reset_pg_stat_statements(&pool).await?;
     for _ in 0..RUNS {
@@ -101,7 +101,8 @@ async fn bloom_filter_containment() -> Result<()> {
     let stats = read_pg_stat_statements(
         &pool,
         "%eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($%",
-    ).await?;
+    )
+    .await?;
 
     append_result(PerfResult {
         name: "bloom_filter_containment".into(),
@@ -124,18 +125,21 @@ async fn eql_cast_equality() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool).await?;
+            .fetch_one(&pool)
+            .await?;
 
     reset_pg_stat_statements(&pool).await?;
     for _ in 0..RUNS {
         sqlx::query("SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted")
             .bind(&encrypted)
-            .fetch_all(&pool).await?;
+            .fetch_all(&pool)
+            .await?;
     }
     let stats = read_pg_stat_statements(
         &pool,
         "%FROM bench WHERE encrypted_text = $%::jsonb::eql_v2_encrypted%",
-    ).await?;
+    )
+    .await?;
 
     append_result(PerfResult {
         name: "eql_cast_equality".into(),
@@ -157,18 +161,21 @@ async fn ore_equality_opclass() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool).await?;
+            .fetch_one(&pool)
+            .await?;
 
     reset_pg_stat_statements(&pool).await?;
     for _ in 0..RUNS {
         sqlx::query("SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted")
             .bind(&encrypted)
-            .fetch_all(&pool).await?;
+            .fetch_all(&pool)
+            .await?;
     }
     let stats = read_pg_stat_statements(
         &pool,
         "%FROM bench WHERE encrypted_int = $%::jsonb::eql_v2_encrypted%",
-    ).await?;
+    )
+    .await?;
 
     append_result(PerfResult {
         name: "ore_equality_opclass".into(),
@@ -190,7 +197,8 @@ async fn ore_range_lt_limit() -> Result<()> {
     let pool = connect().await?;
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50000")
-            .fetch_one(&pool).await?;
+            .fetch_one(&pool)
+            .await?;
 
     reset_pg_stat_statements(&pool).await?;
     for _ in 0..RUNS {
@@ -203,7 +211,8 @@ async fn ore_range_lt_limit() -> Result<()> {
     let stats = read_pg_stat_statements(
         &pool,
         "%FROM bench WHERE encrypted_int < $%ORDER BY encrypted_int LIMIT %",
-    ).await?;
+    )
+    .await?;
 
     append_result(PerfResult {
         name: "ore_range_lt_limit".into(),
@@ -228,12 +237,11 @@ async fn ore_order_by_limit() -> Result<()> {
     reset_pg_stat_statements(&pool).await?;
     for _ in 0..RUNS {
         sqlx::query("SELECT * FROM bench ORDER BY encrypted_int LIMIT 10")
-            .fetch_all(&pool).await?;
+            .fetch_all(&pool)
+            .await?;
     }
-    let stats = read_pg_stat_statements(
-        &pool,
-        "%FROM bench ORDER BY encrypted_int LIMIT %",
-    ).await?;
+    let stats =
+        read_pg_stat_statements(&pool, "%FROM bench ORDER BY encrypted_int LIMIT %").await?;
 
     append_result(PerfResult {
         name: "ore_order_by_limit".into(),
@@ -254,8 +262,9 @@ async fn ore_order_by_limit() -> Result<()> {
 #[ignore = "Tier 2: report writer, runs last under --test-threads=1"]
 async fn zz_write_reports() -> Result<()> {
     let pool = connect().await?;
-    let pg_version: String =
-        sqlx::query_scalar("SHOW server_version_num").fetch_one(&pool).await?;
+    let pg_version: String = sqlx::query_scalar("SHOW server_version_num")
+        .fetch_one(&pool)
+        .await?;
     // server_version_num is "170004" etc — take the major version digits
     let pg_major = pg_version
         .get(..pg_version.len().saturating_sub(4))

From e5fff4daad82630d8bb2c44e9ab31c9ac7ca2b93 Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Fri, 24 Apr 2026 12:31:24 +1000
Subject: [PATCH 27/28] fix(bench): address CodeRabbit review feedback on PR
 #173

- use valid `ubuntu-latest` runner label and split SC2155 export
- bind docker-compose host ports to 127.0.0.1
- capture actual plan_type per benchmark via EXPLAIN instead of hard-coding
- replace zz_write_reports alphabetical-ordering hack with explicit
  `run_all_benchmarks` orchestrator
- parameterize bench_regression_tests via explain_analyze_avg_bound
  to avoid encrypted-value string interpolation
- capture report timestamp at write-time via OffsetDateTime::now_utc
- correct FIXTURE_SCHEMA bench_data ORE id mapping documentation
---
 .github/workflows/benchmark.yml            |   5 +-
 tasks/bench.toml                           |   2 +-
 tests/benchmarks/docker-compose.yml        |   4 +-
 tests/sqlx/Cargo.lock                      |  53 +++++
 tests/sqlx/Cargo.toml                      |   1 +
 tests/sqlx/fixtures/FIXTURE_SCHEMA.md      |   4 +-
 tests/sqlx/src/helpers.rs                  | 121 ++++++++---
 tests/sqlx/src/lib.rs                      |  11 +-
 tests/sqlx/src/reports.rs                  |  20 +-
 tests/sqlx/tests/bench_perf_tests.rs       | 227 ++++++++++++---------
 tests/sqlx/tests/bench_regression_tests.rs |  39 ++--
 11 files changed, 334 insertions(+), 153 deletions(-)

diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
index 5bc386f2..b16871ab 100644
--- a/.github/workflows/benchmark.yml
+++ b/.github/workflows/benchmark.yml
@@ -17,7 +17,7 @@ env:
 jobs:
   benchmark:
     name: "100K dataset benchmark (Postgres 17)"
-    runs-on: ubuntu-latest-m
+    runs-on: ubuntu-latest
     timeout-minutes: 60
 
     steps:
@@ -60,7 +60,8 @@ jobs:
 
       - name: Run Tier 2 benchmark suite
         run: |
-          export BENCH_REPORT_DATE="$(date -u +%Y-%m-%d)-${{ github.run_id }}"
+          BENCH_REPORT_DATE="$(date -u +%Y-%m-%d)-${{ github.run_id }}"
+          export BENCH_REPORT_DATE
           mise run bench:full
 
       - name: Tear down containers
diff --git a/tasks/bench.toml b/tasks/bench.toml
index df21bd55..f6cd8d13 100644
--- a/tasks/bench.toml
+++ b/tasks/bench.toml
@@ -31,5 +31,5 @@ description = "Run full Tier 2 benchmark suite against bench-postgres"
 dir = "{{config_root}}/tests/sqlx"
 env = { DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" }
 run = """
-cargo test --test bench_perf_tests -- --ignored --nocapture --test-threads=1
+cargo test --test bench_perf_tests run_all_benchmarks -- --ignored --nocapture
 """
diff --git a/tests/benchmarks/docker-compose.yml b/tests/benchmarks/docker-compose.yml
index f8d47d48..bd35ba65 100644
--- a/tests/benchmarks/docker-compose.yml
+++ b/tests/benchmarks/docker-compose.yml
@@ -9,7 +9,7 @@ services:
       -c pg_stat_statements.track=all
       -c pg_stat_statements.max=10000
     ports:
-      - "7433:5432"
+      - "127.0.0.1:7433:5432"
     environment:
       POSTGRES_DB: cipherstash
       POSTGRES_USER: cipherstash
@@ -26,7 +26,7 @@ services:
     image: cipherstash/proxy:latest
     container_name: bench-proxy
     ports:
-      - "6433:6432"
+      - "127.0.0.1:6433:6432"
     environment:
       CS_DATABASE__NAME: cipherstash
       CS_DATABASE__USERNAME: cipherstash
diff --git a/tests/sqlx/Cargo.lock b/tests/sqlx/Cargo.lock
index a1060773..66f047d2 100644
--- a/tests/sqlx/Cargo.lock
+++ b/tests/sqlx/Cargo.lock
@@ -152,6 +152,15 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "deranged"
+version = "0.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c"
+dependencies = [
+ "powerfmt",
+]
+
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -198,6 +207,7 @@ dependencies = [
  "serde",
  "serde_json",
  "sqlx",
+ "time",
  "tokio",
 ]
 
@@ -642,6 +652,12 @@ dependencies = [
  "zeroize",
 ]
 
+[[package]]
+name = "num-conv"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967"
+
 [[package]]
 name = "num-integer"
 version = "0.1.46"
@@ -770,6 +786,12 @@ dependencies = [
  "zerovec",
 ]
 
+[[package]]
+name = "powerfmt"
+version = "0.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391"
+
 [[package]]
 name = "ppv-lite86"
 version = "0.2.21"
@@ -1261,6 +1283,37 @@ dependencies = [
  "syn",
 ]
 
+[[package]]
+name = "time"
+version = "0.3.47"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c"
+dependencies = [
+ "deranged",
+ "itoa",
+ "num-conv",
+ "powerfmt",
+ "serde_core",
+ "time-core",
+ "time-macros",
+]
+
+[[package]]
+name = "time-core"
+version = "0.1.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca"
+
+[[package]]
+name = "time-macros"
+version = "0.2.27"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215"
+dependencies = [
+ "num-conv",
+ "time-core",
+]
+
 [[package]]
 name = "tinystr"
 version = "0.8.1"
diff --git a/tests/sqlx/Cargo.toml b/tests/sqlx/Cargo.toml
index 025d697d..acff2489 100644
--- a/tests/sqlx/Cargo.toml
+++ b/tests/sqlx/Cargo.toml
@@ -9,6 +9,7 @@ tokio = { version = "1", features = ["full"] }
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
 anyhow = "1"
+time = { version = "0.3", features = ["formatting", "std"] }
 
 [dev-dependencies]
 # None needed - tests live in this crate
diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
index 52c2daa7..93c34e05 100644
--- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
+++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md
@@ -120,7 +120,7 @@ CREATE TABLE ore (
 
 **Helper Functions:**
 - `get_ore_encrypted(pool, id)` - Selects encrypted value from ore table
-- `create_encrypted_json(id)` - Looks up ore table at `id * 10` (valid ids: 1-9 → ore lookups: 10-90)
+- `create_encrypted_json(id)` - Looks up ore table at `id * 10` (valid ids: 1-99 → ore lookups: 10-990)
 
 **Key Property:**
 - Sequential numeric values enable deterministic comparison tests
@@ -152,7 +152,7 @@ CREATE TABLE bench (
 ```
 
 **Data:**
-- 10,000 rows drawn from 99 distinct encrypted values (ore ids 1-99)
+- 10,000 rows drawn from 99 distinct encrypted values. Caller ids `1..99` are Zipf-skewed and resolve via `create_encrypted_json(id)` to ORE ids `{10, 20, …, 990}` (see the ORE helper description above)
 - Zipf-like skew via `setseed(0.42)` + `random()^2` — deterministic and byte-identical across runs
 - Top id gets ~5% of rows; tail ids ~0.5% each (top:bottom ratio ~10x)
 - Each column draws independently, so column values are decorrelated within a row
diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs
index 6bf5fc4c..e3864433 100644
--- a/tests/sqlx/src/helpers.rs
+++ b/tests/sqlx/src/helpers.rs
@@ -527,6 +527,35 @@ pub async fn explain_json(pool: &PgPool, query: &str) -> Result<serde_json::Valu
     Ok(plan)
 }
 
+/// Run EXPLAIN (FORMAT JSON) with bound parameters and return the top-level plan node type.
+///
+/// Used by benchmarks to capture the actual Postgres plan at runtime instead of
+/// hard-coding a string. Binds each `params[i]` to the `$(i+1)` placeholder.
+///
+/// # Example
+/// ```ignore
+/// let plan_type = fetch_plan_node_type(
+///     &pool,
+///     "SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted",
+///     &[&encrypted],
+/// ).await?;
+/// ```
+pub async fn fetch_plan_node_type(pool: &PgPool, query: &str, params: &[&str]) -> Result<String> {
+    let sql = format!("EXPLAIN (FORMAT JSON) {}", query);
+    let mut q = sqlx::query_scalar::<_, serde_json::Value>(&sql);
+    for p in params {
+        q = q.bind(*p);
+    }
+    let plan = q
+        .fetch_one(pool)
+        .await
+        .with_context(|| format!("running EXPLAIN (FORMAT JSON) on query: {}", query))?;
+    Ok(plan[0]["Plan"]["Node Type"]
+        .as_str()
+        .with_context(|| format!("extracting Plan.Node Type from EXPLAIN on query: {}", query))?
+        .to_string())
+}
+
 /// Run EXPLAIN ANALYZE multiple times and return averaged statistics
 ///
 /// Executes `EXPLAIN (ANALYZE, FORMAT JSON) {query}` the specified number of times
@@ -557,6 +586,37 @@ pub async fn explain_json(pool: &PgPool, query: &str) -> Result<serde_json::Valu
 /// assert_eq!(stats.node_type, "Index Scan");
 /// ```
 pub async fn explain_analyze_avg(pool: &PgPool, query: &str, runs: usize) -> Result<ExplainStats> {
+    explain_analyze_avg_bound(pool, query, &[], runs).await
+}
+
+/// Run EXPLAIN ANALYZE multiple times with bound parameters and return averaged statistics
+///
+/// Like `explain_analyze_avg`, but supports `$1`, `$2`, ... placeholders in the
+/// query. String parameters are bound via sqlx, avoiding SQL injection and
+/// quoting issues when the value may contain `'` characters (for example,
+/// encrypted JSON payloads surfaced via `::jsonb::eql_v2_encrypted`).
+///
+/// # Arguments
+/// * `pool` - Database connection pool
+/// * `query` - SQL query with `$N` placeholders (no EXPLAIN prefix)
+/// * `params` - String parameters bound in order ($1 → params[0], etc.)
+/// * `runs` - Number of times to execute (must be >= 1)
+///
+/// # Example
+/// ```ignore
+/// let stats = explain_analyze_avg_bound(
+///     &pool,
+///     "SELECT * FROM bench WHERE eql_v2.hmac_256(col) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)",
+///     &[&encrypted],
+///     5,
+/// ).await?;
+/// ```
+pub async fn explain_analyze_avg_bound(
+    pool: &PgPool,
+    query: &str,
+    params: &[&str],
+    runs: usize,
+) -> Result<ExplainStats> {
     anyhow::ensure!(runs >= 1, "runs must be >= 1, got {}", runs);
 
     let sql = format!("EXPLAIN (ANALYZE, FORMAT JSON) {}", query);
@@ -566,39 +626,27 @@ pub async fn explain_analyze_avg(pool: &PgPool, query: &str, runs: usize) -> Res
     let mut node_type = String::new();
 
     for i in 0..runs {
-        let plan: serde_json::Value = sqlx::query_scalar(&sql)
-            .fetch_one(pool)
-            .await
-            .with_context(|| {
-                format!(
-                    "running EXPLAIN ANALYZE (run {}/{}) on query: {}",
-                    i + 1,
-                    runs,
-                    query
-                )
-            })?;
+        let mut q = sqlx::query_scalar::<_, serde_json::Value>(&sql);
+        for p in params {
+            q = q.bind(*p);
+        }
+        let plan = q.fetch_one(pool).await.with_context(|| {
+            format!(
+                "running EXPLAIN ANALYZE (run {}/{}) on query: {}",
+                i + 1,
+                runs,
+                query
+            )
+        })?;
 
         // EXPLAIN (ANALYZE, FORMAT JSON) returns:
         // [{"Plan": {...}, "Planning Time": N, "Execution Time": N}]
         let entry = &plan[0];
-
-        let exec_time = entry["Execution Time"]
-            .as_f64()
-            .with_context(|| format!("extracting Execution Time on run {}/{}", i + 1, runs))?;
-
-        let plan_time = entry["Planning Time"]
-            .as_f64()
-            .with_context(|| format!("extracting Planning Time on run {}/{}", i + 1, runs))?;
-
+        let (exec_time, plan_time, nt) = parse_explain_entry(entry, i + 1, runs)?;
         total_execution_ms += exec_time;
         total_planning_ms += plan_time;
-
-        // Capture node type from first run only
         if i == 0 {
-            node_type = entry["Plan"]["Node Type"]
-                .as_str()
-                .with_context(|| "extracting Node Type from first run")?
-                .to_string();
+            node_type = nt;
         }
     }
 
@@ -610,6 +658,27 @@ pub async fn explain_analyze_avg(pool: &PgPool, query: &str, runs: usize) -> Res
     })
 }
 
+fn parse_explain_entry(
+    entry: &serde_json::Value,
+    run_num: usize,
+    total_runs: usize,
+) -> Result<(f64, f64, String)> {
+    let exec_time = entry["Execution Time"].as_f64().with_context(|| {
+        format!(
+            "extracting Execution Time on run {}/{}",
+            run_num, total_runs
+        )
+    })?;
+    let plan_time = entry["Planning Time"]
+        .as_f64()
+        .with_context(|| format!("extracting Planning Time on run {}/{}", run_num, total_runs))?;
+    let node_type = entry["Plan"]["Node Type"]
+        .as_str()
+        .with_context(|| format!("extracting Node Type on run {}/{}", run_num, total_runs))?
+        .to_string();
+    Ok((exec_time, plan_time, node_type))
+}
+
 /// Assert that a JSON EXPLAIN plan does not use any sequential scan
 ///
 /// Recursively walks the JSON plan tree checking all "Node Type" fields.
diff --git a/tests/sqlx/src/lib.rs b/tests/sqlx/src/lib.rs
index f979ed85..973a94a9 100644
--- a/tests/sqlx/src/lib.rs
+++ b/tests/sqlx/src/lib.rs
@@ -14,11 +14,12 @@ pub use assertions::QueryAssertion;
 pub use helpers::{
     analyze_table, assert_no_seq_scan, assert_sequential_ids, assert_uses_index,
     assert_uses_seq_scan, create_jsonb_gin_index, ensure_pg_stat_statements, explain_analyze_avg,
-    explain_json, explain_query, get_bench_encrypted_int, get_bench_encrypted_text,
-    get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb, get_ore_text_encrypted,
-    get_ore_text_encrypted_as_jsonb, get_ste_vec_encrypted, get_ste_vec_encrypted_pair,
-    get_ste_vec_selector_term, get_ste_vec_sv_element, get_ste_vec_term_by_id,
-    read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, PgStatEntry,
+    explain_analyze_avg_bound, explain_json, explain_query, fetch_plan_node_type,
+    get_bench_encrypted_int, get_bench_encrypted_text, get_encrypted_term, get_ore_encrypted,
+    get_ore_encrypted_as_jsonb, get_ore_text_encrypted, get_ore_text_encrypted_as_jsonb,
+    get_ste_vec_encrypted, get_ste_vec_encrypted_pair, get_ste_vec_selector_term,
+    get_ste_vec_sv_element, get_ste_vec_term_by_id, read_pg_stat_statements,
+    reset_pg_stat_statements, ExplainStats, PgStatEntry,
 };
 pub use index_types as IndexTypes;
 pub use reports::{append_result, write_reports, PerfResult};
diff --git a/tests/sqlx/src/reports.rs b/tests/sqlx/src/reports.rs
index 8860901e..1dbd9b56 100644
--- a/tests/sqlx/src/reports.rs
+++ b/tests/sqlx/src/reports.rs
@@ -1,8 +1,9 @@
 //! Benchmark report writer for Tier 2 scheduled benchmarks.
 //!
-//! Each `#[ignore]` test in `bench_perf_tests.rs` pushes a `PerfResult` into
-//! `append_result`. A teardown-style test (run last, alphabetical order) calls
-//! `write_reports` to flush all accumulated results to JSON + Markdown.
+//! Each `#[ignore]` benchmark in `bench_perf_tests.rs` pushes a `PerfResult`
+//! into `append_result`. The `run_all_benchmarks` orchestrator invokes each
+//! benchmark in sequence and then calls `write_reports` to flush all
+//! accumulated results to JSON + Markdown.
 //!
 //! Output shape matches the design doc (.work/eql-index-performance/
 //! 2026-03-30-benchmarking-design.md §Report Format) with one caveat: the
@@ -16,6 +17,8 @@ use serde::Serialize;
 use std::fs;
 use std::path::PathBuf;
 use std::sync::Mutex;
+use time::format_description::well_known::Rfc3339;
+use time::OffsetDateTime;
 
 /// One benchmark case result.
 #[derive(Debug, Clone, Serialize)]
@@ -62,8 +65,10 @@ pub fn append_result(r: PerfResult) {
 ///   `<output_dir>/benchmark-<date>.json`
 ///   `<output_dir>/benchmark-<date>.md`
 ///
-/// `date` is an ISO-8601 date string provided by the caller (usually today).
-/// `postgres_version` and `dataset_rows` are embedded in the report header.
+/// `date` is used only as a filename suffix (any caller-supplied string,
+/// typically `YYYY-MM-DD` with an optional run-id suffix for uniqueness).
+/// The report's `timestamp` field is captured at write time as RFC3339 UTC
+/// and is independent of `date`.
 pub fn write_reports(
     output_dir: &str,
     date: &str,
@@ -71,8 +76,11 @@ pub fn write_reports(
     dataset_rows: i64,
 ) -> Result<(PathBuf, PathBuf)> {
     let results = RESULTS.lock().expect("results mutex poisoned").clone();
+    let timestamp = OffsetDateTime::now_utc()
+        .format(&Rfc3339)
+        .context("formatting RFC3339 write-time timestamp")?;
     let report = BenchmarkReport {
-        timestamp: format!("{date}T00:00:00Z"),
+        timestamp,
         postgres_version: postgres_version.to_string(),
         dataset_rows,
         results,
diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
index f7d68178..63f6b5d7 100644
--- a/tests/sqlx/tests/bench_perf_tests.rs
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -1,25 +1,29 @@
 //! Tier 2 scheduled benchmarks.
 //!
 //! All tests are marked #[ignore] so regular CI doesn't run them. The scheduled
-//! workflow in .github/workflows/benchmark.yml invokes them via
-//! `cargo test --test bench_perf_tests -- --ignored`.
+//! workflow in .github/workflows/benchmark.yml invokes the orchestrator:
+//! `cargo test --test bench_perf_tests run_all_benchmarks -- --ignored`.
 //!
 //! Unlike Tier 1 tests, these use #[tokio::test] with a manual pool connected
 //! via DATABASE_URL against a pre-loaded 100K-row dataset (set by `mise run bench:full`).
 //!
-//! Each test:
+//! Each benchmark:
 //!   1. Resets pg_stat_statements
-//!   2. Runs its query pattern 1000 times
-//!   3. Reads pg_stat_statements for the match
-//!   4. Appends a PerfResult to the shared accumulator
+//!   2. Captures the actual query plan via EXPLAIN (FORMAT JSON)
+//!   3. Runs its query pattern 1000 times
+//!   4. Reads pg_stat_statements for the match
+//!   5. Appends a PerfResult to the shared accumulator
 //!
-//! A single `zz_write_reports` test (alphabetical last) flushes the accumulator
-//! to JSON + Markdown. --test-threads=1 guarantees ordering.
+//! The `run_all_benchmarks` orchestrator invokes each benchmark helper in
+//! sequence and then calls `flush_reports` to write JSON + Markdown. Individual
+//! `#[tokio::test] #[ignore]` wrappers are retained so developers can run a
+//! single benchmark in isolation, but they do NOT write reports (the
+//! orchestrator owns report emission).
 
 use anyhow::Result;
 use eql_tests::{
-    append_result, ensure_pg_stat_statements, read_pg_stat_statements, reset_pg_stat_statements,
-    write_reports, PerfResult,
+    append_result, ensure_pg_stat_statements, fetch_plan_node_type, read_pg_stat_statements,
+    reset_pg_stat_statements, write_reports, PerfResult,
 };
 use sqlx::postgres::PgPoolOptions;
 use sqlx::PgPool;
@@ -38,30 +42,30 @@ async fn connect() -> Result<PgPool> {
     Ok(pool)
 }
 
-/// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows.
-#[tokio::test]
-#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
-async fn hmac_256_equality() -> Result<()> {
-    let pool = connect().await?;
+// ============================================================================
+// Benchmark bodies — each is an async fn that takes a &PgPool. Thin test
+// wrappers below allow running one benchmark in isolation; the orchestrator
+// invokes the bodies directly.
+// ============================================================================
 
+/// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows.
+async fn bench_hmac_256_equality(pool: &PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
+            .fetch_one(pool)
             .await?;
 
-    reset_pg_stat_statements(&pool).await?;
+    let query = "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)";
+    let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?;
+
+    reset_pg_stat_statements(pool).await?;
 
     for _ in 0..RUNS {
-        sqlx::query(
-            "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)",
-        )
-        .bind(&encrypted)
-        .fetch_all(&pool)
-        .await?;
+        sqlx::query(query).bind(&encrypted).fetch_all(pool).await?;
     }
 
     let stats = read_pg_stat_statements(
-        &pool,
+        pool,
         "%FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($%",
     )
     .await?;
@@ -70,7 +74,7 @@ async fn hmac_256_equality() -> Result<()> {
         name: "hmac_256_equality".into(),
         priority: "P0".into(),
         runs: stats.calls,
-        plan_type: "Index Scan".into(),
+        plan_type,
         mean_ms: stats.mean_exec_time,
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
@@ -81,25 +85,21 @@ async fn hmac_256_equality() -> Result<()> {
 }
 
 /// P2: bloom_filter containment — expected ~3.35ms at 100K rows.
-#[tokio::test]
-#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
-async fn bloom_filter_containment() -> Result<()> {
-    let pool = connect().await?;
+async fn bench_bloom_filter_containment(pool: &PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
+            .fetch_one(pool)
             .await?;
 
-    reset_pg_stat_statements(&pool).await?;
+    let query = "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)";
+    let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?;
+
+    reset_pg_stat_statements(pool).await?;
     for _ in 0..RUNS {
-        sqlx::query(
-            "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)",
-        )
-        .bind(&encrypted)
-        .fetch_all(&pool).await?;
+        sqlx::query(query).bind(&encrypted).fetch_all(pool).await?;
     }
     let stats = read_pg_stat_statements(
-        &pool,
+        pool,
         "%eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($%",
     )
     .await?;
@@ -108,7 +108,7 @@ async fn bloom_filter_containment() -> Result<()> {
         name: "bloom_filter_containment".into(),
         priority: "P2".into(),
         runs: stats.calls,
-        plan_type: "Bitmap Index Scan".into(),
+        plan_type,
         mean_ms: stats.mean_exec_time,
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
@@ -119,24 +119,21 @@ async fn bloom_filter_containment() -> Result<()> {
 
 /// P0: eql_cast equality — currently seq scans (CIP-2831). Report records the
 /// actual plan + timing so the number is visible week-over-week until the fix ships.
-#[tokio::test]
-#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
-async fn eql_cast_equality() -> Result<()> {
-    let pool = connect().await?;
+async fn bench_eql_cast_equality(pool: &PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
+            .fetch_one(pool)
             .await?;
 
-    reset_pg_stat_statements(&pool).await?;
+    let query = "SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted";
+    let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?;
+
+    reset_pg_stat_statements(pool).await?;
     for _ in 0..RUNS {
-        sqlx::query("SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted")
-            .bind(&encrypted)
-            .fetch_all(&pool)
-            .await?;
+        sqlx::query(query).bind(&encrypted).fetch_all(pool).await?;
     }
     let stats = read_pg_stat_statements(
-        &pool,
+        pool,
         "%FROM bench WHERE encrypted_text = $%::jsonb::eql_v2_encrypted%",
     )
     .await?;
@@ -145,7 +142,7 @@ async fn eql_cast_equality() -> Result<()> {
         name: "eql_cast_equality".into(),
         priority: "P0".into(),
         runs: stats.calls,
-        plan_type: "Seq Scan".into(),
+        plan_type,
         mean_ms: stats.mean_exec_time,
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
@@ -155,24 +152,21 @@ async fn eql_cast_equality() -> Result<()> {
 }
 
 /// P0: ORE equality via operator class — currently seq scans (CIP-2831).
-#[tokio::test]
-#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
-async fn ore_equality_opclass() -> Result<()> {
-    let pool = connect().await?;
+async fn bench_ore_equality_opclass(pool: &PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1")
-            .fetch_one(&pool)
+            .fetch_one(pool)
             .await?;
 
-    reset_pg_stat_statements(&pool).await?;
+    let query = "SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted";
+    let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?;
+
+    reset_pg_stat_statements(pool).await?;
     for _ in 0..RUNS {
-        sqlx::query("SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted")
-            .bind(&encrypted)
-            .fetch_all(&pool)
-            .await?;
+        sqlx::query(query).bind(&encrypted).fetch_all(pool).await?;
     }
     let stats = read_pg_stat_statements(
-        &pool,
+        pool,
         "%FROM bench WHERE encrypted_int = $%::jsonb::eql_v2_encrypted%",
     )
     .await?;
@@ -181,7 +175,7 @@ async fn ore_equality_opclass() -> Result<()> {
         name: "ore_equality_opclass".into(),
         priority: "P0".into(),
         runs: stats.calls,
-        plan_type: "Seq Scan".into(),
+        plan_type,
         mean_ms: stats.mean_exec_time,
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
@@ -191,25 +185,21 @@ async fn ore_equality_opclass() -> Result<()> {
 }
 
 /// P1: ORE range < with LIMIT — expected ~1.93ms at 100K rows.
-#[tokio::test]
-#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
-async fn ore_range_lt_limit() -> Result<()> {
-    let pool = connect().await?;
+async fn bench_ore_range_lt_limit(pool: &PgPool) -> Result<()> {
     let encrypted: String =
         sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50000")
-            .fetch_one(&pool)
+            .fetch_one(pool)
             .await?;
 
-    reset_pg_stat_statements(&pool).await?;
+    let query = "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10";
+    let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?;
+
+    reset_pg_stat_statements(pool).await?;
     for _ in 0..RUNS {
-        sqlx::query(
-            "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10",
-        )
-        .bind(&encrypted)
-        .fetch_all(&pool).await?;
+        sqlx::query(query).bind(&encrypted).fetch_all(pool).await?;
     }
     let stats = read_pg_stat_statements(
-        &pool,
+        pool,
         "%FROM bench WHERE encrypted_int < $%ORDER BY encrypted_int LIMIT %",
     )
     .await?;
@@ -218,7 +208,7 @@ async fn ore_range_lt_limit() -> Result<()> {
         name: "ore_range_lt_limit".into(),
         priority: "P1".into(),
         runs: stats.calls,
-        plan_type: "Index Scan".into(),
+        plan_type,
         mean_ms: stats.mean_exec_time,
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
@@ -229,25 +219,21 @@ async fn ore_range_lt_limit() -> Result<()> {
 
 /// P1: ORE ORDER BY encrypted_int LIMIT 10 — design doc observes ~543ms at 10K,
 /// so expect several seconds at 100K. Report captures actual number.
-#[tokio::test]
-#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
-async fn ore_order_by_limit() -> Result<()> {
-    let pool = connect().await?;
+async fn bench_ore_order_by_limit(pool: &PgPool) -> Result<()> {
+    let query = "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10";
+    let plan_type = fetch_plan_node_type(pool, query, &[]).await?;
 
-    reset_pg_stat_statements(&pool).await?;
+    reset_pg_stat_statements(pool).await?;
     for _ in 0..RUNS {
-        sqlx::query("SELECT * FROM bench ORDER BY encrypted_int LIMIT 10")
-            .fetch_all(&pool)
-            .await?;
+        sqlx::query(query).fetch_all(pool).await?;
     }
-    let stats =
-        read_pg_stat_statements(&pool, "%FROM bench ORDER BY encrypted_int LIMIT %").await?;
+    let stats = read_pg_stat_statements(pool, "%FROM bench ORDER BY encrypted_int LIMIT %").await?;
 
     append_result(PerfResult {
         name: "ore_order_by_limit".into(),
         priority: "P1".into(),
         runs: stats.calls,
-        plan_type: "Index Scan".into(),
+        plan_type,
         mean_ms: stats.mean_exec_time,
         stddev_ms: stats.stddev_exec_time,
         total_ms: stats.total_exec_time,
@@ -256,14 +242,9 @@ async fn ore_order_by_limit() -> Result<()> {
     Ok(())
 }
 
-/// Alphabetical-last test — flushes accumulated results to disk.
-/// Requires `--test-threads=1` so it runs after all benchmark cases.
-#[tokio::test]
-#[ignore = "Tier 2: report writer, runs last under --test-threads=1"]
-async fn zz_write_reports() -> Result<()> {
-    let pool = connect().await?;
+async fn flush_reports(pool: &PgPool) -> Result<()> {
     let pg_version: String = sqlx::query_scalar("SHOW server_version_num")
-        .fetch_one(&pool)
+        .fetch_one(pool)
         .await?;
     // server_version_num is "170004" etc — take the major version digits
     let pg_major = pg_version
@@ -287,3 +268,63 @@ fn today_utc() -> String {
         .expect("invoking date");
     String::from_utf8(out.stdout).unwrap().trim().to_string()
 }
+
+// ============================================================================
+// Orchestrator — scheduled CI entry point. Runs every benchmark in sequence
+// and emits the report.
+// ============================================================================
+
+#[tokio::test]
+#[ignore = "Tier 2: run all benchmarks + write reports (invoked by `mise run bench:full`)"]
+async fn run_all_benchmarks() -> Result<()> {
+    let pool = connect().await?;
+    bench_hmac_256_equality(&pool).await?;
+    bench_bloom_filter_containment(&pool).await?;
+    bench_eql_cast_equality(&pool).await?;
+    bench_ore_equality_opclass(&pool).await?;
+    bench_ore_range_lt_limit(&pool).await?;
+    bench_ore_order_by_limit(&pool).await?;
+    flush_reports(&pool).await
+}
+
+// ============================================================================
+// Individual test wrappers — allow running one benchmark in isolation via
+// `cargo test --test bench_perf_tests <name> -- --ignored`. These do NOT
+// flush reports; only `run_all_benchmarks` does that.
+// ============================================================================
+
+#[tokio::test]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
+async fn hmac_256_equality() -> Result<()> {
+    bench_hmac_256_equality(&connect().await?).await
+}
+
+#[tokio::test]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
+async fn bloom_filter_containment() -> Result<()> {
+    bench_bloom_filter_containment(&connect().await?).await
+}
+
+#[tokio::test]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
+async fn eql_cast_equality() -> Result<()> {
+    bench_eql_cast_equality(&connect().await?).await
+}
+
+#[tokio::test]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
+async fn ore_equality_opclass() -> Result<()> {
+    bench_ore_equality_opclass(&connect().await?).await
+}
+
+#[tokio::test]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
+async fn ore_range_lt_limit() -> Result<()> {
+    bench_ore_range_lt_limit(&connect().await?).await
+}
+
+#[tokio::test]
+#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"]
+async fn ore_order_by_limit() -> Result<()> {
+    bench_ore_order_by_limit(&connect().await?).await
+}
diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
index f3668476..b9c3eb44 100644
--- a/tests/sqlx/tests/bench_regression_tests.rs
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -12,7 +12,8 @@
 
 use anyhow::Result;
 use eql_tests::{
-    explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats,
+    explain_analyze_avg, explain_analyze_avg_bound, get_bench_encrypted_int,
+    get_bench_encrypted_text, ExplainStats,
 };
 use sqlx::PgPool;
 
@@ -22,11 +23,13 @@ async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> {
     // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K
     let encrypted = get_bench_encrypted_text(&pool, 1).await?;
 
-    let sql = format!(
-        "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)",
-        encrypted
-    );
-    let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
+    let stats: ExplainStats = explain_analyze_avg_bound(
+        &pool,
+        "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)",
+        &[&encrypted],
+        5,
+    )
+    .await?;
     assert!(
         stats.execution_time_ms < 50.0,
         "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows, node_type={})",
@@ -41,11 +44,13 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> {
     // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K
     let encrypted = get_bench_encrypted_text(&pool, 1).await?;
 
-    let sql = format!(
-        "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)",
-        encrypted
-    );
-    let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
+    let stats: ExplainStats = explain_analyze_avg_bound(
+        &pool,
+        "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)",
+        &[&encrypted],
+        5,
+    )
+    .await?;
     assert!(
         stats.execution_time_ms < 100.0,
         "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows, node_type={})",
@@ -61,12 +66,14 @@ async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> {
     // to ore id 83, but the 10K distribution still yields ~4,900 rows below the predicate
     let encrypted = get_bench_encrypted_int(&pool, 50).await?;
 
-    let sql = format!(
-        "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted \
+    let stats: ExplainStats = explain_analyze_avg_bound(
+        &pool,
+        "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted \
          ORDER BY encrypted_int LIMIT 10",
-        encrypted
-    );
-    let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?;
+        &[&encrypted],
+        5,
+    )
+    .await?;
     assert!(
         stats.execution_time_ms < 200.0,
         "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows, node_type={})",

From 2ef4dece34214bbc9a5819371a2789357b442cbf Mon Sep 17 00:00:00 2001
From: Toby Hede <toby@cipherstash.com>
Date: Wed, 29 Apr 2026 13:51:40 +1000
Subject: [PATCH 28/28] perf(bench): reduce RUNS from 1000 to 10 to fit CI
 timeout

Worst-case bench_ore_order_by_limit takes several seconds per run at
100K rows; with RUNS=1000 the single benchmark could exceed the 60-min
CI timeout before flush_reports writes the artifact. Drop RUNS to 10
to keep the scheduled job well under budget while still capturing
mean/stddev via pg_stat_statements.

Addresses CodeRabbit review feedback on PR #173.
---
 tests/benchmarks/README.md           | 4 ++--
 tests/sqlx/tests/bench_perf_tests.rs | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md
index 885ddc74..d4aa57db 100644
--- a/tests/benchmarks/README.md
+++ b/tests/benchmarks/README.md
@@ -7,7 +7,7 @@ encrypted datasets. Complements the Tier 1 tests in `tests/sqlx/tests/bench_*`.
 
 - Brings up Postgres + CipherStash Proxy via docker-compose
 - Inserts 100K plaintext rows through the Proxy (which encrypts them)
-- Runs each P0/P1/P2 query pattern 1000 times
+- Runs each P0/P1/P2 query pattern 10 times
 - Reads `pg_stat_statements` for statistical aggregates
 - Outputs JSON + Markdown reports
 
@@ -39,7 +39,7 @@ GitHub Actions UI (Run workflow button).
 ## Why a separate workflow
 
 - 100K generation takes ~100 seconds via the Proxy
-- 1000-run query loops add several minutes per pattern
+- The slowest pattern (`bench_ore_order_by_limit`) takes several seconds per run on 100K rows
 - Regular PR CI must stay under 10 minutes; this suite would blow that budget
 
 ## Output
diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs
index 63f6b5d7..52f7a0f7 100644
--- a/tests/sqlx/tests/bench_perf_tests.rs
+++ b/tests/sqlx/tests/bench_perf_tests.rs
@@ -10,7 +10,7 @@
 //! Each benchmark:
 //!   1. Resets pg_stat_statements
 //!   2. Captures the actual query plan via EXPLAIN (FORMAT JSON)
-//!   3. Runs its query pattern 1000 times
+//!   3. Runs its query pattern `RUNS` times (currently 10)
 //!   4. Reads pg_stat_statements for the match
 //!   5. Appends a PerfResult to the shared accumulator
 //!
@@ -28,7 +28,7 @@ use eql_tests::{
 use sqlx::postgres::PgPoolOptions;
 use sqlx::PgPool;
 
-const RUNS: i64 = 1000;
+const RUNS: i64 = 10;
 const DATASET_ROWS: i64 = 100_000;
 
 async fn connect() -> Result<PgPool> {