From 2f9dad02522cedb84ec0944079743a0c5236ae54 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 8 Apr 2026 16:40:03 +1000 Subject: [PATCH 01/28] feat(bench): add benchmark table with 10K rows, indexes, and verification tests Migration 007 creates bench table with encrypted_text, encrypted_int, encrypted_bigint columns and seeds 10K rows via create_encrypted_json() with cycling offsets for distribution variety. Fixture creates 5 indexes (hash/btree/GIN) split from migration for before/after testing. 9 tests verify row count, column population, index term extraction, index usage via EXPLAIN, and seq scan baseline. --- tests/sqlx/fixtures/bench_setup.sql | 31 ++++ .../migrations/007_install_bench_data.sql | 28 ++++ tests/sqlx/tests/bench_data_tests.rs | 141 ++++++++++++++++++ 3 files changed, 200 insertions(+) create mode 100644 tests/sqlx/fixtures/bench_setup.sql create mode 100644 tests/sqlx/migrations/007_install_bench_data.sql create mode 100644 tests/sqlx/tests/bench_data_tests.rs diff --git a/tests/sqlx/fixtures/bench_setup.sql b/tests/sqlx/fixtures/bench_setup.sql new file mode 100644 index 00000000..164a4b20 --- /dev/null +++ b/tests/sqlx/fixtures/bench_setup.sql @@ -0,0 +1,31 @@ +-- Fixture: bench_setup.sql +-- +-- Creates benchmark indexes and refreshes planner statistics. +-- Table and 10K rows created by migration 007_install_bench_data.sql. +-- +-- Indexes: +-- bench_text_hmac_idx - hash on eql_v2.hmac_256(encrypted_text) for equality +-- bench_text_ore_idx - btree on encrypted_text via operator class for text ordering +-- bench_int_ore_idx - btree on encrypted_int via operator class for range/ORDER BY +-- bench_bigint_ore_idx - btree on encrypted_bigint via operator class +-- bench_text_bloom_idx - GIN on eql_v2.bloom_filter(encrypted_text) for containment +-- +-- Pattern follows containment_with_index_tests.rs: indexes in fixture (not migration) +-- so tests can verify before/after index creation. + +CREATE INDEX IF NOT EXISTS bench_text_hmac_idx + ON bench USING hash (eql_v2.hmac_256(encrypted_text)); + +CREATE INDEX IF NOT EXISTS bench_text_ore_idx + ON bench USING btree (encrypted_text eql_v2.encrypted_operator_class); + +CREATE INDEX IF NOT EXISTS bench_int_ore_idx + ON bench USING btree (encrypted_int eql_v2.encrypted_operator_class); + +CREATE INDEX IF NOT EXISTS bench_bigint_ore_idx + ON bench USING btree (encrypted_bigint eql_v2.encrypted_operator_class); + +CREATE INDEX IF NOT EXISTS bench_text_bloom_idx + ON bench USING gin (eql_v2.bloom_filter(encrypted_text)); + +ANALYZE bench; diff --git a/tests/sqlx/migrations/007_install_bench_data.sql b/tests/sqlx/migrations/007_install_bench_data.sql new file mode 100644 index 00000000..7786d971 --- /dev/null +++ b/tests/sqlx/migrations/007_install_bench_data.sql @@ -0,0 +1,28 @@ +-- Migration: 007_install_bench_data.sql +-- +-- Creates benchmark table with 10K rows for performance testing. +-- Each column cycles through 100 distinct encrypted values (from ore ids 1-100). +-- +-- Columns: +-- encrypted_text - text equality (hmac), pattern match (bloom), ordering (ore) +-- encrypted_int - integer ORE range/equality/ordering +-- encrypted_bigint - bigint ORE at scale +-- +-- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec) +-- Data generated via create_encrypted_json() from 004_install_test_helpers.sql. + +CREATE TABLE bench ( + id SERIAL PRIMARY KEY, + encrypted_text eql_v2_encrypted, + encrypted_int eql_v2_encrypted, + encrypted_bigint eql_v2_encrypted +); + +-- Seed 10K rows. Each column uses a different offset to create varied distributions. +-- create_encrypted_json(id) valid for ids 1-100 (ore table lookup at 10*id, max ore.id=1000). +INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) +SELECT + create_encrypted_json(((gs - 1) % 100) + 1), + create_encrypted_json(((gs + 33) % 100) + 1), + create_encrypted_json(((gs + 66) % 100) + 1) +FROM generate_series(1, 10000) AS gs; diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs new file mode 100644 index 00000000..c58ca929 --- /dev/null +++ b/tests/sqlx/tests/bench_data_tests.rs @@ -0,0 +1,141 @@ +//! Benchmark data verification tests +//! +//! Validates migration 007_install_bench_data.sql and bench_setup fixture: +//! - 10K rows seeded correctly across 3 encrypted columns +//! - Index terms (hmac, bloom, ORE) are extractable +//! - Indexes are used by the query planner (EXPLAIN assertions) +//! - Sequential scan baseline without indexes + +use anyhow::Result; +use eql_tests::{analyze_table, assert_uses_index, assert_uses_seq_scan, explain_query}; +use sqlx::PgPool; + +// ========== Data Integrity Tests ========== + +/// Verify migration seeded exactly 10K rows +#[sqlx::test] +async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> { + let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM bench") + .fetch_one(&pool) + .await?; + assert_eq!(count.0, 10000, "bench table should have 10000 rows"); + Ok(()) +} + +/// Verify all three columns have non-null encrypted data +#[sqlx::test] +async fn bench_columns_are_populated(pool: PgPool) -> Result<()> { + let count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM bench + WHERE encrypted_text IS NOT NULL + AND encrypted_int IS NOT NULL + AND encrypted_bigint IS NOT NULL", + ) + .fetch_one(&pool) + .await?; + assert_eq!( + count.0, 10000, + "all rows should have non-null encrypted columns" + ); + Ok(()) +} + +/// Verify hmac_256 index terms are extractable from encrypted_text +#[sqlx::test] +async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> { + let count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM bench WHERE eql_v2.hmac_256(encrypted_text) IS NOT NULL", + ) + .fetch_one(&pool) + .await?; + assert_eq!(count.0, 10000, "all rows should have hmac_256 index terms"); + Ok(()) +} + +/// Verify bloom_filter index terms are extractable from encrypted_text +#[sqlx::test] +async fn bench_encrypted_text_has_bloom_filter_terms(pool: PgPool) -> Result<()> { + let count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM bench WHERE eql_v2.bloom_filter(encrypted_text) IS NOT NULL", + ) + .fetch_one(&pool) + .await?; + assert_eq!( + count.0, 10000, + "all rows should have bloom_filter index terms" + ); + Ok(()) +} + +/// Verify ORE terms are extractable from encrypted_int (3 of 5 indexes are ORE btree) +#[sqlx::test] +async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> { + let count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM bench WHERE eql_v2.ore_block_u64_8_256(encrypted_int) IS NOT NULL", + ) + .fetch_one(&pool) + .await?; + assert_eq!(count.0, 10000, "all rows should have ORE block index terms"); + Ok(()) +} + +// ========== Index Usage Tests (with fixture) ========== + +/// Verify hash index is used for hmac_256 equality lookup +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))] +async fn bench_hmac_equality_uses_hash_index(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)", + encrypted + ); + assert_uses_index(&pool, &sql, "bench_text_hmac_idx").await?; + Ok(()) +} + +/// Verify btree index is used for ORDER BY with LIMIT on encrypted_int +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))] +async fn bench_ore_order_uses_btree_index(pool: PgPool) -> Result<()> { + let sql = "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10"; + assert_uses_index(&pool, sql, "bench_int_ore_idx").await?; + Ok(()) +} + +/// Verify GIN index is used for bloom_filter containment +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))] +async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)", + encrypted + ); + assert_uses_index(&pool, &sql, "bench_text_bloom_idx").await?; + Ok(()) +} + +/// Verify sequential scan without indexes (before/after pattern sanity check) +#[sqlx::test] +async fn bench_hmac_without_index_uses_seq_scan(pool: PgPool) -> Result<()> { + analyze_table(&pool, "bench").await?; + + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)", + encrypted + ); + let explain = explain_query(&pool, &sql).await?; + assert_uses_seq_scan(&explain); + Ok(()) +} From 44dabd7d1cdb1bd4877e2b13d5fa9d433ad7f7d9 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 9 Apr 2026 09:25:25 +1000 Subject: [PATCH 02/28] refactor(bench): extract BENCH_ROW_COUNT constant from magic number Addresses code review feedback: the literal 10000 appeared in 5 assert_eq calls. Single constant makes it easy to adjust if row count changes for CI. --- tests/sqlx/tests/bench_data_tests.rs | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs index c58ca929..55400b38 100644 --- a/tests/sqlx/tests/bench_data_tests.rs +++ b/tests/sqlx/tests/bench_data_tests.rs @@ -10,6 +10,8 @@ use anyhow::Result; use eql_tests::{analyze_table, assert_uses_index, assert_uses_seq_scan, explain_query}; use sqlx::PgPool; +const BENCH_ROW_COUNT: i64 = 10000; + // ========== Data Integrity Tests ========== /// Verify migration seeded exactly 10K rows @@ -18,7 +20,10 @@ async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM bench") .fetch_one(&pool) .await?; - assert_eq!(count.0, 10000, "bench table should have 10000 rows"); + assert_eq!( + count.0, BENCH_ROW_COUNT, + "bench table should have 10000 rows" + ); Ok(()) } @@ -48,7 +53,10 @@ async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> { ) .fetch_one(&pool) .await?; - assert_eq!(count.0, 10000, "all rows should have hmac_256 index terms"); + assert_eq!( + count.0, BENCH_ROW_COUNT, + "all rows should have hmac_256 index terms" + ); Ok(()) } @@ -75,7 +83,10 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> { ) .fetch_one(&pool) .await?; - assert_eq!(count.0, 10000, "all rows should have ORE block index terms"); + assert_eq!( + count.0, BENCH_ROW_COUNT, + "all rows should have ORE block index terms" + ); Ok(()) } From 1934a91fc6337136dc6262eb17d1e4a8b820c804 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 9 Apr 2026 12:17:59 +1000 Subject: [PATCH 03/28] fix(bench): move 10K row INSERT from migration to opt-in fixture The INSERT ... generate_series in migration 007 bloated the sqlx template database, making every test pay the copy cost and causing disk space crashes. Now migration 007 is DDL-only (CREATE TABLE bench) and the 10K row seed lives in bench_data.sql fixture. Only bench tests opt in. Also fixes remaining BENCH_ROW_COUNT literals missed by earlier replace. --- tests/sqlx/fixtures/bench_data.sql | 19 ++++++++++++++ .../migrations/007_install_bench_data.sql | 17 +++--------- tests/sqlx/tests/bench_data_tests.rs | 26 +++++++++---------- 3 files changed, 35 insertions(+), 27 deletions(-) create mode 100644 tests/sqlx/fixtures/bench_data.sql diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql new file mode 100644 index 00000000..baeae5ad --- /dev/null +++ b/tests/sqlx/fixtures/bench_data.sql @@ -0,0 +1,19 @@ +-- Fixture: bench_data.sql +-- +-- Seeds 10K rows into the bench table for performance testing. +-- Each column cycles through 100 distinct encrypted values (from ore ids 1-100). +-- +-- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec) +-- Data generated via create_encrypted_json() from 004_install_test_helpers.sql. +-- +-- Cycling offsets create varied distributions: +-- encrypted_text: ids 1, 2, ..., 100, 1, 2, ... (offset 0) +-- encrypted_int: ids 34, 35, ..., 100, 1, ..., 33 (offset +33) +-- encrypted_bigint: ids 67, 68, ..., 100, 1, ..., 66 (offset +66) + +INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) +SELECT + create_encrypted_json(((gs - 1) % 100) + 1), + create_encrypted_json(((gs + 33) % 100) + 1), + create_encrypted_json(((gs + 66) % 100) + 1) +FROM generate_series(1, 10000) AS gs; diff --git a/tests/sqlx/migrations/007_install_bench_data.sql b/tests/sqlx/migrations/007_install_bench_data.sql index 7786d971..04db7695 100644 --- a/tests/sqlx/migrations/007_install_bench_data.sql +++ b/tests/sqlx/migrations/007_install_bench_data.sql @@ -1,15 +1,13 @@ -- Migration: 007_install_bench_data.sql -- --- Creates benchmark table with 10K rows for performance testing. --- Each column cycles through 100 distinct encrypted values (from ore ids 1-100). +-- Creates benchmark table for performance testing. +-- DDL only — data is loaded by the bench_data.sql fixture so that +-- only bench tests pay the 10K-row seeding cost, not the entire suite. -- -- Columns: -- encrypted_text - text equality (hmac), pattern match (bloom), ordering (ore) -- encrypted_int - integer ORE range/equality/ordering -- encrypted_bigint - bigint ORE at scale --- --- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec) --- Data generated via create_encrypted_json() from 004_install_test_helpers.sql. CREATE TABLE bench ( id SERIAL PRIMARY KEY, @@ -17,12 +15,3 @@ CREATE TABLE bench ( encrypted_int eql_v2_encrypted, encrypted_bigint eql_v2_encrypted ); - --- Seed 10K rows. Each column uses a different offset to create varied distributions. --- create_encrypted_json(id) valid for ids 1-100 (ore table lookup at 10*id, max ore.id=1000). -INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) -SELECT - create_encrypted_json(((gs - 1) % 100) + 1), - create_encrypted_json(((gs + 33) % 100) + 1), - create_encrypted_json(((gs + 66) % 100) + 1) -FROM generate_series(1, 10000) AS gs; diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs index 55400b38..8225df5d 100644 --- a/tests/sqlx/tests/bench_data_tests.rs +++ b/tests/sqlx/tests/bench_data_tests.rs @@ -1,6 +1,6 @@ //! Benchmark data verification tests //! -//! Validates migration 007_install_bench_data.sql and bench_setup fixture: +//! Validates bench_data fixture (10K rows) and bench_setup fixture (indexes): //! - 10K rows seeded correctly across 3 encrypted columns //! - Index terms (hmac, bloom, ORE) are extractable //! - Indexes are used by the query planner (EXPLAIN assertions) @@ -14,8 +14,8 @@ const BENCH_ROW_COUNT: i64 = 10000; // ========== Data Integrity Tests ========== -/// Verify migration seeded exactly 10K rows -#[sqlx::test] +/// Verify fixture seeded exactly 10K rows +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as("SELECT COUNT(*) FROM bench") .fetch_one(&pool) @@ -28,7 +28,7 @@ async fn bench_table_has_expected_row_count(pool: PgPool) -> Result<()> { } /// Verify all three columns have non-null encrypted data -#[sqlx::test] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_columns_are_populated(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as( "SELECT COUNT(*) FROM bench @@ -39,14 +39,14 @@ async fn bench_columns_are_populated(pool: PgPool) -> Result<()> { .fetch_one(&pool) .await?; assert_eq!( - count.0, 10000, + count.0, BENCH_ROW_COUNT, "all rows should have non-null encrypted columns" ); Ok(()) } /// Verify hmac_256 index terms are extractable from encrypted_text -#[sqlx::test] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as( "SELECT COUNT(*) FROM bench WHERE eql_v2.hmac_256(encrypted_text) IS NOT NULL", @@ -61,7 +61,7 @@ async fn bench_encrypted_text_has_hmac_terms(pool: PgPool) -> Result<()> { } /// Verify bloom_filter index terms are extractable from encrypted_text -#[sqlx::test] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_encrypted_text_has_bloom_filter_terms(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as( "SELECT COUNT(*) FROM bench WHERE eql_v2.bloom_filter(encrypted_text) IS NOT NULL", @@ -69,14 +69,14 @@ async fn bench_encrypted_text_has_bloom_filter_terms(pool: PgPool) -> Result<()> .fetch_one(&pool) .await?; assert_eq!( - count.0, 10000, + count.0, BENCH_ROW_COUNT, "all rows should have bloom_filter index terms" ); Ok(()) } /// Verify ORE terms are extractable from encrypted_int (3 of 5 indexes are ORE btree) -#[sqlx::test] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as( "SELECT COUNT(*) FROM bench WHERE eql_v2.ore_block_u64_8_256(encrypted_int) IS NOT NULL", @@ -93,7 +93,7 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> { // ========== Index Usage Tests (with fixture) ========== /// Verify hash index is used for hmac_256 equality lookup -#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn bench_hmac_equality_uses_hash_index(pool: PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") @@ -109,7 +109,7 @@ async fn bench_hmac_equality_uses_hash_index(pool: PgPool) -> Result<()> { } /// Verify btree index is used for ORDER BY with LIMIT on encrypted_int -#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn bench_ore_order_uses_btree_index(pool: PgPool) -> Result<()> { let sql = "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10"; assert_uses_index(&pool, sql, "bench_int_ore_idx").await?; @@ -117,7 +117,7 @@ async fn bench_ore_order_uses_btree_index(pool: PgPool) -> Result<()> { } /// Verify GIN index is used for bloom_filter containment -#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_setup")))] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") @@ -133,7 +133,7 @@ async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> { } /// Verify sequential scan without indexes (before/after pattern sanity check) -#[sqlx::test] +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_hmac_without_index_uses_seq_scan(pool: PgPool) -> Result<()> { analyze_table(&pool, "bench").await?; From a57682aa8f78397c6db94d228cd8518fc1d06378 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Thu, 9 Apr 2026 12:18:05 +1000 Subject: [PATCH 04/28] fix(test): correct pg_stat_statements_reset argument order Database OID was passed as 3rd arg (queryid) instead of 2nd arg (dbid). read_pg_stat_statements correctly filters by dbid, confirming the intent. The reset now scopes to the current database instead of matching a non-existent query ID. --- tests/sqlx/src/helpers.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs index a2f454e9..8fe12844 100644 --- a/tests/sqlx/src/helpers.rs +++ b/tests/sqlx/src/helpers.rs @@ -701,7 +701,7 @@ pub async fn ensure_pg_stat_statements(pool: &PgPool) -> Result<()> { /// let stats = read_pg_stat_statements(&pool, "%FROM bench%").await?; /// ``` pub async fn reset_pg_stat_statements(pool: &PgPool) -> Result<()> { - sqlx::query("SELECT pg_stat_statements_reset(NULL::oid, NULL::oid, (SELECT oid FROM pg_database WHERE datname = current_database()))") + sqlx::query("SELECT pg_stat_statements_reset(NULL::oid, (SELECT oid FROM pg_database WHERE datname = current_database()), 0::bigint)") .execute(pool) .await .with_context(|| "resetting pg_stat_statements counters for current database")?; From 89f86f665399232d6d4a155c8a5a4446793b7807 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 21 Apr 2026 13:23:59 +1000 Subject: [PATCH 05/28] fix(bench): address code review feedback - Fix misleading bench_setup.sql comment (DDL-only migration, rows from fixture) - Fix off-by-one in bench_data.sql offset documentation comments - Add missing ORE term extraction test for encrypted_bigint - Add missing index-usage tests for bench_text_ore_idx and bench_bigint_ore_idx - Document bench_data and bench_setup fixtures in FIXTURE_SCHEMA.md - Update migrations README to list all migrations 002-007 --- tests/sqlx/fixtures/FIXTURE_SCHEMA.md | 53 ++++++++++++++++++++++++++- tests/sqlx/fixtures/bench_data.sql | 4 +- tests/sqlx/fixtures/bench_setup.sql | 2 +- tests/sqlx/migrations/README.md | 11 ++++-- tests/sqlx/tests/bench_data_tests.rs | 31 ++++++++++++++++ 5 files changed, 93 insertions(+), 8 deletions(-) diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md index 7988fb23..70d3b0d9 100644 --- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md +++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md @@ -9,7 +9,8 @@ EQL Extension (via migrations) ├── encrypted_json.sql ├── array_data.sql ├── order_by_null_data.sql (depends on ore migration) - └── ore_data.sql + ├── ore_data.sql + └── bench_data.sql + bench_setup.sql (depend on migration 007) ``` All fixtures depend on the EQL extension being installed via SQLx migrations. @@ -132,6 +133,56 @@ CREATE TABLE ore ( --- +## bench_data.sql + +**Purpose:** Seeds 10K rows into the `bench` table for performance benchmarking. Opt-in fixture — only loaded when a test explicitly includes `scripts("bench_data")`, so other tests don't pay the cost. + +**Dependencies:** +- Requires `bench` table from migration `007_install_bench_data.sql` +- Uses `create_encrypted_json()` from migration `004_install_test_helpers.sql` + +**Schema:** Uses `bench` table (DDL in migration 007): +```sql +CREATE TABLE bench ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + encrypted_text eql_v2_encrypted, + encrypted_int eql_v2_encrypted, + encrypted_bigint eql_v2_encrypted +); +``` + +**Data:** +- 10,000 rows cycling through 100 distinct encrypted values (ore ids 1-100) +- Cycling offsets create varied column distributions: + - `encrypted_text`: ids 1, 2, ..., 100, 1, 2, ... (offset 0) + - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +33) + - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +66) +- Each row has HMAC, bloom filter, and ORE index terms + +**Used By:** +- bench_data_tests.rs (all tests) + +--- + +## bench_setup.sql + +**Purpose:** Creates the 5 benchmark indexes and refreshes planner statistics. Always loaded after `bench_data.sql` in tests that verify index usage. + +**Dependencies:** +- Requires `bench` table with data from `bench_data.sql` + +**Indexes created:** +- `bench_text_hmac_idx` — hash on `eql_v2.hmac_256(encrypted_text)` for equality +- `bench_text_ore_idx` — btree on `encrypted_text` via operator class for text ordering +- `bench_int_ore_idx` — btree on `encrypted_int` via operator class for range/ORDER BY +- `bench_bigint_ore_idx` — btree on `encrypted_bigint` via operator class +- `bench_text_bloom_idx` — GIN on `eql_v2.bloom_filter(encrypted_text)` for containment + +**Used By:** +- bench_data_tests.rs (index-usage tests: `scripts("bench_data", "bench_setup")`) + +--- + ## Validation Tests Each fixture should have a validation test to ensure correct structure: diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql index baeae5ad..ca0db6dd 100644 --- a/tests/sqlx/fixtures/bench_data.sql +++ b/tests/sqlx/fixtures/bench_data.sql @@ -8,8 +8,8 @@ -- -- Cycling offsets create varied distributions: -- encrypted_text: ids 1, 2, ..., 100, 1, 2, ... (offset 0) --- encrypted_int: ids 34, 35, ..., 100, 1, ..., 33 (offset +33) --- encrypted_bigint: ids 67, 68, ..., 100, 1, ..., 66 (offset +66) +-- encrypted_int: ids 35, 36, ..., 100, 1, ..., 34 (offset +33) +-- encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +66) INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) SELECT diff --git a/tests/sqlx/fixtures/bench_setup.sql b/tests/sqlx/fixtures/bench_setup.sql index 164a4b20..0f997940 100644 --- a/tests/sqlx/fixtures/bench_setup.sql +++ b/tests/sqlx/fixtures/bench_setup.sql @@ -1,7 +1,7 @@ -- Fixture: bench_setup.sql -- -- Creates benchmark indexes and refreshes planner statistics. --- Table and 10K rows created by migration 007_install_bench_data.sql. +-- Table DDL from migration 007_install_bench_data.sql; 10K rows from bench_data.sql fixture. -- -- Indexes: -- bench_text_hmac_idx - hash on eql_v2.hmac_256(encrypted_text) for equality diff --git a/tests/sqlx/migrations/README.md b/tests/sqlx/migrations/README.md index a03dcaa0..f8b5b169 100644 --- a/tests/sqlx/migrations/README.md +++ b/tests/sqlx/migrations/README.md @@ -10,10 +10,13 @@ These migrations install EQL and test helpers into the test database using a **h - In `.gitignore` - never commit this file - Ensures tests always use current EQL version -**Migrations 002-004 are static fixtures**: -- 002: Test helpers (`test_helpers.sql`) -- 003: ORE test data (`ore.sql`) -- 004: STE Vec test data (`ste_vec.sql`) +**Migrations 002-007 are static fixtures**: +- 002: ORE test data (`ore.sql`) +- 003: STE Vec test data (`ste_vec.sql`) +- 004: Test helpers (`test_helpers.sql`) +- 005: STE Vec vast data +- 006: ORE text data +- 007: Benchmark table DDL (`bench` table with 3 encrypted columns — DDL only, no rows) ## How SQLx Uses These Migrations diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs index 8225df5d..24ea9955 100644 --- a/tests/sqlx/tests/bench_data_tests.rs +++ b/tests/sqlx/tests/bench_data_tests.rs @@ -90,6 +90,21 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> { Ok(()) } +/// Verify ORE terms are extractable from encrypted_bigint +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] +async fn bench_encrypted_bigint_has_ore_terms(pool: PgPool) -> Result<()> { + let count: (i64,) = sqlx::query_as( + "SELECT COUNT(*) FROM bench WHERE eql_v2.ore_block_u64_8_256(encrypted_bigint) IS NOT NULL", + ) + .fetch_one(&pool) + .await?; + assert_eq!( + count.0, BENCH_ROW_COUNT, + "all rows should have ORE block index terms" + ); + Ok(()) +} + // ========== Index Usage Tests (with fixture) ========== /// Verify hash index is used for hmac_256 equality lookup @@ -132,6 +147,22 @@ async fn bench_bloom_containment_uses_gin_index(pool: PgPool) -> Result<()> { Ok(()) } +/// Verify btree index is used for ORDER BY with LIMIT on encrypted_text +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn bench_ore_text_order_uses_btree_index(pool: PgPool) -> Result<()> { + let sql = "SELECT * FROM bench ORDER BY encrypted_text LIMIT 10"; + assert_uses_index(&pool, sql, "bench_text_ore_idx").await?; + Ok(()) +} + +/// Verify btree index is used for ORDER BY with LIMIT on encrypted_bigint +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn bench_ore_bigint_order_uses_btree_index(pool: PgPool) -> Result<()> { + let sql = "SELECT * FROM bench ORDER BY encrypted_bigint LIMIT 10"; + assert_uses_index(&pool, sql, "bench_bigint_ore_idx").await?; + Ok(()) +} + /// Verify sequential scan without indexes (before/after pattern sanity check) #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_hmac_without_index_uses_seq_scan(pool: PgPool) -> Result<()> { From 493f085da952a38cf0b492430e70e463ab571bea Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 21 Apr 2026 15:33:56 +1000 Subject: [PATCH 06/28] fix(bench): address second code review round MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use BIGINT GENERATED ALWAYS AS IDENTITY for bench.id (matches documented schema in FIXTURE_SCHEMA.md and other tables in suite) - Fix stale migration range in README (001-004 → 001-007) - Add comment clarifying int/bigint ORE tests verify data seeding, not distinct encoding paths --- tests/sqlx/migrations/007_install_bench_data.sql | 2 +- tests/sqlx/migrations/README.md | 2 +- tests/sqlx/tests/bench_data_tests.rs | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/sqlx/migrations/007_install_bench_data.sql b/tests/sqlx/migrations/007_install_bench_data.sql index 04db7695..49ff6975 100644 --- a/tests/sqlx/migrations/007_install_bench_data.sql +++ b/tests/sqlx/migrations/007_install_bench_data.sql @@ -10,7 +10,7 @@ -- encrypted_bigint - bigint ORE at scale CREATE TABLE bench ( - id SERIAL PRIMARY KEY, + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, encrypted_text eql_v2_encrypted, encrypted_int eql_v2_encrypted, encrypted_bigint eql_v2_encrypted diff --git a/tests/sqlx/migrations/README.md b/tests/sqlx/migrations/README.md index f8b5b169..f78f0635 100644 --- a/tests/sqlx/migrations/README.md +++ b/tests/sqlx/migrations/README.md @@ -22,7 +22,7 @@ These migrations install EQL and test helpers into the test database using a **h When using `#[sqlx::test]`: - Each test gets a fresh database -- All migrations (001-004) run automatically before each test +- All migrations (001-007) run automatically before each test - Migration 001 contains the latest built EQL - No need to manually reset database between tests diff --git a/tests/sqlx/tests/bench_data_tests.rs b/tests/sqlx/tests/bench_data_tests.rs index 24ea9955..a6912b46 100644 --- a/tests/sqlx/tests/bench_data_tests.rs +++ b/tests/sqlx/tests/bench_data_tests.rs @@ -91,6 +91,9 @@ async fn bench_encrypted_int_has_ore_terms(pool: PgPool) -> Result<()> { } /// Verify ORE terms are extractable from encrypted_bigint +/// +/// Both int and bigint columns use the same eql_v2_encrypted type and ob index structure. +/// These tests verify that data seeding populated both columns, not that encoding differs. #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data")))] async fn bench_encrypted_bigint_has_ore_terms(pool: PgPool) -> Result<()> { let count: (i64,) = sqlx::query_as( From 2e371e6480bf9d63b9b9444f949cdcea0799982b Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 09:25:03 +1000 Subject: [PATCH 07/28] docs(bench): address CodeRabbit feedback on fixture docs - bench_data.sql, FIXTURE_SCHEMA.md: correct offset labels from +33/+66 to +34/+67 (formulas unchanged; labels now match the id sequences) - FIXTURE_SCHEMA.md: remove self-contradictory ore_data.sql fixture references; the ore table is migration-only, not a fixture - migrations/README.md: update stale example filename from 005_my_fixture.sql (slot occupied) to 008_my_fixture.sql --- tests/sqlx/fixtures/FIXTURE_SCHEMA.md | 14 +++++++------- tests/sqlx/fixtures/bench_data.sql | 4 ++-- tests/sqlx/migrations/README.md | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md index 70d3b0d9..87c352ef 100644 --- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md +++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md @@ -9,7 +9,7 @@ EQL Extension (via migrations) ├── encrypted_json.sql ├── array_data.sql ├── order_by_null_data.sql (depends on ore migration) - ├── ore_data.sql + ├── ore table (migration 002 — not a fixture) └── bench_data.sql + bench_setup.sql (depend on migration 007) ``` @@ -155,8 +155,8 @@ CREATE TABLE bench ( - 10,000 rows cycling through 100 distinct encrypted values (ore ids 1-100) - Cycling offsets create varied column distributions: - `encrypted_text`: ids 1, 2, ..., 100, 1, 2, ... (offset 0) - - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +33) - - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +66) + - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +34) + - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +67) - Each row has HMAC, bloom filter, and ORE index terms **Used By:** @@ -199,15 +199,15 @@ async fn fixture_encrypted_json_has_three_records(pool: PgPool) { } ``` -### ore_data Validation +### ore Migration Validation ```rust -#[sqlx::test(fixtures(path = "../fixtures", scripts("ore_data")))] +#[sqlx::test] async fn fixture_ore_data_has_99_records(pool: PgPool) { let count: i64 = sqlx::query_scalar("SELECT COUNT(*) FROM ore") .fetch_one(&pool) .await .unwrap(); - assert_eq!(count, 99, "ore_data fixture should create 99 records"); + assert_eq!(count, 99, "ore migration should provide 99 records"); } ``` @@ -217,7 +217,7 @@ async fn fixture_ore_data_has_99_records(pool: PgPool) { - Use snake_case for fixture file names - Name should describe the data, not the test using it -- Examples: `encrypted_json.sql`, `ore_data.sql`, `array_data.sql` +- Examples: `encrypted_json.sql`, `array_data.sql`, `bench_data.sql` ## Adding New Fixtures diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql index ca0db6dd..149c08db 100644 --- a/tests/sqlx/fixtures/bench_data.sql +++ b/tests/sqlx/fixtures/bench_data.sql @@ -8,8 +8,8 @@ -- -- Cycling offsets create varied distributions: -- encrypted_text: ids 1, 2, ..., 100, 1, 2, ... (offset 0) --- encrypted_int: ids 35, 36, ..., 100, 1, ..., 34 (offset +33) --- encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +66) +-- encrypted_int: ids 35, 36, ..., 100, 1, ..., 34 (offset +34) +-- encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +67) INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) SELECT diff --git a/tests/sqlx/migrations/README.md b/tests/sqlx/migrations/README.md index f78f0635..abfc7471 100644 --- a/tests/sqlx/migrations/README.md +++ b/tests/sqlx/migrations/README.md @@ -39,7 +39,7 @@ cp release/cipherstash-encrypt.sql tests/sqlx/migrations/001_install_eql.sql ## Adding New Test Fixtures To add new test data or helpers: -1. Create a new migration: `tests/sqlx/migrations/005_my_fixture.sql` +1. Create a new migration using the next unused number (e.g. `tests/sqlx/migrations/008_my_fixture.sql`) 2. Add your SQL fixtures 3. Commit it (static migrations are version-controlled) 4. SQLx will apply it automatically in test runs From 53972f90a4086e939e80ae842ad8eb03477d1a5e Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 10:09:43 +1000 Subject: [PATCH 08/28] refactor(bench): use Zipf-like skew for bench fixture distribution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replaces the uniform, phase-locked cycling (each column got 100 rows each of 100 distinct ids, offsets +34/+67) with a deterministic Zipf-like skew seeded via setseed(0.42) and transformed through random()^2. Key differences: - Skewed distribution — top id gets ~5% of rows, tail ids ~0.5% (~10x ratio), giving the planner realistic histograms instead of a perfectly flat distribution. - Three independent draws per row decorrelate the columns; previously all three were the same cycle with fixed phase shifts. - Id range tightened to [1, 99] — create_encrypted_json(id) looks up ore.id = 10*id, so id=100 previously resolved to a missing ore row. Existing bench tests are distribution-agnostic (row-count / non-null / read-and-query-by-id=1) and continue to pass. --- tests/sqlx/fixtures/FIXTURE_SCHEMA.md | 9 ++++----- tests/sqlx/fixtures/bench_data.sql | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md index 87c352ef..52c2daa7 100644 --- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md +++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md @@ -152,11 +152,10 @@ CREATE TABLE bench ( ``` **Data:** -- 10,000 rows cycling through 100 distinct encrypted values (ore ids 1-100) -- Cycling offsets create varied column distributions: - - `encrypted_text`: ids 1, 2, ..., 100, 1, 2, ... (offset 0) - - `encrypted_int`: ids 35, 36, ..., 100, 1, ..., 34 (offset +34) - - `encrypted_bigint`: ids 68, 69, ..., 100, 1, ..., 67 (offset +67) +- 10,000 rows drawn from 99 distinct encrypted values (ore ids 1-99) +- Zipf-like skew via `setseed(0.42)` + `random()^2` — deterministic and byte-identical across runs +- Top id gets ~5% of rows; tail ids ~0.5% each (top:bottom ratio ~10x) +- Each column draws independently, so column values are decorrelated within a row - Each row has HMAC, bloom filter, and ORE index terms **Used By:** diff --git a/tests/sqlx/fixtures/bench_data.sql b/tests/sqlx/fixtures/bench_data.sql index 149c08db..247d4ed5 100644 --- a/tests/sqlx/fixtures/bench_data.sql +++ b/tests/sqlx/fixtures/bench_data.sql @@ -1,19 +1,23 @@ -- Fixture: bench_data.sql -- -- Seeds 10K rows into the bench table for performance testing. --- Each column cycles through 100 distinct encrypted values (from ore ids 1-100). +-- Each column draws independently from 99 distinct encrypted values (ore ids 1-99) +-- using a Zipf-like skew so the planner sees realistic histograms. -- -- Index terms per row: hm (hmac), b3 (blake3), bf (bloom filter), ob (ORE blocks), sv (STE vec) -- Data generated via create_encrypted_json() from 004_install_test_helpers.sql. -- --- Cycling offsets create varied distributions: --- encrypted_text: ids 1, 2, ..., 100, 1, 2, ... (offset 0) --- encrypted_int: ids 35, 36, ..., 100, 1, ..., 34 (offset +34) --- encrypted_bigint: ids 68, 69, ..., 100, 1, ..., 67 (offset +67) +-- Distribution: +-- Deterministic via setseed(0.42) — byte-identical across runs. +-- random()^2 produces a power-law skew: P(id=k) is proportional to 1/sqrt(k). +-- Top id gets ~5% of rows (~500); tail ids get ~0.5% each (~50). Ratio ~10x. +-- Three independent draws per row decorrelate the columns. + +SELECT setseed(0.42); INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) SELECT - create_encrypted_json(((gs - 1) % 100) + 1), - create_encrypted_json(((gs + 33) % 100) + 1), - create_encrypted_json(((gs + 66) % 100) + 1) -FROM generate_series(1, 10000) AS gs; + create_encrypted_json(1 + floor(99 * power(random(), 2))::int), + create_encrypted_json(1 + floor(99 * power(random(), 2))::int), + create_encrypted_json(1 + floor(99 * power(random(), 2))::int) +FROM generate_series(1, 10000); From 86e2e14333e1d70816ac47ae0c2b84f3ba533a21 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 21 Apr 2026 16:21:26 +1000 Subject: [PATCH 09/28] feat(bench): add Tier 1 plan assertions for ORE range queries and P0 ignored patterns --- tests/sqlx/tests/bench_plan_tests.rs | 115 +++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100644 tests/sqlx/tests/bench_plan_tests.rs diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs new file mode 100644 index 00000000..badc9d26 --- /dev/null +++ b/tests/sqlx/tests/bench_plan_tests.rs @@ -0,0 +1,115 @@ +//! Tier 1 benchmark plan assertions +//! +//! EXPLAIN-based tests asserting each P0/P1 query pattern uses the expected +//! index access method. Tests for known-broken patterns are marked #[ignore]. + +use anyhow::Result; +use eql_tests::assert_uses_index; +use sqlx::PgPool; + +/// ORE range query (less-than) uses btree index +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn ore_int_range_lt_uses_btree_index(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + encrypted + ); + assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + Ok(()) +} + +/// ORE range query (greater-than) uses btree index +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn ore_int_range_gt_uses_btree_index(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE encrypted_int > '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + encrypted + ); + assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + Ok(()) +} + +/// ORE combined range (>= low AND <= high) uses btree index +/// +/// Uses explicit >= / <= rather than BETWEEN — BETWEEN's operator resolution +/// against eql_v2_encrypted is untested and may not resolve to the btree family. +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn ore_int_range_combined_uses_btree_index(pool: PgPool) -> Result<()> { + let low: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 10") + .fetch_one(&pool) + .await?; + let high: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 90") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE encrypted_int >= '{}'::jsonb::eql_v2_encrypted AND encrypted_int <= '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + low, high + ); + assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + Ok(()) +} + +/// eql_cast equality should use hash index — currently seq scans (CIP-2831) +/// +/// "eql_cast" refers to the implicit JSONB-to-eql_v2_encrypted assignment cast +/// defined in `src/encrypted/casts.sql` (`CREATE CAST (jsonb AS eql_v2_encrypted) +/// WITH FUNCTION eql_v2.to_encrypted(jsonb)`). The SQL under test uses +/// `'...'::jsonb::eql_v2_encrypted`, which invokes that cast. PostgreSQL does not +/// recognise this cast path as equivalent to the indexed `hmac_256` term, so the +/// planner falls back to a sequential scan instead of using `bench_text_hmac_idx`. +/// +/// Remove #[ignore] when eql_cast index usage is fixed. At 1M rows this query +/// takes 7.83s vs 0.4ms for hmac_256 — a 19,500x regression. +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +#[ignore = "CIP-2831: eql_cast equality performs full seq scan, no index used"] +async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE encrypted_text = '{}'::jsonb::eql_v2_encrypted", + encrypted + ); + assert_uses_index(&pool, &sql, "bench_text_hmac_idx").await?; + Ok(()) +} + +/// ORE equality via operator class should use btree — currently seq scans (CIP-2831) +/// +/// Like `eql_cast_equality_uses_hash_index`, the SQL uses `'...'::jsonb::eql_v2_encrypted` +/// (the implicit JSONB assignment cast from `src/encrypted/casts.sql`). For integer +/// columns with ORE index terms the planner should satisfy equality via the btree +/// operator class, but the cast path prevents index recognition and causes a seq scan. +/// +/// Remove #[ignore] when ORE equality index usage is fixed. At 1M rows this +/// query takes 18.47s vs 0.4ms for hmac_256. +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +#[ignore = "CIP-2831: ORE equality via operator class performs full seq scan"] +async fn ore_equality_uses_btree_index(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE encrypted_int = '{}'::jsonb::eql_v2_encrypted", + encrypted + ); + assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + Ok(()) +} From cb22f6b9fb4c51289dfd41496f9c5a327c5cea03 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 21 Apr 2026 16:30:12 +1000 Subject: [PATCH 10/28] feat(bench): add Tier 1 magnitude regression tests with timing thresholds --- tests/sqlx/tests/bench_regression_tests.rs | 94 ++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 tests/sqlx/tests/bench_regression_tests.rs diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs new file mode 100644 index 00000000..9ab7c250 --- /dev/null +++ b/tests/sqlx/tests/bench_regression_tests.rs @@ -0,0 +1,94 @@ +//! Tier 1 benchmark magnitude regression tests +//! +//! Asserts execution time stays under generous thresholds (~100x expected) +//! to catch catastrophic regressions while tolerating CI runner variance. +//! Uses EXPLAIN ANALYZE averaged over 5 runs for server-side timing. +//! +//! Patterns known to be broken (P0 seq scans) are NOT included here — encoding +//! bad performance as "acceptable" defeats the purpose. See bench_plan_tests.rs +//! for their #[ignore] plan assertions. + +use anyhow::Result; +use eql_tests::{explain_analyze_avg, ExplainStats}; +use sqlx::PgPool; + +/// hmac_256 equality must stay under 50ms on 10K rows (expected ~0.5ms) +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)", + encrypted + ); + let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; + assert!( + stats.execution_time_ms < 50.0, + "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows)", + stats.execution_time_ms + ); + Ok(()) +} + +/// bloom_filter containment must stay under 100ms on 10K rows (expected ~1ms) +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)", + encrypted + ); + let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; + assert!( + stats.execution_time_ms < 100.0, + "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows)", + stats.execution_time_ms + ); + Ok(()) +} + +/// ORE range query (< LIMIT 10) must stay under 200ms on 10K rows (expected ~2ms) +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> { + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50") + .fetch_one(&pool) + .await?; + + let sql = format!( + "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + encrypted + ); + let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; + assert!( + stats.execution_time_ms < 200.0, + "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows)", + stats.execution_time_ms + ); + Ok(()) +} + +/// ORE ORDER BY LIMIT 10 must stay under 2000ms on 10K rows +/// +/// The design doc's observed baseline for this pattern is ~543ms at 10K rows +/// ("Full-set comparison before sort"). Threshold is set at 2000ms — 4x the +/// observed baseline — to absorb CI variance while catching catastrophic regressions. +#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] +async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> { + let stats: ExplainStats = + explain_analyze_avg(&pool, "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10", 5) + .await?; + assert!( + stats.execution_time_ms < 2000.0, + "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows)", + stats.execution_time_ms + ); + Ok(()) +} From b6133f4f685e7bb0e25e1c2727f26e36658649c8 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 21 Apr 2026 17:29:44 +1000 Subject: [PATCH 11/28] fix(bench): address post-review code quality issues - Add get_bench_encrypted_int/text helpers to helpers.rs and re-export from lib.rs - Replace duplicate inline fetch queries with helpers in bench_plan_tests.rs - Add BENCH_INT_ORE_IDX constant to eliminate repeated index name literals - Fix long combined-range format string with line continuation - Add ANALYZE fixture dependency comment to bench_plan_tests.rs module doc - Clarify #[ignore] tests: 10K rows sufficient for validation, CIP-2831 is one root cause - Fix module doc in bench_regression_tests.rs: acknowledge ore_order_by uses 4x not ~100x - Add stats.node_type to all 4 regression assert messages - Add cardinality comments for id=1 and id=50 probe row choices --- tests/sqlx/src/helpers.rs | 22 ++++++++ tests/sqlx/src/lib.rs | 10 ++-- tests/sqlx/tests/bench_plan_tests.rs | 58 ++++++++++------------ tests/sqlx/tests/bench_regression_tests.rs | 45 ++++++++--------- 4 files changed, 74 insertions(+), 61 deletions(-) diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs index 8fe12844..b06ab01c 100644 --- a/tests/sqlx/src/helpers.rs +++ b/tests/sqlx/src/helpers.rs @@ -40,6 +40,28 @@ pub async fn get_ore_text_encrypted(pool: &PgPool, id: i32) -> Result { result.with_context(|| format!("ore_text returned NULL for id={}", id)) } +/// Fetch encrypted_int value from the bench table by id +/// +/// The bench table is created by the bench_data fixture (10K rows, ids 1-10000). +pub async fn get_bench_encrypted_int(pool: &PgPool, id: i32) -> Result { + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = $1") + .bind(id) + .fetch_one(pool) + .await + .with_context(|| format!("fetching bench encrypted_int for id={id}")) +} + +/// Fetch encrypted_text value from the bench table by id +/// +/// The bench table is created by the bench_data fixture (10K rows, ids 1-10000). +pub async fn get_bench_encrypted_text(pool: &PgPool, id: i32) -> Result { + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = $1") + .bind(id) + .fetch_one(pool) + .await + .with_context(|| format!("fetching bench encrypted_text for id={id}")) +} + /// Assert sorted rows match expected sequential id range pub fn assert_sequential_ids(rows: &[sqlx::postgres::PgRow], start: i64, end: i64) { let ids: Vec = rows.iter().map(|r| r.try_get(0).unwrap()).collect(); diff --git a/tests/sqlx/src/lib.rs b/tests/sqlx/src/lib.rs index f72cc45f..911264c3 100644 --- a/tests/sqlx/src/lib.rs +++ b/tests/sqlx/src/lib.rs @@ -13,11 +13,11 @@ pub use assertions::QueryAssertion; pub use helpers::{ analyze_table, assert_no_seq_scan, assert_sequential_ids, assert_uses_index, assert_uses_seq_scan, create_jsonb_gin_index, ensure_pg_stat_statements, explain_analyze_avg, - explain_json, explain_query, get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb, - get_ore_text_encrypted, get_ore_text_encrypted_as_jsonb, get_ste_vec_encrypted, - get_ste_vec_encrypted_pair, get_ste_vec_selector_term, get_ste_vec_sv_element, - get_ste_vec_term_by_id, read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, - PgStatEntry, + explain_json, explain_query, get_bench_encrypted_int, get_bench_encrypted_text, + get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb, get_ore_text_encrypted, + get_ore_text_encrypted_as_jsonb, get_ste_vec_encrypted, get_ste_vec_encrypted_pair, + get_ste_vec_selector_term, get_ste_vec_sv_element, get_ste_vec_term_by_id, + read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, PgStatEntry, }; pub use index_types as IndexTypes; pub use selectors::Selectors; diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs index badc9d26..44d1b3bd 100644 --- a/tests/sqlx/tests/bench_plan_tests.rs +++ b/tests/sqlx/tests/bench_plan_tests.rs @@ -2,40 +2,40 @@ //! //! EXPLAIN-based tests asserting each P0/P1 query pattern uses the expected //! index access method. Tests for known-broken patterns are marked #[ignore]. +//! +//! ANALYZE is run by the bench_setup fixture — planner statistics are populated at fixture load. use anyhow::Result; -use eql_tests::assert_uses_index; +use eql_tests::{assert_uses_index, get_bench_encrypted_int, get_bench_encrypted_text}; use sqlx::PgPool; +const BENCH_INT_ORE_IDX: &str = "bench_int_ore_idx"; + /// ORE range query (less-than) uses btree index #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn ore_int_range_lt_uses_btree_index(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50") - .fetch_one(&pool) - .await?; + let encrypted = get_bench_encrypted_int(&pool, 50).await?; let sql = format!( - "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted \ + ORDER BY encrypted_int LIMIT 10", encrypted ); - assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?; Ok(()) } /// ORE range query (greater-than) uses btree index #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn ore_int_range_gt_uses_btree_index(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50") - .fetch_one(&pool) - .await?; + let encrypted = get_bench_encrypted_int(&pool, 50).await?; let sql = format!( - "SELECT * FROM bench WHERE encrypted_int > '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + "SELECT * FROM bench WHERE encrypted_int > '{}'::jsonb::eql_v2_encrypted \ + ORDER BY encrypted_int LIMIT 10", encrypted ); - assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?; Ok(()) } @@ -45,20 +45,17 @@ async fn ore_int_range_gt_uses_btree_index(pool: PgPool) -> Result<()> { /// against eql_v2_encrypted is untested and may not resolve to the btree family. #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn ore_int_range_combined_uses_btree_index(pool: PgPool) -> Result<()> { - let low: String = - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 10") - .fetch_one(&pool) - .await?; - let high: String = - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 90") - .fetch_one(&pool) - .await?; + let low = get_bench_encrypted_int(&pool, 10).await?; + let high = get_bench_encrypted_int(&pool, 90).await?; let sql = format!( - "SELECT * FROM bench WHERE encrypted_int >= '{}'::jsonb::eql_v2_encrypted AND encrypted_int <= '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + "SELECT * FROM bench \ + WHERE encrypted_int >= '{}'::jsonb::eql_v2_encrypted \ + AND encrypted_int <= '{}'::jsonb::eql_v2_encrypted \ + ORDER BY encrypted_int LIMIT 10", low, high ); - assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?; Ok(()) } @@ -73,13 +70,11 @@ async fn ore_int_range_combined_uses_btree_index(pool: PgPool) -> Result<()> { /// /// Remove #[ignore] when eql_cast index usage is fixed. At 1M rows this query /// takes 7.83s vs 0.4ms for hmac_256 — a 19,500x regression. +/// Passing with the 10K-row fixture confirms index usage — timing data above was measured at 1M rows. #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] #[ignore = "CIP-2831: eql_cast equality performs full seq scan, no index used"] async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) - .await?; + let encrypted = get_bench_encrypted_text(&pool, 1).await?; let sql = format!( "SELECT * FROM bench WHERE encrypted_text = '{}'::jsonb::eql_v2_encrypted", @@ -96,20 +91,19 @@ async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> { /// columns with ORE index terms the planner should satisfy equality via the btree /// operator class, but the cast path prevents index recognition and causes a seq scan. /// +/// CIP-2831 covers both this and `eql_cast_equality_uses_hash_index` as a single root cause fix. /// Remove #[ignore] when ORE equality index usage is fixed. At 1M rows this /// query takes 18.47s vs 0.4ms for hmac_256. +/// Passing with the 10K-row fixture confirms index usage — timing data above was measured at 1M rows. #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] #[ignore = "CIP-2831: ORE equality via operator class performs full seq scan"] async fn ore_equality_uses_btree_index(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) - .await?; + let encrypted = get_bench_encrypted_int(&pool, 1).await?; let sql = format!( "SELECT * FROM bench WHERE encrypted_int = '{}'::jsonb::eql_v2_encrypted", encrypted ); - assert_uses_index(&pool, &sql, "bench_int_ore_idx").await?; + assert_uses_index(&pool, &sql, BENCH_INT_ORE_IDX).await?; Ok(()) } diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs index 9ab7c250..a5de334d 100644 --- a/tests/sqlx/tests/bench_regression_tests.rs +++ b/tests/sqlx/tests/bench_regression_tests.rs @@ -1,7 +1,9 @@ //! Tier 1 benchmark magnitude regression tests //! -//! Asserts execution time stays under generous thresholds (~100x expected) -//! to catch catastrophic regressions while tolerating CI runner variance. +//! Asserts execution time stays under generous thresholds to catch catastrophic regressions +//! while tolerating CI runner variance. Most thresholds are ~100x the expected baseline; +//! ore_order_by uses 4x (543ms observed baseline leaves little headroom for a 100x multiple +//! without creating a test that never fails). //! Uses EXPLAIN ANALYZE averaged over 5 runs for server-side timing. //! //! Patterns known to be broken (P0 seq scans) are NOT included here — encoding @@ -9,16 +11,14 @@ //! for their #[ignore] plan assertions. use anyhow::Result; -use eql_tests::{explain_analyze_avg, ExplainStats}; +use eql_tests::{explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats}; use sqlx::PgPool; /// hmac_256 equality must stay under 50ms on 10K rows (expected ~0.5ms) #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) - .await?; + // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K + let encrypted = get_bench_encrypted_text(&pool, 1).await?; let sql = format!( "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)", @@ -27,8 +27,8 @@ async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> { let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; assert!( stats.execution_time_ms < 50.0, - "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows)", - stats.execution_time_ms + "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows, node_type={})", + stats.execution_time_ms, stats.node_type ); Ok(()) } @@ -36,10 +36,8 @@ async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> { /// bloom_filter containment must stay under 100ms on 10K rows (expected ~1ms) #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) - .await?; + // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K + let encrypted = get_bench_encrypted_text(&pool, 1).await?; let sql = format!( "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)", @@ -48,8 +46,8 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> { let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; assert!( stats.execution_time_ms < 100.0, - "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows)", - stats.execution_time_ms + "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows, node_type={})", + stats.execution_time_ms, stats.node_type ); Ok(()) } @@ -57,20 +55,19 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> { /// ORE range query (< LIMIT 10) must stay under 200ms on 10K rows (expected ~2ms) #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> { - let encrypted: String = - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50") - .fetch_one(&pool) - .await?; + // id=50 is the distribution midpoint → ~4,900 rows below threshold + let encrypted = get_bench_encrypted_int(&pool, 50).await?; let sql = format!( - "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted \ + ORDER BY encrypted_int LIMIT 10", encrypted ); let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; assert!( stats.execution_time_ms < 200.0, - "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows)", - stats.execution_time_ms + "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows, node_type={})", + stats.execution_time_ms, stats.node_type ); Ok(()) } @@ -87,8 +84,8 @@ async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> { .await?; assert!( stats.execution_time_ms < 2000.0, - "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows)", - stats.execution_time_ms + "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows, node_type={})", + stats.execution_time_ms, stats.node_type ); Ok(()) } From 999d22a6ab6afe5e48af3f41fd799627a9a0e4b8 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 21 Apr 2026 18:43:03 +1000 Subject: [PATCH 12/28] fix(bench): address third code review round - Add NULL context to get_bench_encrypted_int/text helpers (mirrors ore helper pattern) - Add BENCH_TEXT_HMAC_IDX constant to bench_plan_tests.rs for consistency with BENCH_INT_ORE_IDX - Clarify id=50 midpoint comment: bench row midpoint, not ORE value midpoint (+33 offset) - Split long use statement in bench_regression_tests.rs onto two lines --- tests/sqlx/src/helpers.rs | 24 +++++++++++++--------- tests/sqlx/tests/bench_plan_tests.rs | 3 ++- tests/sqlx/tests/bench_regression_tests.rs | 16 ++++++++++----- 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs index b06ab01c..6bf5fc4c 100644 --- a/tests/sqlx/src/helpers.rs +++ b/tests/sqlx/src/helpers.rs @@ -44,22 +44,26 @@ pub async fn get_ore_text_encrypted(pool: &PgPool, id: i32) -> Result { /// /// The bench table is created by the bench_data fixture (10K rows, ids 1-10000). pub async fn get_bench_encrypted_int(pool: &PgPool, id: i32) -> Result { - sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = $1") - .bind(id) - .fetch_one(pool) - .await - .with_context(|| format!("fetching bench encrypted_int for id={id}")) + let result: Option = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = $1") + .bind(id) + .fetch_one(pool) + .await + .with_context(|| format!("fetching bench encrypted_int for id={id}"))?; + result.with_context(|| format!("bench.encrypted_int is NULL for id={id}")) } /// Fetch encrypted_text value from the bench table by id /// /// The bench table is created by the bench_data fixture (10K rows, ids 1-10000). pub async fn get_bench_encrypted_text(pool: &PgPool, id: i32) -> Result { - sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = $1") - .bind(id) - .fetch_one(pool) - .await - .with_context(|| format!("fetching bench encrypted_text for id={id}")) + let result: Option = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = $1") + .bind(id) + .fetch_one(pool) + .await + .with_context(|| format!("fetching bench encrypted_text for id={id}"))?; + result.with_context(|| format!("bench.encrypted_text is NULL for id={id}")) } /// Assert sorted rows match expected sequential id range diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs index 44d1b3bd..7d2dc163 100644 --- a/tests/sqlx/tests/bench_plan_tests.rs +++ b/tests/sqlx/tests/bench_plan_tests.rs @@ -10,6 +10,7 @@ use eql_tests::{assert_uses_index, get_bench_encrypted_int, get_bench_encrypted_ use sqlx::PgPool; const BENCH_INT_ORE_IDX: &str = "bench_int_ore_idx"; +const BENCH_TEXT_HMAC_IDX: &str = "bench_text_hmac_idx"; /// ORE range query (less-than) uses btree index #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] @@ -80,7 +81,7 @@ async fn eql_cast_equality_uses_hash_index(pool: PgPool) -> Result<()> { "SELECT * FROM bench WHERE encrypted_text = '{}'::jsonb::eql_v2_encrypted", encrypted ); - assert_uses_index(&pool, &sql, "bench_text_hmac_idx").await?; + assert_uses_index(&pool, &sql, BENCH_TEXT_HMAC_IDX).await?; Ok(()) } diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs index a5de334d..f3668476 100644 --- a/tests/sqlx/tests/bench_regression_tests.rs +++ b/tests/sqlx/tests/bench_regression_tests.rs @@ -11,7 +11,9 @@ //! for their #[ignore] plan assertions. use anyhow::Result; -use eql_tests::{explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats}; +use eql_tests::{ + explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats, +}; use sqlx::PgPool; /// hmac_256 equality must stay under 50ms on 10K rows (expected ~0.5ms) @@ -55,7 +57,8 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> { /// ORE range query (< LIMIT 10) must stay under 200ms on 10K rows (expected ~2ms) #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> { - // id=50 is the distribution midpoint → ~4,900 rows below threshold + // id=50 is the bench row midpoint; encrypted_int uses a +33 offset so this maps + // to ore id 83, but the 10K distribution still yields ~4,900 rows below the predicate let encrypted = get_bench_encrypted_int(&pool, 50).await?; let sql = format!( @@ -79,9 +82,12 @@ async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> { /// observed baseline — to absorb CI variance while catching catastrophic regressions. #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))] async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> { - let stats: ExplainStats = - explain_analyze_avg(&pool, "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10", 5) - .await?; + let stats: ExplainStats = explain_analyze_avg( + &pool, + "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10", + 5, + ) + .await?; assert!( stats.execution_time_ms < 2000.0, "ORE ORDER BY LIMIT 10 took {:.1}ms, threshold 2000ms (observed ~543ms baseline at 10K rows, node_type={})", From 808b789092f92d15e8bb12a784d0f408139e2d51 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:37:50 +1000 Subject: [PATCH 13/28] chore(bench): scaffold tests/benchmarks/ directory with README and gitignore --- tests/benchmarks/.gitignore | 6 ++++ tests/benchmarks/README.md | 48 +++++++++++++++++++++++++++++++ tests/benchmarks/reports/.gitkeep | 0 3 files changed, 54 insertions(+) create mode 100644 tests/benchmarks/.gitignore create mode 100644 tests/benchmarks/README.md create mode 100644 tests/benchmarks/reports/.gitkeep diff --git a/tests/benchmarks/.gitignore b/tests/benchmarks/.gitignore new file mode 100644 index 00000000..9e7d7623 --- /dev/null +++ b/tests/benchmarks/.gitignore @@ -0,0 +1,6 @@ +# Generated reports (too large for git, regenerated on demand) +reports/* +!reports/.gitkeep + +# Local Proxy credentials +.env diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md new file mode 100644 index 00000000..885ddc74 --- /dev/null +++ b/tests/benchmarks/README.md @@ -0,0 +1,48 @@ +# EQL Scheduled Benchmarks (Tier 2) + +Heavy-weight performance benchmarks that run weekly in CI against 100K-row +encrypted datasets. Complements the Tier 1 tests in `tests/sqlx/tests/bench_*`. + +## What this is + +- Brings up Postgres + CipherStash Proxy via docker-compose +- Inserts 100K plaintext rows through the Proxy (which encrypts them) +- Runs each P0/P1/P2 query pattern 1000 times +- Reads `pg_stat_statements` for statistical aggregates +- Outputs JSON + Markdown reports + +## Local usage + +```bash +# Populate credentials +cp tests/benchmarks/.env.example tests/benchmarks/.env +# Edit .env with your CipherStash credentials + +# Start Postgres + Proxy +mise run bench:up + +# Build EQL and generate 100K dataset (bench:generate depends on build) +mise run bench:generate + +# Run the full Tier 2 suite +mise run bench:full + +# Results land in tests/benchmarks/reports/ +``` + +## CI usage + +Runs automatically every Monday at 03:00 UTC via +`.github/workflows/benchmark.yml`. Also manually invocable from the +GitHub Actions UI (Run workflow button). + +## Why a separate workflow + +- 100K generation takes ~100 seconds via the Proxy +- 1000-run query loops add several minutes per pattern +- Regular PR CI must stay under 10 minutes; this suite would blow that budget + +## Output + +`tests/benchmarks/reports/benchmark-YYYY-MM-DD.{json,md}` — uploaded as +GitHub Actions artifact named `benchmark-report-`. diff --git a/tests/benchmarks/reports/.gitkeep b/tests/benchmarks/reports/.gitkeep new file mode 100644 index 00000000..e69de29b From 41b95cbe2d35504883775830e6496d1104e2ac7b Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:40:43 +1000 Subject: [PATCH 14/28] feat(bench): add docker-compose with Postgres + CipherStash Proxy for data generation --- tests/benchmarks/.env.example | 7 ++++ tests/benchmarks/docker-compose.yml | 59 +++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 tests/benchmarks/.env.example create mode 100644 tests/benchmarks/docker-compose.yml diff --git a/tests/benchmarks/.env.example b/tests/benchmarks/.env.example new file mode 100644 index 00000000..fe41909a --- /dev/null +++ b/tests/benchmarks/.env.example @@ -0,0 +1,7 @@ +# CipherStash Proxy credentials +# Get these from https://dashboard.cipherstash.com +CS_CLIENT_ACCESS_KEY= +CS_DEFAULT_KEYSET_ID= +CS_CLIENT_KEY= +CS_CLIENT_ID= +CS_WORKSPACE_CRN= diff --git a/tests/benchmarks/docker-compose.yml b/tests/benchmarks/docker-compose.yml new file mode 100644 index 00000000..f8d47d48 --- /dev/null +++ b/tests/benchmarks/docker-compose.yml @@ -0,0 +1,59 @@ +services: + postgres: + image: postgres:17 + container_name: bench-postgres + command: > + postgres + -c track_functions=all + -c shared_preload_libraries=pg_stat_statements + -c pg_stat_statements.track=all + -c pg_stat_statements.max=10000 + ports: + - "7433:5432" + environment: + POSTGRES_DB: cipherstash + POSTGRES_USER: cipherstash + POSTGRES_PASSWORD: password + healthcheck: + test: ["CMD-SHELL", "pg_isready -U cipherstash"] + interval: 1s + timeout: 5s + retries: 10 + networks: + - bench + + proxy: + image: cipherstash/proxy:latest + container_name: bench-proxy + ports: + - "6433:6432" + environment: + CS_DATABASE__NAME: cipherstash + CS_DATABASE__USERNAME: cipherstash + CS_DATABASE__PASSWORD: password + CS_DATABASE__HOST: postgres + CS_DATABASE__PORT: 5432 + # EQL install is performed explicitly by generate.sh before schema.sql runs. + # Leaving Proxy's own install off avoids racing against generate.sh. + CS_DATABASE__INSTALL_EQL: "false" + CS_CLIENT_ACCESS_KEY: ${CS_CLIENT_ACCESS_KEY} + CS_DEFAULT_KEYSET_ID: ${CS_DEFAULT_KEYSET_ID} + CS_CLIENT_KEY: ${CS_CLIENT_KEY} + CS_CLIENT_ID: ${CS_CLIENT_ID} + CS_WORKSPACE_CRN: ${CS_WORKSPACE_CRN} + healthcheck: + # Probe the Proxy's pg-protocol listener (no auth handshake required). + # busybox `nc` is present in the cipherstash/proxy image. + test: ["CMD-SHELL", "nc -z localhost 6432"] + interval: 1s + timeout: 5s + retries: 30 + depends_on: + postgres: + condition: service_healthy + networks: + - bench + +networks: + bench: + driver: bridge From 721a3f00e276e2ee7cc75340bde6871cf0fbbf40 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:44:00 +1000 Subject: [PATCH 15/28] feat(bench): add schema.sql with bench table and Proxy search configuration --- tests/benchmarks/schema.sql | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tests/benchmarks/schema.sql diff --git a/tests/benchmarks/schema.sql b/tests/benchmarks/schema.sql new file mode 100644 index 00000000..e8693ef1 --- /dev/null +++ b/tests/benchmarks/schema.sql @@ -0,0 +1,35 @@ +-- Bench schema for Tier 2 benchmarks. +-- Applied against the bench-postgres container AFTER EQL has been explicitly +-- installed by generate.sh (see Task 4 — generate.sh installs +-- release/cipherstash-encrypt.sql directly, not relying on Proxy's async install). + +DROP TABLE IF EXISTS bench; + +CREATE TABLE bench ( + id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, + encrypted_text eql_v2_encrypted, + encrypted_int eql_v2_encrypted, + encrypted_bigint eql_v2_encrypted +); + +-- Proxy search configuration: tells Proxy which index terms to generate +-- for each column when plaintext is inserted. +-- +-- Signature: eql_v2.add_search_config(table, column, index, cast_as) +-- (see src/config/functions.sql). add_search_config calls activate_config +-- internally when migrating=false, so no explicit activate_config call. + +-- text column: equality (hmac), pattern match (bloom), ordering (ore) +SELECT eql_v2.add_search_config('bench', 'encrypted_text', 'unique', 'text'); +SELECT eql_v2.add_search_config('bench', 'encrypted_text', 'match', 'text'); +SELECT eql_v2.add_search_config('bench', 'encrypted_text', 'ore', 'text'); + +-- integer column: equality + ORE range/ordering +SELECT eql_v2.add_search_config('bench', 'encrypted_int', 'unique', 'int'); +SELECT eql_v2.add_search_config('bench', 'encrypted_int', 'ore', 'int'); + +-- bigint column: equality + ORE range/ordering +SELECT eql_v2.add_search_config('bench', 'encrypted_bigint', 'unique', 'big_int'); +SELECT eql_v2.add_search_config('bench', 'encrypted_bigint', 'ore', 'big_int'); + +-- Indexes (created after data load in generate.sh, after ANALYZE) From 8fe674863953465a467107745458c4689dd281eb Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:46:42 +1000 Subject: [PATCH 16/28] feat(bench): add generate.sh for 100K dataset generation via Proxy --- tests/benchmarks/generate.sh | 58 ++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100755 tests/benchmarks/generate.sh diff --git a/tests/benchmarks/generate.sh b/tests/benchmarks/generate.sh new file mode 100755 index 00000000..bf6aee96 --- /dev/null +++ b/tests/benchmarks/generate.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Generates a 100K-row encrypted bench dataset via CipherStash Proxy. +# No dump is written in v1 — the Tier 2 workflow regenerates fresh each run. +# +# Prerequisites: +# - mise run build (produces release/cipherstash-encrypt.sql) +# - docker compose -f tests/benchmarks/docker-compose.yml up -d --wait +# - tests/benchmarks/.env populated with CipherStash credentials + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +EQL_SQL="$REPO_ROOT/release/cipherstash-encrypt.sql" +SCALE="${1:-100k}" + +case "$SCALE" in + 100k) ROWS=100000 ;; + *) echo "Unsupported scale: $SCALE (only 100k in v1)" >&2; exit 1 ;; +esac + +if [ ! -f "$EQL_SQL" ]; then + echo "ERROR: $EQL_SQL not found. Run 'mise run build' first." >&2 + exit 1 +fi + +PG_URL="postgresql://cipherstash:password@localhost:7433/cipherstash" +PROXY_URL="postgresql://cipherstash:password@localhost:6433/cipherstash" + +echo "==> Installing EQL into bench-postgres" +psql "$PG_URL" -v ON_ERROR_STOP=1 -f "$EQL_SQL" >/dev/null + +echo "==> Applying bench schema and Proxy search configuration" +psql "$PG_URL" -v ON_ERROR_STOP=1 -f "$SCRIPT_DIR/schema.sql" + +echo "==> Inserting $ROWS plaintext rows through Proxy (this encrypts them)" +# generate_series emits plaintext rows; Proxy intercepts and encrypts each +# column per the search config applied in schema.sql. +psql "$PROXY_URL" -v ON_ERROR_STOP=1 -c " +INSERT INTO bench (encrypted_text, encrypted_int, encrypted_bigint) +SELECT + ('text_' || (((gs - 1) % 1000) + 1))::text, + (((gs - 1) % 1000) + 1)::int, + (((gs - 1) % 1000) + 1)::bigint * 1000000000 +FROM generate_series(1, $ROWS) AS gs; +" + +echo "==> Creating indexes and running ANALYZE" +psql "$PG_URL" -v ON_ERROR_STOP=1 -c " +CREATE INDEX IF NOT EXISTS bench_text_hmac_idx ON bench USING hash (eql_v2.hmac_256(encrypted_text)); +CREATE INDEX IF NOT EXISTS bench_text_ore_idx ON bench USING btree (encrypted_text eql_v2.encrypted_operator_class); +CREATE INDEX IF NOT EXISTS bench_int_ore_idx ON bench USING btree (encrypted_int eql_v2.encrypted_operator_class); +CREATE INDEX IF NOT EXISTS bench_bigint_ore_idx ON bench USING btree (encrypted_bigint eql_v2.encrypted_operator_class); +CREATE INDEX IF NOT EXISTS bench_text_bloom_idx ON bench USING gin (eql_v2.bloom_filter(encrypted_text)); +ANALYZE bench; +" + +echo "==> Done. Rows: $ROWS" From c58629c21b437542243e646c7a9c798ca77870f9 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:50:26 +1000 Subject: [PATCH 17/28] feat(bench): add mise tasks bench:up/down/generate/full --- mise.toml | 2 +- tasks/bench.toml | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) create mode 100644 tasks/bench.toml diff --git a/mise.toml b/mise.toml index ff70e4ce..878cb8cc 100644 --- a/mise.toml +++ b/mise.toml @@ -14,7 +14,7 @@ "python" = "3.13" [task_config] -includes = ["tasks", "tasks/postgres.toml"] +includes = ["tasks", "tasks/postgres.toml", "tasks/bench.toml"] [env] POSTGRES_DB = "cipherstash" diff --git a/tasks/bench.toml b/tasks/bench.toml new file mode 100644 index 00000000..d4b70bc7 --- /dev/null +++ b/tasks/bench.toml @@ -0,0 +1,35 @@ +["bench:up"] +description = "Start Postgres + Proxy for benchmark data generation" +dir = "{{config_root}}" +run = """ +if [ ! -f tests/benchmarks/.env ]; then + echo "ERROR: tests/benchmarks/.env missing. Copy .env.example and fill in credentials." >&2 + exit 1 +fi +docker compose --env-file tests/benchmarks/.env -f tests/benchmarks/docker-compose.yml up -d --wait +""" + +["bench:down"] +description = "Stop benchmark Postgres + Proxy" +dir = "{{config_root}}" +run = """ +docker compose -f tests/benchmarks/docker-compose.yml down -v +""" + +["bench:generate"] +description = "Generate 100K encrypted bench dataset (requires bench:up first)" +# `build` produces release/cipherstash-encrypt.sql, which generate.sh +# installs into the bench Postgres container before applying schema.sql. +depends = ["build"] +dir = "{{config_root}}" +run = """ +tests/benchmarks/generate.sh 100k +""" + +["bench:full"] +description = "Run full Tier 2 benchmark suite against bench-postgres" +dir = "{{config_root}}/tests/sqlx" +env = { BENCH_DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" } +run = """ +cargo test --test bench_perf_tests -- --ignored --nocapture --test-threads=1 +""" From a827486b2754f8f88bab50f105200c0a69a849e1 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:55:37 +1000 Subject: [PATCH 18/28] feat(bench): add PerfResult struct and JSON/Markdown report writer --- tests/sqlx/src/lib.rs | 2 + tests/sqlx/src/reports.rs | 112 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 114 insertions(+) create mode 100644 tests/sqlx/src/reports.rs diff --git a/tests/sqlx/src/lib.rs b/tests/sqlx/src/lib.rs index 911264c3..f979ed85 100644 --- a/tests/sqlx/src/lib.rs +++ b/tests/sqlx/src/lib.rs @@ -7,6 +7,7 @@ use sqlx::PgPool; pub mod assertions; pub mod helpers; pub mod index_types; +pub mod reports; pub mod selectors; pub use assertions::QueryAssertion; @@ -20,6 +21,7 @@ pub use helpers::{ read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, PgStatEntry, }; pub use index_types as IndexTypes; +pub use reports::{append_result, write_reports, PerfResult}; pub use selectors::Selectors; /// Reset pg_stat_user_functions tracking before tests diff --git a/tests/sqlx/src/reports.rs b/tests/sqlx/src/reports.rs new file mode 100644 index 00000000..b02a36af --- /dev/null +++ b/tests/sqlx/src/reports.rs @@ -0,0 +1,112 @@ +//! Benchmark report writer for Tier 2 scheduled benchmarks. +//! +//! Each `#[ignore]` test in `bench_perf_tests.rs` pushes a `PerfResult` into +//! `append_result`. A teardown-style test (run last, alphabetical order) calls +//! `write_reports` to flush all accumulated results to JSON + Markdown. +//! +//! Output shape matches the design doc (.work/eql-index-performance/ +//! 2026-03-30-benchmarking-design.md §Report Format) with one caveat: the +//! design doc lists `p95_ms` / `p99_ms` fields; Postgres `pg_stat_statements` +//! does not expose percentiles — only mean / stddev / total. v1 omits them +//! and documents the gap. Adding percentiles would require a different timing +//! strategy (e.g. client-side histograms) deferred to a follow-up. + +use anyhow::{Context, Result}; +use serde::Serialize; +use std::fs; +use std::path::PathBuf; +use std::sync::Mutex; + +/// One benchmark case result. +#[derive(Debug, Clone, Serialize)] +pub struct PerfResult { + /// Test name (e.g. "hmac_256_equality") + pub name: String, + /// Priority tier (P0, P1, P2) + pub priority: String, + /// Number of executions + pub runs: i64, + /// Plan node type (e.g. "Index Scan", "Seq Scan") + pub plan_type: String, + /// Mean execution time in milliseconds + pub mean_ms: f64, + /// Population standard deviation in milliseconds + pub stddev_ms: f64, + /// Total execution time across all runs in milliseconds + pub total_ms: f64, +} + +/// Top-level report structure — matches the design doc's JSON shape. +#[derive(Debug, Clone, Serialize)] +pub struct BenchmarkReport { + /// RFC3339 UTC timestamp at report-write time + pub timestamp: String, + /// Postgres major version (e.g. "17") + pub postgres_version: String, + /// Dataset size this report was produced against + pub dataset_rows: i64, + /// One entry per benchmark case + pub results: Vec, +} + +static RESULTS: Mutex> = Mutex::new(Vec::new()); + +/// Push a result onto the shared in-memory accumulator. +pub fn append_result(r: PerfResult) { + RESULTS.lock().expect("results mutex poisoned").push(r); +} + +/// Write JSON + Markdown reports for all accumulated results. +/// +/// Output paths: +/// `/benchmark-.json` +/// `/benchmark-.md` +/// +/// `date` is an ISO-8601 date string provided by the caller (usually today). +/// `postgres_version` and `dataset_rows` are embedded in the report header. +pub fn write_reports( + output_dir: &str, + date: &str, + postgres_version: &str, + dataset_rows: i64, +) -> Result<(PathBuf, PathBuf)> { + let results = RESULTS.lock().expect("results mutex poisoned").clone(); + let report = BenchmarkReport { + timestamp: format!("{date}T00:00:00Z"), + postgres_version: postgres_version.to_string(), + dataset_rows, + results, + }; + + fs::create_dir_all(output_dir) + .with_context(|| format!("creating output dir {output_dir}"))?; + + let json_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.json")); + let md_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.md")); + + let json = serde_json::to_string_pretty(&report) + .context("serializing report to JSON")?; + fs::write(&json_path, json) + .with_context(|| format!("writing {}", json_path.display()))?; + + fs::write(&md_path, render_markdown(&report)) + .with_context(|| format!("writing {}", md_path.display()))?; + + Ok((json_path, md_path)) +} + +fn render_markdown(report: &BenchmarkReport) -> String { + let mut out = String::new(); + out.push_str(&format!("# Benchmark Report — {}\n\n", report.timestamp)); + out.push_str(&format!("- Postgres: {}\n", report.postgres_version)); + out.push_str(&format!("- Dataset rows: {}\n\n", report.dataset_rows)); + out.push_str("| Query Pattern | Priority | Plan | Runs | Mean (ms) | Stddev (ms) |\n"); + out.push_str("|---|---|---|---|---|---|\n"); + for r in &report.results { + out.push_str(&format!( + "| {} | {} | {} | {} | {:.3} | {:.3} |\n", + r.name, r.priority, r.plan_type, r.runs, r.mean_ms, r.stddev_ms + )); + } + out +} From 780ac79108da833f27ed283301d312fb381b8317 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 13:59:45 +1000 Subject: [PATCH 19/28] feat(bench): add Tier 2 perf test infrastructure and hmac_256 baseline case --- tests/sqlx/tests/bench_perf_tests.rs | 113 +++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) create mode 100644 tests/sqlx/tests/bench_perf_tests.rs diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs new file mode 100644 index 00000000..5c6558c7 --- /dev/null +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -0,0 +1,113 @@ +//! Tier 2 scheduled benchmarks. +//! +//! All tests are marked #[ignore] so regular CI doesn't run them. The scheduled +//! workflow in .github/workflows/benchmark.yml invokes them via +//! `cargo test --test bench_perf_tests -- --ignored`. +//! +//! Unlike Tier 1 tests, these use #[tokio::test] with a manual pool connected +//! via BENCH_DATABASE_URL against a pre-loaded 100K-row dataset. +//! +//! Each test: +//! 1. Resets pg_stat_statements +//! 2. Runs its query pattern 1000 times +//! 3. Reads pg_stat_statements for the match +//! 4. Appends a PerfResult to the shared accumulator +//! +//! A single `zz_write_reports` test (alphabetical last) flushes the accumulator +//! to JSON + Markdown. --test-threads=1 guarantees ordering. + +use anyhow::Result; +use eql_tests::{ + append_result, ensure_pg_stat_statements, read_pg_stat_statements, + reset_pg_stat_statements, write_reports, PerfResult, +}; +use sqlx::postgres::PgPoolOptions; +use sqlx::PgPool; + +const RUNS: i64 = 1000; +const DATASET_ROWS: i64 = 100_000; + +async fn connect() -> Result { + let url = std::env::var("BENCH_DATABASE_URL") + .expect("BENCH_DATABASE_URL must be set (run `mise run bench:full`)"); + let pool = PgPoolOptions::new() + .max_connections(4) + .connect(&url) + .await?; + ensure_pg_stat_statements(&pool).await?; + Ok(pool) +} + +/// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows. +#[tokio::test] +#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +async fn hmac_256_equality() -> Result<()> { + let pool = connect().await?; + + let encrypted: String = sqlx::query_scalar( + "SELECT (encrypted_text).data::text FROM bench WHERE id = 1", + ) + .fetch_one(&pool) + .await?; + + reset_pg_stat_statements(&pool).await?; + + for _ in 0..RUNS { + sqlx::query( + "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)", + ) + .bind(&encrypted) + .fetch_all(&pool) + .await?; + } + + let stats = read_pg_stat_statements( + &pool, + "%FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($%", + ) + .await?; + + append_result(PerfResult { + name: "hmac_256_equality".into(), + priority: "P0".into(), + runs: stats.calls, + plan_type: "Index Scan".into(), + mean_ms: stats.mean_exec_time, + stddev_ms: stats.stddev_exec_time, + total_ms: stats.total_exec_time, + }); + + assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls"); + Ok(()) +} + +/// Alphabetical-last test — flushes accumulated results to disk. +/// Requires `--test-threads=1` so it runs after all benchmark cases. +#[tokio::test] +#[ignore = "Tier 2: report writer, runs last under --test-threads=1"] +async fn zz_write_reports() -> Result<()> { + let pool = connect().await?; + let pg_version: String = + sqlx::query_scalar("SHOW server_version_num").fetch_one(&pool).await?; + // server_version_num is "170004" etc — take the major version digits + let pg_major = pg_version + .get(..pg_version.len().saturating_sub(4)) + .unwrap_or(&pg_version) + .to_string(); + + let date = std::env::var("BENCH_REPORT_DATE").unwrap_or_else(|_| today_utc()); + let output_dir = std::env::var("BENCH_REPORT_DIR") + .unwrap_or_else(|_| "../../tests/benchmarks/reports".into()); + let (json, md) = write_reports(&output_dir, &date, &pg_major, DATASET_ROWS)?; + eprintln!("wrote {} and {}", json.display(), md.display()); + Ok(()) +} + +fn today_utc() -> String { + // Avoid adding the `chrono` dep; shell out to `date -u` for UTC. + let out = std::process::Command::new("date") + .args(["-u", "+%Y-%m-%d"]) + .output() + .expect("invoking date"); + String::from_utf8(out.stdout).unwrap().trim().to_string() +} From 0941f2d2dfe35378cbc09fbb9c37634ed98718b8 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 14:09:49 +1000 Subject: [PATCH 20/28] feat(bench): add Tier 2 perf tests for P0/P1/P2 query patterns --- tests/sqlx/tests/bench_perf_tests.rs | 167 +++++++++++++++++++++++++++ 1 file changed, 167 insertions(+) diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs index 5c6558c7..195a43e3 100644 --- a/tests/sqlx/tests/bench_perf_tests.rs +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -81,6 +81,173 @@ async fn hmac_256_equality() -> Result<()> { Ok(()) } +/// P2: bloom_filter containment — expected ~3.35ms at 100K rows. +#[tokio::test] +#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +async fn bloom_filter_containment() -> Result<()> { + let pool = connect().await?; + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool).await?; + + reset_pg_stat_statements(&pool).await?; + for _ in 0..RUNS { + sqlx::query( + "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)", + ) + .bind(&encrypted) + .fetch_all(&pool).await?; + } + let stats = read_pg_stat_statements( + &pool, + "%eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($%", + ).await?; + + append_result(PerfResult { + name: "bloom_filter_containment".into(), + priority: "P2".into(), + runs: stats.calls, + plan_type: "Bitmap Index Scan".into(), + mean_ms: stats.mean_exec_time, + stddev_ms: stats.stddev_exec_time, + total_ms: stats.total_exec_time, + }); + assert_eq!(stats.calls, RUNS); + Ok(()) +} + +/// P0: eql_cast equality — currently seq scans (CIP-2831). Report records the +/// actual plan + timing so the number is visible week-over-week until the fix ships. +#[tokio::test] +#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +async fn eql_cast_equality() -> Result<()> { + let pool = connect().await?; + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool).await?; + + reset_pg_stat_statements(&pool).await?; + for _ in 0..RUNS { + sqlx::query("SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted") + .bind(&encrypted) + .fetch_all(&pool).await?; + } + let stats = read_pg_stat_statements( + &pool, + "%FROM bench WHERE encrypted_text = $%::jsonb::eql_v2_encrypted%", + ).await?; + + append_result(PerfResult { + name: "eql_cast_equality".into(), + priority: "P0".into(), + runs: stats.calls, + plan_type: "Seq Scan".into(), + mean_ms: stats.mean_exec_time, + stddev_ms: stats.stddev_exec_time, + total_ms: stats.total_exec_time, + }); + assert_eq!(stats.calls, RUNS); + Ok(()) +} + +/// P0: ORE equality via operator class — currently seq scans (CIP-2831). +#[tokio::test] +#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +async fn ore_equality_opclass() -> Result<()> { + let pool = connect().await?; + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1") + .fetch_one(&pool).await?; + + reset_pg_stat_statements(&pool).await?; + for _ in 0..RUNS { + sqlx::query("SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted") + .bind(&encrypted) + .fetch_all(&pool).await?; + } + let stats = read_pg_stat_statements( + &pool, + "%FROM bench WHERE encrypted_int = $%::jsonb::eql_v2_encrypted%", + ).await?; + + append_result(PerfResult { + name: "ore_equality_opclass".into(), + priority: "P0".into(), + runs: stats.calls, + plan_type: "Seq Scan".into(), + mean_ms: stats.mean_exec_time, + stddev_ms: stats.stddev_exec_time, + total_ms: stats.total_exec_time, + }); + assert_eq!(stats.calls, RUNS); + Ok(()) +} + +/// P1: ORE range < with LIMIT — expected ~1.93ms at 100K rows. +#[tokio::test] +#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +async fn ore_range_lt_limit() -> Result<()> { + let pool = connect().await?; + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50000") + .fetch_one(&pool).await?; + + reset_pg_stat_statements(&pool).await?; + for _ in 0..RUNS { + sqlx::query( + "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", + ) + .bind(&encrypted) + .fetch_all(&pool).await?; + } + let stats = read_pg_stat_statements( + &pool, + "%FROM bench WHERE encrypted_int < $%ORDER BY encrypted_int LIMIT %", + ).await?; + + append_result(PerfResult { + name: "ore_range_lt_limit".into(), + priority: "P1".into(), + runs: stats.calls, + plan_type: "Index Scan".into(), + mean_ms: stats.mean_exec_time, + stddev_ms: stats.stddev_exec_time, + total_ms: stats.total_exec_time, + }); + assert_eq!(stats.calls, RUNS); + Ok(()) +} + +/// P1: ORE ORDER BY encrypted_int LIMIT 10 — design doc observes ~543ms at 10K, +/// so expect several seconds at 100K. Report captures actual number. +#[tokio::test] +#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +async fn ore_order_by_limit() -> Result<()> { + let pool = connect().await?; + + reset_pg_stat_statements(&pool).await?; + for _ in 0..RUNS { + sqlx::query("SELECT * FROM bench ORDER BY encrypted_int LIMIT 10") + .fetch_all(&pool).await?; + } + let stats = read_pg_stat_statements( + &pool, + "%FROM bench ORDER BY encrypted_int LIMIT %", + ).await?; + + append_result(PerfResult { + name: "ore_order_by_limit".into(), + priority: "P1".into(), + runs: stats.calls, + plan_type: "Index Scan".into(), + mean_ms: stats.mean_exec_time, + stddev_ms: stats.stddev_exec_time, + total_ms: stats.total_exec_time, + }); + assert_eq!(stats.calls, RUNS); + Ok(()) +} + /// Alphabetical-last test — flushes accumulated results to disk. /// Requires `--test-threads=1` so it runs after all benchmark cases. #[tokio::test] From dc27f91a4b47e7e1c5bca4d9eafa3fe4beac174d Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 14:12:56 +1000 Subject: [PATCH 21/28] test(bench): add consistent assertion messages to Tier 2 perf tests --- tests/sqlx/tests/bench_perf_tests.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs index 195a43e3..95d3f3fa 100644 --- a/tests/sqlx/tests/bench_perf_tests.rs +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -112,7 +112,7 @@ async fn bloom_filter_containment() -> Result<()> { stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, }); - assert_eq!(stats.calls, RUNS); + assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls"); Ok(()) } @@ -146,7 +146,7 @@ async fn eql_cast_equality() -> Result<()> { stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, }); - assert_eq!(stats.calls, RUNS); + assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls"); Ok(()) } @@ -179,7 +179,7 @@ async fn ore_equality_opclass() -> Result<()> { stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, }); - assert_eq!(stats.calls, RUNS); + assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls"); Ok(()) } @@ -214,7 +214,7 @@ async fn ore_range_lt_limit() -> Result<()> { stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, }); - assert_eq!(stats.calls, RUNS); + assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls"); Ok(()) } @@ -244,7 +244,7 @@ async fn ore_order_by_limit() -> Result<()> { stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, }); - assert_eq!(stats.calls, RUNS); + assert_eq!(stats.calls, RUNS, "expected {RUNS} recorded calls"); Ok(()) } From 285593ba22ad0fe64a998ef30c1c321fda564fb7 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 14:14:10 +1000 Subject: [PATCH 22/28] feat(bench): add scheduled GitHub Actions workflow for weekly Tier 2 benchmarks --- .github/workflows/benchmark.yml | 70 +++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 .github/workflows/benchmark.yml diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000..daec0053 --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,70 @@ +name: "Scheduled Benchmarks (Tier 2)" + +on: + schedule: + - cron: '0 3 * * 1' # Every Monday 03:00 UTC + workflow_dispatch: + +# Prevent a scheduled run from racing a manual dispatch for the same ports. +concurrency: + group: scheduled-benchmarks + cancel-in-progress: false + +env: + # Matches test-eql.yml — forces JS-based composite actions onto Node 24. + FORCE_JAVASCRIPT_ACTIONS_TO_NODE24: "true" + +jobs: + benchmark: + name: "100K dataset benchmark (Postgres 17)" + runs-on: ubuntu-latest-m + timeout-minutes: 60 + + steps: + - uses: actions/checkout@v4 + + - name: Install postgresql-client + # generate.sh uses psql directly against Postgres (port 7433) and Proxy + # (port 6433). jdx/mise-action only installs Rust + Python. + run: | + sudo apt-get update + sudo apt-get install -y postgresql-client + + - uses: jdx/mise-action@v3 + with: + version: 2026.4.0 + install: true + cache: true + + - name: Write Proxy credentials to .env + run: | + cat > tests/benchmarks/.env < Date: Wed, 22 Apr 2026 14:16:32 +1000 Subject: [PATCH 23/28] fix(bench): write Proxy credentials safely via env block + printf --- .github/workflows/benchmark.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index daec0053..5bc386f2 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -37,14 +37,20 @@ jobs: cache: true - name: Write Proxy credentials to .env + env: + CS_CLIENT_ACCESS_KEY: ${{ secrets.CS_CLIENT_ACCESS_KEY }} + CS_DEFAULT_KEYSET_ID: ${{ secrets.CS_DEFAULT_KEYSET_ID }} + CS_CLIENT_KEY: ${{ secrets.CS_CLIENT_KEY }} + CS_CLIENT_ID: ${{ secrets.CS_CLIENT_ID }} + CS_WORKSPACE_CRN: ${{ secrets.CS_WORKSPACE_CRN }} run: | - cat > tests/benchmarks/.env < tests/benchmarks/.env - name: Bring up Postgres + Proxy run: mise run bench:up From 6d98632e7ea008b439f2f5cbd4ac1c676eb92f61 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 14:29:37 +1000 Subject: [PATCH 24/28] refactor(bench): use DATABASE_URL for Tier 2 tests, drop BENCH_DATABASE_URL The bench:full mise task overrides DATABASE_URL to point at the bench Postgres on port 7433, so tests can read the standard env var like every other test in tests/sqlx/. mise task is the canonical entry point. --- tasks/bench.toml | 2 +- tests/sqlx/tests/bench_perf_tests.rs | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/tasks/bench.toml b/tasks/bench.toml index d4b70bc7..df21bd55 100644 --- a/tasks/bench.toml +++ b/tasks/bench.toml @@ -29,7 +29,7 @@ tests/benchmarks/generate.sh 100k ["bench:full"] description = "Run full Tier 2 benchmark suite against bench-postgres" dir = "{{config_root}}/tests/sqlx" -env = { BENCH_DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" } +env = { DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" } run = """ cargo test --test bench_perf_tests -- --ignored --nocapture --test-threads=1 """ diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs index 95d3f3fa..911cf4d0 100644 --- a/tests/sqlx/tests/bench_perf_tests.rs +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -5,7 +5,7 @@ //! `cargo test --test bench_perf_tests -- --ignored`. //! //! Unlike Tier 1 tests, these use #[tokio::test] with a manual pool connected -//! via BENCH_DATABASE_URL against a pre-loaded 100K-row dataset. +//! via DATABASE_URL against a pre-loaded 100K-row dataset (set by `mise run bench:full`). //! //! Each test: //! 1. Resets pg_stat_statements @@ -28,8 +28,8 @@ const RUNS: i64 = 1000; const DATASET_ROWS: i64 = 100_000; async fn connect() -> Result { - let url = std::env::var("BENCH_DATABASE_URL") - .expect("BENCH_DATABASE_URL must be set (run `mise run bench:full`)"); + let url = std::env::var("DATABASE_URL") + .expect("DATABASE_URL must be set (run `mise run bench:full`)"); let pool = PgPoolOptions::new() .max_connections(4) .connect(&url) @@ -40,7 +40,7 @@ async fn connect() -> Result { /// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows. #[tokio::test] -#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] async fn hmac_256_equality() -> Result<()> { let pool = connect().await?; @@ -83,7 +83,7 @@ async fn hmac_256_equality() -> Result<()> { /// P2: bloom_filter containment — expected ~3.35ms at 100K rows. #[tokio::test] -#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] async fn bloom_filter_containment() -> Result<()> { let pool = connect().await?; let encrypted: String = @@ -119,7 +119,7 @@ async fn bloom_filter_containment() -> Result<()> { /// P0: eql_cast equality — currently seq scans (CIP-2831). Report records the /// actual plan + timing so the number is visible week-over-week until the fix ships. #[tokio::test] -#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] async fn eql_cast_equality() -> Result<()> { let pool = connect().await?; let encrypted: String = @@ -152,7 +152,7 @@ async fn eql_cast_equality() -> Result<()> { /// P0: ORE equality via operator class — currently seq scans (CIP-2831). #[tokio::test] -#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] async fn ore_equality_opclass() -> Result<()> { let pool = connect().await?; let encrypted: String = @@ -185,7 +185,7 @@ async fn ore_equality_opclass() -> Result<()> { /// P1: ORE range < with LIMIT — expected ~1.93ms at 100K rows. #[tokio::test] -#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] async fn ore_range_lt_limit() -> Result<()> { let pool = connect().await?; let encrypted: String = @@ -221,7 +221,7 @@ async fn ore_range_lt_limit() -> Result<()> { /// P1: ORE ORDER BY encrypted_int LIMIT 10 — design doc observes ~543ms at 10K, /// so expect several seconds at 100K. Report captures actual number. #[tokio::test] -#[ignore = "Tier 2: requires BENCH_DATABASE_URL and pre-loaded bench data"] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] async fn ore_order_by_limit() -> Result<()> { let pool = connect().await?; From c2dc431fab38f109e5f4129f357127c9ff538609 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 17:24:46 +1000 Subject: [PATCH 25/28] =?UTF-8?q?perf(ci):=20mark=20slow=20perf/O(n=C2=B2)?= =?UTF-8?q?=20tests=20as=20#[ignore]=20to=20cut=20PR=20runtime?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Default cargo test now skips 9 tests that dominated CI time: order_by_sort_tests (30 tests → 25, 401.9s → 8.98s): - filtered_inner_query_correct_order - sort_compare_faster_than_correlated_subquery - filtered_inner_query_faster_than_unfiltered - sort_compare_performance_at_scale - filtered_inner_query_performance_at_scale order_by_no_opclass_tests (12 tests → 8, ~88s → 1.06s): - correlated_subquery_ranking_{asc,desc,with_limit,with_where}_without_opclass These tests assert timing relationships or demonstrate O(n²) behaviour over 1000-row ORE fixtures; they don't catch correctness regressions on the PR path. Run them with `cargo test -- --ignored` (all 9 pass in ~7m 28s). Measured on local PG17 for the two affected binaries combined: before: ~491s after: ~10s (~49× faster) Projected CI impact on test-eql.yml: cargo test step drops from ~22m to ~2m per Postgres version. --- tests/sqlx/tests/order_by_no_opclass_tests.rs | 4 ++++ tests/sqlx/tests/order_by_sort_tests.rs | 5 +++++ 2 files changed, 9 insertions(+) diff --git a/tests/sqlx/tests/order_by_no_opclass_tests.rs b/tests/sqlx/tests/order_by_no_opclass_tests.rs index 136a4ce4..201e59d3 100644 --- a/tests/sqlx/tests/order_by_no_opclass_tests.rs +++ b/tests/sqlx/tests/order_by_no_opclass_tests.rs @@ -169,6 +169,7 @@ async fn direct_order_by_desc_wrong_order_without_opclass(pool: PgPool) -> Resul // ============================================================================ #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"] async fn correlated_subquery_ranking_asc_without_opclass(pool: PgPool) -> Result<()> { // eql_v2.compare() is a standalone function (not an operator), so it survives // the operator class drops. A correlated subquery counts how many rows have a @@ -198,6 +199,7 @@ async fn correlated_subquery_ranking_asc_without_opclass(pool: PgPool) -> Result } #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"] async fn correlated_subquery_ranking_desc_without_opclass(pool: PgPool) -> Result<()> { // Same correlated subquery with DESC — should return highest-ranked rows first. @@ -220,6 +222,7 @@ async fn correlated_subquery_ranking_desc_without_opclass(pool: PgPool) -> Resul } #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"] async fn correlated_subquery_ranking_with_limit_without_opclass(pool: PgPool) -> Result<()> { // LIMIT 1 with ASC subquery ranking should return the smallest value (id=1) @@ -238,6 +241,7 @@ async fn correlated_subquery_ranking_with_limit_without_opclass(pool: PgPool) -> } #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "slow: O(n²) correlated subquery over 1000 rows; run with --ignored"] async fn correlated_subquery_ranking_with_where_without_opclass(pool: PgPool) -> Result<()> { // WHERE clause filters rows, then correlated subquery orders the result correctly. // Note: the subquery counts over the full table to produce a global rank. diff --git a/tests/sqlx/tests/order_by_sort_tests.rs b/tests/sqlx/tests/order_by_sort_tests.rs index 33b69cbf..853e9e4a 100644 --- a/tests/sqlx/tests/order_by_sort_tests.rs +++ b/tests/sqlx/tests/order_by_sort_tests.rs @@ -522,6 +522,7 @@ async fn sort_compare_table_ref_matches_order_by_compare(pool: PgPool) -> Result // ============================================================================ #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "slow: O(n²) correlated subquery over 1000 rows (~7m); run with --ignored"] async fn filtered_inner_query_correct_order(pool: PgPool) -> Result<()> { // Optimized: inner query also filters, producing correct relative ordering // within the filtered set @@ -602,6 +603,7 @@ async fn filtered_inner_query_with_range(pool: PgPool) -> Result<()> { // ============================================================================ #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "perf: relative timing assertion at 1000 rows; run with --ignored"] async fn sort_compare_faster_than_correlated_subquery(pool: PgPool) -> Result<()> { // Warm up: run each query once to populate caches let sort_sql = "SELECT * FROM eql_v2.sort_compare( @@ -644,6 +646,7 @@ async fn sort_compare_faster_than_correlated_subquery(pool: PgPool) -> Result<() } #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "perf: relative timing assertion at 1000 rows; run with --ignored"] async fn filtered_inner_query_faster_than_unfiltered(pool: PgPool) -> Result<()> { let ore_term = get_ore_encrypted(&pool, 42).await?; @@ -703,6 +706,7 @@ async fn filtered_inner_query_faster_than_unfiltered(pool: PgPool) -> Result<()> // ============================================================================ #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "perf: O(n log n) vs O(n²) demonstration at 1000 rows; run with --ignored"] async fn sort_compare_performance_at_scale(pool: PgPool) -> Result<()> { // 1000 rows is sufficient scale to demonstrate O(n log n) vs O(n²) let sort_sql = "SELECT * FROM eql_v2.sort_compare( @@ -738,6 +742,7 @@ async fn sort_compare_performance_at_scale(pool: PgPool) -> Result<()> { } #[sqlx::test(fixtures(path = "../fixtures", scripts("drop_operator_classes")))] +#[ignore = "perf: timing assertion at 1000 rows; run with --ignored"] async fn filtered_inner_query_performance_at_scale(pool: PgPool) -> Result<()> { let ore_term = get_ore_encrypted(&pool, 42).await?; From 511d97be20c670f821cd73a81df4b27fc6515c4b Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 22 Apr 2026 17:29:11 +1000 Subject: [PATCH 26/28] style(bench): apply cargo fmt to reports.rs and bench_perf_tests.rs Pure whitespace: collapses/expands await chains and use-imports to match rustfmt default. No behaviour change. Unblocks test-eql.yml lint step (cargo fmt --check was failing). --- tests/sqlx/src/reports.rs | 11 ++---- tests/sqlx/tests/bench_perf_tests.rs | 57 ++++++++++++++++------------ 2 files changed, 37 insertions(+), 31 deletions(-) diff --git a/tests/sqlx/src/reports.rs b/tests/sqlx/src/reports.rs index b02a36af..8860901e 100644 --- a/tests/sqlx/src/reports.rs +++ b/tests/sqlx/src/reports.rs @@ -78,16 +78,13 @@ pub fn write_reports( results, }; - fs::create_dir_all(output_dir) - .with_context(|| format!("creating output dir {output_dir}"))?; + fs::create_dir_all(output_dir).with_context(|| format!("creating output dir {output_dir}"))?; let json_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.json")); - let md_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.md")); + let md_path = PathBuf::from(output_dir).join(format!("benchmark-{date}.md")); - let json = serde_json::to_string_pretty(&report) - .context("serializing report to JSON")?; - fs::write(&json_path, json) - .with_context(|| format!("writing {}", json_path.display()))?; + let json = serde_json::to_string_pretty(&report).context("serializing report to JSON")?; + fs::write(&json_path, json).with_context(|| format!("writing {}", json_path.display()))?; fs::write(&md_path, render_markdown(&report)) .with_context(|| format!("writing {}", md_path.display()))?; diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs index 911cf4d0..f7d68178 100644 --- a/tests/sqlx/tests/bench_perf_tests.rs +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -18,8 +18,8 @@ use anyhow::Result; use eql_tests::{ - append_result, ensure_pg_stat_statements, read_pg_stat_statements, - reset_pg_stat_statements, write_reports, PerfResult, + append_result, ensure_pg_stat_statements, read_pg_stat_statements, reset_pg_stat_statements, + write_reports, PerfResult, }; use sqlx::postgres::PgPoolOptions; use sqlx::PgPool; @@ -44,11 +44,10 @@ async fn connect() -> Result { async fn hmac_256_equality() -> Result<()> { let pool = connect().await?; - let encrypted: String = sqlx::query_scalar( - "SELECT (encrypted_text).data::text FROM bench WHERE id = 1", - ) - .fetch_one(&pool) - .await?; + let encrypted: String = + sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") + .fetch_one(&pool) + .await?; reset_pg_stat_statements(&pool).await?; @@ -88,7 +87,8 @@ async fn bloom_filter_containment() -> Result<()> { let pool = connect().await?; let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool).await?; + .fetch_one(&pool) + .await?; reset_pg_stat_statements(&pool).await?; for _ in 0..RUNS { @@ -101,7 +101,8 @@ async fn bloom_filter_containment() -> Result<()> { let stats = read_pg_stat_statements( &pool, "%eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($%", - ).await?; + ) + .await?; append_result(PerfResult { name: "bloom_filter_containment".into(), @@ -124,18 +125,21 @@ async fn eql_cast_equality() -> Result<()> { let pool = connect().await?; let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool).await?; + .fetch_one(&pool) + .await?; reset_pg_stat_statements(&pool).await?; for _ in 0..RUNS { sqlx::query("SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted") .bind(&encrypted) - .fetch_all(&pool).await?; + .fetch_all(&pool) + .await?; } let stats = read_pg_stat_statements( &pool, "%FROM bench WHERE encrypted_text = $%::jsonb::eql_v2_encrypted%", - ).await?; + ) + .await?; append_result(PerfResult { name: "eql_cast_equality".into(), @@ -157,18 +161,21 @@ async fn ore_equality_opclass() -> Result<()> { let pool = connect().await?; let encrypted: String = sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1") - .fetch_one(&pool).await?; + .fetch_one(&pool) + .await?; reset_pg_stat_statements(&pool).await?; for _ in 0..RUNS { sqlx::query("SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted") .bind(&encrypted) - .fetch_all(&pool).await?; + .fetch_all(&pool) + .await?; } let stats = read_pg_stat_statements( &pool, "%FROM bench WHERE encrypted_int = $%::jsonb::eql_v2_encrypted%", - ).await?; + ) + .await?; append_result(PerfResult { name: "ore_equality_opclass".into(), @@ -190,7 +197,8 @@ async fn ore_range_lt_limit() -> Result<()> { let pool = connect().await?; let encrypted: String = sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50000") - .fetch_one(&pool).await?; + .fetch_one(&pool) + .await?; reset_pg_stat_statements(&pool).await?; for _ in 0..RUNS { @@ -203,7 +211,8 @@ async fn ore_range_lt_limit() -> Result<()> { let stats = read_pg_stat_statements( &pool, "%FROM bench WHERE encrypted_int < $%ORDER BY encrypted_int LIMIT %", - ).await?; + ) + .await?; append_result(PerfResult { name: "ore_range_lt_limit".into(), @@ -228,12 +237,11 @@ async fn ore_order_by_limit() -> Result<()> { reset_pg_stat_statements(&pool).await?; for _ in 0..RUNS { sqlx::query("SELECT * FROM bench ORDER BY encrypted_int LIMIT 10") - .fetch_all(&pool).await?; + .fetch_all(&pool) + .await?; } - let stats = read_pg_stat_statements( - &pool, - "%FROM bench ORDER BY encrypted_int LIMIT %", - ).await?; + let stats = + read_pg_stat_statements(&pool, "%FROM bench ORDER BY encrypted_int LIMIT %").await?; append_result(PerfResult { name: "ore_order_by_limit".into(), @@ -254,8 +262,9 @@ async fn ore_order_by_limit() -> Result<()> { #[ignore = "Tier 2: report writer, runs last under --test-threads=1"] async fn zz_write_reports() -> Result<()> { let pool = connect().await?; - let pg_version: String = - sqlx::query_scalar("SHOW server_version_num").fetch_one(&pool).await?; + let pg_version: String = sqlx::query_scalar("SHOW server_version_num") + .fetch_one(&pool) + .await?; // server_version_num is "170004" etc — take the major version digits let pg_major = pg_version .get(..pg_version.len().saturating_sub(4)) From e5fff4daad82630d8bb2c44e9ab31c9ac7ca2b93 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Fri, 24 Apr 2026 12:31:24 +1000 Subject: [PATCH 27/28] fix(bench): address CodeRabbit review feedback on PR #173 - use valid `ubuntu-latest` runner label and split SC2155 export - bind docker-compose host ports to 127.0.0.1 - capture actual plan_type per benchmark via EXPLAIN instead of hard-coding - replace zz_write_reports alphabetical-ordering hack with explicit `run_all_benchmarks` orchestrator - parameterize bench_regression_tests via explain_analyze_avg_bound to avoid encrypted-value string interpolation - capture report timestamp at write-time via OffsetDateTime::now_utc - correct FIXTURE_SCHEMA bench_data ORE id mapping documentation --- .github/workflows/benchmark.yml | 5 +- tasks/bench.toml | 2 +- tests/benchmarks/docker-compose.yml | 4 +- tests/sqlx/Cargo.lock | 53 +++++ tests/sqlx/Cargo.toml | 1 + tests/sqlx/fixtures/FIXTURE_SCHEMA.md | 4 +- tests/sqlx/src/helpers.rs | 121 ++++++++--- tests/sqlx/src/lib.rs | 11 +- tests/sqlx/src/reports.rs | 20 +- tests/sqlx/tests/bench_perf_tests.rs | 227 ++++++++++++--------- tests/sqlx/tests/bench_regression_tests.rs | 39 ++-- 11 files changed, 334 insertions(+), 153 deletions(-) diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 5bc386f2..b16871ab 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -17,7 +17,7 @@ env: jobs: benchmark: name: "100K dataset benchmark (Postgres 17)" - runs-on: ubuntu-latest-m + runs-on: ubuntu-latest timeout-minutes: 60 steps: @@ -60,7 +60,8 @@ jobs: - name: Run Tier 2 benchmark suite run: | - export BENCH_REPORT_DATE="$(date -u +%Y-%m-%d)-${{ github.run_id }}" + BENCH_REPORT_DATE="$(date -u +%Y-%m-%d)-${{ github.run_id }}" + export BENCH_REPORT_DATE mise run bench:full - name: Tear down containers diff --git a/tasks/bench.toml b/tasks/bench.toml index df21bd55..f6cd8d13 100644 --- a/tasks/bench.toml +++ b/tasks/bench.toml @@ -31,5 +31,5 @@ description = "Run full Tier 2 benchmark suite against bench-postgres" dir = "{{config_root}}/tests/sqlx" env = { DATABASE_URL = "postgresql://cipherstash:password@localhost:7433/cipherstash" } run = """ -cargo test --test bench_perf_tests -- --ignored --nocapture --test-threads=1 +cargo test --test bench_perf_tests run_all_benchmarks -- --ignored --nocapture """ diff --git a/tests/benchmarks/docker-compose.yml b/tests/benchmarks/docker-compose.yml index f8d47d48..bd35ba65 100644 --- a/tests/benchmarks/docker-compose.yml +++ b/tests/benchmarks/docker-compose.yml @@ -9,7 +9,7 @@ services: -c pg_stat_statements.track=all -c pg_stat_statements.max=10000 ports: - - "7433:5432" + - "127.0.0.1:7433:5432" environment: POSTGRES_DB: cipherstash POSTGRES_USER: cipherstash @@ -26,7 +26,7 @@ services: image: cipherstash/proxy:latest container_name: bench-proxy ports: - - "6433:6432" + - "127.0.0.1:6433:6432" environment: CS_DATABASE__NAME: cipherstash CS_DATABASE__USERNAME: cipherstash diff --git a/tests/sqlx/Cargo.lock b/tests/sqlx/Cargo.lock index a1060773..66f047d2 100644 --- a/tests/sqlx/Cargo.lock +++ b/tests/sqlx/Cargo.lock @@ -152,6 +152,15 @@ dependencies = [ "zeroize", ] +[[package]] +name = "deranged" +version = "0.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" +dependencies = [ + "powerfmt", +] + [[package]] name = "digest" version = "0.10.7" @@ -198,6 +207,7 @@ dependencies = [ "serde", "serde_json", "sqlx", + "time", "tokio", ] @@ -642,6 +652,12 @@ dependencies = [ "zeroize", ] +[[package]] +name = "num-conv" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" + [[package]] name = "num-integer" version = "0.1.46" @@ -770,6 +786,12 @@ dependencies = [ "zerovec", ] +[[package]] +name = "powerfmt" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1261,6 +1283,37 @@ dependencies = [ "syn", ] +[[package]] +name = "time" +version = "0.3.47" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" +dependencies = [ + "deranged", + "itoa", + "num-conv", + "powerfmt", + "serde_core", + "time-core", + "time-macros", +] + +[[package]] +name = "time-core" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" + +[[package]] +name = "time-macros" +version = "0.2.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" +dependencies = [ + "num-conv", + "time-core", +] + [[package]] name = "tinystr" version = "0.8.1" diff --git a/tests/sqlx/Cargo.toml b/tests/sqlx/Cargo.toml index 025d697d..acff2489 100644 --- a/tests/sqlx/Cargo.toml +++ b/tests/sqlx/Cargo.toml @@ -9,6 +9,7 @@ tokio = { version = "1", features = ["full"] } serde = { version = "1", features = ["derive"] } serde_json = "1" anyhow = "1" +time = { version = "0.3", features = ["formatting", "std"] } [dev-dependencies] # None needed - tests live in this crate diff --git a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md index 52c2daa7..93c34e05 100644 --- a/tests/sqlx/fixtures/FIXTURE_SCHEMA.md +++ b/tests/sqlx/fixtures/FIXTURE_SCHEMA.md @@ -120,7 +120,7 @@ CREATE TABLE ore ( **Helper Functions:** - `get_ore_encrypted(pool, id)` - Selects encrypted value from ore table -- `create_encrypted_json(id)` - Looks up ore table at `id * 10` (valid ids: 1-9 → ore lookups: 10-90) +- `create_encrypted_json(id)` - Looks up ore table at `id * 10` (valid ids: 1-99 → ore lookups: 10-990) **Key Property:** - Sequential numeric values enable deterministic comparison tests @@ -152,7 +152,7 @@ CREATE TABLE bench ( ``` **Data:** -- 10,000 rows drawn from 99 distinct encrypted values (ore ids 1-99) +- 10,000 rows drawn from 99 distinct encrypted values. Caller ids `1..99` are Zipf-skewed and resolve via `create_encrypted_json(id)` to ORE ids `{10, 20, …, 990}` (see the ORE helper description above) - Zipf-like skew via `setseed(0.42)` + `random()^2` — deterministic and byte-identical across runs - Top id gets ~5% of rows; tail ids ~0.5% each (top:bottom ratio ~10x) - Each column draws independently, so column values are decorrelated within a row diff --git a/tests/sqlx/src/helpers.rs b/tests/sqlx/src/helpers.rs index 6bf5fc4c..e3864433 100644 --- a/tests/sqlx/src/helpers.rs +++ b/tests/sqlx/src/helpers.rs @@ -527,6 +527,35 @@ pub async fn explain_json(pool: &PgPool, query: &str) -> Result Result { + let sql = format!("EXPLAIN (FORMAT JSON) {}", query); + let mut q = sqlx::query_scalar::<_, serde_json::Value>(&sql); + for p in params { + q = q.bind(*p); + } + let plan = q + .fetch_one(pool) + .await + .with_context(|| format!("running EXPLAIN (FORMAT JSON) on query: {}", query))?; + Ok(plan[0]["Plan"]["Node Type"] + .as_str() + .with_context(|| format!("extracting Plan.Node Type from EXPLAIN on query: {}", query))? + .to_string()) +} + /// Run EXPLAIN ANALYZE multiple times and return averaged statistics /// /// Executes `EXPLAIN (ANALYZE, FORMAT JSON) {query}` the specified number of times @@ -557,6 +586,37 @@ pub async fn explain_json(pool: &PgPool, query: &str) -> Result Result { + explain_analyze_avg_bound(pool, query, &[], runs).await +} + +/// Run EXPLAIN ANALYZE multiple times with bound parameters and return averaged statistics +/// +/// Like `explain_analyze_avg`, but supports `$1`, `$2`, ... placeholders in the +/// query. String parameters are bound via sqlx, avoiding SQL injection and +/// quoting issues when the value may contain `'` characters (for example, +/// encrypted JSON payloads surfaced via `::jsonb::eql_v2_encrypted`). +/// +/// # Arguments +/// * `pool` - Database connection pool +/// * `query` - SQL query with `$N` placeholders (no EXPLAIN prefix) +/// * `params` - String parameters bound in order ($1 → params[0], etc.) +/// * `runs` - Number of times to execute (must be >= 1) +/// +/// # Example +/// ```ignore +/// let stats = explain_analyze_avg_bound( +/// &pool, +/// "SELECT * FROM bench WHERE eql_v2.hmac_256(col) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)", +/// &[&encrypted], +/// 5, +/// ).await?; +/// ``` +pub async fn explain_analyze_avg_bound( + pool: &PgPool, + query: &str, + params: &[&str], + runs: usize, +) -> Result { anyhow::ensure!(runs >= 1, "runs must be >= 1, got {}", runs); let sql = format!("EXPLAIN (ANALYZE, FORMAT JSON) {}", query); @@ -566,39 +626,27 @@ pub async fn explain_analyze_avg(pool: &PgPool, query: &str, runs: usize) -> Res let mut node_type = String::new(); for i in 0..runs { - let plan: serde_json::Value = sqlx::query_scalar(&sql) - .fetch_one(pool) - .await - .with_context(|| { - format!( - "running EXPLAIN ANALYZE (run {}/{}) on query: {}", - i + 1, - runs, - query - ) - })?; + let mut q = sqlx::query_scalar::<_, serde_json::Value>(&sql); + for p in params { + q = q.bind(*p); + } + let plan = q.fetch_one(pool).await.with_context(|| { + format!( + "running EXPLAIN ANALYZE (run {}/{}) on query: {}", + i + 1, + runs, + query + ) + })?; // EXPLAIN (ANALYZE, FORMAT JSON) returns: // [{"Plan": {...}, "Planning Time": N, "Execution Time": N}] let entry = &plan[0]; - - let exec_time = entry["Execution Time"] - .as_f64() - .with_context(|| format!("extracting Execution Time on run {}/{}", i + 1, runs))?; - - let plan_time = entry["Planning Time"] - .as_f64() - .with_context(|| format!("extracting Planning Time on run {}/{}", i + 1, runs))?; - + let (exec_time, plan_time, nt) = parse_explain_entry(entry, i + 1, runs)?; total_execution_ms += exec_time; total_planning_ms += plan_time; - - // Capture node type from first run only if i == 0 { - node_type = entry["Plan"]["Node Type"] - .as_str() - .with_context(|| "extracting Node Type from first run")? - .to_string(); + node_type = nt; } } @@ -610,6 +658,27 @@ pub async fn explain_analyze_avg(pool: &PgPool, query: &str, runs: usize) -> Res }) } +fn parse_explain_entry( + entry: &serde_json::Value, + run_num: usize, + total_runs: usize, +) -> Result<(f64, f64, String)> { + let exec_time = entry["Execution Time"].as_f64().with_context(|| { + format!( + "extracting Execution Time on run {}/{}", + run_num, total_runs + ) + })?; + let plan_time = entry["Planning Time"] + .as_f64() + .with_context(|| format!("extracting Planning Time on run {}/{}", run_num, total_runs))?; + let node_type = entry["Plan"]["Node Type"] + .as_str() + .with_context(|| format!("extracting Node Type on run {}/{}", run_num, total_runs))? + .to_string(); + Ok((exec_time, plan_time, node_type)) +} + /// Assert that a JSON EXPLAIN plan does not use any sequential scan /// /// Recursively walks the JSON plan tree checking all "Node Type" fields. diff --git a/tests/sqlx/src/lib.rs b/tests/sqlx/src/lib.rs index f979ed85..973a94a9 100644 --- a/tests/sqlx/src/lib.rs +++ b/tests/sqlx/src/lib.rs @@ -14,11 +14,12 @@ pub use assertions::QueryAssertion; pub use helpers::{ analyze_table, assert_no_seq_scan, assert_sequential_ids, assert_uses_index, assert_uses_seq_scan, create_jsonb_gin_index, ensure_pg_stat_statements, explain_analyze_avg, - explain_json, explain_query, get_bench_encrypted_int, get_bench_encrypted_text, - get_encrypted_term, get_ore_encrypted, get_ore_encrypted_as_jsonb, get_ore_text_encrypted, - get_ore_text_encrypted_as_jsonb, get_ste_vec_encrypted, get_ste_vec_encrypted_pair, - get_ste_vec_selector_term, get_ste_vec_sv_element, get_ste_vec_term_by_id, - read_pg_stat_statements, reset_pg_stat_statements, ExplainStats, PgStatEntry, + explain_analyze_avg_bound, explain_json, explain_query, fetch_plan_node_type, + get_bench_encrypted_int, get_bench_encrypted_text, get_encrypted_term, get_ore_encrypted, + get_ore_encrypted_as_jsonb, get_ore_text_encrypted, get_ore_text_encrypted_as_jsonb, + get_ste_vec_encrypted, get_ste_vec_encrypted_pair, get_ste_vec_selector_term, + get_ste_vec_sv_element, get_ste_vec_term_by_id, read_pg_stat_statements, + reset_pg_stat_statements, ExplainStats, PgStatEntry, }; pub use index_types as IndexTypes; pub use reports::{append_result, write_reports, PerfResult}; diff --git a/tests/sqlx/src/reports.rs b/tests/sqlx/src/reports.rs index 8860901e..1dbd9b56 100644 --- a/tests/sqlx/src/reports.rs +++ b/tests/sqlx/src/reports.rs @@ -1,8 +1,9 @@ //! Benchmark report writer for Tier 2 scheduled benchmarks. //! -//! Each `#[ignore]` test in `bench_perf_tests.rs` pushes a `PerfResult` into -//! `append_result`. A teardown-style test (run last, alphabetical order) calls -//! `write_reports` to flush all accumulated results to JSON + Markdown. +//! Each `#[ignore]` benchmark in `bench_perf_tests.rs` pushes a `PerfResult` +//! into `append_result`. The `run_all_benchmarks` orchestrator invokes each +//! benchmark in sequence and then calls `write_reports` to flush all +//! accumulated results to JSON + Markdown. //! //! Output shape matches the design doc (.work/eql-index-performance/ //! 2026-03-30-benchmarking-design.md §Report Format) with one caveat: the @@ -16,6 +17,8 @@ use serde::Serialize; use std::fs; use std::path::PathBuf; use std::sync::Mutex; +use time::format_description::well_known::Rfc3339; +use time::OffsetDateTime; /// One benchmark case result. #[derive(Debug, Clone, Serialize)] @@ -62,8 +65,10 @@ pub fn append_result(r: PerfResult) { /// `/benchmark-.json` /// `/benchmark-.md` /// -/// `date` is an ISO-8601 date string provided by the caller (usually today). -/// `postgres_version` and `dataset_rows` are embedded in the report header. +/// `date` is used only as a filename suffix (any caller-supplied string, +/// typically `YYYY-MM-DD` with an optional run-id suffix for uniqueness). +/// The report's `timestamp` field is captured at write time as RFC3339 UTC +/// and is independent of `date`. pub fn write_reports( output_dir: &str, date: &str, @@ -71,8 +76,11 @@ pub fn write_reports( dataset_rows: i64, ) -> Result<(PathBuf, PathBuf)> { let results = RESULTS.lock().expect("results mutex poisoned").clone(); + let timestamp = OffsetDateTime::now_utc() + .format(&Rfc3339) + .context("formatting RFC3339 write-time timestamp")?; let report = BenchmarkReport { - timestamp: format!("{date}T00:00:00Z"), + timestamp, postgres_version: postgres_version.to_string(), dataset_rows, results, diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs index f7d68178..63f6b5d7 100644 --- a/tests/sqlx/tests/bench_perf_tests.rs +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -1,25 +1,29 @@ //! Tier 2 scheduled benchmarks. //! //! All tests are marked #[ignore] so regular CI doesn't run them. The scheduled -//! workflow in .github/workflows/benchmark.yml invokes them via -//! `cargo test --test bench_perf_tests -- --ignored`. +//! workflow in .github/workflows/benchmark.yml invokes the orchestrator: +//! `cargo test --test bench_perf_tests run_all_benchmarks -- --ignored`. //! //! Unlike Tier 1 tests, these use #[tokio::test] with a manual pool connected //! via DATABASE_URL against a pre-loaded 100K-row dataset (set by `mise run bench:full`). //! -//! Each test: +//! Each benchmark: //! 1. Resets pg_stat_statements -//! 2. Runs its query pattern 1000 times -//! 3. Reads pg_stat_statements for the match -//! 4. Appends a PerfResult to the shared accumulator +//! 2. Captures the actual query plan via EXPLAIN (FORMAT JSON) +//! 3. Runs its query pattern 1000 times +//! 4. Reads pg_stat_statements for the match +//! 5. Appends a PerfResult to the shared accumulator //! -//! A single `zz_write_reports` test (alphabetical last) flushes the accumulator -//! to JSON + Markdown. --test-threads=1 guarantees ordering. +//! The `run_all_benchmarks` orchestrator invokes each benchmark helper in +//! sequence and then calls `flush_reports` to write JSON + Markdown. Individual +//! `#[tokio::test] #[ignore]` wrappers are retained so developers can run a +//! single benchmark in isolation, but they do NOT write reports (the +//! orchestrator owns report emission). use anyhow::Result; use eql_tests::{ - append_result, ensure_pg_stat_statements, read_pg_stat_statements, reset_pg_stat_statements, - write_reports, PerfResult, + append_result, ensure_pg_stat_statements, fetch_plan_node_type, read_pg_stat_statements, + reset_pg_stat_statements, write_reports, PerfResult, }; use sqlx::postgres::PgPoolOptions; use sqlx::PgPool; @@ -38,30 +42,30 @@ async fn connect() -> Result { Ok(pool) } -/// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows. -#[tokio::test] -#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] -async fn hmac_256_equality() -> Result<()> { - let pool = connect().await?; +// ============================================================================ +// Benchmark bodies — each is an async fn that takes a &PgPool. Thin test +// wrappers below allow running one benchmark in isolation; the orchestrator +// invokes the bodies directly. +// ============================================================================ +/// P0 baseline: hmac_256 equality should stay ~0.5ms at 100K rows. +async fn bench_hmac_256_equality(pool: &PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) + .fetch_one(pool) .await?; - reset_pg_stat_statements(&pool).await?; + let query = "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)"; + let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?; + + reset_pg_stat_statements(pool).await?; for _ in 0..RUNS { - sqlx::query( - "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)", - ) - .bind(&encrypted) - .fetch_all(&pool) - .await?; + sqlx::query(query).bind(&encrypted).fetch_all(pool).await?; } let stats = read_pg_stat_statements( - &pool, + pool, "%FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($%", ) .await?; @@ -70,7 +74,7 @@ async fn hmac_256_equality() -> Result<()> { name: "hmac_256_equality".into(), priority: "P0".into(), runs: stats.calls, - plan_type: "Index Scan".into(), + plan_type, mean_ms: stats.mean_exec_time, stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, @@ -81,25 +85,21 @@ async fn hmac_256_equality() -> Result<()> { } /// P2: bloom_filter containment — expected ~3.35ms at 100K rows. -#[tokio::test] -#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] -async fn bloom_filter_containment() -> Result<()> { - let pool = connect().await?; +async fn bench_bloom_filter_containment(pool: &PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) + .fetch_one(pool) .await?; - reset_pg_stat_statements(&pool).await?; + let query = "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)"; + let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?; + + reset_pg_stat_statements(pool).await?; for _ in 0..RUNS { - sqlx::query( - "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)", - ) - .bind(&encrypted) - .fetch_all(&pool).await?; + sqlx::query(query).bind(&encrypted).fetch_all(pool).await?; } let stats = read_pg_stat_statements( - &pool, + pool, "%eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($%", ) .await?; @@ -108,7 +108,7 @@ async fn bloom_filter_containment() -> Result<()> { name: "bloom_filter_containment".into(), priority: "P2".into(), runs: stats.calls, - plan_type: "Bitmap Index Scan".into(), + plan_type, mean_ms: stats.mean_exec_time, stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, @@ -119,24 +119,21 @@ async fn bloom_filter_containment() -> Result<()> { /// P0: eql_cast equality — currently seq scans (CIP-2831). Report records the /// actual plan + timing so the number is visible week-over-week until the fix ships. -#[tokio::test] -#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] -async fn eql_cast_equality() -> Result<()> { - let pool = connect().await?; +async fn bench_eql_cast_equality(pool: &PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_text).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) + .fetch_one(pool) .await?; - reset_pg_stat_statements(&pool).await?; + let query = "SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted"; + let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?; + + reset_pg_stat_statements(pool).await?; for _ in 0..RUNS { - sqlx::query("SELECT * FROM bench WHERE encrypted_text = $1::jsonb::eql_v2_encrypted") - .bind(&encrypted) - .fetch_all(&pool) - .await?; + sqlx::query(query).bind(&encrypted).fetch_all(pool).await?; } let stats = read_pg_stat_statements( - &pool, + pool, "%FROM bench WHERE encrypted_text = $%::jsonb::eql_v2_encrypted%", ) .await?; @@ -145,7 +142,7 @@ async fn eql_cast_equality() -> Result<()> { name: "eql_cast_equality".into(), priority: "P0".into(), runs: stats.calls, - plan_type: "Seq Scan".into(), + plan_type, mean_ms: stats.mean_exec_time, stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, @@ -155,24 +152,21 @@ async fn eql_cast_equality() -> Result<()> { } /// P0: ORE equality via operator class — currently seq scans (CIP-2831). -#[tokio::test] -#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] -async fn ore_equality_opclass() -> Result<()> { - let pool = connect().await?; +async fn bench_ore_equality_opclass(pool: &PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 1") - .fetch_one(&pool) + .fetch_one(pool) .await?; - reset_pg_stat_statements(&pool).await?; + let query = "SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted"; + let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?; + + reset_pg_stat_statements(pool).await?; for _ in 0..RUNS { - sqlx::query("SELECT * FROM bench WHERE encrypted_int = $1::jsonb::eql_v2_encrypted") - .bind(&encrypted) - .fetch_all(&pool) - .await?; + sqlx::query(query).bind(&encrypted).fetch_all(pool).await?; } let stats = read_pg_stat_statements( - &pool, + pool, "%FROM bench WHERE encrypted_int = $%::jsonb::eql_v2_encrypted%", ) .await?; @@ -181,7 +175,7 @@ async fn ore_equality_opclass() -> Result<()> { name: "ore_equality_opclass".into(), priority: "P0".into(), runs: stats.calls, - plan_type: "Seq Scan".into(), + plan_type, mean_ms: stats.mean_exec_time, stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, @@ -191,25 +185,21 @@ async fn ore_equality_opclass() -> Result<()> { } /// P1: ORE range < with LIMIT — expected ~1.93ms at 100K rows. -#[tokio::test] -#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] -async fn ore_range_lt_limit() -> Result<()> { - let pool = connect().await?; +async fn bench_ore_range_lt_limit(pool: &PgPool) -> Result<()> { let encrypted: String = sqlx::query_scalar("SELECT (encrypted_int).data::text FROM bench WHERE id = 50000") - .fetch_one(&pool) + .fetch_one(pool) .await?; - reset_pg_stat_statements(&pool).await?; + let query = "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10"; + let plan_type = fetch_plan_node_type(pool, query, &[&encrypted]).await?; + + reset_pg_stat_statements(pool).await?; for _ in 0..RUNS { - sqlx::query( - "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted ORDER BY encrypted_int LIMIT 10", - ) - .bind(&encrypted) - .fetch_all(&pool).await?; + sqlx::query(query).bind(&encrypted).fetch_all(pool).await?; } let stats = read_pg_stat_statements( - &pool, + pool, "%FROM bench WHERE encrypted_int < $%ORDER BY encrypted_int LIMIT %", ) .await?; @@ -218,7 +208,7 @@ async fn ore_range_lt_limit() -> Result<()> { name: "ore_range_lt_limit".into(), priority: "P1".into(), runs: stats.calls, - plan_type: "Index Scan".into(), + plan_type, mean_ms: stats.mean_exec_time, stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, @@ -229,25 +219,21 @@ async fn ore_range_lt_limit() -> Result<()> { /// P1: ORE ORDER BY encrypted_int LIMIT 10 — design doc observes ~543ms at 10K, /// so expect several seconds at 100K. Report captures actual number. -#[tokio::test] -#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] -async fn ore_order_by_limit() -> Result<()> { - let pool = connect().await?; +async fn bench_ore_order_by_limit(pool: &PgPool) -> Result<()> { + let query = "SELECT * FROM bench ORDER BY encrypted_int LIMIT 10"; + let plan_type = fetch_plan_node_type(pool, query, &[]).await?; - reset_pg_stat_statements(&pool).await?; + reset_pg_stat_statements(pool).await?; for _ in 0..RUNS { - sqlx::query("SELECT * FROM bench ORDER BY encrypted_int LIMIT 10") - .fetch_all(&pool) - .await?; + sqlx::query(query).fetch_all(pool).await?; } - let stats = - read_pg_stat_statements(&pool, "%FROM bench ORDER BY encrypted_int LIMIT %").await?; + let stats = read_pg_stat_statements(pool, "%FROM bench ORDER BY encrypted_int LIMIT %").await?; append_result(PerfResult { name: "ore_order_by_limit".into(), priority: "P1".into(), runs: stats.calls, - plan_type: "Index Scan".into(), + plan_type, mean_ms: stats.mean_exec_time, stddev_ms: stats.stddev_exec_time, total_ms: stats.total_exec_time, @@ -256,14 +242,9 @@ async fn ore_order_by_limit() -> Result<()> { Ok(()) } -/// Alphabetical-last test — flushes accumulated results to disk. -/// Requires `--test-threads=1` so it runs after all benchmark cases. -#[tokio::test] -#[ignore = "Tier 2: report writer, runs last under --test-threads=1"] -async fn zz_write_reports() -> Result<()> { - let pool = connect().await?; +async fn flush_reports(pool: &PgPool) -> Result<()> { let pg_version: String = sqlx::query_scalar("SHOW server_version_num") - .fetch_one(&pool) + .fetch_one(pool) .await?; // server_version_num is "170004" etc — take the major version digits let pg_major = pg_version @@ -287,3 +268,63 @@ fn today_utc() -> String { .expect("invoking date"); String::from_utf8(out.stdout).unwrap().trim().to_string() } + +// ============================================================================ +// Orchestrator — scheduled CI entry point. Runs every benchmark in sequence +// and emits the report. +// ============================================================================ + +#[tokio::test] +#[ignore = "Tier 2: run all benchmarks + write reports (invoked by `mise run bench:full`)"] +async fn run_all_benchmarks() -> Result<()> { + let pool = connect().await?; + bench_hmac_256_equality(&pool).await?; + bench_bloom_filter_containment(&pool).await?; + bench_eql_cast_equality(&pool).await?; + bench_ore_equality_opclass(&pool).await?; + bench_ore_range_lt_limit(&pool).await?; + bench_ore_order_by_limit(&pool).await?; + flush_reports(&pool).await +} + +// ============================================================================ +// Individual test wrappers — allow running one benchmark in isolation via +// `cargo test --test bench_perf_tests -- --ignored`. These do NOT +// flush reports; only `run_all_benchmarks` does that. +// ============================================================================ + +#[tokio::test] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] +async fn hmac_256_equality() -> Result<()> { + bench_hmac_256_equality(&connect().await?).await +} + +#[tokio::test] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] +async fn bloom_filter_containment() -> Result<()> { + bench_bloom_filter_containment(&connect().await?).await +} + +#[tokio::test] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] +async fn eql_cast_equality() -> Result<()> { + bench_eql_cast_equality(&connect().await?).await +} + +#[tokio::test] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] +async fn ore_equality_opclass() -> Result<()> { + bench_ore_equality_opclass(&connect().await?).await +} + +#[tokio::test] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] +async fn ore_range_lt_limit() -> Result<()> { + bench_ore_range_lt_limit(&connect().await?).await +} + +#[tokio::test] +#[ignore = "Tier 2: run via `mise run bench:full` (requires pre-loaded bench data)"] +async fn ore_order_by_limit() -> Result<()> { + bench_ore_order_by_limit(&connect().await?).await +} diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs index f3668476..b9c3eb44 100644 --- a/tests/sqlx/tests/bench_regression_tests.rs +++ b/tests/sqlx/tests/bench_regression_tests.rs @@ -12,7 +12,8 @@ use anyhow::Result; use eql_tests::{ - explain_analyze_avg, get_bench_encrypted_int, get_bench_encrypted_text, ExplainStats, + explain_analyze_avg, explain_analyze_avg_bound, get_bench_encrypted_int, + get_bench_encrypted_text, ExplainStats, }; use sqlx::PgPool; @@ -22,11 +23,13 @@ async fn hmac_equality_under_threshold(pool: PgPool) -> Result<()> { // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K let encrypted = get_bench_encrypted_text(&pool, 1).await?; - let sql = format!( - "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256('{}'::jsonb::eql_v2_encrypted)", - encrypted - ); - let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; + let stats: ExplainStats = explain_analyze_avg_bound( + &pool, + "SELECT * FROM bench WHERE eql_v2.hmac_256(encrypted_text) = eql_v2.hmac_256($1::jsonb::eql_v2_encrypted)", + &[&encrypted], + 5, + ) + .await?; assert!( stats.execution_time_ms < 50.0, "hmac_256 equality took {:.1}ms, threshold 50ms (expected ~0.5ms at 10K rows, node_type={})", @@ -41,11 +44,13 @@ async fn bloom_filter_containment_under_threshold(pool: PgPool) -> Result<()> { // id=1 maps to 1 of 100 distinct values → ~100 matching rows at 10K let encrypted = get_bench_encrypted_text(&pool, 1).await?; - let sql = format!( - "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter('{}'::jsonb::eql_v2_encrypted)", - encrypted - ); - let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; + let stats: ExplainStats = explain_analyze_avg_bound( + &pool, + "SELECT * FROM bench WHERE eql_v2.bloom_filter(encrypted_text) @> eql_v2.bloom_filter($1::jsonb::eql_v2_encrypted)", + &[&encrypted], + 5, + ) + .await?; assert!( stats.execution_time_ms < 100.0, "bloom_filter containment took {:.1}ms, threshold 100ms (expected ~1ms at 10K rows, node_type={})", @@ -61,12 +66,14 @@ async fn ore_range_lt_under_threshold(pool: PgPool) -> Result<()> { // to ore id 83, but the 10K distribution still yields ~4,900 rows below the predicate let encrypted = get_bench_encrypted_int(&pool, 50).await?; - let sql = format!( - "SELECT * FROM bench WHERE encrypted_int < '{}'::jsonb::eql_v2_encrypted \ + let stats: ExplainStats = explain_analyze_avg_bound( + &pool, + "SELECT * FROM bench WHERE encrypted_int < $1::jsonb::eql_v2_encrypted \ ORDER BY encrypted_int LIMIT 10", - encrypted - ); - let stats: ExplainStats = explain_analyze_avg(&pool, &sql, 5).await?; + &[&encrypted], + 5, + ) + .await?; assert!( stats.execution_time_ms < 200.0, "ORE range < LIMIT 10 took {:.1}ms, threshold 200ms (expected ~2ms at 10K rows, node_type={})", From 2ef4dece34214bbc9a5819371a2789357b442cbf Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 29 Apr 2026 13:51:40 +1000 Subject: [PATCH 28/28] perf(bench): reduce RUNS from 1000 to 10 to fit CI timeout Worst-case bench_ore_order_by_limit takes several seconds per run at 100K rows; with RUNS=1000 the single benchmark could exceed the 60-min CI timeout before flush_reports writes the artifact. Drop RUNS to 10 to keep the scheduled job well under budget while still capturing mean/stddev via pg_stat_statements. Addresses CodeRabbit review feedback on PR #173. --- tests/benchmarks/README.md | 4 ++-- tests/sqlx/tests/bench_perf_tests.rs | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/benchmarks/README.md b/tests/benchmarks/README.md index 885ddc74..d4aa57db 100644 --- a/tests/benchmarks/README.md +++ b/tests/benchmarks/README.md @@ -7,7 +7,7 @@ encrypted datasets. Complements the Tier 1 tests in `tests/sqlx/tests/bench_*`. - Brings up Postgres + CipherStash Proxy via docker-compose - Inserts 100K plaintext rows through the Proxy (which encrypts them) -- Runs each P0/P1/P2 query pattern 1000 times +- Runs each P0/P1/P2 query pattern 10 times - Reads `pg_stat_statements` for statistical aggregates - Outputs JSON + Markdown reports @@ -39,7 +39,7 @@ GitHub Actions UI (Run workflow button). ## Why a separate workflow - 100K generation takes ~100 seconds via the Proxy -- 1000-run query loops add several minutes per pattern +- The slowest pattern (`bench_ore_order_by_limit`) takes several seconds per run on 100K rows - Regular PR CI must stay under 10 minutes; this suite would blow that budget ## Output diff --git a/tests/sqlx/tests/bench_perf_tests.rs b/tests/sqlx/tests/bench_perf_tests.rs index 63f6b5d7..52f7a0f7 100644 --- a/tests/sqlx/tests/bench_perf_tests.rs +++ b/tests/sqlx/tests/bench_perf_tests.rs @@ -10,7 +10,7 @@ //! Each benchmark: //! 1. Resets pg_stat_statements //! 2. Captures the actual query plan via EXPLAIN (FORMAT JSON) -//! 3. Runs its query pattern 1000 times +//! 3. Runs its query pattern `RUNS` times (currently 10) //! 4. Reads pg_stat_statements for the match //! 5. Appends a PerfResult to the shared accumulator //! @@ -28,7 +28,7 @@ use eql_tests::{ use sqlx::postgres::PgPoolOptions; use sqlx::PgPool; -const RUNS: i64 = 1000; +const RUNS: i64 = 10; const DATASET_ROWS: i64 = 100_000; async fn connect() -> Result {