From 411ec599a8c39a73ab377e425a0dc07e625d800f Mon Sep 17 00:00:00 2001
From: Dan Draper <dan@cipherstash.com>
Date: Mon, 11 May 2026 15:24:55 +1000
Subject: [PATCH 1/3] test(bench): cover GROUP BY / JOIN / DISTINCT plan and
 timing on encrypted columns
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds bench coverage for the three hash-strategy access patterns the Phase 1
hash operator class (#196) enabled but the Phase 2 chain inlining (#202) is
yet to make perf-competitive.

Plan assertions in bench_plan_tests.rs (gated on the bench feature, pass
today):

- group_by_encrypted_uses_hash_aggregate — confirms `GROUP BY encrypted_col`
  picks HashAggregate via the hash op class rather than degenerating to
  GroupAggregate-after-Sort or a Nested-Loop self-comparison.
- join_on_encrypted_uses_hmac_index — confirms self-join on encrypted
  equality engages bench_text_hmac_idx (Hash Join or Nested Loop + Memoize +
  Index Scan, both acceptable).
- distinct_encrypted_uses_hash_aggregate — confirms unbounded DISTINCT picks
  HashAggregate (the bounded-LIMIT variant biases toward IndexOnlyScan over
  the ORE btree opclass; that path is fine on full installs but unavailable
  on Supabase).

Regression timing assertions in bench_regression_tests.rs (#[ignore]'d
pending #202; remove the markers when it merges):

- group_by_encrypted_under_threshold — 150ms (current ~309ms via plpgsql
  hash chain, ~70ms with chain inlined; threshold ~2x the inlined target).
- self_join_encrypted_under_threshold — 350ms (current ~308ms, ~182ms with
  chain inlined; cardinality dominates so threshold is generous).
- distinct_encrypted_under_threshold — 200ms (current ~515ms unbounded via
  ORE btree path, expected to drop into HashAggregate-driven territory
  after chain inlining).

Each timing test panic message states the expected post-#202 number and
the current observed number, so the diagnostic remains useful when the
gate flips after the fix lands.
---
 tests/sqlx/tests/bench_plan_tests.rs       | 68 +++++++++++++++++++-
 tests/sqlx/tests/bench_regression_tests.rs | 74 ++++++++++++++++++++++
 2 files changed, 141 insertions(+), 1 deletion(-)

diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs
index 2cb85ec1..8120f6af 100644
--- a/tests/sqlx/tests/bench_plan_tests.rs
+++ b/tests/sqlx/tests/bench_plan_tests.rs
@@ -6,7 +6,9 @@
 //! ANALYZE is run by the bench_setup fixture — planner statistics are populated at fixture load.
 
 use anyhow::Result;
-use eql_tests::{assert_uses_index, get_bench_encrypted_int, get_bench_encrypted_text};
+use eql_tests::{
+    assert_uses_index, explain_query, get_bench_encrypted_int, get_bench_encrypted_text,
+};
 use sqlx::PgPool;
 
 const BENCH_INT_ORE_IDX: &str = "bench_int_ore_idx";
@@ -157,3 +159,67 @@ async fn bare_ilike_uses_bloom_index(pool: PgPool) -> Result<()> {
     assert_uses_index(&pool, &sql, BENCH_TEXT_BLOOM_IDX).await?;
     Ok(())
 }
+
+// ============================================================================
+// Hash-strategy plans: GROUP BY / JOIN / DISTINCT on encrypted columns engage
+// the hash operator class (#196). The plan-shape assertions below cover the
+// surface PR #196 enabled; the corresponding timing thresholds in
+// bench_regression_tests.rs are #[ignore]'d pending the hash-chain inlining
+// work tracked in #202.
+// ============================================================================
+
+/// `GROUP BY encrypted_col` engages HashAggregate via the hash operator class.
+/// Without the hash op class registered in #196 this would fall back to
+/// GroupAggregate-after-Sort or — worse — degenerate to a Nested-Loop self-comparison.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[cfg_attr(
+    not(feature = "bench"),
+    ignore = "perf-bench: gated, run via mise test:bench"
+)]
+async fn group_by_encrypted_uses_hash_aggregate(pool: PgPool) -> Result<()> {
+    let sql = "SELECT count(*) FROM bench GROUP BY encrypted_text";
+    let plan = explain_query(&pool, sql).await?;
+    assert!(
+        plan.contains("HashAggregate"),
+        "Expected GROUP BY to use HashAggregate. EXPLAIN output:\n{}",
+        plan
+    );
+    Ok(())
+}
+
+/// JOIN on `a.encrypted_col = b.encrypted_col` engages the hmac functional index.
+/// Acceptable plan shapes: Hash Join (preferred), or Nested Loop + Memoize +
+/// Index Scan via `bench_text_hmac_idx` (current planner choice — fine since
+/// the index lookup remains the per-probe cost).
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[cfg_attr(
+    not(feature = "bench"),
+    ignore = "perf-bench: gated, run via mise test:bench"
+)]
+async fn join_on_encrypted_uses_hmac_index(pool: PgPool) -> Result<()> {
+    let sql = "SELECT count(*) FROM bench a JOIN bench b \
+               ON a.encrypted_text = b.encrypted_text";
+    assert_uses_index(&pool, sql, BENCH_TEXT_HMAC_IDX).await?;
+    Ok(())
+}
+
+/// `SELECT DISTINCT encrypted_col FROM t` (unbounded) engages HashAggregate
+/// via the hash operator class. The bounded variant (`... LIMIT N`) biases
+/// the planner toward Index Only Scan over the ORE btree opclass — that's
+/// fine on full installs but unavailable on Supabase, where this hash path
+/// becomes the only viable one.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[cfg_attr(
+    not(feature = "bench"),
+    ignore = "perf-bench: gated, run via mise test:bench"
+)]
+async fn distinct_encrypted_uses_hash_aggregate(pool: PgPool) -> Result<()> {
+    let sql = "SELECT DISTINCT encrypted_text FROM bench";
+    let plan = explain_query(&pool, sql).await?;
+    assert!(
+        plan.contains("HashAggregate"),
+        "Expected DISTINCT to use HashAggregate. EXPLAIN output:\n{}",
+        plan
+    );
+    Ok(())
+}
diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
index 2d49faa6..cc7921df 100644
--- a/tests/sqlx/tests/bench_regression_tests.rs
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -111,3 +111,77 @@ async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> {
     );
     Ok(())
 }
+
+// ============================================================================
+// Hash-strategy timing regressions: GROUP BY / JOIN / DISTINCT on encrypted
+// columns. The plan shapes already engage the hash operator class (#196), but
+// per-row cost is dominated by plpgsql call overhead in the
+// `hash_encrypted` → `to_ste_vec_value` → `hmac_256` chain.
+//
+// All three are #[ignore]'d pending the chain inlining tracked in #202.
+// Thresholds are set to the post-inlining target (measured by patching the
+// chain to LANGUAGE sql IMMUTABLE in-place: ~70ms for GROUP BY, ~182ms for
+// the self-join, with consistent plan shapes). Remove the #[ignore] when #202
+// merges and confirm green.
+// ============================================================================
+
+/// `GROUP BY encrypted_text` should be under 150ms at 10K rows.
+/// Measured baseline today: ~309ms (HashAggregate + Seq Scan, plpgsql per-row
+/// cost in hash_encrypted chain). Measured with chain inlined: ~70ms.
+/// Threshold of 150ms is ~2× the inlined baseline to absorb CI variance.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[ignore = "#202: hash_encrypted chain not yet inlined; remove ignore when #202 merges"]
+async fn group_by_encrypted_under_threshold(pool: PgPool) -> Result<()> {
+    let stats: ExplainStats = explain_analyze_avg(
+        &pool,
+        "SELECT count(*) FROM bench GROUP BY encrypted_text",
+        5,
+    )
+    .await?;
+    assert!(
+        stats.execution_time_ms < 150.0,
+        "GROUP BY encrypted_text took {:.1}ms, threshold 150ms (~70ms expected after #202, currently ~309ms, node_type={})",
+        stats.execution_time_ms, stats.node_type
+    );
+    Ok(())
+}
+
+/// Self-join on `a.encrypted_text = b.encrypted_text` should be under 350ms at
+/// 10K rows (which produces ~1M result rows due to ~99 distinct values × ~100
+/// matches each — most of the time is intrinsic result cardinality, not the
+/// per-probe cost).
+/// Measured baseline today: ~308ms. Measured with chain inlined: ~182ms.
+/// Threshold of 350ms catches a regression to seq scan (>1s) without flapping
+/// on cardinality variance.
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[ignore = "#202: hash_encrypted chain not yet inlined; remove ignore when #202 merges"]
+async fn self_join_encrypted_under_threshold(pool: PgPool) -> Result<()> {
+    let stats: ExplainStats = explain_analyze_avg(
+        &pool,
+        "SELECT count(*) FROM bench a JOIN bench b ON a.encrypted_text = b.encrypted_text",
+        3,
+    )
+    .await?;
+    assert!(
+        stats.execution_time_ms < 350.0,
+        "Self-join on encrypted_text took {:.1}ms, threshold 350ms (~182ms expected after #202, currently ~308ms, node_type={})",
+        stats.execution_time_ms, stats.node_type
+    );
+    Ok(())
+}
+
+/// `SELECT DISTINCT encrypted_text` should be under 200ms at 10K rows once
+/// HashAggregate becomes the planner choice (currently picks Index Only Scan
+/// over the ORE btree opclass at ~132ms — usable but unavailable on Supabase).
+#[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
+#[ignore = "#202: hash_encrypted chain not yet inlined; remove ignore when #202 merges"]
+async fn distinct_encrypted_under_threshold(pool: PgPool) -> Result<()> {
+    let stats: ExplainStats =
+        explain_analyze_avg(&pool, "SELECT DISTINCT encrypted_text FROM bench", 5).await?;
+    assert!(
+        stats.execution_time_ms < 200.0,
+        "DISTINCT encrypted_text took {:.1}ms, threshold 200ms (currently ~132ms via ORE btree on full install; want HashAggregate-driven path post-#202, node_type={})",
+        stats.execution_time_ms, stats.node_type
+    );
+    Ok(())
+}

From 7326fc24c123e10ed835f78debafa170ab3c58d5 Mon Sep 17 00:00:00 2001
From: Dan Draper <dan@cipherstash.com>
Date: Mon, 11 May 2026 16:41:57 +1000
Subject: [PATCH 2/3] =?UTF-8?q?test(bench):=20correct=20hash-strategy=20re?=
 =?UTF-8?q?gression=20numbers=20=E2=80=94=20fast-path,=20not=20naive=20inl?=
 =?UTF-8?q?ining?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After measuring properly, naive plpgsql → LANGUAGE sql conversion of the
hash_encrypted chain does not deliver the speedup originally cited:
`to_ste_vec_value`'s per-row JSONB inspection/reconstruction dominates
even when fully inlined. The actual #202 fix is a fast-path read of
root-level `hm` (`coalesce(val.data ->> 'hm', ...)`) with the
`to_ste_vec_value` unwrap reserved for single-element ste_vec-wrapped
payloads.

Updates the regression test docstrings, panic messages, and section
header to reflect the measured fast-path numbers:
- GROUP BY: expected ~73ms (was ~70ms — close, but framing was wrong).
- Self-join: expected ~185ms (was ~182ms — same).
- DISTINCT (previously TBD): measured ~72ms with fast-path applied.

Thresholds unchanged: 150ms / 350ms / 200ms — all still have ~2x
headroom over the fast-path numbers.
---
 tests/sqlx/tests/bench_regression_tests.rs | 45 ++++++++++++++--------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
index cc7921df..6edcfe63 100644
--- a/tests/sqlx/tests/bench_regression_tests.rs
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -118,17 +118,22 @@ async fn ore_order_by_under_threshold(pool: PgPool) -> Result<()> {
 // per-row cost is dominated by plpgsql call overhead in the
 // `hash_encrypted` → `to_ste_vec_value` → `hmac_256` chain.
 //
-// All three are #[ignore]'d pending the chain inlining tracked in #202.
-// Thresholds are set to the post-inlining target (measured by patching the
-// chain to LANGUAGE sql IMMUTABLE in-place: ~70ms for GROUP BY, ~182ms for
-// the self-join, with consistent plan shapes). Remove the #[ignore] when #202
-// merges and confirm green.
+// All three are #[ignore]'d pending the hash_encrypted fast-path tracked in
+// #202. The dominant cost on these queries isn't plpgsql call overhead (a
+// naive plpgsql → LANGUAGE sql conversion of the existing body leaves them
+// effectively unchanged); it's `to_ste_vec_value`'s per-row JSONB inspection
+// and reconstruction. The #202 fix short-circuits via root-level `hm`
+// (`coalesce(val.data ->> 'hm', ...)`), falling through to `to_ste_vec_value`
+// only for single-element ste_vec-wrapped payloads. Thresholds below reflect
+// measured numbers with that fast-path applied in-place. Remove the
+// `#[ignore]` markers when #202 merges and confirm green.
 // ============================================================================
 
 /// `GROUP BY encrypted_text` should be under 150ms at 10K rows.
-/// Measured baseline today: ~309ms (HashAggregate + Seq Scan, plpgsql per-row
-/// cost in hash_encrypted chain). Measured with chain inlined: ~70ms.
-/// Threshold of 150ms is ~2× the inlined baseline to absorb CI variance.
+/// Measured baseline today: ~309ms (HashAggregate + Seq Scan, dominated by
+/// per-row `to_ste_vec_value` cost in the hash_encrypted chain). Measured
+/// with the #202 fast-path applied: ~73ms. Threshold of 150ms is ~2x the
+/// fast-path number to absorb CI variance.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 #[ignore = "#202: hash_encrypted chain not yet inlined; remove ignore when #202 merges"]
 async fn group_by_encrypted_under_threshold(pool: PgPool) -> Result<()> {
@@ -140,7 +145,7 @@ async fn group_by_encrypted_under_threshold(pool: PgPool) -> Result<()> {
     .await?;
     assert!(
         stats.execution_time_ms < 150.0,
-        "GROUP BY encrypted_text took {:.1}ms, threshold 150ms (~70ms expected after #202, currently ~309ms, node_type={})",
+        "GROUP BY encrypted_text took {:.1}ms, threshold 150ms (~73ms expected after #202 fast-path, currently ~309ms, node_type={})",
         stats.execution_time_ms, stats.node_type
     );
     Ok(())
@@ -150,9 +155,9 @@ async fn group_by_encrypted_under_threshold(pool: PgPool) -> Result<()> {
 /// 10K rows (which produces ~1M result rows due to ~99 distinct values × ~100
 /// matches each — most of the time is intrinsic result cardinality, not the
 /// per-probe cost).
-/// Measured baseline today: ~308ms. Measured with chain inlined: ~182ms.
-/// Threshold of 350ms catches a regression to seq scan (>1s) without flapping
-/// on cardinality variance.
+/// Measured baseline today: ~308ms. Measured with the #202 fast-path applied:
+/// ~185ms. Threshold of 350ms catches a regression to seq scan (>1s) without
+/// flapping on cardinality variance.
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 #[ignore = "#202: hash_encrypted chain not yet inlined; remove ignore when #202 merges"]
 async fn self_join_encrypted_under_threshold(pool: PgPool) -> Result<()> {
@@ -164,15 +169,21 @@ async fn self_join_encrypted_under_threshold(pool: PgPool) -> Result<()> {
     .await?;
     assert!(
         stats.execution_time_ms < 350.0,
-        "Self-join on encrypted_text took {:.1}ms, threshold 350ms (~182ms expected after #202, currently ~308ms, node_type={})",
+        "Self-join on encrypted_text took {:.1}ms, threshold 350ms (~185ms expected after #202 fast-path, currently ~308ms, node_type={})",
         stats.execution_time_ms, stats.node_type
     );
     Ok(())
 }
 
-/// `SELECT DISTINCT encrypted_text` should be under 200ms at 10K rows once
-/// HashAggregate becomes the planner choice (currently picks Index Only Scan
-/// over the ORE btree opclass at ~132ms — usable but unavailable on Supabase).
+/// Unbounded `SELECT DISTINCT encrypted_text` should be under 200ms at 10K rows.
+/// Measured baseline today: ~515ms (HashAggregate over plpgsql hash_encrypted
+/// chain). Measured with the #202 fast-path applied (`coalesce(val.data ->>
+/// 'hm', ...)`): ~72ms. Threshold of 200ms is ~2.8x the fast-path number to
+/// absorb CI variance.
+///
+/// (The `... LIMIT N` variant biases the planner toward Index Only Scan over
+/// the ORE btree opclass — fine on full installs but unavailable on Supabase.
+/// This test exercises the unbounded path that engages HashAggregate.)
 #[sqlx::test(fixtures(path = "../fixtures", scripts("bench_data", "bench_setup")))]
 #[ignore = "#202: hash_encrypted chain not yet inlined; remove ignore when #202 merges"]
 async fn distinct_encrypted_under_threshold(pool: PgPool) -> Result<()> {
@@ -180,7 +191,7 @@ async fn distinct_encrypted_under_threshold(pool: PgPool) -> Result<()> {
         explain_analyze_avg(&pool, "SELECT DISTINCT encrypted_text FROM bench", 5).await?;
     assert!(
         stats.execution_time_ms < 200.0,
-        "DISTINCT encrypted_text took {:.1}ms, threshold 200ms (currently ~132ms via ORE btree on full install; want HashAggregate-driven path post-#202, node_type={})",
+        "DISTINCT encrypted_text took {:.1}ms, threshold 200ms (~72ms expected after #202 fast-path, currently ~515ms, node_type={})",
         stats.execution_time_ms, stats.node_type
     );
     Ok(())

From 02b539e62b30219b1c60394296295babe88fc395 Mon Sep 17 00:00:00 2001
From: Dan Draper <dan@cipherstash.com>
Date: Mon, 11 May 2026 17:24:39 +1000
Subject: [PATCH 3/3] test(bench): cover GROUP BY on JSON field extracted from
 encrypted column
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the field-level GROUP BY scenario alongside the root-level coverage.
The pattern — `GROUP BY eql_v2.jsonb_path_query_first(col, '<selector>')`
— is the canonical "tell me how many rows per region" query against
ste_vec encryption where the field has a `unique` index configured.

The current bench fixture's field-level sv elements carry only OPE
terms (`ocv` / `ocf`), no `hm`, which means field-level GROUP BY raises
("Cannot hash eql_v2_encrypted value: no hmac_256 index term found").
The new `bench_json_data.sql` fixture overlays `hm` onto the $.hello sv
element of each bench row — synthesising what `@cipherstash/protect`
produces when the $.hello path is configured with `unique`. Reuses the
existing `bench` rows so the fixture cost stays the same.

Plan assertion in `bench_plan_tests.rs` (gated on bench feature, passes
today): `group_by_jsonb_field_uses_hash_aggregate` — confirms the
planner engages `HashAggregate` (today as `Partial HashAggregate` under
a parallel worker, then Sort + GroupAggregate for the merge — once #204
inlines the extractors, this may flatten to a single HashAggregate).

Regression timing in `bench_regression_tests.rs` (#[ignore]'d pending
#204): `group_by_jsonb_field_under_threshold` — threshold 50ms.
Measured numbers at 10K rows: ~278ms on main, ~234ms with the #202
fast-path patched in, ~7ms achievable via raw JSONB extraction
(`(col).data->'sv'->N->>'hm'`). The 50ms threshold targets the
post-#204-extractor-inlining state.
---
 tests/sqlx/fixtures/bench_json_data.sql    | 49 ++++++++++++++++++++++
 tests/sqlx/tests/bench_plan_tests.rs       | 40 ++++++++++++++++++
 tests/sqlx/tests/bench_regression_tests.rs | 32 ++++++++++++++
 3 files changed, 121 insertions(+)
 create mode 100644 tests/sqlx/fixtures/bench_json_data.sql

diff --git a/tests/sqlx/fixtures/bench_json_data.sql b/tests/sqlx/fixtures/bench_json_data.sql
new file mode 100644
index 00000000..3002a8b1
--- /dev/null
+++ b/tests/sqlx/fixtures/bench_json_data.sql
@@ -0,0 +1,49 @@
+-- Fixture: bench_json_data.sql
+--
+-- Builds the bench_json table by overlaying the existing `bench` rows
+-- (loaded by the bench_data.sql fixture, which must run first) and adding
+-- `hm` to the $.hello sv element of each row. This mirrors what
+-- `@cipherstash/protect` would produce for a JSONB column where the
+-- $.hello path is configured with a `unique` index — without that, the
+-- field-level sv element carries only OPE terms (`ocv`) and field-level
+-- GROUP BY / DISTINCT / hash joins on the extracted value raise:
+--
+--     ERROR:  Cannot hash eql_v2_encrypted value: no hmac_256 index term
+--             found. Configure a `unique` index on the column for hash
+--             operations (GROUP BY, DISTINCT, hash joins).
+--
+-- The synthesised `hm` is the field's existing `ocv` hex string (already
+-- deterministic over the plaintext at that selector) so it serves as a
+-- valid equality token without us inventing a separate one. The shape
+-- matches production: `c`, `s`, `ocv`, `hm` at the sv element level.
+--
+-- Selector cheatsheet (matches Selectors:: in tests/sqlx/src/selectors.rs):
+--   bca213de9ccce676fa849ff9c4807963 → $       (root, has b3 here today)
+--   a7cea93975ed8c01f861ccb6bd082784 → $.hello (we add `hm` here)
+--   2517068c0d1f9d4d41d2c666211f785e → $.n     (left alone)
+
+CREATE TABLE IF NOT EXISTS bench_json (
+    id bigserial PRIMARY KEY,
+    e eql_v2_encrypted
+);
+
+INSERT INTO bench_json (e)
+SELECT (jsonb_build_object(
+    'c',  (encrypted_text).data ->> 'c',
+    'i',  (encrypted_text).data -> 'i',
+    'v',  2,
+    'hm', (encrypted_text).data ->> 'hm',
+    'sv', (
+        SELECT jsonb_agg(
+            CASE
+                WHEN elem ->> 's' = 'a7cea93975ed8c01f861ccb6bd082784'
+                THEN elem || jsonb_build_object('hm', elem ->> 'ocv')
+                ELSE elem
+            END
+        )
+        FROM jsonb_array_elements((encrypted_text).data -> 'sv') elem
+    )
+)) :: eql_v2_encrypted
+FROM bench;
+
+ANALYZE bench_json;
diff --git a/tests/sqlx/tests/bench_plan_tests.rs b/tests/sqlx/tests/bench_plan_tests.rs
index 8120f6af..0be9bab7 100644
--- a/tests/sqlx/tests/bench_plan_tests.rs
+++ b/tests/sqlx/tests/bench_plan_tests.rs
@@ -223,3 +223,43 @@ async fn distinct_encrypted_uses_hash_aggregate(pool: PgPool) -> Result<()> {
     );
     Ok(())
 }
+
+// ============================================================================
+// Field-level hash-strategy: GROUP BY on a JSON path extracted from an
+// encrypted column. This is the "how many users per region" pattern against
+// ste_vec encryption.
+//
+// Uses the bench_json fixture, which overlays `hm` onto the `$.hello` sv
+// element of each bench row — simulating what `@cipherstash/protect` produces
+// for a JSONB column where the `$.hello` path is configured with a `unique`
+// index. Without that overlay, field-level GROUP BY raises today
+// ("Cannot hash eql_v2_encrypted value: no hmac_256 index term found").
+// ============================================================================
+
+/// Documented EQL form for field-level GROUP BY:
+/// `GROUP BY eql_v2.jsonb_path_query_first(col, '<selector>')`.
+/// The planner engages a parallel Partial HashAggregate + Sort + GroupAggregate
+/// merge — i.e., `HashAggregate` appears in the plan even if it's not the top
+/// node. The bare-`->` form (`col -> '<sel>'::text`) currently picks
+/// Sort + GroupAggregate instead; once #204 inlines the extractors, the
+/// planner has the option of flattening to a single HashAggregate.
+#[sqlx::test(fixtures(
+    path = "../fixtures",
+    scripts("bench_data", "bench_setup", "bench_json_data")
+))]
+#[cfg_attr(
+    not(feature = "bench"),
+    ignore = "perf-bench: gated, run via mise test:bench"
+)]
+async fn group_by_jsonb_field_uses_hash_aggregate(pool: PgPool) -> Result<()> {
+    // Selectors::HELLO = $.hello — see tests/sqlx/src/selectors.rs.
+    let sql = "SELECT count(*) FROM bench_json \
+               GROUP BY eql_v2.jsonb_path_query_first(e, 'a7cea93975ed8c01f861ccb6bd082784')";
+    let plan = explain_query(&pool, sql).await?;
+    assert!(
+        plan.contains("HashAggregate"),
+        "Expected field-level GROUP BY plan to include a HashAggregate node. EXPLAIN output:\n{}",
+        plan
+    );
+    Ok(())
+}
diff --git a/tests/sqlx/tests/bench_regression_tests.rs b/tests/sqlx/tests/bench_regression_tests.rs
index 6edcfe63..5335db95 100644
--- a/tests/sqlx/tests/bench_regression_tests.rs
+++ b/tests/sqlx/tests/bench_regression_tests.rs
@@ -196,3 +196,35 @@ async fn distinct_encrypted_under_threshold(pool: PgPool) -> Result<()> {
     );
     Ok(())
 }
+
+/// Field-level `GROUP BY eql_v2.jsonb_path_query_first(col, '<selector>')` —
+/// the canonical pattern for "count rows per JSON field value" against an
+/// encrypted column where the field has a `unique` index configured.
+///
+/// Closing #202 (hash_encrypted fast-path) helps but doesn't fully solve this:
+/// the dominant cost shifts to `jsonb_path_query_first`'s per-row plpgsql
+/// overhead. The threshold below reflects what becomes achievable once #204
+/// inlines the JSONB field extractors.
+///
+/// Measured at 10K rows on a synthesized fixture with `hm` overlaid at
+/// $.hello: current main ~496ms, with #202 fast-path applied ~234ms, with
+/// raw JSONB extraction bypass (proxy-emitted, no eql_v2 plpgsql in the
+/// hot path) ~7ms. Threshold of 50ms is set near the bypass number — when
+/// #204 inlines the extractors, the EQL form should converge on the same
+/// ballpark.
+#[sqlx::test(fixtures(
+    path = "../fixtures",
+    scripts("bench_data", "bench_setup", "bench_json_data")
+))]
+#[ignore = "#204: JSONB field extractors not yet inlined; remove ignore when #204 merges"]
+async fn group_by_jsonb_field_under_threshold(pool: PgPool) -> Result<()> {
+    let sql = "SELECT count(*) FROM bench_json \
+               GROUP BY eql_v2.jsonb_path_query_first(e, 'a7cea93975ed8c01f861ccb6bd082784')";
+    let stats: ExplainStats = explain_analyze_avg(&pool, sql, 5).await?;
+    assert!(
+        stats.execution_time_ms < 50.0,
+        "GROUP BY field-level jsonb_path_query_first took {:.1}ms, threshold 50ms (~7ms achievable with full extractor inlining + #202, currently ~496ms on main, ~234ms with #202 fast-path only, node_type={})",
+        stats.execution_time_ms, stats.node_type
+    );
+    Ok(())
+}