cipherstash · coderdan · May 13, 2026 · May 13, 2026 · May 13, 2026 · May 13, 2026
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -14,7 +14,8 @@ ore-rs = "0.7.0"
 hex = "0.4.3"
 hex-literal = "0.3.4"
 rand = "0.8.5"
-cipherstash-client = { version = "0.32.2", features = ["tokio"] }
+cipherstash-client = { version = "0.34.1-alpha.4", features = ["tokio"] }
+stack-profile = "0.34.1-alpha.4"
 anyhow = "1.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
@@ -47,4 +48,8 @@ harness = false
 
 [[bench]]
 name = "exact"
+harness = false
+
+[[bench]]
+name = "group_by"
 harness = false
diff --git a/README_REPORT.md b/README_REPORT.md
@@ -200,12 +200,6 @@ string_encrypted_10000_gin_index
 ON string_encrypted_10000 USING GIN (
     eql_v2.bloom_filter(value)
 );
-
-CREATE INDEX
-string_encrypted_10000_eql_index
-ON string_encrypted_10000 (
-    value eql_v2.encrypted_operator_class
-);
 ```
 
 ## Customization

diff --git a/benches/exact.rs b/benches/exact.rs
@@ -1,8 +1,8 @@
 use cipherstash_client::{
-    credentials::ServiceCredentials,
     encryption::ScopedCipher,
     eql::Identifier,
     schema::{column::Index, ColumnConfig, ColumnType},
+    AutoStrategy,
 };
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dbbenches::{init_scoped_cipher, EncryptedQuery, EncryptedQueryBuilder};
@@ -16,13 +16,13 @@ static QUERY_TEMPLATES: &[(&str, &str, &str)] = &[
 ];
 
 async fn build_query(
-    cipher: Arc<ScopedCipher<ServiceCredentials>>,
+    cipher: Arc<ScopedCipher<AutoStrategy>>,
     query: &str,
     x: &str,
     table_name: &str,
 ) -> EncryptedQuery {
     let column_config = ColumnConfig::build("value")
-        .casts_as(ColumnType::Utf8Str)
+        .casts_as(ColumnType::Text)
         .add_index(Index::new_unique());
 
     let identifier = Identifier::new(table_name, "value");

diff --git a/benches/group_by.rs b/benches/group_by.rs
@@ -0,0 +1,85 @@
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use sqlx::postgres::PgPoolOptions;
+use sqlx::Row;
+use tokio::runtime::Runtime;
+
+// Two flavours of the same GROUP BY against the string_encrypted_* tables:
+//
+// 1. "eql_cast" — natural form: `GROUP BY value`. The hash discriminator for
+//    aggregation is provided by `eql_v2.hash_encrypted`, which is a plpgsql
+//    function called once per row. Not inlinable.
+//
+// 2. "hmac_extractor" — explicit form: `GROUP BY eql_v2.hmac_256(value)`. The
+//    extractor is an inlinable single-statement SQL function (post 2.3), so the
+//    planner folds the body — `(val).data ->> 'hm'` — into the aggregation.
+//
+// PostgreSQL builds an in-memory hash table for GROUP BY in both cases (the
+// functional hash index on `eql_v2.hmac_256(value)` is only useful for
+// equality lookups, not aggregation), so this is really a comparison of
+// per-row hashing cost: plpgsql function call vs. inlined SQL.
+static QUERY_TEMPLATES: &[(&str, &str)] = &[
+    (
+        "SELECT count(*) FROM {TABLE} GROUP BY value",
+        "eql_cast",
+    ),
+    (
+        "SELECT count(*) FROM {TABLE} GROUP BY eql_v2.hmac_256(value)",
+        "hmac_extractor",
+    ),
+];
+
+fn criterion_benchmark(c: &mut Criterion) {
+    let rt = Runtime::new().unwrap();
+
+    let target_rows = std::env::var("TARGET_ROWS")
+        .unwrap_or_else(|_| "unknown".to_string());
+
+    let table_suffix = match target_rows.as_str() {
+        "10000" | "100000" | "1000000" | "10000000" => format!("_{}", target_rows),
+        _ => String::new(),
+    };
+    let table_name = format!("string_encrypted{}", table_suffix);
+
+    let pool = rt.block_on(async {
+        let database_url =
+            std::env::var("DATABASE_URL").expect("DATABASE_URL environment variable must be set");
+
+        PgPoolOptions::new()
+            .max_connections(5)
+            .connect(&database_url)
+            .await
+            .expect("Failed to connect to database")
+    });
+
+    let mut group = c.benchmark_group("GROUP_BY");
+    group.sample_size(10);
+    // The natural-form `GROUP BY value` scenario calls `eql_v2.hash_encrypted`
+    // (plpgsql, per row) for the hash discriminator. At 10k rows that's
+    // ~3.5 s per iteration; at 100k+ it scales roughly linearly. Criterion's
+    // default 5 s `measurement_time` can't fit a single sample. Extend so
+    // even the slow scenarios get the criterion-minimum 10 samples without
+    // a "Unable to complete 10 samples" warning. Inflated for headroom at
+    // 1M rows.
+    group.warm_up_time(std::time::Duration::from_secs(5));
+    group.measurement_time(std::time::Duration::from_secs(60));
+
+    for (query_template, scenario) in QUERY_TEMPLATES {
+        let query_str = query_template.replace("{TABLE}", &table_name);
+
+        group.bench_function(format!("group_by/{}/{}", scenario, target_rows), |b| {
+            b.to_async(&rt).iter(|| async {
+                let rows = sqlx::query(&query_str)
+                    .fetch_all(&pool)
+                    .await
+                    .expect("group_by query failed");
+                // Drain the result to force the aggregation to materialise.
+                black_box(rows.iter().map(|r| r.get::<i64, _>(0)).sum::<i64>())
+            })
+        });
+    }
+
+    group.finish();
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
diff --git a/benches/match.rs b/benches/match.rs
@@ -1,8 +1,8 @@
 use cipherstash_client::{
-    credentials::ServiceCredentials,
     encryption::ScopedCipher,
     eql::Identifier,
     schema::{column::Index, ColumnConfig, ColumnType},
+    AutoStrategy,
 };
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dbbenches::{init_scoped_cipher, EncryptedQuery, EncryptedQueryBuilder};
@@ -17,13 +17,13 @@ static QUERY_TEMPLATES: &[(&str, &str, &str)] = &[
 ];
 
 async fn build_query(
-    cipher: Arc<ScopedCipher<ServiceCredentials>>,
+    cipher: Arc<ScopedCipher<AutoStrategy>>,
     query: &str,
     x: &str,
     table_name: &str,
 ) -> EncryptedQuery {
     let column_config = ColumnConfig::build("value")
-        .casts_as(ColumnType::Utf8Str)
+        .casts_as(ColumnType::Text)
         .add_index(Index::new_match());
 
     let identifier = Identifier::new(table_name, "value");

diff --git a/benches/ore.rs b/benches/ore.rs
@@ -1,24 +1,49 @@
 use cipherstash_client::{
-    credentials::ServiceCredentials,
     encryption::ScopedCipher,
     eql::Identifier,
     schema::{
         column::{Index, IndexType},
         ColumnConfig, ColumnType,
     },
+    AutoStrategy,
 };
 use criterion::{black_box, criterion_group, criterion_main, Criterion};
 use dbbenches::{init_scoped_cipher, EncryptedQuery, EncryptedQueryBuilder};
 use sqlx::postgres::PgPoolOptions;
 use std::sync::Arc;
 use tokio::runtime::Runtime;
 
+// Post-EQL-2.3 (with the `<` / `<=` / `>` / `>=` operator inlining), bare-form
+// range predicates on `eql_v2_encrypted` reduce to
+// `eql_v2.ore_block_u64_8_256(a) <op> eql_v2.ore_block_u64_8_256(b)` and
+// structurally match a functional btree index on
+// `eql_v2.ore_block_u64_8_256(value)` — so the natural-form scenarios below
+// engage the index without rewriting.
+//
+// The ordered scenarios show three plan shapes side-by-side:
+//
+//   range_lt_ordered_10        — natural form: WHERE val < $1 ORDER BY val LIMIT 10
+//                                 → Bitmap Index Scan via the inlined `<`, plus
+//                                   a Top-N sort by `val` (the natural-form sort
+//                                   key doesn't match the index expression
+//                                   syntactically). Each comparison in the Sort
+//                                   step uses the inlined ORE-term path, so the
+//                                   Top-N is fast.
+//
+//   range_lt_hybrid_ordered_10 — natural WHERE, extractor ORDER BY:
+//                                 ORDER BY eql_v2.ore_block_u64_8_256(val).
+//                                 The sort key matches the index expression →
+//                                 plain ordered Index Scan, no Sort node.
+//
+//   range_lt_ore_ordered_10    — fully extractor on both clauses. After the `<`
+//                                 inlining the WHERE reduces to the same shape
+//                                 as the hybrid, so the plan is identical to
+//                                 hybrid. Kept for contrast / regression.
+//
+// The equality scenario from the previous bench (`WHERE value = $1`) is gone:
+// the integer column carries only `ob`, not `hm`, so post-2.3 equality returns
+// NULL → zero rows. See exact.rs for the meaningful equality benches.
 static QUERY_TEMPLATES: &[(&str, i32, &str)] = &[
-    (
-        "SELECT value FROM {TABLE} WHERE value = $1 LIMIT 1",
-        5000,
-        "exact",
-    ),
     (
         "SELECT id,value::jsonb FROM {TABLE} WHERE value > $1 LIMIT 10",
         5000,
@@ -44,10 +69,24 @@ static QUERY_TEMPLATES: &[(&str, i32, &str)] = &[
         5000,
         "range_lt_ordered_10",
     ),
+    (
+        "SELECT id,value::jsonb FROM {TABLE} \
+         WHERE value < $1 \
+         ORDER BY eql_v2.ore_block_u64_8_256(value) LIMIT 10",
+        5000,
+        "range_lt_hybrid_ordered_10",
+    ),
+    (
+        "SELECT id,value::jsonb FROM {TABLE} \
+         WHERE eql_v2.ore_block_u64_8_256(value) < eql_v2.ore_block_u64_8_256($1::jsonb) \
+         ORDER BY eql_v2.ore_block_u64_8_256(value) LIMIT 10",
+        5000,
+        "range_lt_ore_ordered_10",
+    ),
 ];
 
 async fn build_query(
-    cipher: Arc<ScopedCipher<ServiceCredentials>>,
+    cipher: Arc<ScopedCipher<AutoStrategy>>,
     query: &str,
     x: i32,
     table_name: &str,
@@ -108,10 +147,21 @@ fn criterion_benchmark(c: &mut Criterion) {
 
     let mut group = c.benchmark_group("ORE");
     group.sample_size(10);
+    // Some scenarios — notably the natural-form `WHERE val < $1 ORDER BY val
+    // LIMIT 10` — finish a single iteration in several hundred milliseconds
+    // because the Top-N sort runs over the post-WHERE bitmap rather than
+    // streaming from an ordered index (see U-005 in EQL's v2.3 upgrade
+    // notes). Criterion's default 5 s `measurement_time` only fits a few
+    // such samples, yielding very wide confidence intervals and false
+    // "regressed" alerts against any stored baseline. 30 s gives the slow
+    // scenarios room to settle while leaving fast ones (sub-ms to single
+    // ms) plenty of headroom.
+    group.warm_up_time(std::time::Duration::from_secs(5));
+    group.measurement_time(std::time::Duration::from_secs(30));
 
     for (i, query) in queries.into_iter().enumerate() {
         let (_, _, scenario) = QUERY_TEMPLATES[i];
-        
+
         group.bench_function(format!("ore/{}/{}", scenario, target_rows), |b| {
             b.to_async(&rt).iter(|| async {
                 let _: Vec<_> = query.execute(&pool).await.unwrap();

diff --git a/mise.toml b/mise.toml
@@ -2,6 +2,9 @@
 python = "latest"
 rust = "latest"
 
+[env]
+DATABASE_URL = "postgres://postgres:postgres@localhost:5400/postgres"
+
 [tasks.postgres]
 description = "Start PostgreSQL via Docker Compose"
 run = "docker compose up -d postgres"
@@ -201,7 +204,7 @@ echo "Preparing integer_encrypted table with $TARGET_ROWS rows..."
 mise run prepare:integer_encrypted "$TARGET_ROWS"
 
 echo "Cleaning old benchmark data..."
-rm -rf target/criterion/ORE
+rm -rf target/criterion/data/main/ORE target/criterion/reports/ORE
 
 echo "Running ORE query benchmark..."
 mkdir -p results/query
@@ -234,7 +237,7 @@ echo "Preparing string_encrypted table with $TARGET_ROWS rows..."
 mise run prepare:string_encrypted "$TARGET_ROWS"
 
 echo "Cleaning old benchmark data..."
-rm -rf target/criterion/MATCH
+rm -rf target/criterion/data/main/MATCH target/criterion/reports/MATCH
 
 echo "Running MATCH query benchmark..."
 mkdir -p results/query
@@ -267,7 +270,7 @@ echo "Preparing string_encrypted table with $TARGET_ROWS rows..."
 mise run prepare:string_encrypted "$TARGET_ROWS"
 
 echo "Cleaning old benchmark data..."
-rm -rf target/criterion/EXACT
+rm -rf target/criterion/data/main/EXACT target/criterion/reports/EXACT
 
 echo "Running EXACT query benchmark..."
 mkdir -p results/query
@@ -277,6 +280,39 @@ TARGET_ROWS="$TARGET_ROWS" cargo criterion --bench exact --message-format json >
 echo "Benchmark complete! Results written to $OUTPUT_FILE"
 """
 
+[tasks."bench:query:group_by"]
+description = "Run GROUP BY query benchmark"
+run = """
+#!/usr/bin/env bash
+set -e
+
+TARGET_ROWS="$1"
+
+if [ -z "$TARGET_ROWS" ]; then
+  echo "Error: target row count argument required"
+  echo "Usage: mise run bench:query:group_by <target_rows>"
+  exit 1
+fi
+
+if ! [[ "$TARGET_ROWS" =~ ^[0-9]+$ ]]; then
+  echo "Error: target row count must be a positive integer"
+  exit 1
+fi
+
+echo "Preparing string_encrypted table with $TARGET_ROWS rows..."
+mise run prepare:string_encrypted "$TARGET_ROWS"
+
+echo "Cleaning old benchmark data..."
+rm -rf target/criterion/data/main/GROUP_BY target/criterion/reports/GROUP_BY
+
+echo "Running GROUP BY query benchmark..."
+mkdir -p results/query
+OUTPUT_FILE="results/query/group_by_rows_${TARGET_ROWS}.json"
+TARGET_ROWS="$TARGET_ROWS" cargo criterion --bench group_by --message-format json > "$OUTPUT_FILE"
+
+echo "Benchmark complete! Results written to $OUTPUT_FILE"
+"""
+
 [tasks."bench:query:all"]
 description = "Run all query benchmarks with multiple row counts (10k, 100k, 1M, 10M)"
 run = """
@@ -288,7 +324,7 @@ ROW_COUNTS=(10000 100000 1000000 10000000)
 echo "========================================"
 echo "Starting comprehensive benchmark suite"
 echo "Row counts: ${ROW_COUNTS[*]}"
-echo "Benchmarks: exact, match, ore"
+echo "Benchmarks: exact, match, ore, group_by"
 echo "========================================"
 echo ""
 
@@ -299,19 +335,23 @@ for ROWS in "${ROW_COUNTS[@]}"; do
   echo "Running benchmarks with $ROWS rows"
   echo "========================================"
   echo ""
-  
-  echo "[1/3] Running EXACT benchmark with $ROWS rows..."
+
+  echo "[1/4] Running EXACT benchmark with $ROWS rows..."
   mise run bench:query:exact "$ROWS"
   echo ""
-  
-  echo "[2/3] Running MATCH benchmark with $ROWS rows..."
+
+  echo "[2/4] Running MATCH benchmark with $ROWS rows..."
   mise run bench:query:match "$ROWS"
   echo ""
-  
-  echo "[3/3] Running ORE benchmark with $ROWS rows..."
+
+  echo "[3/4] Running ORE benchmark with $ROWS rows..."
   mise run bench:query:ore "$ROWS"
   echo ""
-
+
+  echo "[4/4] Running GROUP BY benchmark with $ROWS rows..."
+  mise run bench:query:group_by "$ROWS"
+  echo ""
+
   echo "Completed benchmarks for $ROWS rows"
   echo ""
 done