Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,469 changes: 1,197 additions & 272 deletions Cargo.lock

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ ore-rs = "0.7.0"
hex = "0.4.3"
hex-literal = "0.3.4"
rand = "0.8.5"
cipherstash-client = { version = "0.32.2", features = ["tokio"] }
cipherstash-client = { version = "0.34.1-alpha.4", features = ["tokio"] }
stack-profile = "0.34.1-alpha.4"
anyhow = "1.0"
serde = { version = "1.0", features = ["derive"] }
serde_json = "1.0"
Expand Down Expand Up @@ -47,4 +48,8 @@ harness = false

[[bench]]
name = "exact"
harness = false

[[bench]]
name = "group_by"
harness = false
6 changes: 0 additions & 6 deletions README_REPORT.md
Original file line number Diff line number Diff line change
Expand Up @@ -200,12 +200,6 @@ string_encrypted_10000_gin_index
ON string_encrypted_10000 USING GIN (
eql_v2.bloom_filter(value)
);

CREATE INDEX
string_encrypted_10000_eql_index
ON string_encrypted_10000 (
value eql_v2.encrypted_operator_class
);
```

## Customization
Expand Down
6 changes: 3 additions & 3 deletions benches/exact.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use cipherstash_client::{
credentials::ServiceCredentials,
encryption::ScopedCipher,
eql::Identifier,
schema::{column::Index, ColumnConfig, ColumnType},
AutoStrategy,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use dbbenches::{init_scoped_cipher, EncryptedQuery, EncryptedQueryBuilder};
Expand All @@ -16,13 +16,13 @@ static QUERY_TEMPLATES: &[(&str, &str, &str)] = &[
];

async fn build_query(
cipher: Arc<ScopedCipher<ServiceCredentials>>,
cipher: Arc<ScopedCipher<AutoStrategy>>,
query: &str,
x: &str,
table_name: &str,
) -> EncryptedQuery {
let column_config = ColumnConfig::build("value")
.casts_as(ColumnType::Utf8Str)
.casts_as(ColumnType::Text)
.add_index(Index::new_unique());

let identifier = Identifier::new(table_name, "value");
Expand Down
85 changes: 85 additions & 0 deletions benches/group_by.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use sqlx::postgres::PgPoolOptions;
use sqlx::Row;
use tokio::runtime::Runtime;

// Two flavours of the same GROUP BY against the string_encrypted_* tables:
//
// 1. "eql_cast" — natural form: `GROUP BY value`. The hash discriminator for
// aggregation is provided by `eql_v2.hash_encrypted`, which is a plpgsql
// function called once per row. Not inlinable.
//
// 2. "hmac_extractor" — explicit form: `GROUP BY eql_v2.hmac_256(value)`. The
// extractor is an inlinable single-statement SQL function (post 2.3), so the
// planner folds the body — `(val).data ->> 'hm'` — into the aggregation.
//
// PostgreSQL builds an in-memory hash table for GROUP BY in both cases (the
// functional hash index on `eql_v2.hmac_256(value)` is only useful for
// equality lookups, not aggregation), so this is really a comparison of
// per-row hashing cost: plpgsql function call vs. inlined SQL.
static QUERY_TEMPLATES: &[(&str, &str)] = &[
(
"SELECT count(*) FROM {TABLE} GROUP BY value",
"eql_cast",
),
(
"SELECT count(*) FROM {TABLE} GROUP BY eql_v2.hmac_256(value)",
"hmac_extractor",
),
];

fn criterion_benchmark(c: &mut Criterion) {
let rt = Runtime::new().unwrap();

let target_rows = std::env::var("TARGET_ROWS")
.unwrap_or_else(|_| "unknown".to_string());

let table_suffix = match target_rows.as_str() {
"10000" | "100000" | "1000000" | "10000000" => format!("_{}", target_rows),
_ => String::new(),
};
let table_name = format!("string_encrypted{}", table_suffix);

let pool = rt.block_on(async {
let database_url =
std::env::var("DATABASE_URL").expect("DATABASE_URL environment variable must be set");

PgPoolOptions::new()
.max_connections(5)
.connect(&database_url)
.await
.expect("Failed to connect to database")
});

let mut group = c.benchmark_group("GROUP_BY");
group.sample_size(10);
// The natural-form `GROUP BY value` scenario calls `eql_v2.hash_encrypted`
// (plpgsql, per row) for the hash discriminator. At 10k rows that's
// ~3.5 s per iteration; at 100k+ it scales roughly linearly. Criterion's
// default 5 s `measurement_time` can't fit a single sample. Extend so
// even the slow scenarios get the criterion-minimum 10 samples without
// a "Unable to complete 10 samples" warning. Inflated for headroom at
// 1M rows.
group.warm_up_time(std::time::Duration::from_secs(5));
group.measurement_time(std::time::Duration::from_secs(60));

for (query_template, scenario) in QUERY_TEMPLATES {
let query_str = query_template.replace("{TABLE}", &table_name);

group.bench_function(format!("group_by/{}/{}", scenario, target_rows), |b| {
b.to_async(&rt).iter(|| async {
let rows = sqlx::query(&query_str)
.fetch_all(&pool)
.await
.expect("group_by query failed");
// Drain the result to force the aggregation to materialise.
black_box(rows.iter().map(|r| r.get::<i64, _>(0)).sum::<i64>())
})
});
}

group.finish();
}

criterion_group!(benches, criterion_benchmark);
criterion_main!(benches);
6 changes: 3 additions & 3 deletions benches/match.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use cipherstash_client::{
credentials::ServiceCredentials,
encryption::ScopedCipher,
eql::Identifier,
schema::{column::Index, ColumnConfig, ColumnType},
AutoStrategy,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use dbbenches::{init_scoped_cipher, EncryptedQuery, EncryptedQueryBuilder};
Expand All @@ -17,13 +17,13 @@ static QUERY_TEMPLATES: &[(&str, &str, &str)] = &[
];

async fn build_query(
cipher: Arc<ScopedCipher<ServiceCredentials>>,
cipher: Arc<ScopedCipher<AutoStrategy>>,
query: &str,
x: &str,
table_name: &str,
) -> EncryptedQuery {
let column_config = ColumnConfig::build("value")
.casts_as(ColumnType::Utf8Str)
.casts_as(ColumnType::Text)
.add_index(Index::new_match());

let identifier = Identifier::new(table_name, "value");
Expand Down
66 changes: 58 additions & 8 deletions benches/ore.rs
Original file line number Diff line number Diff line change
@@ -1,24 +1,49 @@
use cipherstash_client::{
credentials::ServiceCredentials,
encryption::ScopedCipher,
eql::Identifier,
schema::{
column::{Index, IndexType},
ColumnConfig, ColumnType,
},
AutoStrategy,
};
use criterion::{black_box, criterion_group, criterion_main, Criterion};
use dbbenches::{init_scoped_cipher, EncryptedQuery, EncryptedQueryBuilder};
use sqlx::postgres::PgPoolOptions;
use std::sync::Arc;
use tokio::runtime::Runtime;

// Post-EQL-2.3 (with the `<` / `<=` / `>` / `>=` operator inlining), bare-form
// range predicates on `eql_v2_encrypted` reduce to
// `eql_v2.ore_block_u64_8_256(a) <op> eql_v2.ore_block_u64_8_256(b)` and
// structurally match a functional btree index on
// `eql_v2.ore_block_u64_8_256(value)` — so the natural-form scenarios below
// engage the index without rewriting.
//
// The ordered scenarios show three plan shapes side-by-side:
//
// range_lt_ordered_10 — natural form: WHERE val < $1 ORDER BY val LIMIT 10
// → Bitmap Index Scan via the inlined `<`, plus
// a Top-N sort by `val` (the natural-form sort
// key doesn't match the index expression
// syntactically). Each comparison in the Sort
// step uses the inlined ORE-term path, so the
// Top-N is fast.
//
// range_lt_hybrid_ordered_10 — natural WHERE, extractor ORDER BY:
// ORDER BY eql_v2.ore_block_u64_8_256(val).
// The sort key matches the index expression →
// plain ordered Index Scan, no Sort node.
//
// range_lt_ore_ordered_10 — fully extractor on both clauses. After the `<`
// inlining the WHERE reduces to the same shape
// as the hybrid, so the plan is identical to
// hybrid. Kept for contrast / regression.
//
// The equality scenario from the previous bench (`WHERE value = $1`) is gone:
// the integer column carries only `ob`, not `hm`, so post-2.3 equality returns
// NULL → zero rows. See exact.rs for the meaningful equality benches.
static QUERY_TEMPLATES: &[(&str, i32, &str)] = &[
(
"SELECT value FROM {TABLE} WHERE value = $1 LIMIT 1",
5000,
"exact",
),
(
"SELECT id,value::jsonb FROM {TABLE} WHERE value > $1 LIMIT 10",
5000,
Expand All @@ -44,10 +69,24 @@ static QUERY_TEMPLATES: &[(&str, i32, &str)] = &[
5000,
"range_lt_ordered_10",
),
(
"SELECT id,value::jsonb FROM {TABLE} \
WHERE value < $1 \
ORDER BY eql_v2.ore_block_u64_8_256(value) LIMIT 10",
5000,
"range_lt_hybrid_ordered_10",
),
(
"SELECT id,value::jsonb FROM {TABLE} \
WHERE eql_v2.ore_block_u64_8_256(value) < eql_v2.ore_block_u64_8_256($1::jsonb) \
ORDER BY eql_v2.ore_block_u64_8_256(value) LIMIT 10",
5000,
"range_lt_ore_ordered_10",
),
];

async fn build_query(
cipher: Arc<ScopedCipher<ServiceCredentials>>,
cipher: Arc<ScopedCipher<AutoStrategy>>,
query: &str,
x: i32,
table_name: &str,
Expand Down Expand Up @@ -108,10 +147,21 @@ fn criterion_benchmark(c: &mut Criterion) {

let mut group = c.benchmark_group("ORE");
group.sample_size(10);
// Some scenarios — notably the natural-form `WHERE val < $1 ORDER BY val
// LIMIT 10` — finish a single iteration in several hundred milliseconds
// because the Top-N sort runs over the post-WHERE bitmap rather than
// streaming from an ordered index (see U-005 in EQL's v2.3 upgrade
// notes). Criterion's default 5 s `measurement_time` only fits a few
// such samples, yielding very wide confidence intervals and false
// "regressed" alerts against any stored baseline. 30 s gives the slow
// scenarios room to settle while leaving fast ones (sub-ms to single
// ms) plenty of headroom.
group.warm_up_time(std::time::Duration::from_secs(5));
group.measurement_time(std::time::Duration::from_secs(30));

for (i, query) in queries.into_iter().enumerate() {
let (_, _, scenario) = QUERY_TEMPLATES[i];

group.bench_function(format!("ore/{}/{}", scenario, target_rows), |b| {
b.to_async(&rt).iter(|| async {
let _: Vec<_> = query.execute(&pool).await.unwrap();
Expand Down
62 changes: 51 additions & 11 deletions mise.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@
python = "latest"
rust = "latest"

[env]
DATABASE_URL = "postgres://postgres:postgres@localhost:5400/postgres"

[tasks.postgres]
description = "Start PostgreSQL via Docker Compose"
run = "docker compose up -d postgres"
Expand Down Expand Up @@ -201,7 +204,7 @@ echo "Preparing integer_encrypted table with $TARGET_ROWS rows..."
mise run prepare:integer_encrypted "$TARGET_ROWS"

echo "Cleaning old benchmark data..."
rm -rf target/criterion/ORE
rm -rf target/criterion/data/main/ORE target/criterion/reports/ORE

echo "Running ORE query benchmark..."
mkdir -p results/query
Expand Down Expand Up @@ -234,7 +237,7 @@ echo "Preparing string_encrypted table with $TARGET_ROWS rows..."
mise run prepare:string_encrypted "$TARGET_ROWS"

echo "Cleaning old benchmark data..."
rm -rf target/criterion/MATCH
rm -rf target/criterion/data/main/MATCH target/criterion/reports/MATCH

echo "Running MATCH query benchmark..."
mkdir -p results/query
Expand Down Expand Up @@ -267,7 +270,7 @@ echo "Preparing string_encrypted table with $TARGET_ROWS rows..."
mise run prepare:string_encrypted "$TARGET_ROWS"

echo "Cleaning old benchmark data..."
rm -rf target/criterion/EXACT
rm -rf target/criterion/data/main/EXACT target/criterion/reports/EXACT

echo "Running EXACT query benchmark..."
mkdir -p results/query
Expand All @@ -277,6 +280,39 @@ TARGET_ROWS="$TARGET_ROWS" cargo criterion --bench exact --message-format json >
echo "Benchmark complete! Results written to $OUTPUT_FILE"
"""

[tasks."bench:query:group_by"]
description = "Run GROUP BY query benchmark"
run = """
#!/usr/bin/env bash
set -e

TARGET_ROWS="$1"

if [ -z "$TARGET_ROWS" ]; then
echo "Error: target row count argument required"
echo "Usage: mise run bench:query:group_by <target_rows>"
exit 1
fi

if ! [[ "$TARGET_ROWS" =~ ^[0-9]+$ ]]; then
echo "Error: target row count must be a positive integer"
exit 1
fi

echo "Preparing string_encrypted table with $TARGET_ROWS rows..."
mise run prepare:string_encrypted "$TARGET_ROWS"

echo "Cleaning old benchmark data..."
rm -rf target/criterion/data/main/GROUP_BY target/criterion/reports/GROUP_BY

echo "Running GROUP BY query benchmark..."
mkdir -p results/query
OUTPUT_FILE="results/query/group_by_rows_${TARGET_ROWS}.json"
TARGET_ROWS="$TARGET_ROWS" cargo criterion --bench group_by --message-format json > "$OUTPUT_FILE"

echo "Benchmark complete! Results written to $OUTPUT_FILE"
"""

[tasks."bench:query:all"]
description = "Run all query benchmarks with multiple row counts (10k, 100k, 1M, 10M)"
run = """
Expand All @@ -288,7 +324,7 @@ ROW_COUNTS=(10000 100000 1000000 10000000)
echo "========================================"
echo "Starting comprehensive benchmark suite"
echo "Row counts: ${ROW_COUNTS[*]}"
echo "Benchmarks: exact, match, ore"
echo "Benchmarks: exact, match, ore, group_by"
echo "========================================"
echo ""

Expand All @@ -299,19 +335,23 @@ for ROWS in "${ROW_COUNTS[@]}"; do
echo "Running benchmarks with $ROWS rows"
echo "========================================"
echo ""
echo "[1/3] Running EXACT benchmark with $ROWS rows..."

echo "[1/4] Running EXACT benchmark with $ROWS rows..."
mise run bench:query:exact "$ROWS"
echo ""
echo "[2/3] Running MATCH benchmark with $ROWS rows..."

echo "[2/4] Running MATCH benchmark with $ROWS rows..."
mise run bench:query:match "$ROWS"
echo ""
echo "[3/3] Running ORE benchmark with $ROWS rows..."

echo "[3/4] Running ORE benchmark with $ROWS rows..."
mise run bench:query:ore "$ROWS"
echo ""


echo "[4/4] Running GROUP BY benchmark with $ROWS rows..."
mise run bench:query:group_by "$ROWS"
echo ""

echo "Completed benchmarks for $ROWS rows"
echo ""
done
Expand Down
Loading