From 0dc8363f9ed4619e8c7b55cfa9ef1236a3370974 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:09:09 +1000 Subject: [PATCH 01/19] build(test): path-dep eql-scalars from tests/sqlx for catalog-driven fixtures --- Cargo.lock | 1 + tests/sqlx/Cargo.toml | 1 + 2 files changed, 2 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index ebc39165..2d902e93 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1174,6 +1174,7 @@ version = "0.1.0" dependencies = [ "anyhow", "cipherstash-client", + "eql-scalars", "hex", "jsonschema", "paste", diff --git a/tests/sqlx/Cargo.toml b/tests/sqlx/Cargo.toml index 50f7d035..c7a8525a 100644 --- a/tests/sqlx/Cargo.toml +++ b/tests/sqlx/Cargo.toml @@ -13,6 +13,7 @@ hex = "0.4" jsonschema = { version = "0.46.4", default-features = false } cipherstash-client = { version = "0.35", features = ["tokio"] } paste = "1" +eql-scalars = { path = "../../crates/eql-scalars" } [dev-dependencies] # None needed - tests live in this crate From 21e0c9eb2aff93d48e5939b47430f1e0992969d8 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:10:21 +1000 Subject: [PATCH 02/19] build: generate encrypted-domain SQL via eql-codegen (Rust), not Python --- tasks/build.sh | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/tasks/build.sh b/tasks/build.sh index cef25521..621b0b72 100755 --- a/tasks/build.sh +++ b/tasks/build.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash #MISE description="Build SQL into single release file" #MISE alias="b" -#MISE sources=["src/**/*.sql", "tasks/pin_search_path.sql", "tasks/uninstall.sql", "tasks/uninstall-protect.sql", "tasks/codegen/types/*.toml", "tasks/codegen/*.py"] +#MISE sources=["src/**/*.sql", "tasks/pin_search_path.sql", "tasks/uninstall.sql", "tasks/uninstall-protect.sql", "crates/eql-scalars/src/**/*.rs", "crates/eql-codegen/src/**/*.rs"] #MISE outputs=["release/cipherstash-encrypt.sql","release/cipherstash-encrypt-uninstall.sql","release/cipherstash-encrypt-protect.sql","release/cipherstash-encrypt-protect-uninstall.sql"] #USAGE flag "--version " help="Specify release version of EQL" default="DEV" @@ -9,25 +9,27 @@ set -euo pipefail -# Regenerate encrypted-domain SQL from TOML specs before building. +# Regenerate encrypted-domain SQL from the Rust catalog before building. # Generated files (src/encrypted_domain//_*.sql) are gitignored; the -# manifest at tasks/codegen/types/.toml is the source of truth. +# catalog at crates/eql-scalars/src (eql-scalars::CATALOG) is the source of +# truth, rendered by the eql-codegen binary. # -# Nuke every generated file first so a deleted or renamed manifest can't +# Nuke every generated file first so a type removed from the catalog can't # leave orphans in src/ that the `src/**/*.sql` build glob would silently -# pick up. writer.py cleans within a directory it's regenerating, but it -# never runs for a type whose manifest no longer exists. Hand-written -# *_extensions.sql is preserved by the name patterns; -mindepth 2 keeps -# the type-agnostic src/encrypted_domain/functions.sql safe. +# pick up. eql-codegen cleans within a directory it regenerates, but never +# runs for a type no longer in the catalog. Hand-written *_extensions.sql is +# preserved by the name patterns; -mindepth 2 keeps the type-agnostic +# src/encrypted_domain/functions.sql safe. find src/encrypted_domain -mindepth 2 -type f \ \( -name '*_types.sql' -o -name '*_functions.sql' -o -name '*_operators.sql' \ -o -name '*_aggregates.sql' \) \ -delete 2>/dev/null || true -# Regenerate every type — single source of truth for the enumeration lives in -# tasks/codegen/generate.py (sorted, deterministic, aggregate exit code). The -# orphan sweep above still handles the manifest-deleted case --all cannot. -mise exec python -- python -m tasks.codegen.generate --all +# Regenerate every type — the catalog (eql-scalars::CATALOG) is the single +# source of truth for the enumeration; eql-codegen renders all SQL and all +# tests/sqlx/src/fixtures/_values.rs in one deterministic run. The orphan +# sweep above still handles the catalog-removed case the generator cannot. +cargo run -p eql-codegen # Fail loudly if any file referenced in a tsorted dep list doesn't exist. # Without this, `xargs cat` would print `cat: foo.sql: No such file or directory` From a3cc631d0eca3963505ae0ea8b1593b700dc0511 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:11:44 +1000 Subject: [PATCH 03/19] test(fixtures): add catalog-driven generate-all-fixtures entry point --- tests/sqlx/tests/generate_all_fixtures.rs | 46 +++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tests/sqlx/tests/generate_all_fixtures.rs diff --git a/tests/sqlx/tests/generate_all_fixtures.rs b/tests/sqlx/tests/generate_all_fixtures.rs new file mode 100644 index 00000000..bbc1f595 --- /dev/null +++ b/tests/sqlx/tests/generate_all_fixtures.rs @@ -0,0 +1,46 @@ +//! Catalog-driven "generate every encrypted fixture" entry point. +//! +//! Replaces the Python-era `fixture:generate ` per-type scripts and the +//! `fixture:generate:all` TOML-glob loop (which spawned a separate `cargo test` +//! per type). This runs ALL scalar fixture generators in ONE process, iterating +//! `eql_scalars::CATALOG` for the authoritative token set. +//! +//! The encrypted-fixture logic itself is unchanged — each type's +//! `fixtures::eql_v2_::spec().run()` still produces +//! `tests/sqlx/fixtures/eql_v2_.sql` exactly as before. +//! +//! Gated behind `fixture-gen` (needs a live Postgres + CS_* creds). Run via: +//! mise run fixture:generate:all +#![cfg(feature = "fixture-gen")] + +use eql_scalars::CATALOG; +use eql_tests::fixtures; + +/// Map a catalog token to its fixture generator and run it. A token present in +/// the catalog but missing here is a wiring gap — fail loudly so a new scalar +/// type cannot silently skip fixture generation. +async fn generate_for_token(token: &str) -> anyhow::Result<()> { + match token { + "int2" => fixtures::eql_v2_int2::spec().run().await, + "int4" => fixtures::eql_v2_int4::spec().run().await, + other => anyhow::bail!( + "no fixture generator wired for catalog token '{other}'. \ + Add an arm to generate_for_token in tests/sqlx/tests/generate_all_fixtures.rs \ + (and the eql_v2_{other} fixture module). See the encrypted-domain spec §9." + ), + } +} + +#[tokio::test] +#[ignore = "generator — run via `mise run fixture:generate:all`"] +async fn generate_all() -> anyhow::Result<()> { + let mut generated = 0usize; + for spec in CATALOG { + eprintln!("Generating fixture eql_v2_{}...", spec.token); + generate_for_token(spec.token).await?; + generated += 1; + } + assert!(generated > 0, "CATALOG is empty — nothing to generate"); + eprintln!("Regenerated {generated} scalar fixture(s)."); + Ok(()) +} From 76e1e0681ee6563acd8dec6a2357e6843d19a432 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:13:58 +1000 Subject: [PATCH 04/19] build(fixtures): collapse fixture:generate{,:all} into one catalog-driven task --- tasks/fixtures.toml | 61 +++++++++++---------------------------------- 1 file changed, 14 insertions(+), 47 deletions(-) diff --git a/tasks/fixtures.toml b/tasks/fixtures.toml index ecfe3ad9..f24bc684 100644 --- a/tasks/fixtures.toml +++ b/tasks/fixtures.toml @@ -1,8 +1,13 @@ -["fixture:generate"] -description = "Generate a SQLx fixture script via cipherstash-client" -# Runs the gated generator for the named fixture. Writes -# tests/sqlx/fixtures/.sql. Must run inside the crate — there is no -# root Cargo.toml — matching test:schema / test:sqlx:watch. +["fixture:generate:all"] +description = "Regenerate every scalar SQLx fixture in one process, driven by eql-scalars::CATALOG" +# Replaces the Python-era per-type `fixture:generate ` script and the +# TOML-glob `fixture:generate:all` loop (one `cargo test` per type). The +# generate_all_fixtures test iterates eql-scalars::CATALOG and runs every +# eql_v2_ fixture generator in a SINGLE process. The encrypted-fixture logic +# is unchanged; only enumeration + entry point changed. +# +# Writes tests/sqlx/fixtures/eql_v2_.sql (gitignored — regenerated on every +# `mise run test:sqlx`). # # Prerequisites: # - mise run postgres:up (Postgres with EQL installed) @@ -11,48 +16,10 @@ description = "Generate a SQLx fixture script via cipherstash-client" # CS_CLIENT_ACCESS_KEY + CS_WORKSPACE_CRN ZeroKMS auth (AutoStrategy) # CS_CLIENT_ID + CS_CLIENT_KEY client key (EnvKeyProvider) # -# Usage: mise run fixture:generate eql_v2_int4 +# Must run inside the crate — a workspace member still builds from its own dir. dir = "{{config_root}}/tests/sqlx" run = """ -fixture="{{arg(name="fixture")}}" -# Match the Rust `FixtureIdentifier` rule: `^[a-z][a-z0-9_]*$`. Reject -# empty, leading-digit, and any non-lowercase-alphanumeric-underscore -# input here so the failure mode is a clear shell error rather than a -# Rust panic during the cargo test invocation. -case "$fixture" in - (''|[0-9]*|*[!a-z0-9_]*) echo "Invalid fixture name: $fixture (expected ^[a-z][a-z0-9_]*$)" >&2; exit 1 ;; -esac - -cargo test --features fixture-gen --lib \ - "fixtures::${fixture}::generate" \ - -- --ignored --exact --nocapture -""" - -["fixture:generate:all"] -description = "Regenerate every scalar SQLx fixture declared by a type manifest" -# Enumerates tasks/codegen/types/*.toml — the SAME manifests that -# `codegen:domain:all` drives — and regenerates the SQLx fixture for each type -# whose manifest declares a [fixture] table. This keeps the test fixtures in -# lockstep with the declared scalar types: adding a new scalar type (a new -# .toml with a [fixture] table) is picked up automatically, so the test -# task never has to hand-list each fixture. Same prerequisites as -# `fixture:generate` (Postgres up + CS_* credentials). -dir = "{{config_root}}" -run = """ -generated=0 -for manifest in tasks/codegen/types/*.toml; do - # Guard the no-match case (glob stays literal under POSIX sh). - [ -e "$manifest" ] || continue - # Only types that declare a [fixture] table have a SQLx fixture generator. - grep -qE '^\\[fixture\\]' "$manifest" || continue - token=$(basename "$manifest" .toml) - echo "Generating fixture eql_v2_${token}..." - mise run fixture:generate "eql_v2_${token}" - generated=$((generated + 1)) -done -if [ "$generated" -eq 0 ]; then - echo "No scalar manifests with a [fixture] table found in tasks/codegen/types/" >&2 - exit 1 -fi -echo "Regenerated ${generated} scalar fixture(s)." +set -euo pipefail +cargo test --features fixture-gen --test generate_all_fixtures \ + generate_all -- --ignored --exact --nocapture """ From b1abf23f488f7c792e23a8a0e01ac2bd9acc5e6f Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:14:23 +1000 Subject: [PATCH 05/19] docs(mise): describe catalog-driven fixture regeneration in test:sqlx --- mise.toml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/mise.toml b/mise.toml index e627b45d..b17117b8 100644 --- a/mise.toml +++ b/mise.toml @@ -58,9 +58,10 @@ cd tests/sqlx sqlx migrate run # Regenerate fixtures every run — they are not committed (see .gitignore). -# fixture:generate:all enumerates every scalar manifest in -# tasks/codegen/types/ that declares a [fixture] table, so new scalar types -# are picked up automatically without editing this task. +# fixture:generate:all iterates eql-scalars::CATALOG and generates every +# scalar fixture in one process, so new scalar types are picked up +# automatically (add the catalog row + the fixture wiring) without editing +# this task. # Generator encrypts via cipherstash-client directly, which needs BOTH a # ZeroKMS auth credential (CS_CLIENT_ACCESS_KEY + CS_WORKSPACE_CRN, via # AutoStrategy) AND a client key (CS_CLIENT_ID + CS_CLIENT_KEY, via From de08b28d2b5935aba44d134eaaf63f83e00a8f2a Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:32:03 +1000 Subject: [PATCH 06/19] test(matrix): collapse per-type inventory to one catalog-reconciled snapshot --- mise.toml | 76 ++++++-- tests/sqlx/snapshots/int2_matrix_tests.txt | 211 --------------------- tests/sqlx/snapshots/int4_matrix_tests.txt | 211 --------------------- tests/sqlx/snapshots/matrix_tests.txt | 211 +++++++++++++++++++++ 4 files changed, 267 insertions(+), 442 deletions(-) delete mode 100644 tests/sqlx/snapshots/int2_matrix_tests.txt delete mode 100644 tests/sqlx/snapshots/int4_matrix_tests.txt create mode 100644 tests/sqlx/snapshots/matrix_tests.txt diff --git a/mise.toml b/mise.toml index b17117b8..c92d222b 100644 --- a/mise.toml +++ b/mise.toml @@ -129,29 +129,65 @@ cargo test -p eql-scalars -p eql-codegen """ [tasks."test:matrix:inventory"] -description = "Regenerate the int4/int2 matrix test-name inventory snapshots (no database required)" +description = "Verify the matrix test-name set against the single canonical snapshot, catalog-cross-checked (no database required)" dir = "{{config_root}}/tests/sqlx" run = """ -# Pin an explicit feature set so the inventory is deterministic regardless of -# the caller's local flags. `--no-default-features` keeps the `scale` arm -# (`#[cfg(feature = "scale")]`) excluded — its add/delete is a known blind spot -# of this default-feature inventory, covered instead by the scale gate + the -# family::mutations negative controls. `--list` enumerates the whole -# encrypted_domain binary (family::support, family::inlinability, -# family::mutations, scalars::int4, scalars::int2); the per-scalar `grep` -# scopes each snapshot to that matrix only, so landing other family tests -# never dirties it. `LC_ALL=C sort` makes ordering byte-stable across locales -# (a bare `sort` is locale-dependent and yields spurious CI diffs). +# ONE canonical, token-normalized snapshot (snapshots/matrix_tests.txt) pins the +# set of macro-emitted matrix test names. The two per-type snapshots are gone: +# they were byte-identical modulo the type token, so one canonical set plus a +# per-type normalize+compare carries the same signal at 1/N the committed surface. +# +# Steps: +# 1. List the encrypted_domain binary ONCE (deterministic; reused below). +# 2. Discover the set of scalar types present FROM THE BINARY'S OWN OUTPUT +# (scalars:::: prefixes) — never a directory glob. +# 3. For each discovered type, normalize its token to and assert its set +# equals the canonical snapshot. Assert at least one type is present. +# 4. Completeness cross-check: assert the discovered type set equals +# `eql-codegen list-types`. A catalog type added without its matrix wiring +# (no scalars:::: tests in the binary) fails here. +# +# `--no-default-features` excludes the `scale` arm (a known, documented blind +# spot, covered by the scale gate + family::mutations negative controls). +# `LC_ALL=C sort` makes ordering byte-stable across locales. No database needed. set -euo pipefail -mkdir -p snapshots -cargo test --no-default-features --test encrypted_domain -- --list | - sed -n 's/: test$//p' | - grep '^scalars::int4' | - LC_ALL=C sort > snapshots/int4_matrix_tests.txt -cargo test --no-default-features --test encrypted_domain -- --list | - sed -n 's/: test$//p' | - grep '^scalars::int2' | - LC_ALL=C sort > snapshots/int2_matrix_tests.txt + +test -f snapshots/matrix_tests.txt || { echo "snapshots/matrix_tests.txt missing — regenerate (see snapshots/README.md)." >&2; exit 1; } + +listing=$(cargo test --no-default-features --test encrypted_domain -- --list | sed -n 's/: test$//p') + +# Types present in the binary, from scalars:::: prefixes. +discovered=$(printf '%s\\n' "$listing" \ + | sed -n 's/^scalars::\\([a-z0-9_]*\\)::.*/\\1/p' \ + | LC_ALL=C sort -u) +[ -n "$discovered" ] || { echo "No scalars:::: tests found in the encrypted_domain binary." >&2; exit 1; } + +# Per-type normalize + compare against the canonical snapshot. +checked=0 +while IFS= read -r t; do + [ -n "$t" ] || continue + printf '%s\\n' "$listing" | grep "^scalars::${t}::" \ + | sed -e "s/^scalars::${t}::/scalars::::/" -e "s/_${t}_/__/g" | LC_ALL=C sort > "/tmp/matrix-norm-${t}.txt" + if ! cmp -s "/tmp/matrix-norm-${t}.txt" snapshots/matrix_tests.txt; then + echo "Matrix test-name set for '${t}' differs from snapshots/matrix_tests.txt:" >&2 + diff snapshots/matrix_tests.txt "/tmp/matrix-norm-${t}.txt" >&2 || true + exit 1 + fi + checked=$((checked + 1)) +done <<< "$discovered" +[ "$checked" -gt 0 ] || { echo "No scalar type matched the canonical snapshot." >&2; exit 1; } + +# Completeness cross-check against the catalog (the single source of truth). +catalog=$(cd "{{config_root}}" && cargo run -p eql-codegen -- list-types | LC_ALL=C sort -u) +if [ "$discovered" != "$catalog" ]; then + echo "Catalog types and matrix-wired types disagree." >&2 + echo " catalog (eql-codegen list-types): $(echo "$catalog" | tr '\\n' ' ')" >&2 + echo " matrix-wired (binary --list): $(echo "$discovered" | tr '\\n' ' ')" >&2 + echo "A catalog type missing its matrix wiring (or a wired type not in the catalog) trips this." >&2 + exit 1 +fi + +echo "Matrix inventory OK: ${checked} type(s) match the canonical snapshot; catalog reconciled." """ [tasks."test:matrix:expand"] diff --git a/tests/sqlx/snapshots/int2_matrix_tests.txt b/tests/sqlx/snapshots/int2_matrix_tests.txt deleted file mode 100644 index 3b6ed674..00000000 --- a/tests/sqlx/snapshots/int2_matrix_tests.txt +++ /dev/null @@ -1,211 +0,0 @@ -scalars::int2::matrix_int2_eq_aggregate_typecheck_max -scalars::int2::matrix_int2_eq_aggregate_typecheck_min -scalars::int2::matrix_int2_eq_contained_by_blocker -scalars::int2::matrix_int2_eq_contains_blocker -scalars::int2::matrix_int2_eq_count_distinct_extractor -scalars::int2::matrix_int2_eq_count_path_cast -scalars::int2::matrix_int2_eq_count_typed_column -scalars::int2::matrix_int2_eq_eq_pivot_max_correctness -scalars::int2::matrix_int2_eq_eq_pivot_max_cross_shape -scalars::int2::matrix_int2_eq_eq_pivot_min_correctness -scalars::int2::matrix_int2_eq_eq_pivot_min_cross_shape -scalars::int2::matrix_int2_eq_eq_pivot_zero_correctness -scalars::int2::matrix_int2_eq_eq_pivot_zero_cross_shape -scalars::int2::matrix_int2_eq_eq_supported_null -scalars::int2::matrix_int2_eq_gt_blocker -scalars::int2::matrix_int2_eq_gte_blocker -scalars::int2::matrix_int2_eq_index_engages_btree -scalars::int2::matrix_int2_eq_index_engages_hash -scalars::int2::matrix_int2_eq_lt_blocker -scalars::int2::matrix_int2_eq_lte_blocker -scalars::int2::matrix_int2_eq_native_absent_ops -scalars::int2::matrix_int2_eq_neq_pivot_max_correctness -scalars::int2::matrix_int2_eq_neq_pivot_max_cross_shape -scalars::int2::matrix_int2_eq_neq_pivot_min_correctness -scalars::int2::matrix_int2_eq_neq_pivot_min_cross_shape -scalars::int2::matrix_int2_eq_neq_pivot_zero_correctness -scalars::int2::matrix_int2_eq_neq_pivot_zero_cross_shape -scalars::int2::matrix_int2_eq_neq_supported_null -scalars::int2::matrix_int2_eq_path_op_blockers -scalars::int2::matrix_int2_eq_payload_check -scalars::int2::matrix_int2_eq_planner_metadata_eq -scalars::int2::matrix_int2_eq_sanity -scalars::int2::matrix_int2_eq_typed_column_blocker -scalars::int2::matrix_int2_fixture_shape -scalars::int2::matrix_int2_ord_aggregate_group_by_max -scalars::int2::matrix_int2_ord_aggregate_group_by_min -scalars::int2::matrix_int2_ord_aggregate_max -scalars::int2::matrix_int2_ord_aggregate_max_all_null -scalars::int2::matrix_int2_ord_aggregate_max_empty -scalars::int2::matrix_int2_ord_aggregate_max_mixed_null -scalars::int2::matrix_int2_ord_aggregate_min -scalars::int2::matrix_int2_ord_aggregate_min_all_null -scalars::int2::matrix_int2_ord_aggregate_min_empty -scalars::int2::matrix_int2_ord_aggregate_min_mixed_null -scalars::int2::matrix_int2_ord_aggregate_parallel_safe -scalars::int2::matrix_int2_ord_contained_by_blocker -scalars::int2::matrix_int2_ord_contains_blocker -scalars::int2::matrix_int2_ord_count_distinct_extractor -scalars::int2::matrix_int2_ord_count_path_cast -scalars::int2::matrix_int2_ord_count_typed_column -scalars::int2::matrix_int2_ord_eq_pivot_max_correctness -scalars::int2::matrix_int2_ord_eq_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_eq_pivot_min_correctness -scalars::int2::matrix_int2_ord_eq_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_eq_pivot_zero_correctness -scalars::int2::matrix_int2_ord_eq_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_eq_supported_null -scalars::int2::matrix_int2_ord_gt_pivot_max_correctness -scalars::int2::matrix_int2_ord_gt_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_gt_pivot_min_correctness -scalars::int2::matrix_int2_ord_gt_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_gt_pivot_zero_correctness -scalars::int2::matrix_int2_ord_gt_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_gt_supported_null -scalars::int2::matrix_int2_ord_gte_pivot_max_correctness -scalars::int2::matrix_int2_ord_gte_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_gte_pivot_min_correctness -scalars::int2::matrix_int2_ord_gte_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_gte_pivot_zero_correctness -scalars::int2::matrix_int2_ord_gte_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_gte_supported_null -scalars::int2::matrix_int2_ord_index_engages_btree -scalars::int2::matrix_int2_ord_lt_pivot_max_correctness -scalars::int2::matrix_int2_ord_lt_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_lt_pivot_min_correctness -scalars::int2::matrix_int2_ord_lt_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_lt_pivot_zero_correctness -scalars::int2::matrix_int2_ord_lt_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_lt_supported_null -scalars::int2::matrix_int2_ord_lte_pivot_max_correctness -scalars::int2::matrix_int2_ord_lte_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_lte_pivot_min_correctness -scalars::int2::matrix_int2_ord_lte_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_lte_pivot_zero_correctness -scalars::int2::matrix_int2_ord_lte_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_lte_supported_null -scalars::int2::matrix_int2_ord_native_absent_ops -scalars::int2::matrix_int2_ord_neq_pivot_max_correctness -scalars::int2::matrix_int2_ord_neq_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_neq_pivot_min_correctness -scalars::int2::matrix_int2_ord_neq_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_neq_pivot_zero_correctness -scalars::int2::matrix_int2_ord_neq_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_neq_supported_null -scalars::int2::matrix_int2_ord_ord_routes_through_ob -scalars::int2::matrix_int2_ord_order_by_asc_no_where -scalars::int2::matrix_int2_ord_order_by_asc_nulls_first -scalars::int2::matrix_int2_ord_order_by_asc_nulls_last -scalars::int2::matrix_int2_ord_order_by_asc_with_where -scalars::int2::matrix_int2_ord_order_by_desc_no_where -scalars::int2::matrix_int2_ord_order_by_desc_nulls_first -scalars::int2::matrix_int2_ord_order_by_desc_nulls_last -scalars::int2::matrix_int2_ord_order_by_desc_with_where -scalars::int2::matrix_int2_ord_order_by_using_gt_rejects -scalars::int2::matrix_int2_ord_order_by_using_gte_rejects -scalars::int2::matrix_int2_ord_order_by_using_lt_rejects -scalars::int2::matrix_int2_ord_order_by_using_lte_rejects -scalars::int2::matrix_int2_ord_ore_aggregate_group_by_max -scalars::int2::matrix_int2_ord_ore_aggregate_group_by_min -scalars::int2::matrix_int2_ord_ore_aggregate_max -scalars::int2::matrix_int2_ord_ore_aggregate_max_all_null -scalars::int2::matrix_int2_ord_ore_aggregate_max_empty -scalars::int2::matrix_int2_ord_ore_aggregate_max_mixed_null -scalars::int2::matrix_int2_ord_ore_aggregate_min -scalars::int2::matrix_int2_ord_ore_aggregate_min_all_null -scalars::int2::matrix_int2_ord_ore_aggregate_min_empty -scalars::int2::matrix_int2_ord_ore_aggregate_min_mixed_null -scalars::int2::matrix_int2_ord_ore_aggregate_parallel_safe -scalars::int2::matrix_int2_ord_ore_contained_by_blocker -scalars::int2::matrix_int2_ord_ore_contains_blocker -scalars::int2::matrix_int2_ord_ore_count_distinct_extractor -scalars::int2::matrix_int2_ord_ore_count_path_cast -scalars::int2::matrix_int2_ord_ore_count_typed_column -scalars::int2::matrix_int2_ord_ore_eq_pivot_max_correctness -scalars::int2::matrix_int2_ord_ore_eq_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_ore_eq_pivot_min_correctness -scalars::int2::matrix_int2_ord_ore_eq_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_ore_eq_pivot_zero_correctness -scalars::int2::matrix_int2_ord_ore_eq_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_ore_eq_supported_null -scalars::int2::matrix_int2_ord_ore_gt_pivot_max_correctness -scalars::int2::matrix_int2_ord_ore_gt_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_ore_gt_pivot_min_correctness -scalars::int2::matrix_int2_ord_ore_gt_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_ore_gt_pivot_zero_correctness -scalars::int2::matrix_int2_ord_ore_gt_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_ore_gt_supported_null -scalars::int2::matrix_int2_ord_ore_gte_pivot_max_correctness -scalars::int2::matrix_int2_ord_ore_gte_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_ore_gte_pivot_min_correctness -scalars::int2::matrix_int2_ord_ore_gte_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_ore_gte_pivot_zero_correctness -scalars::int2::matrix_int2_ord_ore_gte_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_ore_gte_supported_null -scalars::int2::matrix_int2_ord_ore_index_engages_btree -scalars::int2::matrix_int2_ord_ore_lt_pivot_max_correctness -scalars::int2::matrix_int2_ord_ore_lt_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_ore_lt_pivot_min_correctness -scalars::int2::matrix_int2_ord_ore_lt_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_ore_lt_pivot_zero_correctness -scalars::int2::matrix_int2_ord_ore_lt_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_ore_lt_supported_null -scalars::int2::matrix_int2_ord_ore_lte_pivot_max_correctness -scalars::int2::matrix_int2_ord_ore_lte_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_ore_lte_pivot_min_correctness -scalars::int2::matrix_int2_ord_ore_lte_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_ore_lte_pivot_zero_correctness -scalars::int2::matrix_int2_ord_ore_lte_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_ore_lte_supported_null -scalars::int2::matrix_int2_ord_ore_native_absent_ops -scalars::int2::matrix_int2_ord_ore_neq_pivot_max_correctness -scalars::int2::matrix_int2_ord_ore_neq_pivot_max_cross_shape -scalars::int2::matrix_int2_ord_ore_neq_pivot_min_correctness -scalars::int2::matrix_int2_ord_ore_neq_pivot_min_cross_shape -scalars::int2::matrix_int2_ord_ore_neq_pivot_zero_correctness -scalars::int2::matrix_int2_ord_ore_neq_pivot_zero_cross_shape -scalars::int2::matrix_int2_ord_ore_neq_supported_null -scalars::int2::matrix_int2_ord_ore_ord_routes_through_ob -scalars::int2::matrix_int2_ord_ore_order_by_asc_no_where -scalars::int2::matrix_int2_ord_ore_order_by_asc_nulls_first -scalars::int2::matrix_int2_ord_ore_order_by_asc_nulls_last -scalars::int2::matrix_int2_ord_ore_order_by_asc_with_where -scalars::int2::matrix_int2_ord_ore_order_by_desc_no_where -scalars::int2::matrix_int2_ord_ore_order_by_desc_nulls_first -scalars::int2::matrix_int2_ord_ore_order_by_desc_nulls_last -scalars::int2::matrix_int2_ord_ore_order_by_desc_with_where -scalars::int2::matrix_int2_ord_ore_order_by_using_gt_rejects -scalars::int2::matrix_int2_ord_ore_order_by_using_gte_rejects -scalars::int2::matrix_int2_ord_ore_order_by_using_lt_rejects -scalars::int2::matrix_int2_ord_ore_order_by_using_lte_rejects -scalars::int2::matrix_int2_ord_ore_ore_injectivity -scalars::int2::matrix_int2_ord_ore_path_op_blockers -scalars::int2::matrix_int2_ord_ore_payload_check -scalars::int2::matrix_int2_ord_ore_planner_metadata_eq -scalars::int2::matrix_int2_ord_ore_planner_metadata_ord -scalars::int2::matrix_int2_ord_ore_sanity -scalars::int2::matrix_int2_ord_ore_typed_column_blocker -scalars::int2::matrix_int2_ord_path_op_blockers -scalars::int2::matrix_int2_ord_payload_check -scalars::int2::matrix_int2_ord_planner_metadata_eq -scalars::int2::matrix_int2_ord_planner_metadata_ord -scalars::int2::matrix_int2_ord_sanity -scalars::int2::matrix_int2_ord_scale_preference_default_btree -scalars::int2::matrix_int2_ord_typed_column_blocker -scalars::int2::matrix_int2_storage_aggregate_typecheck_max -scalars::int2::matrix_int2_storage_aggregate_typecheck_min -scalars::int2::matrix_int2_storage_contained_by_blocker -scalars::int2::matrix_int2_storage_contains_blocker -scalars::int2::matrix_int2_storage_count_path_cast -scalars::int2::matrix_int2_storage_count_typed_column -scalars::int2::matrix_int2_storage_eq_blocker -scalars::int2::matrix_int2_storage_gt_blocker -scalars::int2::matrix_int2_storage_gte_blocker -scalars::int2::matrix_int2_storage_lt_blocker -scalars::int2::matrix_int2_storage_lte_blocker -scalars::int2::matrix_int2_storage_native_absent_ops -scalars::int2::matrix_int2_storage_neq_blocker -scalars::int2::matrix_int2_storage_path_op_blockers -scalars::int2::matrix_int2_storage_payload_check -scalars::int2::matrix_int2_storage_sanity -scalars::int2::matrix_int2_storage_typed_column_blocker diff --git a/tests/sqlx/snapshots/int4_matrix_tests.txt b/tests/sqlx/snapshots/int4_matrix_tests.txt deleted file mode 100644 index 1fab59bd..00000000 --- a/tests/sqlx/snapshots/int4_matrix_tests.txt +++ /dev/null @@ -1,211 +0,0 @@ -scalars::int4::matrix_int4_eq_aggregate_typecheck_max -scalars::int4::matrix_int4_eq_aggregate_typecheck_min -scalars::int4::matrix_int4_eq_contained_by_blocker -scalars::int4::matrix_int4_eq_contains_blocker -scalars::int4::matrix_int4_eq_count_distinct_extractor -scalars::int4::matrix_int4_eq_count_path_cast -scalars::int4::matrix_int4_eq_count_typed_column -scalars::int4::matrix_int4_eq_eq_pivot_max_correctness -scalars::int4::matrix_int4_eq_eq_pivot_max_cross_shape -scalars::int4::matrix_int4_eq_eq_pivot_min_correctness -scalars::int4::matrix_int4_eq_eq_pivot_min_cross_shape -scalars::int4::matrix_int4_eq_eq_pivot_zero_correctness -scalars::int4::matrix_int4_eq_eq_pivot_zero_cross_shape -scalars::int4::matrix_int4_eq_eq_supported_null -scalars::int4::matrix_int4_eq_gt_blocker -scalars::int4::matrix_int4_eq_gte_blocker -scalars::int4::matrix_int4_eq_index_engages_btree -scalars::int4::matrix_int4_eq_index_engages_hash -scalars::int4::matrix_int4_eq_lt_blocker -scalars::int4::matrix_int4_eq_lte_blocker -scalars::int4::matrix_int4_eq_native_absent_ops -scalars::int4::matrix_int4_eq_neq_pivot_max_correctness -scalars::int4::matrix_int4_eq_neq_pivot_max_cross_shape -scalars::int4::matrix_int4_eq_neq_pivot_min_correctness -scalars::int4::matrix_int4_eq_neq_pivot_min_cross_shape -scalars::int4::matrix_int4_eq_neq_pivot_zero_correctness -scalars::int4::matrix_int4_eq_neq_pivot_zero_cross_shape -scalars::int4::matrix_int4_eq_neq_supported_null -scalars::int4::matrix_int4_eq_path_op_blockers -scalars::int4::matrix_int4_eq_payload_check -scalars::int4::matrix_int4_eq_planner_metadata_eq -scalars::int4::matrix_int4_eq_sanity -scalars::int4::matrix_int4_eq_typed_column_blocker -scalars::int4::matrix_int4_fixture_shape -scalars::int4::matrix_int4_ord_aggregate_group_by_max -scalars::int4::matrix_int4_ord_aggregate_group_by_min -scalars::int4::matrix_int4_ord_aggregate_max -scalars::int4::matrix_int4_ord_aggregate_max_all_null -scalars::int4::matrix_int4_ord_aggregate_max_empty -scalars::int4::matrix_int4_ord_aggregate_max_mixed_null -scalars::int4::matrix_int4_ord_aggregate_min -scalars::int4::matrix_int4_ord_aggregate_min_all_null -scalars::int4::matrix_int4_ord_aggregate_min_empty -scalars::int4::matrix_int4_ord_aggregate_min_mixed_null -scalars::int4::matrix_int4_ord_aggregate_parallel_safe -scalars::int4::matrix_int4_ord_contained_by_blocker -scalars::int4::matrix_int4_ord_contains_blocker -scalars::int4::matrix_int4_ord_count_distinct_extractor -scalars::int4::matrix_int4_ord_count_path_cast -scalars::int4::matrix_int4_ord_count_typed_column -scalars::int4::matrix_int4_ord_eq_pivot_max_correctness -scalars::int4::matrix_int4_ord_eq_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_eq_pivot_min_correctness -scalars::int4::matrix_int4_ord_eq_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_eq_pivot_zero_correctness -scalars::int4::matrix_int4_ord_eq_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_eq_supported_null -scalars::int4::matrix_int4_ord_gt_pivot_max_correctness -scalars::int4::matrix_int4_ord_gt_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_gt_pivot_min_correctness -scalars::int4::matrix_int4_ord_gt_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_gt_pivot_zero_correctness -scalars::int4::matrix_int4_ord_gt_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_gt_supported_null -scalars::int4::matrix_int4_ord_gte_pivot_max_correctness -scalars::int4::matrix_int4_ord_gte_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_gte_pivot_min_correctness -scalars::int4::matrix_int4_ord_gte_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_gte_pivot_zero_correctness -scalars::int4::matrix_int4_ord_gte_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_gte_supported_null -scalars::int4::matrix_int4_ord_index_engages_btree -scalars::int4::matrix_int4_ord_lt_pivot_max_correctness -scalars::int4::matrix_int4_ord_lt_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_lt_pivot_min_correctness -scalars::int4::matrix_int4_ord_lt_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_lt_pivot_zero_correctness -scalars::int4::matrix_int4_ord_lt_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_lt_supported_null -scalars::int4::matrix_int4_ord_lte_pivot_max_correctness -scalars::int4::matrix_int4_ord_lte_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_lte_pivot_min_correctness -scalars::int4::matrix_int4_ord_lte_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_lte_pivot_zero_correctness -scalars::int4::matrix_int4_ord_lte_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_lte_supported_null -scalars::int4::matrix_int4_ord_native_absent_ops -scalars::int4::matrix_int4_ord_neq_pivot_max_correctness -scalars::int4::matrix_int4_ord_neq_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_neq_pivot_min_correctness -scalars::int4::matrix_int4_ord_neq_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_neq_pivot_zero_correctness -scalars::int4::matrix_int4_ord_neq_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_neq_supported_null -scalars::int4::matrix_int4_ord_ord_routes_through_ob -scalars::int4::matrix_int4_ord_order_by_asc_no_where -scalars::int4::matrix_int4_ord_order_by_asc_nulls_first -scalars::int4::matrix_int4_ord_order_by_asc_nulls_last -scalars::int4::matrix_int4_ord_order_by_asc_with_where -scalars::int4::matrix_int4_ord_order_by_desc_no_where -scalars::int4::matrix_int4_ord_order_by_desc_nulls_first -scalars::int4::matrix_int4_ord_order_by_desc_nulls_last -scalars::int4::matrix_int4_ord_order_by_desc_with_where -scalars::int4::matrix_int4_ord_order_by_using_gt_rejects -scalars::int4::matrix_int4_ord_order_by_using_gte_rejects -scalars::int4::matrix_int4_ord_order_by_using_lt_rejects -scalars::int4::matrix_int4_ord_order_by_using_lte_rejects -scalars::int4::matrix_int4_ord_ore_aggregate_group_by_max -scalars::int4::matrix_int4_ord_ore_aggregate_group_by_min -scalars::int4::matrix_int4_ord_ore_aggregate_max -scalars::int4::matrix_int4_ord_ore_aggregate_max_all_null -scalars::int4::matrix_int4_ord_ore_aggregate_max_empty -scalars::int4::matrix_int4_ord_ore_aggregate_max_mixed_null -scalars::int4::matrix_int4_ord_ore_aggregate_min -scalars::int4::matrix_int4_ord_ore_aggregate_min_all_null -scalars::int4::matrix_int4_ord_ore_aggregate_min_empty -scalars::int4::matrix_int4_ord_ore_aggregate_min_mixed_null -scalars::int4::matrix_int4_ord_ore_aggregate_parallel_safe -scalars::int4::matrix_int4_ord_ore_contained_by_blocker -scalars::int4::matrix_int4_ord_ore_contains_blocker -scalars::int4::matrix_int4_ord_ore_count_distinct_extractor -scalars::int4::matrix_int4_ord_ore_count_path_cast -scalars::int4::matrix_int4_ord_ore_count_typed_column -scalars::int4::matrix_int4_ord_ore_eq_pivot_max_correctness -scalars::int4::matrix_int4_ord_ore_eq_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_ore_eq_pivot_min_correctness -scalars::int4::matrix_int4_ord_ore_eq_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_ore_eq_pivot_zero_correctness -scalars::int4::matrix_int4_ord_ore_eq_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_ore_eq_supported_null -scalars::int4::matrix_int4_ord_ore_gt_pivot_max_correctness -scalars::int4::matrix_int4_ord_ore_gt_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_ore_gt_pivot_min_correctness -scalars::int4::matrix_int4_ord_ore_gt_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_ore_gt_pivot_zero_correctness -scalars::int4::matrix_int4_ord_ore_gt_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_ore_gt_supported_null -scalars::int4::matrix_int4_ord_ore_gte_pivot_max_correctness -scalars::int4::matrix_int4_ord_ore_gte_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_ore_gte_pivot_min_correctness -scalars::int4::matrix_int4_ord_ore_gte_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_ore_gte_pivot_zero_correctness -scalars::int4::matrix_int4_ord_ore_gte_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_ore_gte_supported_null -scalars::int4::matrix_int4_ord_ore_index_engages_btree -scalars::int4::matrix_int4_ord_ore_lt_pivot_max_correctness -scalars::int4::matrix_int4_ord_ore_lt_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_ore_lt_pivot_min_correctness -scalars::int4::matrix_int4_ord_ore_lt_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_ore_lt_pivot_zero_correctness -scalars::int4::matrix_int4_ord_ore_lt_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_ore_lt_supported_null -scalars::int4::matrix_int4_ord_ore_lte_pivot_max_correctness -scalars::int4::matrix_int4_ord_ore_lte_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_ore_lte_pivot_min_correctness -scalars::int4::matrix_int4_ord_ore_lte_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_ore_lte_pivot_zero_correctness -scalars::int4::matrix_int4_ord_ore_lte_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_ore_lte_supported_null -scalars::int4::matrix_int4_ord_ore_native_absent_ops -scalars::int4::matrix_int4_ord_ore_neq_pivot_max_correctness -scalars::int4::matrix_int4_ord_ore_neq_pivot_max_cross_shape -scalars::int4::matrix_int4_ord_ore_neq_pivot_min_correctness -scalars::int4::matrix_int4_ord_ore_neq_pivot_min_cross_shape -scalars::int4::matrix_int4_ord_ore_neq_pivot_zero_correctness -scalars::int4::matrix_int4_ord_ore_neq_pivot_zero_cross_shape -scalars::int4::matrix_int4_ord_ore_neq_supported_null -scalars::int4::matrix_int4_ord_ore_ord_routes_through_ob -scalars::int4::matrix_int4_ord_ore_order_by_asc_no_where -scalars::int4::matrix_int4_ord_ore_order_by_asc_nulls_first -scalars::int4::matrix_int4_ord_ore_order_by_asc_nulls_last -scalars::int4::matrix_int4_ord_ore_order_by_asc_with_where -scalars::int4::matrix_int4_ord_ore_order_by_desc_no_where -scalars::int4::matrix_int4_ord_ore_order_by_desc_nulls_first -scalars::int4::matrix_int4_ord_ore_order_by_desc_nulls_last -scalars::int4::matrix_int4_ord_ore_order_by_desc_with_where -scalars::int4::matrix_int4_ord_ore_order_by_using_gt_rejects -scalars::int4::matrix_int4_ord_ore_order_by_using_gte_rejects -scalars::int4::matrix_int4_ord_ore_order_by_using_lt_rejects -scalars::int4::matrix_int4_ord_ore_order_by_using_lte_rejects -scalars::int4::matrix_int4_ord_ore_ore_injectivity -scalars::int4::matrix_int4_ord_ore_path_op_blockers -scalars::int4::matrix_int4_ord_ore_payload_check -scalars::int4::matrix_int4_ord_ore_planner_metadata_eq -scalars::int4::matrix_int4_ord_ore_planner_metadata_ord -scalars::int4::matrix_int4_ord_ore_sanity -scalars::int4::matrix_int4_ord_ore_typed_column_blocker -scalars::int4::matrix_int4_ord_path_op_blockers -scalars::int4::matrix_int4_ord_payload_check -scalars::int4::matrix_int4_ord_planner_metadata_eq -scalars::int4::matrix_int4_ord_planner_metadata_ord -scalars::int4::matrix_int4_ord_sanity -scalars::int4::matrix_int4_ord_scale_preference_default_btree -scalars::int4::matrix_int4_ord_typed_column_blocker -scalars::int4::matrix_int4_storage_aggregate_typecheck_max -scalars::int4::matrix_int4_storage_aggregate_typecheck_min -scalars::int4::matrix_int4_storage_contained_by_blocker -scalars::int4::matrix_int4_storage_contains_blocker -scalars::int4::matrix_int4_storage_count_path_cast -scalars::int4::matrix_int4_storage_count_typed_column -scalars::int4::matrix_int4_storage_eq_blocker -scalars::int4::matrix_int4_storage_gt_blocker -scalars::int4::matrix_int4_storage_gte_blocker -scalars::int4::matrix_int4_storage_lt_blocker -scalars::int4::matrix_int4_storage_lte_blocker -scalars::int4::matrix_int4_storage_native_absent_ops -scalars::int4::matrix_int4_storage_neq_blocker -scalars::int4::matrix_int4_storage_path_op_blockers -scalars::int4::matrix_int4_storage_payload_check -scalars::int4::matrix_int4_storage_sanity -scalars::int4::matrix_int4_storage_typed_column_blocker diff --git a/tests/sqlx/snapshots/matrix_tests.txt b/tests/sqlx/snapshots/matrix_tests.txt new file mode 100644 index 00000000..2cdfc22b --- /dev/null +++ b/tests/sqlx/snapshots/matrix_tests.txt @@ -0,0 +1,211 @@ +scalars::::matrix__eq_aggregate_typecheck_max +scalars::::matrix__eq_aggregate_typecheck_min +scalars::::matrix__eq_contained_by_blocker +scalars::::matrix__eq_contains_blocker +scalars::::matrix__eq_count_distinct_extractor +scalars::::matrix__eq_count_path_cast +scalars::::matrix__eq_count_typed_column +scalars::::matrix__eq_eq_pivot_max_correctness +scalars::::matrix__eq_eq_pivot_max_cross_shape +scalars::::matrix__eq_eq_pivot_min_correctness +scalars::::matrix__eq_eq_pivot_min_cross_shape +scalars::::matrix__eq_eq_pivot_zero_correctness +scalars::::matrix__eq_eq_pivot_zero_cross_shape +scalars::::matrix__eq_eq_supported_null +scalars::::matrix__eq_gt_blocker +scalars::::matrix__eq_gte_blocker +scalars::::matrix__eq_index_engages_btree +scalars::::matrix__eq_index_engages_hash +scalars::::matrix__eq_lt_blocker +scalars::::matrix__eq_lte_blocker +scalars::::matrix__eq_native_absent_ops +scalars::::matrix__eq_neq_pivot_max_correctness +scalars::::matrix__eq_neq_pivot_max_cross_shape +scalars::::matrix__eq_neq_pivot_min_correctness +scalars::::matrix__eq_neq_pivot_min_cross_shape +scalars::::matrix__eq_neq_pivot_zero_correctness +scalars::::matrix__eq_neq_pivot_zero_cross_shape +scalars::::matrix__eq_neq_supported_null +scalars::::matrix__eq_path_op_blockers +scalars::::matrix__eq_payload_check +scalars::::matrix__eq_planner_metadata_eq +scalars::::matrix__eq_sanity +scalars::::matrix__eq_typed_column_blocker +scalars::::matrix__fixture_shape +scalars::::matrix__ord_aggregate_group_by_max +scalars::::matrix__ord_aggregate_group_by_min +scalars::::matrix__ord_aggregate_max +scalars::::matrix__ord_aggregate_max_all_null +scalars::::matrix__ord_aggregate_max_empty +scalars::::matrix__ord_aggregate_max_mixed_null +scalars::::matrix__ord_aggregate_min +scalars::::matrix__ord_aggregate_min_all_null +scalars::::matrix__ord_aggregate_min_empty +scalars::::matrix__ord_aggregate_min_mixed_null +scalars::::matrix__ord_aggregate_parallel_safe +scalars::::matrix__ord_contained_by_blocker +scalars::::matrix__ord_contains_blocker +scalars::::matrix__ord_count_distinct_extractor +scalars::::matrix__ord_count_path_cast +scalars::::matrix__ord_count_typed_column +scalars::::matrix__ord_eq_pivot_max_correctness +scalars::::matrix__ord_eq_pivot_max_cross_shape +scalars::::matrix__ord_eq_pivot_min_correctness +scalars::::matrix__ord_eq_pivot_min_cross_shape +scalars::::matrix__ord_eq_pivot_zero_correctness +scalars::::matrix__ord_eq_pivot_zero_cross_shape +scalars::::matrix__ord_eq_supported_null +scalars::::matrix__ord_gt_pivot_max_correctness +scalars::::matrix__ord_gt_pivot_max_cross_shape +scalars::::matrix__ord_gt_pivot_min_correctness +scalars::::matrix__ord_gt_pivot_min_cross_shape +scalars::::matrix__ord_gt_pivot_zero_correctness +scalars::::matrix__ord_gt_pivot_zero_cross_shape +scalars::::matrix__ord_gt_supported_null +scalars::::matrix__ord_gte_pivot_max_correctness +scalars::::matrix__ord_gte_pivot_max_cross_shape +scalars::::matrix__ord_gte_pivot_min_correctness +scalars::::matrix__ord_gte_pivot_min_cross_shape +scalars::::matrix__ord_gte_pivot_zero_correctness +scalars::::matrix__ord_gte_pivot_zero_cross_shape +scalars::::matrix__ord_gte_supported_null +scalars::::matrix__ord_index_engages_btree +scalars::::matrix__ord_lt_pivot_max_correctness +scalars::::matrix__ord_lt_pivot_max_cross_shape +scalars::::matrix__ord_lt_pivot_min_correctness +scalars::::matrix__ord_lt_pivot_min_cross_shape +scalars::::matrix__ord_lt_pivot_zero_correctness +scalars::::matrix__ord_lt_pivot_zero_cross_shape +scalars::::matrix__ord_lt_supported_null +scalars::::matrix__ord_lte_pivot_max_correctness +scalars::::matrix__ord_lte_pivot_max_cross_shape +scalars::::matrix__ord_lte_pivot_min_correctness +scalars::::matrix__ord_lte_pivot_min_cross_shape +scalars::::matrix__ord_lte_pivot_zero_correctness +scalars::::matrix__ord_lte_pivot_zero_cross_shape +scalars::::matrix__ord_lte_supported_null +scalars::::matrix__ord_native_absent_ops +scalars::::matrix__ord_neq_pivot_max_correctness +scalars::::matrix__ord_neq_pivot_max_cross_shape +scalars::::matrix__ord_neq_pivot_min_correctness +scalars::::matrix__ord_neq_pivot_min_cross_shape +scalars::::matrix__ord_neq_pivot_zero_correctness +scalars::::matrix__ord_neq_pivot_zero_cross_shape +scalars::::matrix__ord_neq_supported_null +scalars::::matrix__ord_ord_routes_through_ob +scalars::::matrix__ord_order_by_asc_no_where +scalars::::matrix__ord_order_by_asc_nulls_first +scalars::::matrix__ord_order_by_asc_nulls_last +scalars::::matrix__ord_order_by_asc_with_where +scalars::::matrix__ord_order_by_desc_no_where +scalars::::matrix__ord_order_by_desc_nulls_first +scalars::::matrix__ord_order_by_desc_nulls_last +scalars::::matrix__ord_order_by_desc_with_where +scalars::::matrix__ord_order_by_using_gt_rejects +scalars::::matrix__ord_order_by_using_gte_rejects +scalars::::matrix__ord_order_by_using_lt_rejects +scalars::::matrix__ord_order_by_using_lte_rejects +scalars::::matrix__ord_ore_aggregate_group_by_max +scalars::::matrix__ord_ore_aggregate_group_by_min +scalars::::matrix__ord_ore_aggregate_max +scalars::::matrix__ord_ore_aggregate_max_all_null +scalars::::matrix__ord_ore_aggregate_max_empty +scalars::::matrix__ord_ore_aggregate_max_mixed_null +scalars::::matrix__ord_ore_aggregate_min +scalars::::matrix__ord_ore_aggregate_min_all_null +scalars::::matrix__ord_ore_aggregate_min_empty +scalars::::matrix__ord_ore_aggregate_min_mixed_null +scalars::::matrix__ord_ore_aggregate_parallel_safe +scalars::::matrix__ord_ore_contained_by_blocker +scalars::::matrix__ord_ore_contains_blocker +scalars::::matrix__ord_ore_count_distinct_extractor +scalars::::matrix__ord_ore_count_path_cast +scalars::::matrix__ord_ore_count_typed_column +scalars::::matrix__ord_ore_eq_pivot_max_correctness +scalars::::matrix__ord_ore_eq_pivot_max_cross_shape +scalars::::matrix__ord_ore_eq_pivot_min_correctness +scalars::::matrix__ord_ore_eq_pivot_min_cross_shape +scalars::::matrix__ord_ore_eq_pivot_zero_correctness +scalars::::matrix__ord_ore_eq_pivot_zero_cross_shape +scalars::::matrix__ord_ore_eq_supported_null +scalars::::matrix__ord_ore_gt_pivot_max_correctness +scalars::::matrix__ord_ore_gt_pivot_max_cross_shape +scalars::::matrix__ord_ore_gt_pivot_min_correctness +scalars::::matrix__ord_ore_gt_pivot_min_cross_shape +scalars::::matrix__ord_ore_gt_pivot_zero_correctness +scalars::::matrix__ord_ore_gt_pivot_zero_cross_shape +scalars::::matrix__ord_ore_gt_supported_null +scalars::::matrix__ord_ore_gte_pivot_max_correctness +scalars::::matrix__ord_ore_gte_pivot_max_cross_shape +scalars::::matrix__ord_ore_gte_pivot_min_correctness +scalars::::matrix__ord_ore_gte_pivot_min_cross_shape +scalars::::matrix__ord_ore_gte_pivot_zero_correctness +scalars::::matrix__ord_ore_gte_pivot_zero_cross_shape +scalars::::matrix__ord_ore_gte_supported_null +scalars::::matrix__ord_ore_index_engages_btree +scalars::::matrix__ord_ore_lt_pivot_max_correctness +scalars::::matrix__ord_ore_lt_pivot_max_cross_shape +scalars::::matrix__ord_ore_lt_pivot_min_correctness +scalars::::matrix__ord_ore_lt_pivot_min_cross_shape +scalars::::matrix__ord_ore_lt_pivot_zero_correctness +scalars::::matrix__ord_ore_lt_pivot_zero_cross_shape +scalars::::matrix__ord_ore_lt_supported_null +scalars::::matrix__ord_ore_lte_pivot_max_correctness +scalars::::matrix__ord_ore_lte_pivot_max_cross_shape +scalars::::matrix__ord_ore_lte_pivot_min_correctness +scalars::::matrix__ord_ore_lte_pivot_min_cross_shape +scalars::::matrix__ord_ore_lte_pivot_zero_correctness +scalars::::matrix__ord_ore_lte_pivot_zero_cross_shape +scalars::::matrix__ord_ore_lte_supported_null +scalars::::matrix__ord_ore_native_absent_ops +scalars::::matrix__ord_ore_neq_pivot_max_correctness +scalars::::matrix__ord_ore_neq_pivot_max_cross_shape +scalars::::matrix__ord_ore_neq_pivot_min_correctness +scalars::::matrix__ord_ore_neq_pivot_min_cross_shape +scalars::::matrix__ord_ore_neq_pivot_zero_correctness +scalars::::matrix__ord_ore_neq_pivot_zero_cross_shape +scalars::::matrix__ord_ore_neq_supported_null +scalars::::matrix__ord_ore_ord_routes_through_ob +scalars::::matrix__ord_ore_order_by_asc_no_where +scalars::::matrix__ord_ore_order_by_asc_nulls_first +scalars::::matrix__ord_ore_order_by_asc_nulls_last +scalars::::matrix__ord_ore_order_by_asc_with_where +scalars::::matrix__ord_ore_order_by_desc_no_where +scalars::::matrix__ord_ore_order_by_desc_nulls_first +scalars::::matrix__ord_ore_order_by_desc_nulls_last +scalars::::matrix__ord_ore_order_by_desc_with_where +scalars::::matrix__ord_ore_order_by_using_gt_rejects +scalars::::matrix__ord_ore_order_by_using_gte_rejects +scalars::::matrix__ord_ore_order_by_using_lt_rejects +scalars::::matrix__ord_ore_order_by_using_lte_rejects +scalars::::matrix__ord_ore_ore_injectivity +scalars::::matrix__ord_ore_path_op_blockers +scalars::::matrix__ord_ore_payload_check +scalars::::matrix__ord_ore_planner_metadata_eq +scalars::::matrix__ord_ore_planner_metadata_ord +scalars::::matrix__ord_ore_sanity +scalars::::matrix__ord_ore_typed_column_blocker +scalars::::matrix__ord_path_op_blockers +scalars::::matrix__ord_payload_check +scalars::::matrix__ord_planner_metadata_eq +scalars::::matrix__ord_planner_metadata_ord +scalars::::matrix__ord_sanity +scalars::::matrix__ord_scale_preference_default_btree +scalars::::matrix__ord_typed_column_blocker +scalars::::matrix__storage_aggregate_typecheck_max +scalars::::matrix__storage_aggregate_typecheck_min +scalars::::matrix__storage_contained_by_blocker +scalars::::matrix__storage_contains_blocker +scalars::::matrix__storage_count_path_cast +scalars::::matrix__storage_count_typed_column +scalars::::matrix__storage_eq_blocker +scalars::::matrix__storage_gt_blocker +scalars::::matrix__storage_gte_blocker +scalars::::matrix__storage_lt_blocker +scalars::::matrix__storage_lte_blocker +scalars::::matrix__storage_native_absent_ops +scalars::::matrix__storage_neq_blocker +scalars::::matrix__storage_path_op_blockers +scalars::::matrix__storage_payload_check +scalars::::matrix__storage_sanity +scalars::::matrix__storage_typed_column_blocker From 8d77e6e05dc41d3ac489a8c4a7c25eb150b805ba Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:33:56 +1000 Subject: [PATCH 07/19] ci(codegen): Rust catalog/generator tests + golden parity, drop pytest --- .github/workflows/test-eql.yml | 35 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/.github/workflows/test-eql.yml b/.github/workflows/test-eql.yml index 22a61d1c..46206296 100644 --- a/.github/workflows/test-eql.yml +++ b/.github/workflows/test-eql.yml @@ -113,31 +113,30 @@ jobs: workspaces: . shared-key: sqlx-tests - # The Python `test:codegen` drift suite is intentionally not run here: the - # Python generator is deprecated and removed in the following PR. The - # hand-written reference (tests/codegen/reference/) is now gated against - # the Rust generator by the `mise run codegen:parity` step below. + # Crate compile/lint/test (cargo test -p eql-scalars -p eql-codegen) runs + # in the dedicated `test:crates` job; this job covers the codegen-specific + # gates only — fixture-value regeneration and golden/values parity. # Regenerate the committed Rust fixture-value consts for EVERY type from - # their manifests and fail if any differ from / are missing in the tree. - # The value lists are rendered deterministically (unlike the encrypted - # .sql fixtures, whose ciphertext is non-deterministic and gitignored), so - # a plain diff is the right guard — it catches a manifest edit that wasn't - # regenerated. `git add -N` registers any brand-new untracked const so a - # forgotten-to-commit file also trips the diff. No Postgres needed: this - # only runs the Python generator. + # the catalog and fail if any differ from / are missing in the tree. + # eql-codegen renders all _values.rs deterministically (unlike the + # encrypted .sql fixtures, whose ciphertext is non-deterministic and + # gitignored), so a plain diff is the right guard — it catches a catalog + # edit that wasn't regenerated. `git add -N` registers any brand-new + # untracked const so a forgotten-to-commit file also trips the diff. No + # Postgres needed: the generator is std-only. - name: Regenerate and verify fixture-value consts (all types) run: | - mise run codegen:domain:all + cargo run -p eql-codegen git add -N tests/sqlx/src/fixtures git diff --exit-code -- tests/sqlx/src/fixtures \ - || { echo "Fixture value const(s) stale or uncommitted — run 'mise run codegen:domain:all' and commit tests/sqlx/src/fixtures."; exit 1; } + || { echo "Fixture value const(s) stale or uncommitted — run 'cargo run -p eql-codegen' and commit tests/sqlx/src/fixtures."; exit 1; } - # Cross-generator parity: assert the Rust eql-codegen output is byte- - # identical to the Python oracle across all types, the committed - # _values.rs, and the int4 golden reference. No Postgres needed — both - # generators are deterministic and run offline. - - name: Verify Rust↔Python generator parity + # Parity gate: assert the Rust eql-codegen output is line-normalized-equal + # to the int4 golden reference and the committed _values.rs are byte- + # identical (git-clean) after regeneration. Python is no longer an oracle + # (retired in P2). No Postgres needed — the generator runs offline. + - name: Verify generator parity (golden + values) run: | mise run codegen:parity From a8e9ab77a16de062958be86903aed59e7e4bd0e2 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:34:54 +1000 Subject: [PATCH 08/19] ci(matrix): verify single canonical snapshot, catalog-reconciled, cache root workspace --- .github/workflows/test-eql.yml | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/.github/workflows/test-eql.yml b/.github/workflows/test-eql.yml index 46206296..feacd2c6 100644 --- a/.github/workflows/test-eql.yml +++ b/.github/workflows/test-eql.yml @@ -160,17 +160,21 @@ jobs: workspaces: . shared-key: sqlx-tests - # Regenerate the matrix test-name inventory with the SAME pinned feature - # set the local task uses (`--no-default-features`, scale excluded), then - # fail if it differs from the committed snapshot. A coverage change shows - # up as added/removed names in the PR diff — e.g. emptying `ord_domains` - # drops ~140 names, impossible to miss in review. No Postgres needed: - # `--list` only enumerates, the suite uses runtime queries. - - name: Regenerate and verify the matrix test-name inventory + # Verify the matrix test-name set against the SINGLE canonical snapshot + # (snapshots/matrix_tests.txt) with the SAME pinned feature set the local + # task uses (`--no-default-features`, scale excluded), and cross-check the + # binary's discovered type set against `eql-codegen list-types`. A coverage + # change shows up as a diff in the snapshot; a catalog type missing its + # matrix wiring fails the cross-check. No Postgres needed: `--list` only + # enumerates, the suite uses runtime queries. + - name: Verify the matrix test-name inventory run: | mise run test:matrix:inventory - git diff --exit-code -- tests/sqlx/snapshots/int4_matrix_tests.txt \ - tests/sqlx/snapshots/int2_matrix_tests.txt \ + # Diff the whole snapshots/ directory so the single canonical file + # isn't hardcoded here; the mise task discovers the type set from the + # binary and reconciles it against `eql-codegen list-types`. + git add -N tests/sqlx/snapshots + git diff --exit-code -- tests/sqlx/snapshots \ || { echo "Coverage inventory stale — run 'mise run test:matrix:inventory' and commit."; exit 1; } test: From 86e54ef07e45a418c6807c1a855c1c6b0b2f918c Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:56:38 +1000 Subject: [PATCH 09/19] build: remove Python codegen toolchain (catalog/generator is Rust) --- mise.toml | 18 +- tasks/codegen/__init__.py | 17 - tasks/codegen/conftest.py | 6 - tasks/codegen/domain.sh | 15 - tasks/codegen/generate.py | 333 ---------------- tasks/codegen/operator_surface.py | 72 ---- tasks/codegen/scalars.py | 102 ----- tasks/codegen/spec.py | 141 ------- tasks/codegen/templates.py | 495 ----------------------- tasks/codegen/terms.py | 107 ----- tasks/codegen/test_against_reference.py | 117 ------ tasks/codegen/test_generate.py | 351 ----------------- tasks/codegen/test_operator_surface.py | 126 ------ tasks/codegen/test_scalars.py | 82 ---- tasks/codegen/test_spec.py | 208 ---------- tasks/codegen/test_templates.py | 500 ------------------------ tasks/codegen/test_terms.py | 96 ----- tasks/codegen/test_writer.py | 157 -------- tasks/codegen/types/.gitkeep | 0 tasks/codegen/types/int2.toml | 19 - tasks/codegen/types/int4.toml | 19 - tasks/codegen/writer.py | 89 ----- 22 files changed, 6 insertions(+), 3064 deletions(-) delete mode 100644 tasks/codegen/__init__.py delete mode 100644 tasks/codegen/conftest.py delete mode 100755 tasks/codegen/domain.sh delete mode 100644 tasks/codegen/generate.py delete mode 100644 tasks/codegen/operator_surface.py delete mode 100644 tasks/codegen/scalars.py delete mode 100644 tasks/codegen/spec.py delete mode 100644 tasks/codegen/templates.py delete mode 100644 tasks/codegen/terms.py delete mode 100644 tasks/codegen/test_against_reference.py delete mode 100644 tasks/codegen/test_generate.py delete mode 100644 tasks/codegen/test_operator_surface.py delete mode 100644 tasks/codegen/test_scalars.py delete mode 100644 tasks/codegen/test_spec.py delete mode 100644 tasks/codegen/test_templates.py delete mode 100644 tasks/codegen/test_terms.py delete mode 100644 tasks/codegen/test_writer.py delete mode 100644 tasks/codegen/types/.gitkeep delete mode 100644 tasks/codegen/types/int2.toml delete mode 100644 tasks/codegen/types/int4.toml delete mode 100644 tasks/codegen/writer.py diff --git a/mise.toml b/mise.toml index c92d222b..d4fabf59 100644 --- a/mise.toml +++ b/mise.toml @@ -18,6 +18,9 @@ # macro-expand-eql.yml workflow's mise-action step), so there is no separate # hardcoded version to keep in lockstep. "cargo:cargo-expand" = "1.0.122" +# Still required by the documentation tooling (`tasks/docs/generate/*.py`, run +# by `docs:generate:markdown` in the release workflow). The encrypted-domain +# codegen toolchain is now Rust (eql-scalars/eql-codegen) and needs no Python. "python" = "3.13" [task_config] @@ -90,25 +93,16 @@ run = """ cargo test --test payload_schema_tests """ -[tasks."codegen:domain:all"] -description = "Regenerate every encrypted-domain type from its TOML manifest" -dir = "{{config_root}}" -run = """ -mise exec python -- python -m tasks.codegen.generate --all -""" - [tasks."codegen:parity"] -description = "Parity gate: Rust eql-codegen output byte-identical to the Python oracle" +description = "Parity gate: Rust eql-codegen output matches the int4 golden (normalized) + committed values.rs" dir = "{{config_root}}" run = "bash tasks/codegen-parity.sh" [tasks."test:codegen"] -description = "Run the encrypted-domain codegen generator tests (no database required)" +description = "Run the encrypted-domain catalog + generator tests (no database required)" dir = "{{config_root}}" run = """ -# pytest is the only non-stdlib dependency; the install is a fast no-op once satisfied. -mise exec python -- python -m pip install --quiet --disable-pip-version-check pytest -mise exec python -- python -m pytest tasks/codegen -q +cargo test -p eql-scalars -p eql-codegen """ [tasks."test:crates"] diff --git a/tasks/codegen/__init__.py b/tasks/codegen/__init__.py deleted file mode 100644 index 4443af87..00000000 --- a/tasks/codegen/__init__.py +++ /dev/null @@ -1,17 +0,0 @@ -"""Encrypted-domain SQL code generator for EQL scalar domain families.""" - -from .generate import generate_type, main -from .spec import DomainSpec, SpecError, TypeSpec, load_spec -from .terms import TERM_CATALOG, Term, TermError - -__all__ = [ - "DomainSpec", - "SpecError", - "TERM_CATALOG", - "Term", - "TermError", - "TypeSpec", - "generate_type", - "load_spec", - "main", -] diff --git a/tasks/codegen/conftest.py b/tasks/codegen/conftest.py deleted file mode 100644 index f03b0e8b..00000000 --- a/tasks/codegen/conftest.py +++ /dev/null @@ -1,6 +0,0 @@ -"""pytest discovery anchor for the codegen package. - -Tests import via `from tasks.codegen. import ...`; pytest runs -from the repo root (where `tasks/__init__.py` exists), so no `sys.path` -manipulation is needed. -""" diff --git a/tasks/codegen/domain.sh b/tasks/codegen/domain.sh deleted file mode 100755 index ae279a12..00000000 --- a/tasks/codegen/domain.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash -#MISE description="Regenerate an encrypted-domain type from its TOML spec" -#USAGE arg "type" help="Type token, e.g. int4 (matches tasks/codegen/types/.toml)" - -set -euo pipefail - -TYPE=${usage_type:?type argument required} - -echo "Regenerating encrypted-domain type: ${TYPE}" -mise exec python -- python -m tasks.codegen.generate "${TYPE}" -echo "" -echo "✓ Regenerated src/encrypted_domain/${TYPE}/ (gitignored)" -echo " Note: 'mise run build' regenerates every type automatically;" -echo " this task is for refreshing one type while iterating on its manifest." -echo " When ready, run 'mise run clean && mise run build' then 'mise run test'." diff --git a/tasks/codegen/generate.py b/tasks/codegen/generate.py deleted file mode 100644 index 03ac6be7..00000000 --- a/tasks/codegen/generate.py +++ /dev/null @@ -1,333 +0,0 @@ -"""Top-level scalar encrypted-domain materializer.""" - -import sys -from collections.abc import Iterator -from pathlib import Path - -from .operator_surface import ( - BLOCKER_ONLY_OPERATORS, - PATH_OPERATORS, - SYMMETRIC_OPERATORS, - backing_function, -) -from .spec import DomainSpec, SpecError, TypeSpec, load_spec -from .templates import ( - AGGREGATE_OPS, - domain_name, - extractor_for_operator, - is_ord_capable, - render_aggregate, - render_blocker_bool, - render_blocker_native, - render_blocker_path, - render_domain_block, - render_extractor, - render_fixture_values_rs, - render_operator, - render_wrapper, - role_phrase, - supported_operators, -) -from .terms import TERM_CATALOG, Term, term_requires -from .writer import ( - clean_generated_files, - ensure_generated_paths_writable, - write_generated_file, - write_generated_rs, -) - -REPO_ROOT = Path(__file__).resolve().parents[2] - - -def _symmetric_shapes(dom: str) -> list[tuple[str, str]]: - return [(dom, dom), (dom, "jsonb"), ("jsonb", dom)] - - -def _path_shapes(dom: str) -> list[tuple[str, str]]: - return [(dom, "text"), (dom, "integer"), ("jsonb", dom)] - - -def _blocker_only_shapes(dom: str, op: str) -> list[tuple[str, str, str]]: - if op in {"?", "?|", "?&"}: - rhs = "text[]" if op in {"?|", "?&"} else "text" - return [(dom, rhs, "boolean")] - if op in {"@?", "@@"}: - return [(dom, "jsonpath", "boolean")] - if op == "#>": - return [(dom, "text[]", "jsonb")] - if op == "#>>": - return [(dom, "text[]", "text")] - if op == "-": - return [(dom, "text", "jsonb"), (dom, "integer", "jsonb"), (dom, "text[]", "jsonb")] - if op == "#-": - return [(dom, "text[]", "jsonb")] - if op == "||": - return [(dom, dom, "jsonb"), (dom, "jsonb", "jsonb"), ("jsonb", dom, "jsonb")] - raise ValueError(f"unhandled blocker-only operator: {op}") - - -def _types_path(token: str) -> str: - return f"src/encrypted_domain/{token}/{token}_types.sql" - - -def fixture_values_rs_path(out_root: Path, token: str) -> Path: - """Committed Rust fixture-value const for a type. Outside the gitignored - src/encrypted_domain/ SQL tree because it is consumed (and committed) by - the Rust test crate.""" - return ( - out_root / "tests" / "sqlx" / "src" / "fixtures" / f"{token}_values.rs" - ) - - -def render_types_file(spec: TypeSpec) -> str: - """Body for _types.sql: every domain in one idempotent DO block. - - Iteration order follows the manifest's declared order — the TOML file is - the source of truth for emit order. - """ - blocks = [render_domain_block(domain, spec.token) for domain in spec.domains] - return ( - "-- REQUIRE: src/schema-v3.sql\n\n" - f"--! @file encrypted_domain/{spec.token}/{spec.token}_types.sql\n" - f"--! @brief Encrypted-domain type family for {spec.token}.\n\n" - "DO $$\nBEGIN\n" - + "\n".join(blocks) - + "END\n$$;\n" - ) - - -def _functions_requires(spec: TypeSpec, domain: DomainSpec) -> list[str]: - reqs = [ - "src/schema.sql", - "src/schema-v3.sql", - _types_path(spec.token), - "src/encrypted_domain/functions.sql", - ] - for extra in term_requires(domain.terms): - if extra not in reqs: - reqs.append(extra) - return reqs - - -def _extractor_terms(domain: DomainSpec) -> Iterator[Term]: - seen: set[str] = set() - for term_name in domain.terms: - term = TERM_CATALOG[term_name] - if term.extractor not in seen: - seen.add(term.extractor) - yield term - - -def render_functions_file(spec: TypeSpec, domain: DomainSpec) -> str: - """Body for a domain's _functions.sql.""" - dom = domain_name(domain.name) - supported = set(supported_operators(domain)) - parts: list[str] = [] - - for term in _extractor_terms(domain): - parts.append(render_extractor(domain, term)) - - for op in SYMMETRIC_OPERATORS: - extractor = extractor_for_operator(domain, op) - for arg_a, arg_b in _symmetric_shapes(dom): - if op in supported and extractor is not None: - parts.append(render_wrapper(domain, op, arg_a, arg_b, extractor)) - else: - parts.append(render_blocker_bool(domain, op, arg_a, arg_b)) - - for op in PATH_OPERATORS: - for arg_a, arg_b in _path_shapes(dom): - parts.append(render_blocker_path(domain, op, arg_a, arg_b)) - - for op in BLOCKER_ONLY_OPERATORS: - for arg_a, arg_b, returns in _blocker_only_shapes(dom, op): - parts.append(render_blocker_native(domain, op, arg_a, arg_b, returns)) - - requires = "\n".join(f"-- REQUIRE: {r}" for r in _functions_requires(spec, domain)) - header = ( - requires + "\n\n" - f"--! @file encrypted_domain/{spec.token}/{domain.name}_functions.sql\n" - f"--! @brief {role_phrase(domain.terms)} domain of the {spec.token} " - f"encrypted-domain family — comparison/path functions.\n\n" - ) - return header + "\n".join(parts) - - -def render_operators_file(spec: TypeSpec, domain: DomainSpec) -> str: - """Body for a domain's _operators.sql: 44 CREATE OPERATOR statements.""" - dom = domain_name(domain.name) - supported = set(supported_operators(domain)) - parts: list[str] = [] - - for op in SYMMETRIC_OPERATORS: - backing = backing_function(op) - for leftarg, rightarg in _symmetric_shapes(dom): - parts.append( - render_operator( - op, backing, leftarg, rightarg, - supported=op in supported, - ) - ) - for op in PATH_OPERATORS: - backing = backing_function(op) - for leftarg, rightarg in _path_shapes(dom): - parts.append( - render_operator(op, backing, leftarg, rightarg, supported=False) - ) - for op in BLOCKER_ONLY_OPERATORS: - backing = backing_function(op) - for leftarg, rightarg, _returns in _blocker_only_shapes(dom, op): - parts.append( - render_operator(op, backing, leftarg, rightarg, supported=False) - ) - - requires = ( - "-- REQUIRE: src/schema-v3.sql\n" - f"-- REQUIRE: {_types_path(spec.token)}\n" - f"-- REQUIRE: src/encrypted_domain/{spec.token}/" - f"{domain.name}_functions.sql\n" - ) - header = ( - requires + "\n" - f"--! @file encrypted_domain/{spec.token}/{domain.name}_operators.sql\n" - f"--! @brief {role_phrase(domain.terms)} domain of the {spec.token} " - f"encrypted-domain family — operator declarations.\n\n" - ) - return header + "\n".join(parts) - - -def render_aggregates_file(spec: TypeSpec, domain: DomainSpec) -> str | None: - """Body for a domain's _aggregates.sql, or None if the domain has no - ordering comparator (storage/eq variants have no MIN/MAX semantics).""" - if not is_ord_capable(domain): - return None - parts = [render_aggregate(domain, AGGREGATE_OPS[name]) for name in ("min", "max")] - requires = ( - "-- REQUIRE: src/schema-v3.sql\n" - f"-- REQUIRE: {_types_path(spec.token)}\n" - f"-- REQUIRE: src/encrypted_domain/{spec.token}/" - f"{domain.name}_functions.sql\n" - f"-- REQUIRE: src/encrypted_domain/{spec.token}/" - f"{domain.name}_operators.sql\n" - ) - header = ( - requires + "\n" - f"--! @file encrypted_domain/{spec.token}/{domain.name}_aggregates.sql\n" - f"--! @brief {role_phrase(domain.terms)} domain of the {spec.token} " - f"encrypted-domain family — MIN/MAX aggregates.\n\n" - ) - return header + "\n".join(parts) - - -def generate_type(spec: TypeSpec, out_dir: Path) -> list[Path]: - """Regenerate every generated file for a type.""" - out_dir = Path(out_dir) - target_paths = [out_dir / f"{spec.token}_types.sql"] - for domain in spec.domains: - target_paths.append(out_dir / f"{domain.name}_functions.sql") - target_paths.append(out_dir / f"{domain.name}_operators.sql") - if is_ord_capable(domain): - target_paths.append(out_dir / f"{domain.name}_aggregates.sql") - ensure_generated_paths_writable(target_paths) - clean_generated_files(out_dir) - - written: list[Path] = [] - - types_path = out_dir / f"{spec.token}_types.sql" - write_generated_file(types_path, render_types_file(spec)) - written.append(types_path) - - for domain in spec.domains: - fn_path = out_dir / f"{domain.name}_functions.sql" - write_generated_file(fn_path, render_functions_file(spec, domain)) - written.append(fn_path) - - op_path = out_dir / f"{domain.name}_operators.sql" - write_generated_file(op_path, render_operators_file(spec, domain)) - written.append(op_path) - - agg_body = render_aggregates_file(spec, domain) - if agg_body is not None: - agg_path = out_dir / f"{domain.name}_aggregates.sql" - write_generated_file(agg_path, agg_body) - written.append(agg_path) - - return written - - -DEFAULT_TYPES_DIR = Path(__file__).parent / "types" - - -def generate_one(token: str, *, types_dir: Path, out_root: Path) -> int: - """Regenerate one type from types_dir/.toml. - - Returns 0 on success, 1 when the manifest is missing or its inferred token - does not match. A malformed manifest raises SpecError — the caller decides - whether to surface it (single-type CLI) or aggregate it (--all).""" - toml_path = types_dir / f"{token}.toml" - if not toml_path.is_file(): - print(f"error: no manifest at {toml_path}", file=sys.stderr) - return 1 - spec = load_spec(toml_path) - if spec.token != token: - print( - f"error: manifest token '{spec.token}' does not match '{token}'", - file=sys.stderr, - ) - return 1 - out_dir = out_root / "src" / "encrypted_domain" / token - written = generate_type(spec, out_dir) - - if spec.fixture_values is not None: - rs_path = fixture_values_rs_path(out_root, token) - write_generated_rs(rs_path, render_fixture_values_rs(spec)) - written.append(rs_path) - - for path in written: - print(f"generated {path.relative_to(out_root)}") - print(f"generated {len(written)} files for {token}") - return 0 - - -def generate_all(*, types_dir: Path, out_root: Path) -> int: - """Regenerate every type whose manifest lives in types_dir. - - Iterates sorted(types_dir.glob('*.toml')) for deterministic order and - aggregates return codes: a missing/mismatched/malformed manifest is - reported and counted as a failure without aborting the remaining types.""" - tokens = [p.stem for p in sorted(types_dir.glob("*.toml"))] - if not tokens: - print(f"error: no manifests found in {types_dir}", file=sys.stderr) - return 1 - rc = 0 - for token in tokens: - try: - if generate_one(token, types_dir=types_dir, out_root=out_root) != 0: - rc = 1 - except SpecError as exc: - print(f"error: {token}: {exc}", file=sys.stderr) - rc = 1 - status = "ok" if rc == 0 else "FAILED" - print(f"codegen --all: {status} ({len(tokens)} types: {', '.join(tokens)})") - return rc - - -def main( - argv: list[str], - *, - types_dir: Path | None = None, - out_root: Path | None = None, -) -> int: - """CLI entrypoint: generate , or --all for every manifest.""" - types_dir = types_dir or DEFAULT_TYPES_DIR - out_root = out_root or REPO_ROOT - if len(argv) == 2 and argv[1] == "--all": - return generate_all(types_dir=types_dir, out_root=out_root) - if len(argv) != 2: - print("Usage: generate.py | generate.py --all", file=sys.stderr) - return 2 - return generate_one(argv[1], types_dir=types_dir, out_root=out_root) - - -if __name__ == "__main__": - sys.exit(main(sys.argv)) diff --git a/tasks/codegen/operator_surface.py b/tasks/codegen/operator_surface.py deleted file mode 100644 index 355e5751..00000000 --- a/tasks/codegen/operator_surface.py +++ /dev/null @@ -1,72 +0,0 @@ -"""The generated operator surface for a scalar encrypted-domain type. - -Supported comparison operators route to inlinable wrappers when the domain -has the required term. Unsupported comparisons, path operators, and native -jsonb fallback operators route to blockers. -""" - -from dataclasses import dataclass -from typing import Literal - - -@dataclass(frozen=True) -class Operator: - """One operator in the generated surface.""" - - symbol: str - backing: str # eql_v2 backing function name (bare or quoted) - kind: Literal["symmetric", "path", "blocker_only"] - restrict: str | None # selectivity estimator, symmetric ops only - join: str | None # join selectivity estimator, symmetric ops only - commutator: str | None - negator: str | None - - -SYMMETRIC_OPERATORS = ["=", "<>", "<", "<=", ">", ">=", "@>", "<@"] -PATH_OPERATORS = ["->", "->>"] -BLOCKER_ONLY_OPERATORS = ["?", "?|", "?&", "@?", "@@", "#>", "#>>", "-", "#-", "||"] - - -OPERATORS: dict[str, Operator] = { - "=": Operator("=", "eq", "symmetric", "eqsel", "eqjoinsel", "=", "<>"), - "<>": Operator("<>", "neq", "symmetric", "neqsel", "neqjoinsel", "<>", "="), - "<": Operator("<", "lt", "symmetric", "scalarltsel", "scalarltjoinsel", ">", ">="), - "<=": Operator("<=", "lte", "symmetric", "scalarlesel", "scalarlejoinsel", ">=", ">"), - ">": Operator(">", "gt", "symmetric", "scalargtsel", "scalargtjoinsel", "<", "<="), - ">=": Operator(">=", "gte", "symmetric", "scalargesel", "scalargejoinsel", "<=", "<"), - "@>": Operator("@>", "contains", "symmetric", None, None, None, None), - "<@": Operator("<@", "contained_by", "symmetric", None, None, None, None), - "->": Operator("->", '"->"', "path", None, None, None, None), - "->>": Operator("->>", '"->>"', "path", None, None, None, None), - "?": Operator("?", '"?"', "blocker_only", None, None, None, None), - "?|": Operator("?|", '"?|"', "blocker_only", None, None, None, None), - "?&": Operator("?&", '"?&"', "blocker_only", None, None, None, None), - "@?": Operator("@?", '"@?"', "blocker_only", None, None, None, None), - "@@": Operator("@@", '"@@"', "blocker_only", None, None, None, None), - "#>": Operator("#>", '"#>"', "blocker_only", None, None, None, None), - "#>>": Operator("#>>", '"#>>"', "blocker_only", None, None, None, None), - "-": Operator("-", '"-"', "blocker_only", None, None, None, None), - "#-": Operator("#-", '"#-"', "blocker_only", None, None, None, None), - "||": Operator("||", '"||"', "blocker_only", None, None, None, None), -} - - -def backing_function(symbol: str) -> str: - """Return the eql_v2 backing function name for an operator symbol.""" - return OPERATORS[symbol].backing - - -# The full union of operator symbols the generator knows about: supported -# wrappers, path operators, and explicit blockers. Together these are exactly -# the native jsonb operator surface for PG 14-17, so this set is the basis of -# the storage-only "every native jsonb operator is blocked" guarantee. -# -# A live-DB structural guard (tests/sqlx/.../family/jsonb_operator_surface.rs) -# queries pg_operator for every operator with a jsonb argument and asserts the -# set is a subset of this union — if a future PG version adds a jsonb operator -# not enumerated here, that test fails rather than silently letting native -# plaintext-jsonb semantics through on an encrypted column. Keep that test's -# hardcoded expectation in sync with this set. -KNOWN_JSONB_OPERATORS: frozenset[str] = frozenset( - SYMMETRIC_OPERATORS + PATH_OPERATORS + BLOCKER_ONLY_OPERATORS -) diff --git a/tasks/codegen/scalars.py b/tasks/codegen/scalars.py deleted file mode 100644 index eee01dc4..00000000 --- a/tasks/codegen/scalars.py +++ /dev/null @@ -1,102 +0,0 @@ -"""Fixed scalar-kind catalog for fixture-value emission. - -A `ScalarKind` knows how to turn a manifest fixture-value token into a Rust -literal of the type's native Rust scalar, and how to resolve it to a numeric -value for the MIN/MAX/zero invariant check. The manifest carries only the -list of value tokens; the per-type behaviour lives here (mirroring terms.py), -not in free-form TOML fields. - -Recognised sentinels are ``MIN`` / ``MAX`` / ``ZERO``; every other token is a -numeric literal validated against the type's representable range. -""" - -from dataclasses import dataclass - - -class ScalarError(Exception): - """Raised for an unknown scalar token or an invalid fixture value.""" - - -_SENTINELS = ("MIN", "MAX", "ZERO") - - -@dataclass(frozen=True) -class ScalarKind: - """One scalar type's Rust rendering rules for fixture values.""" - - token: str - rust_type: str - min_symbol: str - max_symbol: str - zero_symbol: str - min_value: int - max_value: int - - def _parse(self, value: str) -> int: - if value == "MIN": - return self.min_value - if value == "MAX": - return self.max_value - if value == "ZERO": - return 0 - try: - n = int(value) - except ValueError as exc: - raise ScalarError( - f"{self.token}: {value!r} is not a valid {self.rust_type} " - f"literal or sentinel ({'/'.join(_SENTINELS)})" - ) from exc - if not (self.min_value <= n <= self.max_value): - raise ScalarError( - f"{self.token}: {value!r} out of range for {self.rust_type} " - f"[{self.min_value}, {self.max_value}]" - ) - return n - - def numeric_value(self, value: str) -> int: - """Resolve a fixture token to its numeric value (validates range).""" - return self._parse(value) - - def render_literal(self, value: str) -> str: - """Render a fixture token as a Rust literal of this scalar type.""" - symbols = { - "MIN": self.min_symbol, - "MAX": self.max_symbol, - "ZERO": self.zero_symbol, - } - if value in symbols: - return symbols[value] - return str(self._parse(value)) - - -SCALAR_KINDS: dict[str, ScalarKind] = { - "int4": ScalarKind( - token="int4", - rust_type="i32", - min_symbol="i32::MIN", - max_symbol="i32::MAX", - zero_symbol="0", - min_value=-2147483648, - max_value=2147483647, - ), - "int2": ScalarKind( - token="int2", - rust_type="i16", - min_symbol="i16::MIN", - max_symbol="i16::MAX", - zero_symbol="0", - min_value=-32768, - max_value=32767, - ), -} - - -def require_scalar(token: str) -> ScalarKind: - """Return the catalog kind for `token`, or raise ScalarError.""" - try: - return SCALAR_KINDS[token] - except KeyError as exc: - raise ScalarError( - f"unknown scalar token '{token}' " - f"(expected one of {sorted(SCALAR_KINDS)})" - ) from exc diff --git a/tasks/codegen/spec.py b/tasks/codegen/spec.py deleted file mode 100644 index 40e28cac..00000000 --- a/tasks/codegen/spec.py +++ /dev/null @@ -1,141 +0,0 @@ -"""Minimal TOML manifest loader for scalar encrypted-domain codegen.""" - -import re -import tomllib -from dataclasses import dataclass -from pathlib import Path - -from .scalars import ScalarError, require_scalar -from .terms import TermError, require_terms - - -_SQL_IDENTIFIER = re.compile(r"^[a-z][a-z0-9_]*$") - - -class SpecError(Exception): - """Raised when a TOML manifest is missing or invalid.""" - - -@dataclass(frozen=True) -class DomainSpec: - """One generated public domain and the fixed terms it carries.""" - - name: str - terms: list[str] - - -@dataclass(frozen=True) -class TypeSpec: - """A scalar encrypted-domain manifest loaded from one TOML file.""" - - token: str - domains: list[DomainSpec] - fixture_values: list[str] | None = None - - -def _load_fixture_values(raw: dict, token: str) -> list[str] | None: - """Parse and validate the optional [fixture] table. - - Returns the ordered list of value tokens, or None when no [fixture] table - is present. The tokens are the manifest source of truth for the generated - Rust fixture-value const; the scalar kind validates each one and the set - must include MIN, MAX, and zero (the matrix comparison pivots).""" - if "fixture" not in raw: - return None - - fixture_table = raw["fixture"] - if not isinstance(fixture_table, dict) or "values" not in fixture_table: - raise SpecError("[fixture]: missing required key 'values'") - - values = fixture_table["values"] - if not isinstance(values, list): - raise SpecError("[fixture] values: must be a list of value tokens") - if not values: - raise SpecError("[fixture] values: must not be empty") - if any(not isinstance(v, str) for v in values): - raise SpecError("[fixture] values: must be strings") - - try: - kind = require_scalar(token) - resolved = [(v, kind.numeric_value(v)) for v in values] - for v in values: - kind.render_literal(v) - except ScalarError as exc: - raise SpecError(f"[fixture] values: {exc}") from exc - - # Distinct-plaintext contract: the matrix oracle treats each fixture value - # as a distinct plaintext, and the generated Rust const must not repeat a - # literal. Detect duplicates against the *resolved numeric* value so that - # both copy-paste token dups ("1", "1") and sentinel/literal aliases - # (e.g. "MIN" alongside the same number as a literal) are rejected. - seen: dict[int, str] = {} - duplicates: list[str] = [] - for token_value, number in resolved: - if number in seen: - duplicates.append( - f"{token_value!r} duplicates {seen[number]!r} (both resolve to {number})" - if token_value != seen[number] - else f"{token_value!r}" - ) - else: - seen[number] = token_value - if duplicates: - raise SpecError( - "[fixture] values: must be distinct, but found duplicate values: " - + ", ".join(duplicates) - ) - - numbers = set(seen) - if not ({kind.min_value, kind.max_value, 0} <= numbers): - raise SpecError( - "[fixture] values: must include MIN, MAX, and zero " - "(the matrix comparison pivots)" - ) - - return list(values) - - -def load_spec(path: Path | str) -> TypeSpec: - """Load and validate a per-type scalar-domain manifest.""" - path = Path(path) - with path.open("rb") as fh: - raw = tomllib.load(fh) - - if "domain" not in raw: - raise SpecError("spec: missing required table '[domain]'") - - domain_table = raw["domain"] - if not isinstance(domain_table, dict) or not domain_table: - raise SpecError("[domain]: at least one domain is required") - - token = path.stem - if not _SQL_IDENTIFIER.match(token): - raise SpecError( - f"spec: token {token!r} must match {_SQL_IDENTIFIER.pattern}" - ) - domains: list[DomainSpec] = [] - for name, terms in domain_table.items(): - if not isinstance(name, str) or not _SQL_IDENTIFIER.match(name): - raise SpecError( - f"[domain] {name}: domain name {name!r} must match " - f"{_SQL_IDENTIFIER.pattern}" - ) - if name != token and not name.startswith(f"{token}_"): - raise SpecError( - f"[domain] {name}: domain name must start with '{token}'" - ) - if not isinstance(terms, list): - raise SpecError( - f"[domain] {name}: value must be a list of term names" - ) - if any(not isinstance(term, str) for term in terms): - raise SpecError(f"[domain] {name}: term names must be strings") - try: - require_terms(list(terms)) - except TermError as exc: - raise SpecError(f"[domain] {name}: {exc}") from exc - domains.append(DomainSpec(name=name, terms=list(terms))) - - fixture_values = _load_fixture_values(raw, token) - - return TypeSpec(token=token, domains=domains, fixture_values=fixture_values) diff --git a/tasks/codegen/templates.py b/tasks/codegen/templates.py deleted file mode 100644 index 14fefbc6..00000000 --- a/tasks/codegen/templates.py +++ /dev/null @@ -1,495 +0,0 @@ -"""Per-construct SQL template functions for scalar encrypted-domain codegen.""" - -from dataclasses import dataclass - -from .operator_surface import OPERATORS -from .scalars import require_scalar -from .spec import DomainSpec, TypeSpec -from .terms import ( - Term, - extractor_for_operator as _catalog_extractor_for_operator, - operators_for_terms, - role_for_terms, - term_json_keys, -) - -# SQL generated-file marker, emitted as the first line of every generated SQL -# file. Must stay byte-identical to the Rust generator's AUTO_GENERATED_HEADER -# (crates/eql-codegen/src/consts.rs) so the two generators are at byte parity -# (mise run codegen:parity). The `^-- AUTOMATICALLY GENERATED FILE` first line -# is also what tasks/docs/validate/{coverage,required-tags}.sh grep on to skip -# generated SQL — keep this and that grep in lockstep. -AUTO_GENERATED_HEADER = "-- AUTOMATICALLY GENERATED FILE.\n" - -# Rust counterpart, prepended to the committed `_values.rs` (which has no -# template). Rust comment syntax (`//`) so the `.rs` file stays valid; must stay -# byte-identical to the Rust generator's AUTO_GENERATED_HEADER_RS. -AUTO_GENERATED_HEADER_RS = "// AUTOMATICALLY GENERATED FILE.\n" - -ENVELOPE_KEYS = ["v", "i"] -CIPHERTEXT_KEY = "c" -# EQL payload-format version. The domain CHECK pins the 'v' envelope key to -# this value, matching EQL's repo-wide rule (eql_v2._encrypted_check_v, -# src/encrypted/constraints.sql). Presence of 'v' is enforced via -# ENVELOPE_KEYS; this pins its value so a stale/foreign-version payload is -# rejected on insert or cast rather than surfacing later at query time. -VERSION_KEY = "v" -ENVELOPE_VERSION = 2 - - -def _sql_str(s: str) -> str: - """Escape a Python string for use *inside* a single-quoted SQL string - literal by doubling embedded single quotes. - - Use this at every `'{...}'` interpolation boundary in the render_* - helpers — payload keys, operator symbols, domain names rendered into - RAISE messages, etc. NOT for schema-qualified identifiers like - ``eql_v3.foo``: those are emitted unquoted and must not be doubled. - - Today every catalog string (term keys, operator symbols) is quote-free, - so this is a no-op on real input and output stays byte-identical. It - exists so a future quote-bearing catalog string can never break out of - its SQL literal — nothing else enforces the quote-free invariant.""" - return s.replace("'", "''") - - -# Schema housing the encrypted-domain families: the domains themselves plus -# their index-term extractors, comparison wrappers, blockers, and aggregates. -# New in v3 and distinct from the core eql_v2 schema, which still owns the -# shared index-term types the extractors return and construct -# (eql_v2.hmac_256, eql_v2.ore_block_u64_8_256). -DOMAIN_SCHEMA = "eql_v3" -# Schema owning the core index-term types/constructors the extractors reuse. -CORE_SCHEMA = "eql_v2" - - -def render_fixture_values_rs(spec: TypeSpec) -> str: - """Body for tests/sqlx/src/fixtures/_values.rs. - - Emits one `pub const VALUES: &[]` from the manifest's - `[fixture] values`, preserving declaration order. The writer prepends the - AUTO-GENERATED Rust header, so the body carries none.""" - kind = require_scalar(spec.token) - values = spec.fixture_values or [] - literals = "".join(f" {kind.render_literal(v)},\n" for v in values) - return ( - f"//! Fixture plaintext values for the {spec.token} " - "encrypted-domain family.\n" - "//!\n" - f"//! Generated from the `{spec.token}` row in `eql-scalars::CATALOG` " - "(`fixtures`) —\n" - "//! the single source of truth shared by the fixture generator\n" - f"//! (`fixtures::eql_v2_{spec.token}`) and the matrix oracle\n" - "//! (`ScalarType::FIXTURE_VALUES`).\n\n" - f"/// Distinct plaintext values present in the `eql_v2_{spec.token}` " - "fixture.\n" - f"pub const VALUES: &[{kind.rust_type}] = &[\n" - f"{literals}" - "];\n" - ) - -OPERATOR_PHRASES: dict[str, str] = { - "=": "Equality", - "<>": "Inequality", - "<": "Less-than", - "<=": "Less-than-or-equal", - ">": "Greater-than", - ">=": "Greater-than-or-equal", - "@>": "Contains", - "<@": "Contained-by", -} - -DOMAIN_ROLE_PHRASES: dict[str, str] = { - "storage": "Storage-only", - "eq": "Equality-only", - "ord": "Ordered", -} - - -def role_phrase(terms: list[str]) -> str: - """Proper-cased prose label for a domain with these terms — the single - source of truth for role → human prose. Every renderer that wants to - describe a domain's role in @brief lines reaches for this, so a rename - in DOMAIN_ROLE_PHRASES propagates to every generated file.""" - return DOMAIN_ROLE_PHRASES[role_for_terms(terms)] - - -def _scheme_suffix(name: str, token: str, role: str) -> str | None: - """The scheme tag of a domain name, or None for the converged name. - - The naming convention is ``_`` for the recommended converged - domain and ``__`` for a scheme-explicit twin that - pins the same role to one concrete index scheme. ``storage`` has no role - segment, so its converged name is the bare ````. - - Generic by construction: it reads ``token`` and ``role`` rather than any - hard-coded type or scheme string, so it works for int8/date/etc. and for - schemes other than ``ore``. Returns the scheme segment (e.g. ``"ore"``) - for a twin, or None when ``name`` is the converged name (or doesn't match - the convention at all).""" - converged = token if role == "storage" else f"{token}_{role}" - if name == converged: - return None - prefix = converged + "_" - if name.startswith(prefix): - scheme = name[len(prefix):] - if scheme: - return scheme - return None - - -# Roles that come in converged + scheme-explicit-twin pairs and therefore need -# a disambiguating @brief clause. Ordered domains are the case the reviewer -# flagged: int4_ord and int4_ord_ore carry identical terms (["ore"]) and would -# otherwise render an identical brief. Driven by role (generic across int8, -# date, etc.), never by a literal type/scheme name. eq and storage have a -# single name each, so no disambiguation is needed (or wanted — it'd be noise). -_TWINNABLE_ROLES = frozenset({"ord"}) - - -def brief_role_clause(domain: DomainSpec, token: str) -> str: - """The trailing clause distinguishing the recommended converged domain - from a scheme-explicit twin, for use in a per-domain @brief. - - Two domains that carry identical terms (e.g. ``int4_ord`` and - ``int4_ord_ore``, both ``["ore"]``) would otherwise render an identical - brief. The converged name is the recommended one to reach for; the twin - names the concrete scheme explicitly. Returns "" for roles that don't come - in converged/twin pairs (eq, storage) and for names that match no pattern. - - Generic by construction: keyed on the term-derived role and the - ``_[_]`` name shape, never on a literal type or scheme - string, so int8/date/etc. and non-ore schemes work unchanged.""" - role = role_for_terms(domain.terms) - if role not in _TWINNABLE_ROLES: - return "" - scheme = _scheme_suffix(domain.name, token, role) - if scheme is not None: - return ( - f" Scheme-explicit twin pinning the {scheme} scheme; " - f"prefer the converged {token}_{role} name." - ) - if domain.name == f"{token}_{role}": - return " Recommended converged name for this role." - return "" - - -def domain_name(domain: str) -> str: - """The schema-qualified SQL domain type name, e.g. ``eql_v3.int4_eq``.""" - return f"{DOMAIN_SCHEMA}.{domain}" - - -def _arg_label(dom: str, arg_type: str) -> str: - """Doxygen brief shape qualifier for one operand: 'domain' if it's - the encrypted-domain type, otherwise the literal SQL type.""" - return "domain" if arg_type == dom else arg_type - - -def _shape_qualifier(dom: str, arg_a: str, arg_b: str) -> str: - """Doxygen brief parenthetical. Empty for the canonical (dom, dom) shape.""" - if arg_a == dom and arg_b == dom: - return "" - return f" ({_arg_label(dom, arg_a)}, {_arg_label(dom, arg_b)})" - - -def render_domain_block(domain: DomainSpec, token: str) -> str: - """One idempotent IF NOT EXISTS CREATE DOMAIN block, prefixed by a - per-domain --! @brief derived from role + token.""" - dom = domain_name(domain.name) - keys = ENVELOPE_KEYS + [CIPHERTEXT_KEY] + term_json_keys(domain.terms) - presence = "\n AND ".join(f"VALUE ? '{_sql_str(key)}'" for key in keys) - checks = ( - presence - + f"\n AND VALUE->>'{_sql_str(VERSION_KEY)}' = '{ENVELOPE_VERSION}'" - ) - phrase = role_phrase(domain.terms) - clause = brief_role_clause(domain, token) - return ( - f" --! @brief {phrase} encrypted {token} domain.{clause}\n" - f" IF NOT EXISTS (\n" - f" SELECT 1 FROM pg_type\n" - f" WHERE typname = '{_sql_str(domain.name)}' " - f"AND typnamespace = '{DOMAIN_SCHEMA}'::regnamespace\n" - f" ) THEN\n" - f" CREATE DOMAIN {dom} AS jsonb\n" - f" CHECK (\n" - f" jsonb_typeof(VALUE) = 'object'\n" - f" AND {checks}\n" - f" );\n" - f" END IF;\n" - ) - - -def render_extractor(domain: DomainSpec, term: Term) -> str: - """The inlinable index-term extractor for a domain term.""" - dom = domain_name(domain.name) - doxy = ( - f"--! @brief Index extractor for the {dom} variant.\n" - f"--! @param a {dom}\n" - f"--! @return {term.returns}\n" - ) - return doxy + ( - f"CREATE FUNCTION {DOMAIN_SCHEMA}.{term.extractor}(a {dom})\n" - f"RETURNS {term.returns}\n" - f"LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE\n" - f"AS $$ SELECT {CORE_SCHEMA}.{term.ctor}(a::jsonb) $$;\n" - ) - - -def _extract_arg(arg_type: str, extractor: str, domain: str, arg: str) -> str: - """The extractor-call SQL for one operand, casting jsonb to the domain first.""" - if arg_type == "jsonb": - return f"{DOMAIN_SCHEMA}.{extractor}({arg}::{domain})" - return f"{DOMAIN_SCHEMA}.{extractor}({arg})" - - -def render_wrapper( - domain: DomainSpec, op: str, arg_a: str, arg_b: str, extractor: str -) -> str: - """An inlinable comparison wrapper for a supported operator.""" - dom = domain_name(domain.name) - backing = OPERATORS[op].backing - call_a = _extract_arg(arg_a, extractor, dom, "a") - call_b = _extract_arg(arg_b, extractor, dom, "b") - doxy = ( - f"--! @brief {OPERATOR_PHRASES[op]} wrapper for {dom}" - f"{_shape_qualifier(dom, arg_a, arg_b)}.\n" - f"--! @param a {arg_a}\n" - f"--! @param b {arg_b}\n" - f"--! @return boolean\n" - ) - return doxy + ( - f"CREATE FUNCTION {DOMAIN_SCHEMA}.{backing}(a {arg_a}, b {arg_b})\n" - f"RETURNS boolean LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE\n" - f"AS $$ SELECT {call_a} {op} {call_b} $$;\n" - ) - - -def render_blocker_bool( - domain: DomainSpec, op: str, arg_a: str, arg_b: str -) -> str: - """A boolean-returning blocker. NEVER STRICT, ALWAYS LANGUAGE plpgsql - so the RAISE survives inlining and planner-time elision; see CLAUDE.md - footguns and the encrypted-domain spec §4.""" - dom = domain_name(domain.name) - backing = OPERATORS[op].backing - doxy = ( - f"--! @brief Blocker for {op} on {dom}" - f"{_shape_qualifier(dom, arg_a, arg_b)}.\n" - f"--! @param a {arg_a}\n" - f"--! @param b {arg_b}\n" - f"--! @return boolean (never returns; always raises)\n" - ) - return doxy + ( - f"CREATE FUNCTION {DOMAIN_SCHEMA}.{backing}(a {arg_a}, b {arg_b})\n" - f"RETURNS boolean IMMUTABLE PARALLEL SAFE\n" - f"AS $$ BEGIN RETURN {DOMAIN_SCHEMA}.encrypted_domain_unsupported_bool(" - f"'{_sql_str(dom)}', '{_sql_str(op)}'); END; $$\n" - f"LANGUAGE plpgsql;\n" - ) - - -def render_blocker_path( - domain: DomainSpec, op: str, arg_a: str, arg_b: str -) -> str: - """A path-operator blocker. NEVER STRICT, ALWAYS LANGUAGE plpgsql - so the RAISE survives inlining and planner-time elision; see CLAUDE.md - footguns and the encrypted-domain spec §4.""" - dom = domain_name(domain.name) - backing = OPERATORS[op].backing - returns = "text" if op == "->>" else dom - doxy = ( - f"--! @brief Blocker for {op} on {dom} " - f"({_arg_label(dom, arg_a)}, {_arg_label(dom, arg_b)}).\n" - f"--! @param a {arg_a}\n" - f"--! @param selector {arg_b}\n" - f"--! @return {returns} (never returns; always raises)\n" - ) - return doxy + ( - f"CREATE FUNCTION {DOMAIN_SCHEMA}.{backing}(a {arg_a}, selector {arg_b})\n" - f"RETURNS {returns} IMMUTABLE PARALLEL SAFE\n" - f"AS $$ BEGIN RAISE EXCEPTION " - f"'operator % is not supported for %', '{_sql_str(op)}', " - f"'{_sql_str(dom)}'; END; $$\n" - f"LANGUAGE plpgsql;\n" - ) - - -def render_blocker_native( - domain: DomainSpec, op: str, arg_a: str, arg_b: str, returns: str -) -> str: - """A blocker for a native jsonb fallback operator. NEVER STRICT, ALWAYS - LANGUAGE plpgsql. Boolean blockers delegate to the shared helper so lint - recognition and messages stay uniform; other return types raise directly. - """ - dom = domain_name(domain.name) - backing = OPERATORS[op].backing - doxy = ( - f"--! @brief Blocker for {op} on {dom}" - f"{_shape_qualifier(dom, arg_a, arg_b)}.\n" - f"--! @param a {arg_a}\n" - f"--! @param b {arg_b}\n" - f"--! @return {returns} (never returns; always raises)\n" - ) - if returns == "boolean": - body = ( - f"BEGIN RETURN {DOMAIN_SCHEMA}.encrypted_domain_unsupported_bool(" - f"'{_sql_str(dom)}', '{_sql_str(op)}'); END;" - ) - else: - body = ( - "BEGIN RAISE EXCEPTION " - f"'operator % is not supported for %', '{_sql_str(op)}', " - f"'{_sql_str(dom)}'; END;" - ) - return doxy + ( - f"CREATE FUNCTION {DOMAIN_SCHEMA}.{backing}(a {arg_a}, b {arg_b})\n" - f"RETURNS {returns} IMMUTABLE PARALLEL SAFE\n" - f"AS $$ {body} $$\n" - f"LANGUAGE plpgsql;\n" - ) - - -def extractor_for_operator(domain: DomainSpec, op: str) -> str | None: - """Return the catalog extractor that supports op for this domain.""" - return _catalog_extractor_for_operator(domain.terms, op) - - -def supported_operators(domain: DomainSpec) -> list[str]: - """Supported operators for this domain.""" - return operators_for_terms(domain.terms) - - -@dataclass(frozen=True) -class AggregateOp: - """One aggregate operator definition (min or max).""" - - name: str # public function name, e.g. "min" - sfunc_name: str # state function name, e.g. "min_sfunc" - comparator: str # SQL comparator used to choose the new state: "<" or ">" - phrase: str # short prose label used in --! @brief lines - - -AGGREGATE_OPS: dict[str, AggregateOp] = { - "min": AggregateOp("min", "min_sfunc", "<", "minimum"), - "max": AggregateOp("max", "max_sfunc", ">", "maximum"), -} - - -def is_ord_capable(domain: DomainSpec) -> bool: - """True if the domain carries a comparator term (i.e. supports `<`).""" - return role_for_terms(domain.terms) == "ord" - - -def render_aggregate(domain: DomainSpec, op: AggregateOp) -> str: - """Render state function + CREATE AGGREGATE for one aggregate op on one - domain. The ord-capability gate lives at the file-level renderer - (`render_aggregates_file`); callers may legitimately render a single - aggregate without re-asserting that precondition. MIN/MAX on a non-ord - domain is structurally well-formed text but semantically meaningless — - the file-level gate is what stops it ever reaching disk.""" - dom = domain_name(domain.name) - sfunc_doxy = ( - f"--! @brief State function for {op.name} aggregate on {dom}.\n" - f"--! @internal\n" - f"--!\n" - f"--! @param state {dom} running extremum\n" - f"--! @param value {dom} next non-NULL value\n" - f"--! @return {dom} the {op.phrase} of state and value\n" - ) - # plpgsql + STRICT: PG seeds the state with the first non-NULL value and - # skips NULL inputs. plpgsql (not sql) because aggregate state functions - # aren't index expressions — opacity to the planner is fine — and a - # multi-statement BEGIN/IF/END body is the natural shape. - # - # The same rationale is mirrored into the emitted SQL below so a reader of - # the generated file (who never sees this Python) understands why it isn't - # an inlinable LANGUAGE sql CASE. - sfunc_rationale = ( - "-- LANGUAGE plpgsql, not sql: aggregate state functions are not index\n" - "-- expressions, so opacity to the planner is fine, and a multi-statement\n" - "-- BEGIN/IF/END body is the natural shape. (A LANGUAGE sql CASE would\n" - "-- also work, but the procedural form mirrors the blocker convention.)\n" - ) - sfunc = sfunc_rationale + ( - f"CREATE FUNCTION {DOMAIN_SCHEMA}.{op.sfunc_name}(state {dom}, value {dom})\n" - f"RETURNS {dom}\n" - f"LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE\n" - f"SET search_path = pg_catalog, extensions, public\n" - f"AS $$\n" - f"BEGIN\n" - f" IF value {op.comparator} state THEN\n" - f" RETURN value;\n" - f" END IF;\n" - f" RETURN state;\n" - f"END;\n" - f"$$;\n" - ) - agg_doxy = ( - f"--! @brief Find the {op.phrase} encrypted value in a group of " - f"{dom} values.\n" - f"--!\n" - f"--! Comparison routes through the domain's `{op.comparator}` " - f"operator, which uses the ORE block term — no decryption.\n" - f"--!\n" - f"--! @param input {dom} encrypted values to aggregate\n" - f"--! @return {dom} {op.phrase} of the group, or NULL if all " - f"inputs are NULL\n" - ) - # min/max are associative, so the state function doubles as the combine - # function: merging two partial extrema is the same comparison. With a - # PARALLEL SAFE sfunc/combinefunc and `parallel = safe`, PG can use partial - # and parallel aggregation on the large GROUP BY workloads these ORE - # aggregates exist to serve — still with no decryption. The combinefunc is - # STRICT (it is the sfunc), so PG carries a null partial state through as - # "no value yet", matching the serial seed-and-skip semantics. - aggregate = ( - "-- combinefunc = sfunc: min/max are associative, so merging two partial\n" - "-- extrema is the same comparison. PARALLEL SAFE enables partial and\n" - "-- parallel aggregation on large GROUP BY workloads, with no decryption.\n" - f"CREATE AGGREGATE {DOMAIN_SCHEMA}.{op.name}({dom}) (\n" - f" sfunc = {DOMAIN_SCHEMA}.{op.sfunc_name},\n" - f" stype = {dom},\n" - f" combinefunc = {DOMAIN_SCHEMA}.{op.sfunc_name},\n" - f" parallel = safe\n" - f");\n" - ) - return sfunc_doxy + sfunc + "\n" + agg_doxy + aggregate - - -def render_operator( - op: str, backing: str, leftarg: str, rightarg: str, supported: bool -) -> str: - """A CREATE OPERATOR declaration. - - Unsupported operators are still declared, but their backing function is a - blocker that always raises. We emit them so the operator resolves on the - domain (rather than silently falling through to a native jsonb operator), - and a leading SQL comment explains the placeholder to future readers.""" - meta = OPERATORS[op] - lines = [] - if not supported: - lines.append( - f"-- Placeholder: this domain's term set does not support {op}; " - f"the backing function always raises." - ) - lines += [ - f"CREATE OPERATOR {op} (", - f" FUNCTION = {DOMAIN_SCHEMA}.{backing},", - f" LEFTARG = {leftarg}, RIGHTARG = {rightarg}", - ] - if supported and meta.kind == "symmetric": - extras = [] - if meta.commutator: - extras.append(f"COMMUTATOR = {meta.commutator}") - if meta.negator: - extras.append(f"NEGATOR = {meta.negator}") - if meta.restrict: - extras.append(f"RESTRICT = {meta.restrict}") - if meta.join: - extras.append(f"JOIN = {meta.join}") - if extras: - lines[-1] += "," - lines.append(" " + ", ".join(extras)) - lines.append(");") - return "\n".join(lines) + "\n" diff --git a/tasks/codegen/terms.py b/tasks/codegen/terms.py deleted file mode 100644 index 32a7c788..00000000 --- a/tasks/codegen/terms.py +++ /dev/null @@ -1,107 +0,0 @@ -"""Fixed index-term catalog for scalar encrypted-domain codegen.""" - -from collections.abc import Iterable -from dataclasses import dataclass - - -class TermError(Exception): - """Raised when a manifest references an unknown term.""" - - -@dataclass(frozen=True) -class Term: - """One fixed index term known to the scalar materializer.""" - - name: str - json_key: str - extractor: str - returns: str - ctor: str - role: str - operators: tuple[str, ...] - requires: tuple[str, ...] - - -TERM_CATALOG: dict[str, Term] = { - "hm": Term( - name="hm", - json_key="hm", - extractor="eq_term", - returns="eql_v2.hmac_256", - ctor="hmac_256", - role="eq", - operators=("=", "<>"), - requires=("src/hmac_256/functions.sql",), - ), - "ore": Term( - name="ore", - json_key="ob", - extractor="ord_term", - returns="eql_v2.ore_block_u64_8_256", - ctor="ore_block_u64_8_256", - role="ord", - operators=("=", "<>", "<", "<=", ">", ">="), - requires=( - "src/ore_block_u64_8_256/functions.sql", - "src/ore_block_u64_8_256/operators.sql", - ), - ), -} - - -def _dedupe_preserving_order(values: Iterable[str]) -> list[str]: - """Stable dedupe — first occurrence wins. `dict.fromkeys` preserves insert order.""" - return list(dict.fromkeys(values)) - - -def require_terms(names: list[str]) -> list[Term]: - """Return catalog terms for manifest names, preserving input order.""" - terms: list[Term] = [] - for name in names: - try: - terms.append(TERM_CATALOG[name]) - except KeyError as exc: - raise TermError( - f"unknown term '{name}' (expected one of {sorted(TERM_CATALOG)})" - ) from exc - return terms - - -def operators_for_terms(names: list[str]) -> list[str]: - """Supported operators for the union of a domain's terms.""" - return _dedupe_preserving_order( - op for term in require_terms(names) for op in term.operators - ) - - -def term_json_keys(names: list[str]) -> list[str]: - """JSON payload keys required by these terms.""" - return _dedupe_preserving_order( - term.json_key for term in require_terms(names) - ) - - -def term_requires(names: list[str]) -> list[str]: - """SQL REQUIRE edges needed by these terms.""" - return _dedupe_preserving_order( - req for term in require_terms(names) for req in term.requires - ) - - -def extractor_for_operator(names: list[str], op: str) -> str | None: - """The catalog extractor that supports `op` for a domain carrying `names`.""" - for term in require_terms(names): - if op in term.operators: - return term.extractor - return None - - -def role_for_terms(names: list[str]) -> str: - """Generated-file role label for a domain with these terms. - - A domain with no terms is `storage`; otherwise the role comes from - the first term's catalog role (e.g. `hm` -> `eq`, `ore` -> `ord`). - """ - if not names: - return "storage" - return require_terms(names)[0].role diff --git a/tasks/codegen/test_against_reference.py b/tasks/codegen/test_against_reference.py deleted file mode 100644 index e7ea62e9..00000000 --- a/tasks/codegen/test_against_reference.py +++ /dev/null @@ -1,117 +0,0 @@ -"""Identity guard: the generator must reproduce the frozen manual -reference under tests/codegen/reference// byte-for-byte. - -The reference is the reviewed manual implementation. If the generator's -output diverges from the reference, either the generator regressed (fix -it) or the reference is being deliberately updated (commit the new -reference in this PR). - -Compares in-memory `render_*_file` output directly against the reference, -so it runs anywhere regardless of whether the build has materialised -src/encrypted_domain// (those files are gitignored — `tasks/build.sh` -regenerates them on each build). -""" -from pathlib import Path - -import pytest - -from tasks.codegen.generate import ( - REPO_ROOT, - render_aggregates_file, - render_functions_file, - render_operators_file, - render_types_file, -) -from tasks.codegen.spec import load_spec -from tasks.codegen.templates import render_fixture_values_rs - -_REFERENCE_ROOT = REPO_ROOT / "tests" / "codegen" / "reference" -_TYPES_DIR = REPO_ROOT / "tasks" / "codegen" / "types" - - -def _strip_reference_marker(text: str) -> str: - """Drop any leading `-- REFERENCE:` / `// REFERENCE:` lines. They label the - file as the parity baseline (see tests/codegen/reference/README.md) and are - not part of the generator's output. Both comment styles are recognised so - the same helper serves SQL and Rust reference files.""" - lines = text.splitlines(keepends=True) - while lines and lines[0].startswith(("-- REFERENCE:", "// REFERENCE:")): - lines.pop(0) - return "".join(lines) - - -def _reference_files() -> list[Path]: - """Every SQL file under tests/codegen/reference//.""" - if not _REFERENCE_ROOT.is_dir(): - return [] - return sorted(_REFERENCE_ROOT.glob("*/*.sql")) - - -def _render(reference_path: Path) -> str: - """Render the corresponding generator output for a reference file.""" - token = reference_path.parent.name - name = reference_path.name - spec = load_spec(_TYPES_DIR / f"{token}.toml") - - if name == f"{token}_types.sql": - return render_types_file(spec) - - for domain in spec.domains: - if name == f"{domain.name}_functions.sql": - return render_functions_file(spec, domain) - if name == f"{domain.name}_operators.sql": - return render_operators_file(spec, domain) - if name == f"{domain.name}_aggregates.sql": - body = render_aggregates_file(spec, domain) - if body is None: - pytest.fail( - f"reference {reference_path.relative_to(REPO_ROOT)} exists " - f"but the generator skipped this variant (not ord-capable). " - f"Remove the reference file or update the manifest." - ) - return body - - pytest.fail(f"unrecognised reference filename: {name}") - - -@pytest.mark.parametrize( - "reference_path", - _reference_files(), - ids=lambda p: f"{p.parent.name}/{p.name}", -) -def test_generator_matches_manual_reference(reference_path: Path): - """Generator render output must equal the reviewed reference.""" - token = reference_path.parent.name - fix = ( - f"either the generator regressed (fix tasks/codegen/) or the " - f"manual reference is being updated deliberately — commit the " - f"new reference at {reference_path.relative_to(REPO_ROOT)} in " - f"this PR. Regenerate via: mise run codegen:domain {token}" - ) - - expected = _strip_reference_marker(reference_path.read_text(encoding="utf-8")) - actual = _render(reference_path) - - assert actual == expected, f"{reference_path.name}: {fix}" - - -def test_generator_matches_rust_fixture_values_reference(): - """The generated Rust fixture-value const must match the reviewed reference. - - Guards the committed tests/sqlx/src/fixtures/int4_values.rs against drift - from the manifest (the same property the CI staleness guard enforces, but - runnable without a checkout diff).""" - reference_path = _REFERENCE_ROOT / "int4" / "int4_values.rs" - spec = load_spec(_TYPES_DIR / "int4.toml") - - expected = _strip_reference_marker( - reference_path.read_text(encoding="utf-8") - ) - actual = render_fixture_values_rs(spec) - - assert actual == expected, ( - "int4_values.rs: either the generator regressed (fix tasks/codegen/) " - "or the reference is being updated deliberately — commit the new " - f"reference at {reference_path.relative_to(REPO_ROOT)} in this PR. " - "Regenerate via: mise run codegen:domain int4" - ) diff --git a/tasks/codegen/test_generate.py b/tasks/codegen/test_generate.py deleted file mode 100644 index db93b989..00000000 --- a/tasks/codegen/test_generate.py +++ /dev/null @@ -1,351 +0,0 @@ -"""Tests for composing scalar encrypted-domain files from a manifest.""" - -import textwrap - -import pytest - -from tasks.codegen.generate import ( - generate_type, - main, - render_aggregates_file, - render_functions_file, - render_operators_file, - render_types_file, -) -from tasks.codegen.spec import load_spec -from tasks.codegen.templates import AUTO_GENERATED_HEADER, AUTO_GENERATED_HEADER_RS -from tasks.codegen.writer import OwnershipError - - -INT4_TOML = textwrap.dedent(""" - [domain] - int4 = [] - int4_eq = ["hm"] - int4_ord_ore = ["ore"] - int4_ord = ["ore"] -""") - -INT4_FIXTURE_TOML = INT4_TOML + textwrap.dedent(""" - [fixture] - values = ["MIN", "-1", "ZERO", "1", "MAX"] -""") - -# A second, synthetic type for multi-type (--all) coverage. No [fixture] table, -# so it never touches scalars.py (which only registers int4) — it exercises the -# enumeration, not fixture rendering. -INT4X_TOML = textwrap.dedent(""" - [domain] - int4x = [] - int4x_eq = ["hm"] - int4x_ord = ["ore"] -""") - - -def _fixture_values_rs(out_root): - return out_root / "tests" / "sqlx" / "src" / "fixtures" / "int4_values.rs" - - -def load(tmp_path): - p = tmp_path / "int4.toml" - p.write_text(INT4_TOML) - return load_spec(p) - - -def test_types_file_has_all_four_domains(tmp_path): - spec = load(tmp_path) - sql = render_types_file(spec) - assert "-- REQUIRE: src/schema-v3.sql" in sql - for dom in ("int4", "int4_eq", - "int4_ord", "int4_ord_ore"): - assert f"CREATE DOMAIN eql_v3.{dom} AS jsonb" in sql - - -def test_storage_functions_file_is_all_blockers(tmp_path): - spec = load(tmp_path) - storage = next(d for d in spec.domains if d.name == "int4") - sql = render_functions_file(spec, storage) - assert sql.count("CREATE FUNCTION") == 44 - assert "SET search_path" not in sql - assert sql.count("LANGUAGE plpgsql") == 44 - assert sql.count("LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE") == 0 - - -def test_eq_functions_file_counts_and_extractor(tmp_path): - spec = load(tmp_path) - eq = next(d for d in spec.domains if d.name == "int4_eq") - sql = render_functions_file(spec, eq) - assert sql.count("CREATE FUNCTION") == 45 - assert "CREATE FUNCTION eql_v3.eq_term(a eql_v3.int4_eq)" in sql - assert "RETURNS eql_v2.hmac_256" in sql - # 1 extractor + 6 wrappers (=, <> across 3 arg-shapes) inlined as SQL; - # 38 blockers across the remaining native jsonb surface as plpgsql. - assert sql.count("LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE") == 7 - assert sql.count("LANGUAGE plpgsql") == 38 - assert "SET search_path" not in sql - - -def test_ore_functions_file_counts_and_extractor(tmp_path): - spec = load(tmp_path) - ordered = next(d for d in spec.domains if d.name == "int4_ord") - sql = render_functions_file(spec, ordered) - assert sql.count("CREATE FUNCTION") == 45 - assert "CREATE FUNCTION eql_v3.ord_term(a eql_v3.int4_ord)" in sql - assert "RETURNS eql_v2.ore_block_u64_8_256" in sql - # 1 extractor + 18 wrappers (=, <>, <, <=, >, >= across 3 shapes); - # 26 blockers across containment/path/native-jsonb fallback ops. - assert sql.count("LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE") == 19 - assert sql.count("LANGUAGE plpgsql") == 26 - assert "SET search_path" not in sql - - -def test_operators_file_has_forty_four(tmp_path): - spec = load(tmp_path) - eq = next(d for d in spec.domains if d.name == "int4_eq") - sql = render_operators_file(spec, eq) - assert sql.count("CREATE OPERATOR") == 44 - - -def test_generate_type_writes_expected_files(tmp_path): - spec = load(tmp_path) - out_dir = tmp_path / "int4" - written = generate_type(spec, out_dir) - names = {p.name for p in written} - assert "int4_types.sql" in names - for domain in ("int4", "int4_eq", "int4_ord", "int4_ord_ore"): - assert f"{domain}_functions.sql" in names - assert f"{domain}_operators.sql" in names - # Aggregates only emitted for ord-capable variants — storage and eq skip. - assert "int4_aggregates.sql" not in names - assert "int4_eq_aggregates.sql" not in names - assert "int4_ord_aggregates.sql" in names - assert "int4_ord_ore_aggregates.sql" in names - # 1 types + 4 functions + 4 operators + 2 aggregates = 11 - assert len(written) == 11 - for p in written: - assert p.read_text().startswith(AUTO_GENERATED_HEADER) - - -def test_generate_type_cleans_stale_files(tmp_path): - spec = load(tmp_path) - out_dir = tmp_path / "int4" - out_dir.mkdir() - stale = out_dir / "int4_removed_functions.sql" - stale.write_text(AUTO_GENERATED_HEADER + "-- orphan\n") - generate_type(spec, out_dir) - assert not stale.exists() - - -def test_generate_type_preserves_hand_written_extension_file(tmp_path): - spec = load(tmp_path) - out_dir = tmp_path / "int4" - out_dir.mkdir() - extension = out_dir / "int4_extensions.sql" - body = ( - "-- REQUIRE: src/encrypted_domain/int4/int4_types.sql\n" - "-- hand-written extension SQL\n" - ) - extension.write_text(body) - generate_type(spec, out_dir) - assert extension.read_text() == body - - -def test_generate_type_preflights_hand_written_target_before_cleanup(tmp_path): - spec = load(tmp_path) - out_dir = tmp_path / "int4" - out_dir.mkdir() - generated = out_dir / "int4_types.sql" - protected = out_dir / "int4_eq_functions.sql" - original_generated = AUTO_GENERATED_HEADER + "-- old generated\n" - original_protected = "-- REQUIRE: src/schema.sql\n-- hand-written\n" - generated.write_text(original_generated) - protected.write_text(original_protected) - - with pytest.raises(OwnershipError, match="hand-written"): - generate_type(spec, out_dir) - - assert generated.read_text() == original_generated - assert protected.read_text() == original_protected - assert not (out_dir / "int4_eq_operators.sql").exists() - - -def _seed_types_dir(tmp_path, name: str = "int4.toml", body: str = INT4_TOML): - types_dir = tmp_path / "types" - types_dir.mkdir() - (types_dir / name).write_text(body) - return types_dir - - -def test_main_rejects_wrong_argv_length(capsys): - rc = main(["generate.py"]) - assert rc == 2 - err = capsys.readouterr().err - assert "Usage: generate.py " in err - - -def test_main_errors_on_missing_manifest(tmp_path, capsys): - types_dir = tmp_path / "types" - types_dir.mkdir() - rc = main( - ["generate.py", "int4"], - types_dir=types_dir, - out_root=tmp_path, - ) - assert rc == 1 - err = capsys.readouterr().err - assert "no manifest at" in err - assert "int4.toml" in err - - -def test_main_errors_on_token_mismatch(tmp_path, capsys): - """Manifest stem must equal argv token — guards against a copy/rename.""" - types_dir = _seed_types_dir(tmp_path, name="int4.toml") - rc = main( - ["generate.py", "int8"], - types_dir=types_dir, - out_root=tmp_path, - ) - # int8.toml doesn't exist — first failure is missing manifest, not mismatch. - # To exercise the mismatch branch we need a manifest at int8.toml that - # declares int4 domains (impossible — the loader infers token from stem). - # The branch is therefore unreachable via the normal types/.toml - # convention; the assertion below just confirms the missing-manifest - # error path fires when the names diverge. - assert rc == 1 - err = capsys.readouterr().err - assert "no manifest at" in err - assert "int8.toml" in err - - -def test_main_happy_path_writes_files(tmp_path, capsys): - types_dir = _seed_types_dir(tmp_path) - rc = main( - ["generate.py", "int4"], - types_dir=types_dir, - out_root=tmp_path, - ) - assert rc == 0 - out_dir = tmp_path / "src" / "encrypted_domain" / "int4" - assert (out_dir / "int4_types.sql").is_file() - assert (out_dir / "int4_eq_functions.sql").is_file() - assert (out_dir / "int4_ord_operators.sql").is_file() - assert (out_dir / "int4_ord_aggregates.sql").is_file() - assert (out_dir / "int4_ord_ore_aggregates.sql").is_file() - assert not (out_dir / "int4_aggregates.sql").exists() - assert not (out_dir / "int4_eq_aggregates.sql").exists() - stdout = capsys.readouterr().out - assert "generated 11 files for int4" in stdout - - -def test_main_emits_fixture_values_rs_when_manifest_has_fixture(tmp_path, capsys): - types_dir = _seed_types_dir(tmp_path, body=INT4_FIXTURE_TOML) - rc = main(["generate.py", "int4"], types_dir=types_dir, out_root=tmp_path) - assert rc == 0 - rs = _fixture_values_rs(tmp_path) - assert rs.is_file() - text = rs.read_text() - assert text.startswith(AUTO_GENERATED_HEADER_RS) - assert "pub const VALUES: &[i32] = &[" in text - assert "i32::MIN," in text and "i32::MAX," in text - stdout = capsys.readouterr().out - assert "int4_values.rs" in stdout - - -def test_main_omits_fixture_values_rs_when_no_fixture_table(tmp_path, capsys): - types_dir = _seed_types_dir(tmp_path, body=INT4_TOML) - rc = main(["generate.py", "int4"], types_dir=types_dir, out_root=tmp_path) - assert rc == 0 - assert not _fixture_values_rs(tmp_path).exists() - - -def _seed_two_types(tmp_path): - types_dir = _seed_types_dir(tmp_path, name="int4.toml", body=INT4_TOML) - (types_dir / "int4x.toml").write_text(INT4X_TOML) - return types_dir - - -def test_main_all_generates_every_type(tmp_path, capsys): - types_dir = _seed_two_types(tmp_path) - rc = main(["generate.py", "--all"], types_dir=types_dir, out_root=tmp_path) - assert rc == 0 - assert (tmp_path / "src/encrypted_domain/int4/int4_types.sql").is_file() - assert (tmp_path / "src/encrypted_domain/int4x/int4x_types.sql").is_file() - out = capsys.readouterr().out - assert "generated 11 files for int4" in out - assert "codegen --all: ok (2 types: int4, int4x)" in out - - -def test_main_all_generates_in_sorted_order(tmp_path, capsys): - types_dir = _seed_two_types(tmp_path) - main(["generate.py", "--all"], types_dir=types_dir, out_root=tmp_path) - out = capsys.readouterr().out - assert out.index("for int4\n") < out.index("for int4x\n") - - -def test_main_all_errors_when_no_manifests(tmp_path, capsys): - types_dir = tmp_path / "types" - types_dir.mkdir() - rc = main(["generate.py", "--all"], types_dir=types_dir, out_root=tmp_path) - assert rc == 1 - assert "no manifests found" in capsys.readouterr().err - - -def test_main_all_aggregates_nonzero_on_bad_manifest(tmp_path, capsys): - types_dir = _seed_types_dir(tmp_path, name="int4.toml", body=INT4_TOML) - # 'broken' sorts before 'int4', so it is processed first; its domain name - # does not start with the token, so load_spec raises SpecError. - (types_dir / "broken.toml").write_text("[domain]\nwrongprefix = []\n") - rc = main(["generate.py", "--all"], types_dir=types_dir, out_root=tmp_path) - assert rc == 1 - captured = capsys.readouterr() - assert "broken" in captured.err - assert "codegen --all: FAILED" in captured.out - # The good type still generated despite the broken sibling. - assert (tmp_path / "src/encrypted_domain/int4/int4_types.sql").is_file() - - -def test_ordered_files_are_byte_identical_modulo_typename(tmp_path): - spec = load(tmp_path) - ord_domain = next(d for d in spec.domains if d.name == "int4_ord") - ore_domain = next(d for d in spec.domains if d.name == "int4_ord_ore") - - for renderer in (render_functions_file, render_operators_file, render_aggregates_file): - ord_sql = renderer(spec, ord_domain) - ore_sql = renderer(spec, ore_domain) - normalised_ord = ord_sql.replace("int4_ord_ore", "T").replace( - "int4_ord", "T" - ) - normalised_ore = ore_sql.replace("int4_ord_ore", "T").replace( - "int4_ord", "T" - ) - assert normalised_ord == normalised_ore, ( - f"{renderer.__name__}: int4_ord and int4_ord_ore must produce " - f"byte-identical SQL modulo their typenames" - ) - - -def test_render_aggregates_file_only_for_ord_variants(tmp_path): - spec = load(tmp_path) - storage = next(d for d in spec.domains if d.name == "int4") - eq = next(d for d in spec.domains if d.name == "int4_eq") - ordered = next(d for d in spec.domains if d.name == "int4_ord") - ore = next(d for d in spec.domains if d.name == "int4_ord_ore") - - assert render_aggregates_file(spec, storage) is None - assert render_aggregates_file(spec, eq) is None - assert render_aggregates_file(spec, ordered) is not None - assert render_aggregates_file(spec, ore) is not None - - -def test_render_aggregates_file_carries_both_min_and_max(tmp_path): - spec = load(tmp_path) - ordered = next(d for d in spec.domains if d.name == "int4_ord") - sql = render_aggregates_file(spec, ordered) - assert sql is not None - assert sql.count("CREATE FUNCTION") == 2 - assert sql.count("CREATE AGGREGATE") == 2 - assert "eql_v3.min_sfunc" in sql - assert "eql_v3.max_sfunc" in sql - # REQUIRE edges: types + functions + operators must all be declared. - assert "-- REQUIRE: src/encrypted_domain/int4/int4_ord_operators.sql" in sql - assert "-- REQUIRE: src/encrypted_domain/int4/int4_ord_functions.sql" in sql - assert "-- REQUIRE: src/encrypted_domain/int4/int4_types.sql" in sql diff --git a/tasks/codegen/test_operator_surface.py b/tasks/codegen/test_operator_surface.py deleted file mode 100644 index a513ce82..00000000 --- a/tasks/codegen/test_operator_surface.py +++ /dev/null @@ -1,126 +0,0 @@ -"""Tests for the scalar operator surface definition.""" -from tasks.codegen.operator_surface import ( - BLOCKER_ONLY_OPERATORS, - KNOWN_JSONB_OPERATORS, - OPERATORS, - PATH_OPERATORS, - SYMMETRIC_OPERATORS, - backing_function, -) - - -def test_twenty_operators_total(): - """The surface covers supported wrappers plus native jsonb fallbacks.""" - assert len(OPERATORS) == 20 - - -def test_eight_symmetric_operators(): - """8 symmetric boolean operators.""" - assert SYMMETRIC_OPERATORS == ["=", "<>", "<", "<=", ">", ">=", "@>", "<@"] - - -def test_two_path_operators(): - """2 path operators.""" - assert PATH_OPERATORS == ["->", "->>"] - - -def test_ten_blocker_only_jsonb_fallback_operators(): - """Native jsonb operators not otherwise supported are blocker-only.""" - assert BLOCKER_ONLY_OPERATORS == [ - "?", - "?|", - "?&", - "@?", - "@@", - "#>", - "#>>", - "-", - "#-", - "||", - ] - - -def test_no_like_operators(): - """The surface excludes ~~ and ~~* (int4 has no LIKE support).""" - assert "~~" not in OPERATORS - assert "~~*" not in OPERATORS - - -def test_backing_function_names(): - """Each operator maps to its eql_v2 backing function name.""" - assert backing_function("=") == "eq" - assert backing_function("<>") == "neq" - assert backing_function("<") == "lt" - assert backing_function("<=") == "lte" - assert backing_function(">") == "gt" - assert backing_function(">=") == "gte" - assert backing_function("@>") == "contains" - assert backing_function("<@") == "contained_by" - assert backing_function("->") == '"->"' - assert backing_function("->>") == '"->>"' - assert backing_function("?") == '"?"' - assert backing_function("?|") == '"?|"' - assert backing_function("?&") == '"?&"' - assert backing_function("@?") == '"@?"' - assert backing_function("@@") == '"@@"' - assert backing_function("#>") == '"#>"' - assert backing_function("#>>") == '"#>>"' - assert backing_function("-") == '"-"' - assert backing_function("#-") == '"#-"' - assert backing_function("||") == '"||"' - - -def test_selectivity_estimators(): - """Symmetric ops carry RESTRICT/JOIN selectivity estimators.""" - assert OPERATORS["="].restrict == "eqsel" - assert OPERATORS["="].join == "eqjoinsel" - assert OPERATORS["<>"].restrict == "neqsel" - assert OPERATORS["<"].restrict == "scalarltsel" - assert OPERATORS["<="].restrict == "scalarlesel" - assert OPERATORS[">"].restrict == "scalargtsel" - assert OPERATORS[">="].restrict == "scalargesel" - - -def test_negators_and_commutators(): - """= / <> are negators; range ops commute as documented.""" - assert OPERATORS["="].negator == "<>" - assert OPERATORS["<>"].negator == "=" - assert OPERATORS["<"].commutator == ">" - assert OPERATORS["<"].negator == ">=" - assert OPERATORS[">="].commutator == "<=" - - -def test_known_jsonb_operators_is_union_of_the_three_lists(): - """The exported union is exactly the three enumerated lists, deduped.""" - assert KNOWN_JSONB_OPERATORS == frozenset( - SYMMETRIC_OPERATORS + PATH_OPERATORS + BLOCKER_ONLY_OPERATORS - ) - - -def test_known_jsonb_operators_matches_operators_keys(): - """The union must stay in lockstep with the OPERATORS table itself, so a - new operator added to one but not the other is caught here rather than - leaving a hole in the storage-only blocker guarantee.""" - assert KNOWN_JSONB_OPERATORS == frozenset(OPERATORS) - - -def test_known_jsonb_operators_full_native_surface(): - """Pin the full native jsonb operator surface for PG 14-17. This is the - source-of-truth the live-DB structural guard - (tests/sqlx/.../family/jsonb_operator_surface.rs) asserts pg_operator is a - subset of. If PG adds a jsonb operator, that DB test fails; if this list is - edited, both must move together. The three lists are disjoint, so the union - size equals their combined length.""" - assert KNOWN_JSONB_OPERATORS == frozenset( - { - # symmetric (supported wrappers) - "=", "<>", "<", "<=", ">", ">=", "@>", "<@", - # path - "->", "->>", - # blocker-only native jsonb fallbacks - "?", "?|", "?&", "@?", "@@", "#>", "#>>", "-", "#-", "||", - } - ) - assert len(KNOWN_JSONB_OPERATORS) == ( - len(SYMMETRIC_OPERATORS) + len(PATH_OPERATORS) + len(BLOCKER_ONLY_OPERATORS) - ) diff --git a/tasks/codegen/test_scalars.py b/tasks/codegen/test_scalars.py deleted file mode 100644 index 1f15f1c3..00000000 --- a/tasks/codegen/test_scalars.py +++ /dev/null @@ -1,82 +0,0 @@ -"""Tests for the scalar-kind catalog driving fixture-value emission.""" - -import pytest - -from tasks.codegen.scalars import ( - ScalarError, - require_scalar, - SCALAR_KINDS, -) - - -def test_int4_kind_fields(): - kind = require_scalar("int4") - assert kind.token == "int4" - assert kind.rust_type == "i32" - assert kind.min_symbol == "i32::MIN" - assert kind.max_symbol == "i32::MAX" - assert kind.zero_symbol == "0" - assert kind.min_value == -2147483648 - assert kind.max_value == 2147483647 - - -def test_render_literal_maps_sentinels(): - kind = require_scalar("int4") - assert kind.render_literal("MIN") == "i32::MIN" - assert kind.render_literal("MAX") == "i32::MAX" - assert kind.render_literal("ZERO") == "0" - - -def test_render_literal_passes_through_numeric(): - kind = require_scalar("int4") - assert kind.render_literal("-100") == "-100" - assert kind.render_literal("0") == "0" - assert kind.render_literal("9999") == "9999" - - -def test_render_literal_rejects_non_numeric(): - kind = require_scalar("int4") - with pytest.raises(ScalarError, match="not a valid i32 literal or sentinel"): - kind.render_literal("oops") - - -def test_render_literal_rejects_out_of_range(): - kind = require_scalar("int4") - with pytest.raises(ScalarError, match="out of range"): - kind.render_literal("2147483648") # i32::MAX + 1 - - -def test_numeric_value_resolves_sentinels_and_literals(): - kind = require_scalar("int4") - assert kind.numeric_value("MIN") == -2147483648 - assert kind.numeric_value("MAX") == 2147483647 - assert kind.numeric_value("ZERO") == 0 - assert kind.numeric_value("42") == 42 - assert kind.numeric_value("-1") == -1 - - -def test_require_scalar_unknown_raises(): - with pytest.raises(ScalarError, match="unknown scalar token 'bogus'"): - require_scalar("bogus") - - -def test_int4_registered_in_catalog(): - assert "int4" in SCALAR_KINDS - - -def test_int2_kind_resolves_and_renders(): - kind = require_scalar("int2") - assert kind.rust_type == "i16" - assert kind.numeric_value("MIN") == -32768 - assert kind.numeric_value("MAX") == 32767 - assert kind.numeric_value("ZERO") == 0 - assert kind.render_literal("MIN") == "i16::MIN" - assert kind.render_literal("MAX") == "i16::MAX" - assert kind.render_literal("ZERO") == "0" - assert kind.render_literal("30000") == "30000" - - -def test_int2_kind_rejects_out_of_range(): - kind = require_scalar("int2") - with pytest.raises(ScalarError, match="out of range"): - kind.numeric_value("40000") diff --git a/tasks/codegen/test_spec.py b/tasks/codegen/test_spec.py deleted file mode 100644 index 151a03cb..00000000 --- a/tasks/codegen/test_spec.py +++ /dev/null @@ -1,208 +0,0 @@ -"""Tests for the scalar-domain manifest loader.""" - -import textwrap - -import pytest - -from tasks.codegen.spec import DomainSpec, SpecError, TypeSpec, load_spec - - -VALID_TOML = textwrap.dedent(""" - [domain] - int4 = [] - int4_eq = ["hm"] - int4_ord_ore = ["ore"] - int4_ord = ["ore"] -""") - - -def write(tmp_path, name, text): - p = tmp_path / name - p.write_text(text) - return p - - -def test_loads_valid_manifest_and_infers_token_from_filename(tmp_path): - spec = load_spec(write(tmp_path, "int4.toml", VALID_TOML)) - assert isinstance(spec, TypeSpec) - assert spec.token == "int4" - assert spec.domains == [ - DomainSpec(name="int4", terms=[]), - DomainSpec(name="int4_eq", terms=["hm"]), - DomainSpec(name="int4_ord_ore", terms=["ore"]), - DomainSpec(name="int4_ord", terms=["ore"]), - ] - - -def test_missing_domain_table_raises(tmp_path): - with pytest.raises(SpecError, match="missing required table '\\[domain\\]'"): - load_spec(write(tmp_path, "int4.toml", "")) - - -def test_empty_domain_table_raises(tmp_path): - with pytest.raises(SpecError, match="at least one domain"): - load_spec(write(tmp_path, "int4.toml", "[domain]\n")) - - -def test_domain_value_must_be_list(tmp_path): - bad = textwrap.dedent(""" - [domain] - int4_eq = "hm" - """) - with pytest.raises(SpecError, match="must be a list of term names"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_domain_term_must_be_string(tmp_path): - bad = textwrap.dedent(""" - [domain] - int4_eq = [1] - """) - with pytest.raises(SpecError, match="term names must be strings"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_unknown_term_raises_with_domain_context(tmp_path): - bad = textwrap.dedent(""" - [domain] - int4_eq = ["bogus"] - """) - with pytest.raises(SpecError, match="\\[domain\\] int4_eq: unknown term 'bogus'"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_domain_name_must_start_with_type_token(tmp_path): - bad = textwrap.dedent(""" - [domain] - text = [] - """) - with pytest.raises(SpecError, match="domain name must start with 'int4'"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_domain_name_must_be_token_or_token_underscore(tmp_path): - bad = textwrap.dedent(""" - [domain] - int4xfoo = [] - """) - with pytest.raises(SpecError, match="domain name must start with 'int4'"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -@pytest.mark.parametrize("filename", [ - "Int4.toml", - "int-4.toml", - "int 4.toml", - "4int.toml", - "int4;drop.toml", -]) -def test_token_must_be_sql_identifier(tmp_path, filename): - with pytest.raises(SpecError, match=r"token .* must match"): - load_spec(write(tmp_path, filename, VALID_TOML)) - - -@pytest.mark.parametrize("bad_name", [ - "int4-eq", - "int4 eq", - "INT4_eq", - "int4;drop", -]) -def test_domain_name_must_be_sql_identifier(tmp_path, bad_name): - bad = textwrap.dedent(f""" - [domain] - "{bad_name}" = [] - """) - with pytest.raises(SpecError, match=r"domain name .* must match"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -FIXTURE_TOML = VALID_TOML + textwrap.dedent(""" - [fixture] - values = ["MIN", "-100", "-1", "ZERO", "1", "9999", "MAX"] -""") - - -def test_fixture_values_default_to_none_when_absent(tmp_path): - spec = load_spec(write(tmp_path, "int4.toml", VALID_TOML)) - assert spec.fixture_values is None - - -def test_loads_fixture_values_when_present(tmp_path): - spec = load_spec(write(tmp_path, "int4.toml", FIXTURE_TOML)) - assert spec.fixture_values == [ - "MIN", "-100", "-1", "ZERO", "1", "9999", "MAX", - ] - - -def test_fixture_values_must_be_a_list(tmp_path): - bad = VALID_TOML + '\n[fixture]\nvalues = "MIN"\n' - with pytest.raises(SpecError, match=r"\[fixture\] values: must be a list"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_table_requires_values_key(tmp_path): - bad = VALID_TOML + "\n[fixture]\nother = 1\n" - with pytest.raises(SpecError, match=r"\[fixture\]: missing required key 'values'"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_must_be_non_empty(tmp_path): - bad = VALID_TOML + "\n[fixture]\nvalues = []\n" - with pytest.raises(SpecError, match=r"\[fixture\] values: must not be empty"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_must_be_strings(tmp_path): - bad = VALID_TOML + "\n[fixture]\nvalues = [1, 2]\n" - with pytest.raises(SpecError, match=r"\[fixture\] values: must be strings"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_reject_invalid_literal(tmp_path): - bad = VALID_TOML + '\n[fixture]\nvalues = ["MIN", "oops", "ZERO", "MAX"]\n' - with pytest.raises(SpecError, match="not a valid i32 literal"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_require_min_max_zero(tmp_path): - bad = VALID_TOML + '\n[fixture]\nvalues = ["1", "2", "3"]\n' - with pytest.raises(SpecError, match="must include MIN, MAX, and zero"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_require_max_even_if_min_and_zero_present(tmp_path): - bad = VALID_TOML + '\n[fixture]\nvalues = ["MIN", "ZERO", "1"]\n' - with pytest.raises(SpecError, match="must include MIN, MAX, and zero"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_reject_duplicate_literal(tmp_path): - bad = VALID_TOML + '\n[fixture]\nvalues = ["MIN", "1", "ZERO", "1", "MAX"]\n' - with pytest.raises(SpecError, match=r"must be distinct.*duplicate values.*'1'"): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_values_reject_sentinel_literal_alias(tmp_path): - # "MIN" and the i32::MIN literal resolve to the same plaintext value; - # the distinct-plaintext contract must reject the pair. - bad = ( - VALID_TOML - + '\n[fixture]\nvalues = ["MIN", "-2147483648", "ZERO", "MAX"]\n' - ) - with pytest.raises( - SpecError, - match=r"must be distinct.*'-2147483648' duplicates 'MIN' \(both resolve to -2147483648\)", - ): - load_spec(write(tmp_path, "int4.toml", bad)) - - -def test_fixture_for_unknown_scalar_token_raises(tmp_path): - bad = textwrap.dedent(""" - [domain] - int8 = [] - - [fixture] - values = ["1"] - """) - with pytest.raises(SpecError, match="unknown scalar token 'int8'"): - load_spec(write(tmp_path, "int8.toml", bad)) diff --git a/tasks/codegen/test_templates.py b/tasks/codegen/test_templates.py deleted file mode 100644 index 4a24f923..00000000 --- a/tasks/codegen/test_templates.py +++ /dev/null @@ -1,500 +0,0 @@ -"""Tests for per-construct SQL template functions.""" - -from tasks.codegen.spec import DomainSpec, TypeSpec -from tasks.codegen.templates import ( - AGGREGATE_OPS, - AUTO_GENERATED_HEADER, - AUTO_GENERATED_HEADER_RS, - _sql_str, - brief_role_clause, - domain_name, - extractor_for_operator, - is_ord_capable, - render_aggregate, - render_blocker_bool, - render_blocker_native, - render_blocker_path, - render_domain_block, - render_extractor, - render_fixture_values_rs, - render_operator, - render_wrapper, -) -from tasks.codegen.terms import TERM_CATALOG - - -def test_auto_generated_header_present(): - # Byte-identical to the Rust generator's marker - # (crates/eql-codegen/src/consts.rs) and to the `^-- AUTOMATICALLY GENERATED - # FILE` prefix that tasks/docs/validate/*.sh grep on to skip generated SQL. - assert AUTO_GENERATED_HEADER == "-- AUTOMATICALLY GENERATED FILE.\n" - assert "AUTOMATICALLY GENERATED FILE" in AUTO_GENERATED_HEADER - - -def test_rust_header_is_a_rust_comment(): - # Rust uses // comments, not SQL's --. Byte-identical to the Rust - # generator's AUTO_GENERATED_HEADER_RS (crates/eql-codegen/src/consts.rs). - assert AUTO_GENERATED_HEADER_RS == "// AUTOMATICALLY GENERATED FILE.\n" - # No line is an SQL-style (`--`) comment — this is Rust, not SQL. - assert not any( - line.startswith("--") for line in AUTO_GENERATED_HEADER_RS.splitlines() - ) - - -def test_render_fixture_values_rs_emits_typed_const(): - spec = TypeSpec( - token="int4", - domains=[], - fixture_values=["MIN", "-1", "ZERO", "1", "MAX"], - ) - body = render_fixture_values_rs(spec) - assert "pub const VALUES: &[i32] = &[" in body - assert "`int4` row in `eql-scalars::CATALOG`" in body - # Sentinels map to named consts; numeric tokens pass through. - assert "i32::MIN," in body - assert "i32::MAX," in body - assert " -1,\n" in body - assert " 0,\n" in body # ZERO and "1" both literal - assert " 1,\n" in body - # No generated-file marker in the body — the writer prepends it. - assert "AUTOMATICALLY GENERATED FILE" not in body - - -def test_render_fixture_values_rs_preserves_manifest_order(): - spec = TypeSpec( - token="int4", - domains=[], - fixture_values=["MIN", "ZERO", "MAX"], - ) - body = render_fixture_values_rs(spec) - assert body.index("i32::MIN") < body.index("0,") < body.index("i32::MAX") - - -def test_domain_block_storage_uses_fixed_envelope_only(): - domain = DomainSpec(name="int4", terms=[]) - sql = render_domain_block(domain, "int4") - assert "CREATE DOMAIN eql_v3.int4 AS jsonb" in sql - assert "VALUE ? 'v'" in sql - assert "VALUE ? 'i'" in sql - assert "VALUE ? 'c'" in sql - assert "VALUE ? 'hm'" not in sql - assert "VALUE ? 'ob'" not in sql - - -def test_domain_block_uses_catalog_json_keys(): - domain = DomainSpec(name="int4_ord", terms=["ore"]) - sql = render_domain_block(domain, "int4") - assert "CREATE DOMAIN eql_v3.int4_ord AS jsonb" in sql - assert "VALUE ? 'ob'" in sql - assert "VALUE ? 'ore'" not in sql - - -def test_domain_block_check_pins_envelope_version(): - """Thread D: the CHECK both verifies the envelope `v` key is PRESENT and - pins its value to the EQL payload-format version (2), matching the - repo-wide eql_v2._encrypted_check_v rule. The v=1 payloads in - tests/sqlx/fixtures/aggregate_minmax_data.sql belong to the separate - composite-type (eql_v2_encrypted) aggregate stream, not these domains, so - pinning the value here rejects stale/foreign-version payloads without - affecting that fixture.""" - for domain in ( - DomainSpec(name="int4", terms=[]), - DomainSpec(name="int4_eq", terms=["hm"]), - DomainSpec(name="int4_ord", terms=["ore"]), - ): - sql = render_domain_block(domain, "int4") - assert "VALUE ? 'v'" in sql # presence checked - assert "VALUE->>'v' = '2'" in sql # value pinned to version 2 - - -def test_extractor_is_catalog_derived_and_inlinable(): - domain = DomainSpec(name="int4_eq", terms=["hm"]) - sql = render_extractor(domain, TERM_CATALOG["hm"]) - assert "CREATE FUNCTION eql_v3.eq_term(a eql_v3.int4_eq)" in sql - assert "RETURNS eql_v2.hmac_256" in sql - assert "LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE" in sql - assert "SELECT eql_v2.hmac_256(a::jsonb)" in sql - assert "SET search_path" not in sql - - -def test_wrapper_uses_term_extractor_for_supported_operator(): - domain = DomainSpec(name="int4_ord", terms=["ore"]) - sql = render_wrapper( - domain, - op="<", - arg_a="eql_v3.int4_ord", - arg_b="jsonb", - extractor="ord_term", - ) - assert "CREATE FUNCTION eql_v3.lt(a eql_v3.int4_ord, b jsonb)" in sql - assert "SELECT eql_v3.ord_term(a) < eql_v3.ord_term(b::eql_v3.int4_ord)" in sql - - -def test_wrapper_is_inlinable_sql(): - """Wrappers must be single-statement LANGUAGE sql with no search_path pin.""" - domain = DomainSpec(name="int4_eq", terms=["hm"]) - sql = render_wrapper( - domain, - op="=", - arg_a="eql_v3.int4_eq", - arg_b="eql_v3.int4_eq", - extractor="eq_term", - ) - assert "LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE" in sql - assert "SET search_path" not in sql - assert "LANGUAGE plpgsql" not in sql - - -def test_extractor_for_operator_selects_catalog_term(): - domain = DomainSpec(name="int4_ord", terms=["ore"]) - assert extractor_for_operator(domain, "=") == "ord_term" - assert extractor_for_operator(domain, "<") == "ord_term" - - -def test_extractor_for_operator_returns_none_for_unsupported_operator(): - domain = DomainSpec(name="int4_eq", terms=["hm"]) - assert extractor_for_operator(domain, "<") is None - - -def test_blocker_bool_is_not_strict(): - """Footgun: a STRICT blocker lets Postgres skip the body on NULL input, - silently bypassing the 'operator not supported' raise. Assert the exact - attribute line so any future refactor that re-adds STRICT fails loudly.""" - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_bool( - domain, op="<", arg_a="eql_v3.int4", arg_b="eql_v3.int4", - ) - assert "CREATE FUNCTION eql_v3.lt(a eql_v3.int4, b eql_v3.int4)" in sql - assert "encrypted_domain_unsupported_bool('eql_v3.int4', '<')" in sql - assert "RETURNS boolean IMMUTABLE PARALLEL SAFE\n" in sql - assert "LANGUAGE plpgsql" in sql - assert "STRICT" not in sql - - -def test_blocker_path_is_not_strict(): - """Mirror of test_blocker_bool_is_not_strict for path blockers.""" - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_path( - domain, op="->", arg_a="eql_v3.int4", arg_b="text", - ) - assert "RETURNS eql_v3.int4 IMMUTABLE PARALLEL SAFE\n" in sql - assert "LANGUAGE plpgsql" in sql - assert "STRICT" not in sql - - -def test_blocker_path_returns_domain_or_text(): - domain = DomainSpec(name="int4", terms=[]) - arrow = render_blocker_path( - domain, op="->", arg_a="eql_v3.int4", arg_b="text", - ) - assert 'CREATE FUNCTION eql_v3."->"(a eql_v3.int4, selector text)' in arrow - assert "RETURNS eql_v3.int4" in arrow - arrow2 = render_blocker_path( - domain, op="->>", arg_a="eql_v3.int4", arg_b="text", - ) - assert "RETURNS text" in arrow2 - - -def test_blocker_path_for_jsonb_left_arg_returns_domain(): - """The (jsonb, dom) shape from _path_shapes still routes to the domain - return type for `->` (only `->>` returns text).""" - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_path( - domain, op="->", arg_a="jsonb", arg_b="eql_v3.int4", - ) - assert 'CREATE FUNCTION eql_v3."->"(a jsonb, selector eql_v3.int4)' in sql - assert "RETURNS eql_v3.int4" in sql - - -def test_blocker_native_bool_uses_helper_and_is_not_strict(): - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_native( - domain, op="?", arg_a="eql_v3.int4", arg_b="text", returns="boolean", - ) - assert 'CREATE FUNCTION eql_v3."?"(a eql_v3.int4, b text)' in sql - assert "encrypted_domain_unsupported_bool('eql_v3.int4', '?')" in sql - assert "RETURNS boolean IMMUTABLE PARALLEL SAFE\n" in sql - assert "LANGUAGE plpgsql" in sql - assert "STRICT" not in sql - - -def test_blocker_native_jsonb_result_raises_and_is_not_strict(): - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_native( - domain, op="#>", arg_a="eql_v3.int4", arg_b="text[]", returns="jsonb", - ) - assert 'CREATE FUNCTION eql_v3."#>"(a eql_v3.int4, b text[])' in sql - assert "RETURNS jsonb IMMUTABLE PARALLEL SAFE\n" in sql - assert "RAISE EXCEPTION 'operator % is not supported for %', '#>', 'eql_v3.int4'" in sql - assert "LANGUAGE plpgsql" in sql - assert "STRICT" not in sql - - -def test_blocker_native_text_result_raises_and_is_not_strict(): - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_native( - domain, op="#>>", arg_a="eql_v3.int4", arg_b="text[]", returns="text", - ) - assert 'CREATE FUNCTION eql_v3."#>>"(a eql_v3.int4, b text[])' in sql - assert "RETURNS text IMMUTABLE PARALLEL SAFE\n" in sql - assert "LANGUAGE plpgsql" in sql - assert "STRICT" not in sql - - -def test_blocker_native_concat_cross_shape(): - domain = DomainSpec(name="int4", terms=[]) - sql = render_blocker_native( - domain, op="||", arg_a="jsonb", arg_b="eql_v3.int4", returns="jsonb", - ) - assert 'CREATE FUNCTION eql_v3."||"(a jsonb, b eql_v3.int4)' in sql - assert "RETURNS jsonb" in sql - - -def test_operator_symmetric_metadata(): - sql = render_operator( - op="=", backing="eq", - leftarg="eql_v3.int4_eq", rightarg="eql_v3.int4_eq", - supported=True, - ) - assert "CREATE OPERATOR = (" in sql - assert "FUNCTION = eql_v3.eq" in sql - assert "LEFTARG = eql_v3.int4_eq, RIGHTARG = eql_v3.int4_eq" in sql - assert "NEGATOR = <>" in sql - assert "RESTRICT = eqsel" in sql - - -def test_render_operator_unsupported_emits_only_function_and_args(): - """Unsupported routing must not emit NEGATOR / RESTRICT / JOIN / COMMUTATOR - (those would lie about selectivity for a function that always raises).""" - sql = render_operator( - op="=", backing="eq", - leftarg="eql_v3.int4", rightarg="eql_v3.int4", - supported=False, - ) - assert "CREATE OPERATOR = (" in sql - assert "FUNCTION = eql_v3.eq" in sql - assert "LEFTARG = eql_v3.int4, RIGHTARG = eql_v3.int4" in sql - assert "NEGATOR" not in sql - assert "RESTRICT" not in sql - assert "JOIN" not in sql - assert "COMMUTATOR" not in sql - - -def test_render_aggregate_min_int4_ord_emits_state_function_and_aggregate(): - """Pin the rendered shape for the canonical (int4_ord, min) case.""" - domain = DomainSpec(name="int4_ord", terms=["ore"]) - sql = render_aggregate(domain, AGGREGATE_OPS["min"]) - assert "CREATE FUNCTION eql_v3.min_sfunc(state eql_v3.int4_ord, value eql_v3.int4_ord)" in sql - assert "RETURNS eql_v3.int4_ord" in sql - assert "LANGUAGE plpgsql IMMUTABLE STRICT" in sql - assert "SET search_path = pg_catalog, extensions, public" in sql - assert "IF value < state THEN" in sql - assert "CREATE AGGREGATE eql_v3.min(eql_v3.int4_ord) (" in sql - assert "sfunc = eql_v3.min_sfunc" in sql - assert "stype = eql_v3.int4_ord" in sql - - -def test_render_aggregate_max_uses_greater_than_comparator(): - """Symmetric pin: max uses `>` not `<`.""" - domain = DomainSpec(name="int4_ord_ore", terms=["ore"]) - sql = render_aggregate(domain, AGGREGATE_OPS["max"]) - assert "CREATE FUNCTION eql_v3.max_sfunc(state eql_v3.int4_ord_ore, value eql_v3.int4_ord_ore)" in sql - assert "IF value > state THEN" in sql - assert "CREATE AGGREGATE eql_v3.max(eql_v3.int4_ord_ore) (" in sql - - -def test_render_aggregate_state_function_is_not_inlinable(): - """Footgun mirror: blockers must be LANGUAGE plpgsql; the state function - deliberately is too, so the planner can't elide an IMMUTABLE STRICT - aggregate state call away. STRICT + plpgsql + SET search_path together.""" - domain = DomainSpec(name="int4_ord", terms=["ore"]) - sql = render_aggregate(domain, AGGREGATE_OPS["min"]) - assert "LANGUAGE plpgsql" in sql - assert "STRICT" in sql - # Inlinable-SQL shape — explicitly absent. - assert "LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE" not in sql - - -def test_is_ord_capable_matches_role(): - assert is_ord_capable(DomainSpec(name="int4_ord", terms=["ore"])) is True - assert is_ord_capable(DomainSpec(name="int4_ord_ore", terms=["ore"])) is True - assert is_ord_capable(DomainSpec(name="int4_eq", terms=["hm"])) is False - assert is_ord_capable(DomainSpec(name="int4", terms=[])) is False - - -def test_render_operator_for_containment_omits_commutator(): - """@> has no commutator / negator / selectivity in OPERATORS; supported=True - must still omit those clauses.""" - sql = render_operator( - op="@>", backing="contains", - leftarg="eql_v3.int4_ord", rightarg="eql_v3.int4_ord", - supported=True, - ) - assert "CREATE OPERATOR @> (" in sql - assert "FUNCTION = eql_v3.contains" in sql - assert "COMMUTATOR" not in sql - assert "NEGATOR" not in sql - assert "RESTRICT" not in sql - assert "JOIN" not in sql - - -# --- ITEM A: placeholder/blocker operator comment ------------------------- - - -def test_render_operator_unsupported_emits_placeholder_comment(): - """Thread A: a blocker-backed (unsupported) operator must carry a leading - SQL comment explaining it is a placeholder that raises, so a future - reviewer doesn't wonder why an ordering op is declared on an eq-only - domain.""" - sql = render_operator( - op="<", backing="lt", - leftarg="eql_v3.int4_eq", rightarg="eql_v3.int4_eq", - supported=False, - ) - assert sql.startswith("-- Placeholder:") - assert "does not support <" in sql - assert "always raises" in sql - # The comment precedes the CREATE OPERATOR. - assert sql.index("-- Placeholder:") < sql.index("CREATE OPERATOR") - - -def test_render_operator_supported_has_no_placeholder_comment(): - """Supported operators route to real wrappers — no placeholder comment.""" - sql = render_operator( - op="=", backing="eq", - leftarg="eql_v3.int4_eq", rightarg="eql_v3.int4_eq", - supported=True, - ) - assert "Placeholder" not in sql - - -# --- ITEM B & J: aggregate SQL rationale comments ------------------------- - - -def test_render_aggregate_state_function_emits_plpgsql_rationale_comment(): - """Thread B: the plpgsql rationale must appear in the emitted SQL (not just - as a Python comment) so a SQL reader sees why it isn't an inlinable - LANGUAGE sql CASE.""" - domain = DomainSpec(name="int4_ord", terms=["ore"]) - sql = render_aggregate(domain, AGGREGATE_OPS["min"]) - assert "-- LANGUAGE plpgsql, not sql:" in sql - assert "not index" in sql - # The rationale precedes the state-function definition. - assert sql.index("-- LANGUAGE plpgsql, not sql:") < sql.index( - "CREATE FUNCTION eql_v3.min_sfunc" - ) - - -def test_render_aggregate_enables_parallel_and_combinefunc(): - """Thread #22: MIN/MAX aggregates declare a combine function (the state - function itself — min/max are associative) and PARALLEL = SAFE, so PG can - use partial/parallel aggregation on the large GROUP BY workloads these ORE - aggregates exist to serve. The sfunc is likewise PARALLEL SAFE.""" - for op_name, sfunc in (("min", "min_sfunc"), ("max", "max_sfunc")): - domain = DomainSpec(name="int4_ord", terms=["ore"]) - sql = render_aggregate(domain, AGGREGATE_OPS[op_name]) - # The state function must be parallel-safe... - assert "LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE" in sql - # ...and the aggregate must declare the combinefunc + parallel safety - # inside the CREATE AGGREGATE option list (not merely in prose). - aggregate_body = sql[sql.index(f"CREATE AGGREGATE eql_v3.{op_name}"):] - assert f"combinefunc = eql_v3.{sfunc}" in aggregate_body - assert "parallel = safe" in aggregate_body - # The stale "intentionally disabled" omission note must be gone. - assert "intentionally disabled" not in sql - assert "-- No COMBINEFUNC" not in sql - - -# --- ITEM K: differentiated @brief for converged vs scheme-explicit ------- - - -def test_domain_brief_distinguishes_converged_from_scheme_twin(): - """Thread K: int4_ord (converged) and int4_ord_ore (scheme twin) carry the - same terms but must render distinct, sensible briefs.""" - ord_dom = DomainSpec(name="int4_ord", terms=["ore"]) - ore_dom = DomainSpec(name="int4_ord_ore", terms=["ore"]) - ord_sql = render_domain_block(ord_dom, "int4") - ore_sql = render_domain_block(ore_dom, "int4") - - ord_brief = next( - line for line in ord_sql.splitlines() if "@brief" in line - ) - ore_brief = next( - line for line in ore_sql.splitlines() if "@brief" in line - ) - # Both still lead with the role phrase... - assert "Ordered encrypted int4 domain." in ord_brief - assert "Ordered encrypted int4 domain." in ore_brief - # ...but the trailing clause differs and reads sensibly. - assert ord_brief != ore_brief - assert "Recommended converged name" in ord_brief - assert "Scheme-explicit twin" in ore_brief - assert "ore scheme" in ore_brief - assert "int4_ord" in ore_brief # points back at the converged name - - -def test_brief_role_clause_is_generic_over_token_and_scheme(): - """The disambiguation reads token/role/scheme from the name, not a - hard-coded literal — so it works for other types (int8) and schemes.""" - # Converged ordered name for a different token. - assert "Recommended converged name" in brief_role_clause( - DomainSpec(name="int8_ord", terms=["ore"]), "int8" - ) - # Scheme-explicit twin with a hypothetical non-ore scheme label. - clause = brief_role_clause( - DomainSpec(name="date_ord_lex", terms=["ore"]), "date" - ) - assert "Scheme-explicit twin" in clause - assert "lex scheme" in clause - assert "date_ord" in clause - - -def test_brief_role_clause_empty_for_storage_and_eq(): - """Storage and eq domains have no converged/twin ambiguity (only one name - each), so they get no disambiguating clause — brief stays unchanged.""" - assert brief_role_clause(DomainSpec(name="int4", terms=[]), "int4") == "" - assert brief_role_clause( - DomainSpec(name="int4_eq", terms=["hm"]), "int4" - ) == "" - - -# --- THREAD 1: SQL-string interpolation hardening ------------------------- - - -def test_sql_str_doubles_single_quotes(): - """_sql_str doubles embedded single quotes so a value can't break out of - its SQL string literal.""" - assert _sql_str("o'brien") == "o''brien" - assert _sql_str("a'b'c") == "a''b''c" - # Quote-free input is unchanged — current catalog strings stay byte-stable. - assert _sql_str("int4_eq") == "int4_eq" - assert _sql_str("<=") == "<=" - - -def test_blocker_escapes_quote_bearing_domain_in_rendered_sql(): - """A hypothetical quote-bearing domain name must be doubled inside the - helper-call string literal in the rendered blocker, not interpolated raw. - - (op can't carry a quote in practice — it's looked up in the operator - catalog — so the domain name is the live escaping path through the blocker - string literals.)""" - domain = DomainSpec(name="o'dom", terms=[]) - sql = render_blocker_bool( - domain, op="<", arg_a="eql_v3.o'dom", arg_b="eql_v3.o'dom", - ) - # The dom flows into encrypted_domain_unsupported_bool('', '') - # as a single-quoted literal — the quote must be doubled. - assert "encrypted_domain_unsupported_bool('eql_v3.o''dom', '<')" in sql - # The raw, unescaped single-quoted form must not appear. - assert "'eql_v3.o'dom'" not in sql - - -def test_domain_block_escapes_quote_bearing_key_in_check(): - """A hypothetical quote-bearing payload key must be doubled inside the - VALUE ? '' check rather than interpolated raw.""" - # A term-free domain whose name carries a quote exercises the typname - # literal escaping in the IF NOT EXISTS guard. - quoted = DomainSpec(name="we'ird", terms=[]) - sql = render_domain_block(quoted, "int4") - assert "typname = 'we''ird'" in sql - assert "typname = 'we'ird'" not in sql diff --git a/tasks/codegen/test_terms.py b/tasks/codegen/test_terms.py deleted file mode 100644 index 8ac7aa4b..00000000 --- a/tasks/codegen/test_terms.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Tests for the fixed scalar-domain term catalog.""" - -import pytest - -from tasks.codegen.terms import ( - TermError, - extractor_for_operator, - operators_for_terms, - require_terms, - role_for_terms, - term_json_keys, - term_requires, -) - - -def test_hm_term_provides_equality(): - terms = require_terms(["hm"]) - hm = terms[0] - assert hm.name == "hm" - assert hm.json_key == "hm" - assert hm.extractor == "eq_term" - assert hm.returns == "eql_v2.hmac_256" - assert hm.ctor == "hmac_256" - assert hm.role == "eq" - assert hm.operators == ("=", "<>") - assert hm.requires == ("src/hmac_256/functions.sql",) - - -def test_ore_term_preserves_existing_int4_sql_contract(): - terms = require_terms(["ore"]) - ore = terms[0] - assert ore.name == "ore" - assert ore.json_key == "ob" - assert ore.extractor == "ord_term" - assert ore.returns == "eql_v2.ore_block_u64_8_256" - assert ore.ctor == "ore_block_u64_8_256" - assert ore.role == "ord" - assert ore.operators == ("=", "<>", "<", "<=", ">", ">=") - assert ore.requires == ( - "src/ore_block_u64_8_256/functions.sql", - "src/ore_block_u64_8_256/operators.sql", - ) - - -def test_unknown_term_raises(): - with pytest.raises(TermError, match="unknown term 'bogus'"): - require_terms(["bogus"]) - - -def test_operators_are_union_in_catalog_order(): - assert operators_for_terms(["ore", "hm"]) == [ - "=", "<>", "<", "<=", ">", ">=", - ] - - -def test_json_keys_come_from_catalog_not_manifest_names(): - assert term_json_keys(["hm", "ore"]) == ["hm", "ob"] - - -def test_term_requires_are_deduplicated(): - assert term_requires(["ore", "ore", "hm"]) == [ - "src/ore_block_u64_8_256/functions.sql", - "src/ore_block_u64_8_256/operators.sql", - "src/hmac_256/functions.sql", - ] - - -def test_role_for_terms_handles_storage_eq_ord(): - assert role_for_terms([]) == "storage" - assert role_for_terms(["hm"]) == "eq" - assert role_for_terms(["ore"]) == "ord" - - -def test_operators_for_terms_handles_empty_list(): - assert operators_for_terms([]) == [] - - -def test_term_json_keys_handles_empty_list(): - assert term_json_keys([]) == [] - - -def test_term_requires_handles_empty_list(): - assert term_requires([]) == [] - - -def test_extractor_for_operator_picks_first_term_supporting_op(): - assert extractor_for_operator(["hm"], "=") == "eq_term" - assert extractor_for_operator(["ore"], "<") == "ord_term" - # Multi-term domains: first supporting term wins. - assert extractor_for_operator(["hm", "ore"], "=") == "eq_term" - assert extractor_for_operator(["hm", "ore"], "<") == "ord_term" - - -def test_extractor_for_operator_returns_none_when_no_term_supports_op(): - assert extractor_for_operator(["hm"], "<") is None - assert extractor_for_operator([], "=") is None diff --git a/tasks/codegen/test_writer.py b/tasks/codegen/test_writer.py deleted file mode 100644 index 81805cb1..00000000 --- a/tasks/codegen/test_writer.py +++ /dev/null @@ -1,157 +0,0 @@ -"""Tests for the ownership / overwrite-refusal / stale-cleanup rules.""" -import pytest -from tasks.codegen.generate import REPO_ROOT -from tasks.codegen.templates import AUTO_GENERATED_HEADER, AUTO_GENERATED_HEADER_RS -from tasks.codegen.writer import ( - _MARKER, - OwnershipError, - is_generated, - is_generated_rs, - clean_generated_files, - ensure_generated_paths_writable, - write_generated_file, - write_generated_rs, -) - - -_EXPECTED_SUFFIXES = ( - "_types.sql", - "_functions.sql", - "_operators.sql", - "_aggregates.sql", - "_extensions.sql", -) - - -def test_is_generated_true_for_header(tmp_path): - p = tmp_path / "x.sql" - p.write_text(AUTO_GENERATED_HEADER + "SELECT 1;\n") - assert is_generated(p) is True - - -def test_is_generated_false_for_handwritten(tmp_path): - p = tmp_path / "x.sql" - p.write_text("-- REQUIRE: src/schema.sql\nSELECT 1;\n") - assert is_generated(p) is False - - -def test_is_generated_true_for_crlf_header(tmp_path): - p = tmp_path / "x.sql" - p.write_bytes((_MARKER + "\r\n" + "SELECT 1;\n").encode("utf-8")) - assert is_generated(p) is True - - -def test_write_generated_file_creates_with_header(tmp_path): - p = tmp_path / "int4_types.sql" - write_generated_file(p, "DO $$ BEGIN END $$;\n") - text = p.read_text() - assert text.startswith(AUTO_GENERATED_HEADER) - assert "DO $$ BEGIN END $$;" in text - - -def test_write_refuses_to_overwrite_handwritten(tmp_path): - """Refuse to clobber a hand-written file at a generated path.""" - p = tmp_path / "int4_types.sql" - p.write_text("-- REQUIRE: src/schema.sql\n-- hand-written\n") - with pytest.raises(OwnershipError, match="hand-written"): - write_generated_file(p, "DO $$ BEGIN END $$;\n") - - -def test_preflight_refuses_handwritten_target_before_cleanup(tmp_path): - generated = tmp_path / "int4_types.sql" - hand = tmp_path / "int4_eq_functions.sql" - generated.write_text(AUTO_GENERATED_HEADER + "-- old generated\n") - hand.write_text("-- REQUIRE: src/schema.sql\n-- hand-written\n") - - with pytest.raises(OwnershipError, match=r"int4_eq_functions\.sql"): - ensure_generated_paths_writable([generated, hand]) - - assert generated.exists() - assert hand.exists() - - -def test_write_overwrites_existing_generated_file(tmp_path): - """A file that already carries the header may be overwritten.""" - p = tmp_path / "int4_types.sql" - p.write_text(AUTO_GENERATED_HEADER + "-- old content\n") - write_generated_file(p, "-- new content\n") - text = p.read_text() - assert "-- new content" in text - assert "-- old content" not in text - - -def test_clean_removes_only_generated_files(tmp_path): - """Clean deletes every generated file, keeps the rest.""" - gen1 = tmp_path / "int4_eq_functions.sql" - gen2 = tmp_path / "int4_old_domain_functions.sql" # stale orphan - hand = tmp_path / "int4_jsonb_extra.sql" - gen1.write_text(AUTO_GENERATED_HEADER + "SELECT 1;\n") - gen2.write_text(AUTO_GENERATED_HEADER + "SELECT 2;\n") - hand.write_text("-- REQUIRE: src/schema.sql\n-- hand-written\n") - - removed = clean_generated_files(tmp_path) - - assert not gen1.exists() - assert not gen2.exists() # stale orphan cleaned up - assert hand.exists() # hand-written file untouched - assert set(removed) == {gen1, gen2} - - -def test_clean_on_empty_directory(tmp_path): - """Clean on a greenfield directory removes nothing and does not error.""" - removed = clean_generated_files(tmp_path) - assert removed == [] - - -def test_write_generated_rs_creates_with_rust_header(tmp_path): - p = tmp_path / "int4_values.rs" - write_generated_rs(p, "pub const VALUES: &[i32] = &[];\n") - text = p.read_text() - assert text.startswith(AUTO_GENERATED_HEADER_RS) - assert "pub const VALUES" in text - - -def test_is_generated_rs_true_for_rust_header(tmp_path): - p = tmp_path / "int4_values.rs" - p.write_text(AUTO_GENERATED_HEADER_RS + "pub const VALUES: &[i32] = &[];\n") - assert is_generated_rs(p) is True - - -def test_is_generated_rs_false_for_handwritten(tmp_path): - p = tmp_path / "int4_values.rs" - p.write_text("//! hand-written\npub const VALUES: &[i32] = &[];\n") - assert is_generated_rs(p) is False - - -def test_write_generated_rs_refuses_to_overwrite_handwritten(tmp_path): - p = tmp_path / "int4_values.rs" - p.write_text("//! hand-written\n") - with pytest.raises(OwnershipError, match="hand-written"): - write_generated_rs(p, "pub const VALUES: &[i32] = &[];\n") - - -def test_write_generated_rs_overwrites_existing_generated(tmp_path): - p = tmp_path / "int4_values.rs" - p.write_text(AUTO_GENERATED_HEADER_RS + "// old\n") - write_generated_rs(p, "// new\n") - text = p.read_text() - assert "// new" in text - assert "// old" not in text - - -def test_no_misnamed_sql_files_in_generated_dirs(): - """Files under src/encrypted_domain// must end in one of the four - documented suffixes — catches mistakes like `int4_extension.sql` - (singular), which the build would silently include despite violating - the documented convention.""" - root = REPO_ROOT / "src" / "encrypted_domain" - misnamed = [ - path.relative_to(REPO_ROOT) - for type_dir in root.iterdir() if type_dir.is_dir() - for path in sorted(type_dir.glob("*.sql")) - if not path.name.endswith(_EXPECTED_SUFFIXES) - ] if root.is_dir() else [] - assert not misnamed, ( - f"misnamed SQL files in src/encrypted_domain/ — expected suffix in " - f"{_EXPECTED_SUFFIXES}: {misnamed}" - ) diff --git a/tasks/codegen/types/.gitkeep b/tasks/codegen/types/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/tasks/codegen/types/int2.toml b/tasks/codegen/types/int2.toml deleted file mode 100644 index 314bc698..00000000 --- a/tasks/codegen/types/int2.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Encrypted-domain scalar manifest for int2. -# The filename supplies the type token. Each domain lists the index terms -# it carries; term capabilities are fixed in tasks/codegen/terms.py. - -[domain] -int2 = [] -int2_eq = ["hm"] -int2_ord_ore = ["ore"] -int2_ord = ["ore"] - -# Single source of truth for the int2 fixture plaintext list. Drives the -# generated tests/sqlx/src/fixtures/int2_values.rs const, shared by the fixture -# generator and the matrix oracle. Sentinels MIN/MAX/ZERO map to i16 named -# consts; the set MUST include MIN, MAX, and zero (matrix comparison pivots). -[fixture] -values = [ - "MIN", "-30000", "-100", "-1", "ZERO", "1", "2", "5", "10", "17", "25", - "42", "50", "100", "250", "1000", "9999", "30000", "MAX", -] diff --git a/tasks/codegen/types/int4.toml b/tasks/codegen/types/int4.toml deleted file mode 100644 index 606d80ee..00000000 --- a/tasks/codegen/types/int4.toml +++ /dev/null @@ -1,19 +0,0 @@ -# Encrypted-domain scalar manifest for int4. -# The filename supplies the type token. Each domain lists the index terms -# it carries; term capabilities are fixed in tasks/codegen/terms.py. - -[domain] -int4 = [] -int4_eq = ["hm"] -int4_ord_ore = ["ore"] -int4_ord = ["ore"] - -# Single source of truth for the int4 fixture plaintext list. Drives the -# generated tests/sqlx/src/fixtures/int4_values.rs const, shared by the fixture -# generator and the matrix oracle. Sentinels MIN/MAX/ZERO map to i32 named -# consts; the set MUST include MIN, MAX, and zero (matrix comparison pivots). -[fixture] -values = [ - "MIN", "-100", "-1", "ZERO", "1", "2", "5", "10", "17", "25", - "42", "50", "100", "250", "1000", "9999", "MAX", -] diff --git a/tasks/codegen/writer.py b/tasks/codegen/writer.py deleted file mode 100644 index aa0cdd99..00000000 --- a/tasks/codegen/writer.py +++ /dev/null @@ -1,89 +0,0 @@ -"""File writer enforcing the AUTO-GENERATED-header ownership rule. - -The generator owns only files carrying the AUTO-GENERATED header. It -preflights expected output paths, deletes generated files to clear stale -orphans, and refuses to overwrite a hand-written file at a generated path. -""" - -from pathlib import Path - -from .templates import AUTO_GENERATED_HEADER, AUTO_GENERATED_HEADER_RS - -# The first line of each header is the ownership marker. -_MARKER = AUTO_GENERATED_HEADER.splitlines()[0] -_RS_MARKER = AUTO_GENERATED_HEADER_RS.splitlines()[0] - - -class OwnershipError(Exception): - """Raised when the generator would clobber a hand-written file.""" - - -def _first_line(path: Path) -> str: - with path.open("r", encoding="utf-8") as fh: - return fh.readline().rstrip("\r\n") - - -def is_generated(path: Path) -> bool: - """True if the file at `path` carries the SQL AUTO-GENERATED marker.""" - if not path.is_file(): - return False - return _first_line(path) == _MARKER - - -def is_generated_rs(path: Path) -> bool: - """True if the file at `path` carries the Rust AUTO-GENERATED marker.""" - if not path.is_file(): - return False - return _first_line(path) == _RS_MARKER - - -def clean_generated_files(directory: Path) -> list[Path]: - """Delete every generated .sql file in `directory`. Returns the list - of removed paths. Hand-written files are left untouched. A no-op on a - directory that does not exist or holds no generated files.""" - directory = Path(directory) - if not directory.is_dir(): - return [] - removed: list[Path] = [] - for path in sorted(directory.glob("*.sql")): - if is_generated(path): - path.unlink() - removed.append(path) - return removed - - -def ensure_generated_paths_writable(paths: list[Path]) -> None: - """Refuse a generation run before cleanup if any target is hand-written.""" - for path in paths: - path = Path(path) - if path.exists() and not is_generated(path): - raise OwnershipError( - f"refusing to overwrite hand-written file: {path} " - f"(no AUTO-GENERATED header). Remove it by hand if it is a " - f"one-time generator-adoption target." - ) - - -def write_generated_file(path: Path, body: str) -> None: - """Write `body` to `path`, prefixed with the SQL AUTO-GENERATED header. - - Refuses (OwnershipError) if `path` exists and is hand-written — a file - at a generated path that lacks the header is never clobbered.""" - path = Path(path) - ensure_generated_paths_writable([path]) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(AUTO_GENERATED_HEADER + body, encoding="utf-8") - - -def write_generated_rs(path: Path, body: str) -> None: - """Write `body` to a Rust file, prefixed with the Rust AUTO-GENERATED - header. Unlike the SQL surface this file is committed; the header still - guards against clobbering a hand-written file at the same path.""" - path = Path(path) - if path.exists() and not is_generated_rs(path): - raise OwnershipError( - f"refusing to overwrite hand-written file: {path} " - f"(no AUTO-GENERATED header)." - ) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(AUTO_GENERATED_HEADER_RS + body, encoding="utf-8") From af6eb8f1181f65591bafd904bf02ae5213341b5f Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 15:58:23 +1000 Subject: [PATCH 10/19] docs(CLAUDE): describe Rust catalog codegen, single matrix snapshot, no TOML/Python --- CLAUDE.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 10477062..65da42bd 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -29,7 +29,7 @@ This project uses `mise` for task management. Common commands: - Run SQLx tests directly: `mise run test:sqlx` - Run SQLx tests in watch mode: `mise run test:sqlx:watch` - Tests are located in `tests/sqlx/` using Rust and SQLx framework -- Regenerate the scalar matrix coverage snapshots: `mise run test:matrix:inventory` (no database required). These committed `tests/sqlx/snapshots/_matrix_tests.txt` baselines pin the set of `scalars::::*` test names so a silently dropped/renamed/`#[cfg]`-gated test fails CI's `matrix-coverage` job. When you add or remove matrix tests (or add a scalar type), regenerate and commit the affected snapshot in the same change. See `tests/sqlx/snapshots/README.md`. +- Verify the scalar matrix coverage snapshot: `mise run test:matrix:inventory` (no database required). ONE committed `tests/sqlx/snapshots/matrix_tests.txt` baseline pins the token-normalized set of `scalars::::*` test names so a silently dropped/renamed/`#[cfg]`-gated test fails CI's `matrix-coverage` job. The task discovers the present scalar types from the test binary's `--list` and cross-checks them against `cargo run -p eql-codegen -- list-types`, so a catalog type missing its matrix wiring also fails. When you change which matrix tests the macro emits, regenerate and commit the single snapshot in the same change. See `tests/sqlx/snapshots/README.md`. ### Build System - Dependencies are resolved using `-- REQUIRE:` comments in SQL files @@ -78,11 +78,11 @@ This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for search `src/encrypted_domain/` holds **encrypted-domain type families** — jsonb-backed PostgreSQL domains in the **`eql_v3` schema**, one domain per operator/index capability (`eql_v3.` storage-only, `eql_v3._eq`, `eql_v3._ord`). The schema qualifier replaces the old version-prefixed name, so the domains are `eql_v3.int4`, `eql_v3.int4_eq`, `eql_v3.int4_ord`, `eql_v3.int4_ord_ore` — created in `eql_v3`, not `public`. Their extractors/wrappers/aggregates (`eql_v3.eq_term`, `eql_v3.ord_term`, `eql_v3.eq`/`lt`/…, `eql_v3.min`/`max`) also live in `eql_v3`, but the index-term types they return and construct (`eql_v2.hmac_256`, `eql_v2.ore_block_u64_8_256`) stay in `eql_v2` and are referenced cross-schema. `eql_v3.int4` (PR #239, supersedes #225) is the reference scalar implementation; future scalar types such as `int8`, `bool`, `date`, `float`, `numeric`, `timestamp`, `text`, and `jsonb` follow this materializer pattern. `text`, `numeric`, and `jsonb` are planned but have no generated SQL surface yet — `jsonb` in particular needs a separate SQL design beyond the ordered-scalar materializer. The `eql-scalars` fixture catalog (`crates/eql-scalars`) already models their fixture values ahead of the SQL surface. -Adding a scalar encrypted-domain type is generated from a minimal manifest at `tasks/codegen/types/.toml`: the filename supplies ``, and the `[domain]` table maps each generated domain name to the fixed index terms it carries. Example: `int4_eq = ["hm"]`, `int4_ord = ["ore"]`. Term capabilities are fixed in `tasks/codegen/terms.py`: `hm` provides equality, and `ore` provides equality plus ordering. `mise run build` regenerates the scalar SQL surface into `src/encrypted_domain//` from every manifest at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. Use `mise run codegen:domain ` to refresh a single type manually while iterating on its manifest, or `mise run codegen:domain:all` to regenerate every type at once (the same enumeration `mise run build` uses). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` files are gitignored and never committed — the TOML manifest plus `tasks/codegen/terms.py` are the source of truth. Generated files carry an `AUTOMATICALLY GENERATED FILE — DO NOT EDIT` header (the project-wide marker that `docs:validate` greps on to skip generated SQL); change the manifest or term catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` have no generated SQL surface yet (they are planned, not out of scope); `jsonb` needs a separate SQL design beyond this ordered-scalar materializer. +Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed; the committed `tests/sqlx/src/fixtures/_values.rs` consts are also generated (CI diffs them). Generated SQL carries an `AUTOMATICALLY GENERATED FILE — DO NOT EDIT` header (the project-wide marker `docs:validate` greps on) and the committed `_values.rs` carries an `AUTO-GENERATED — DO NOT EDIT` header; change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. -**Adding a new encrypted-domain type: follow `docs/reference/encrypted-domain-implementation-spec.md`.** The mechanics are fixed for ordered scalar domains; the manifest only declares domain names and terms. New term behavior belongs in `tasks/codegen/terms.py` with tests, not in free-form TOML fields. +**Adding a new encrypted-domain type: follow `docs/reference/encrypted-domain-implementation-spec.md`.** The mechanics are fixed for ordered scalar domains; the catalog row only declares the token, kind, domain suffixes, and terms. New term behavior belongs in the `Term` enum's `impl` methods in `crates/eql-scalars/src` with tests, not in free-form catalog data. -Regeneration is deterministic: identical manifest + term catalog produce byte-identical SQL. If `mise run build` produces unexpected output, the change is in the manifest, `tasks/codegen/terms.py`, or `tasks/codegen/templates.py` — not in random run-to-run variation. +Regeneration is deterministic: an identical `CATALOG` produces byte-identical SQL. If `mise run build` produces unexpected output, the change is in `crates/eql-scalars/src` (the catalog/terms) or `crates/eql-codegen/src` (the renderers) — not in random run-to-run variation. Footguns the spec exists to prevent: @@ -90,7 +90,7 @@ Footguns the spec exists to prevent: - **No domain-over-domain** (`CREATE DOMAIN a AS b`). Operators resolve against the ultimate base type (`jsonb`), so a derived domain does not inherit the base domain's operator surface — blockers stop engaging. - **No operator class on a domain.** Index through a functional index on the extractor (`eq_term` / `ord_term`), whose return type already carries a default opclass. - **Inlinable functions** (extractors, comparison wrappers) need `LANGUAGE sql`, a single-statement `SELECT`, `IMMUTABLE`, and **no `SET` clause** — a pinned `search_path` disables inlining. No per-type allowlist edit: the `pin_search_path.sql` structural rule recognises encrypted-domain functions intrinsically and `tasks/test/splinter.sh` covers the converged extractor/wrapper names. -- **Blockers must be `LANGUAGE plpgsql`, not `LANGUAGE sql`.** The inverse of the rule above. A blocker exists to always raise, but a `LANGUAGE sql` body is inlinable and the planner can elide the call when the result is provably unused (dead `CASE` branch, folded predicate). `LANGUAGE plpgsql` is opaque to the planner, so the call — and its `RAISE` — survives. The generator in `tasks/codegen/templates.py` enforces this; don't "simplify" the rendered blockers to `LANGUAGE sql` even though the body is a single expression. +- **Blockers must be `LANGUAGE plpgsql`, not `LANGUAGE sql`.** The inverse of the rule above. A blocker exists to always raise, but a `LANGUAGE sql` body is inlinable and the planner can elide the call when the result is provably unused (dead `CASE` branch, folded predicate). `LANGUAGE plpgsql` is opaque to the planner, so the call — and its `RAISE` — survives. The blocker renderers in `crates/eql-codegen/src` enforce this; don't "simplify" the rendered blockers to `LANGUAGE sql` even though the body is a single expression. - **Build with `mise run clean && mise run build`** — a bare build can leave stale `release/*.sql`. ### Testing Infrastructure @@ -220,7 +220,7 @@ Prefer `LANGUAGE SQL` over `LANGUAGE plpgsql` unless you need procedural feature - Exception handling (`BEGIN...EXCEPTION...END`) - Complex control flow (loops, early returns) - Dynamic SQL (`EXECUTE`) -- Functions that must remain opaque to the planner — typically blockers whose only job is to `RAISE`. `LANGUAGE sql` would be inlined and may be elided when the result is provably unused; `LANGUAGE plpgsql` is never inlined, so the body always runs. See the encrypted-domain footgun list above and the blocker renderers in `tasks/codegen/templates.py`. +- Functions that must remain opaque to the planner — typically blockers whose only job is to `RAISE`. `LANGUAGE sql` would be inlined and may be elided when the result is provably unused; `LANGUAGE plpgsql` is never inlined, so the body always runs. See the encrypted-domain footgun list above and the blocker renderers in `crates/eql-codegen/src`. ## Release & changelog discipline From f5ca6333876f27c3fa519119f0fad6bd28657642 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 16:01:44 +1000 Subject: [PATCH 11/19] docs(spec): rewrite encrypted-domain spec for Rust catalog, single matrix snapshot --- .../encrypted-domain-implementation-spec.md | 264 ++++++++++-------- 1 file changed, 146 insertions(+), 118 deletions(-) diff --git a/docs/reference/encrypted-domain-implementation-spec.md b/docs/reference/encrypted-domain-implementation-spec.md index e21c8d6a..838f870c 100644 --- a/docs/reference/encrypted-domain-implementation-spec.md +++ b/docs/reference/encrypted-domain-implementation-spec.md @@ -2,7 +2,7 @@ This is the scalar encrypted-domain generator contract used by `int4`. It applies to scalar domains whose searchable payloads are represented by -the fixed term catalog in `tasks/codegen/terms.py`. +the fixed `Term` catalog in `crates/eql-scalars/src`. `text` and `jsonb` are outside this scalar materializer. @@ -10,33 +10,41 @@ the fixed term catalog in `tasks/codegen/terms.py`. Each generated domain is a concrete `jsonb` domain in the `eql_v3` schema named `eql_v3.` (dropped by `DROP SCHEMA eql_v3 CASCADE`; -survives an `eql_v2` uninstall). The manifest is intentionally small: - -```toml -[domain] -int4 = [] -int4_eq = ["hm"] -int4_ord_ore = ["ore"] -int4_ord = ["ore"] +survives an `eql_v2` uninstall). A type's catalog row is intentionally +small — a `ScalarSpec` whose `domains` field lists each generated domain +as a `DomainSpec` (a `suffix` plus the fixed terms it carries): + +```rust +ScalarSpec { + token: "int4", + kind: ScalarKind::I32, + domains: &[ + DomainSpec { suffix: "", terms: &[] }, + DomainSpec { suffix: "_eq", terms: &[Term::Hm] }, + DomainSpec { suffix: "_ord_ore", terms: &[Term::Ore] }, + DomainSpec { suffix: "_ord", terms: &[Term::Ore] }, + ], + fixtures: &[/* see §9 */], +} ``` -The TOML filename supplies the type token. The `[domain]` table maps each -generated domain name to the fixed terms it carries. The generator -emits files in the manifest's declared order, so order keys in the TOML -in the order you want them to appear in generated output. Term capabilities -come only from `tasks/codegen/terms.py`: +The `token` supplies the type token; each domain's full name is `token` ++ `suffix`. The generator emits domains in the order the `domains` slice +declares them, so order the slice the way you want the generated output to +read. Term capabilities are fixed by the `Term` enum +(`crates/eql-scalars/src`): | Term | JSON key | Extractor | Return type | Supported operators | |---|---|---|---|---| -| `hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` / `<>` | -| `ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` / `<>` / `<` / `<=` / `>` / `>=` | +| `Hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` / `<>` | +| `Ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` / `<>` / `<` / `<=` / `>` / `>=` | -For current `int4`, domains carrying `ore` use JSON key `ob`, extractor +For current `int4`, domains carrying `Ore` use JSON key `ob`, extractor `ord_term`, and the ORE block supports equality plus ordering. A type that needs a non-ORE equality term on an ordered domain needs a new -catalog term design, not a manifest flag. +`Term` design, not a catalog flag. -The manifest above declares two ordered domains, `int4_ord` and +The row above declares two ordered domains, `int4_ord` and `int4_ord_ore`, carrying the same term. They are intentional twins: the generator emits byte-identical SQL (modulo type name) so callers can pick a name that documents intent without committing to a term family in a @@ -44,42 +52,38 @@ future migration. ## 2. Checklist -- [ ] Author `tasks/codegen/types/.toml`. The filename supplies ``. - The `[domain]` table maps generated domain names to fixed terms: - - ```toml - [domain] - int4 = [] - int4_eq = ["hm"] - int4_ord_ore = ["ore"] - int4_ord = ["ore"] - ``` - - Terms determine operator support: `hm` provides `=` / `<>`; `ore` - provides `=` / `<>` / `<` / `<=` / `>` / `>=`. -- [ ] Add or update catalog terms in `tasks/codegen/terms.py` with tests. -- [ ] **If `` is a new scalar kind, register a `ScalarKind` in - `tasks/codegen/scalars.py`** (use the `int4` entry as the template): its - `token`, `rust_type`, the `MIN` / `MAX` / `ZERO` Rust symbols, and the - numeric `min_value` / `max_value` bounds. This is a code change with - tests, exactly like a new catalog term in `terms.py` — not a manifest - field. `load_spec` resolves the scalar before it validates anything, so - without this entry `mise run codegen:domain ` raises - `ScalarError: unknown scalar token ''` and emits nothing. Then search - the codegen tests for any fixture using `` as a negative "unknown - scalar" example (e.g. `test_spec.py`) and update it — registering the - kind makes that token valid. -- [ ] Declare the fixture plaintext list once in the manifest's `[fixture]` - table (see §9). The list MUST include `MIN`, `MAX`, and zero. -- [ ] Run `mise run codegen:domain ` to materialise generated SQL and the - committed `tests/sqlx/src/fixtures/_values.rs` while iterating, or - just `mise run build` — every build regenerates from the manifest first. - Commit the regenerated `_values.rs` (CI diffs it). +- [ ] Add a row to the Rust catalog `eql-scalars::CATALOG` + (`crates/eql-scalars/src/lib.rs`). A `ScalarSpec` declares: + + - `token` — the type token (e.g. `int8`); supplies `` everywhere. + - `kind` — the `ScalarKind` (`I16` / `I32` / `I64`), which carries the + Rust type name, the `MIN`/`MAX`/zero symbols, and the numeric bounds. + - `domains` — a `&[DomainSpec]`, each a `suffix` + the fixed `Term`s it + carries. The storage domain is suffix `""` with no terms; `_eq => [Hm]`; + `_ord` and `_ord_ore => [Ore]`. + - `fixtures` — the `Fixture` value list (see §9). It MUST include `Min`, + `Max`, and zero. + + Terms determine operator support: `Hm` provides `=` / `<>`; `Ore` + provides `=` / `<>` / `<` / `<=` / `>` / `>=`. There is no TOML manifest + and no Python: the catalog is the source of truth, validated by the + compiler (an undefined `Term` or unknown `ScalarKind` is a compile error) + plus catalog `#[test]`s over `CATALOG`. +- [ ] **If `` needs a new scalar width**, add a `ScalarKind` enum variant in + `crates/eql-scalars/src/lib.rs` with its rust-type name, `MIN`/`MAX`/zero + symbols, and numeric bounds, and unit-test its `impl` methods. New term + behaviour likewise belongs in the `Term` enum's `impl` methods with tests + — not in free-form catalog data. +- [ ] Run `cargo run -p eql-codegen` to materialise the generated SQL + (`src/encrypted_domain//_{types,functions,operators,aggregates}.sql`, + gitignored) and the committed `tests/sqlx/src/fixtures/_values.rs` + const, or just `mise run build` — every build runs the generator first. + Commit the regenerated `_values.rs` (CI diffs it). There is no per-type + codegen task: one run generates every type from `CATALOG`. - [ ] Generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / - `*_aggregates.sql` are gitignored and never committed. The TOML - manifest plus `tasks/codegen/terms.py` are the source of truth. - Change the manifest or catalog and rebuild; do not hand-edit - generated SQL. + `*_aggregates.sql` are gitignored and never committed. The catalog + (`eql-scalars::CATALOG`) plus the `eql-codegen` renderers are the source + of truth. Change the catalog and rebuild; do not hand-edit generated SQL. - [ ] Put optional hand-written SQL in `src/encrypted_domain//_extensions.sql` with explicit `-- REQUIRE:` edges. This file IS committed. @@ -87,25 +91,47 @@ future migration. single golden master for the type-generic generator: the SQL templates are pure token substitution and the only type-specific rendering is `_values.rs`, so a per-type baseline can only fail when `int4`'s already - would. Drift protection for the new type comes from the `int4` reference - (shared templates + `terms.py`), the committed `_values.rs` const guarded - by the CI staleness check (`mise run codegen:domain ` + `git diff - --exit-code`) and the `` cases in `tasks/codegen/test_scalars.py`, and - the `ordered_numeric_matrix!` SQLx suite (behaviour, not bytes). -- [ ] Run `mise run test:matrix:inventory` and commit the regenerated - `tests/sqlx/snapshots/_matrix_tests.txt` — the sorted inventory of every - `scalars::::*` test name in the `encrypted_domain` binary. CI diffs it - (same as `_values.rs`); a stale snapshot fails the `matrix-coverage` - job with "Coverage inventory stale". This baseline is what catches a - silently dropped, renamed, or `#[cfg]`-gated matrix test. See §8. -- [ ] Run `mise run test:codegen`, the relevant SQLx suites, and the - PostgreSQL matrix before merging. + would. Drift protection for the new type comes from the `int4` reference, + the committed `_values.rs` const guarded by the CI staleness check + (`cargo run -p eql-codegen` + `git diff --exit-code`) and the catalog/ + generator `#[test]`s (`cargo test -p eql-scalars -p eql-codegen`), and the + `ordered_numeric_matrix!` SQLx suite (behaviour, not bytes). +- [ ] Wire the SQLx matrix oracle. The generated SQL is enough to install the + domains, but the `ordered_numeric_matrix!` suite only runs once the Rust + harness knows about the scalar. Copy each piece from the `int4` + reference — these are hand-maintained registration lists (the Phase-4 + `scalar_types!` registry, a separate plan, will collapse them): + + | File | Add | + |------|-----| + | `tests/sqlx/src/fixtures/eql_plaintext.rs` | A sealed `EqlPlaintext` impl for the scalar's Rust type: `impl Sealed for {}`, a `PlaintextSqlType` const for its base column type, `impl EqlPlaintext for ` (`CAST`, `PLAINTEXT_SQL_TYPE`, `to_plaintext` → the right `Plaintext` variant), plus the two `#[test]` casts. | + | `tests/sqlx/src/fixtures/eql_v2_.rs` | `crate::scalar_fixture!("eql_v2_", , VALUES);` (pulls `super::_values::VALUES`). | + | `tests/sqlx/src/fixtures/mod.rs` | `pub mod _values;` and `pub mod eql_v2_;`. | + | `tests/sqlx/tests/generate_all_fixtures.rs` | An arm in `generate_for_token`: `"" => fixtures::eql_v2_::spec().run().await,`. The match is exhaustive over the catalog — a catalog token with no arm fails the generator loudly. | + | `tests/sqlx/src/scalar_domains.rs` | `impl ScalarType for ` — `PG_TYPE` (the base PG type, e.g. `"int8"`) and `FIXTURE_VALUES = crate::fixtures::_values::VALUES`. | + | `tests/sqlx/tests/encrypted_domain/scalars/.rs` | `ordered_numeric_matrix! { suite = , scalar = , eql_type = "eql_v2_" }`. | + | `tests/sqlx/tests/encrypted_domain/scalars/mod.rs` | `pub mod ;`. | + + `` is the scalar's Rust type (`i32` for `int4`, `i16` for `int2`). + Forget one and the matrix simply does not run for the type — the matrix + inventory cross-check (next step) surfaces it, because the catalog has the + type but the binary has no `scalars::::` tests. +- [ ] Run `mise run test:matrix:inventory`. It verifies every present type's + token-normalized `scalars::::*` name set equals the single canonical + `tests/sqlx/snapshots/matrix_tests.txt`, and cross-checks the present type + set against `cargo run -p eql-codegen -- list-types`. You do **not** edit a + per-type snapshot — there is one canonical snapshot; you only regenerate it + when the macro's emitted name set itself changes. A catalog type missing + its matrix wiring fails the cross-check. See §8 and + `tests/sqlx/snapshots/README.md`. +- [ ] Run `mise run test:codegen` (`cargo test -p eql-scalars -p eql-codegen`), + the relevant SQLx suites, and the PostgreSQL matrix before merging. ## 3. Domain Generation The generator emits `src/encrypted_domain//_types.sql` (gitignored; -materialised on every `mise run build` and on `mise run codegen:domain -`) with one idempotent `DO $$ ... $$` block. Domain `CHECK` +materialised on every `mise run build` and every `cargo run -p eql-codegen`) +with one idempotent `DO $$ ... $$` block. Domain `CHECK` constraints always require: - fixed envelope keys `v` and `i`; @@ -126,9 +152,9 @@ that bypass the fixed operator surface. ## 4. Extractors And Wrappers -Extractor names and return types come from `tasks/codegen/terms.py`, not -from TOML. Generated extractors and supported comparison wrappers are -inline-friendly SQL functions: +Extractor names and return types come from the `Term` enum +(`crates/eql-scalars/src`), not from catalog data. Generated extractors and +supported comparison wrappers are inline-friendly SQL functions: ```sql LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE @@ -225,7 +251,7 @@ Optional hand-written SQL beyond the fixed scalar surface belongs in: src/encrypted_domain//_extensions.sql ``` -The generator must not create this file, list it in TOML, add an +The generator must not create this file, list it in the catalog, add an auto-generated header, or clean it during regeneration. The file must declare its own `-- REQUIRE:` edges, usually to `_types.sql` and whichever generated function or operator file it extends. Unlike the @@ -261,7 +287,7 @@ Cover each generated domain with SQLx tests appropriate to its terms: - real typed columns are tested, not only cast literals; - generated ordered-domain twins remain byte-identical modulo type name (the shared generator is anchored by the `int4` golden master in - `tests/codegen/reference/int4/` via `tasks/codegen/test_against_reference.py`; + `tests/codegen/reference/int4/` via the eql-codegen parity test; new types add no baseline of their own — see §2). For ordered numeric scalars this coverage is generated by the @@ -283,21 +309,24 @@ the catalog does not promise. ### Matrix coverage inventory snapshot -The *set of test names* the matrix emits is itself guarded. `mise run -test:matrix:inventory` lists every test in the `encrypted_domain` binary -under a pinned feature set (`--no-default-features`, which deliberately -excludes the `scale` arm — see the task comment in `mise.toml`), greps it to -each `scalars::::*` matrix, `LC_ALL=C sort`s for byte-stable ordering, and -writes one committed snapshot per scalar at -`tests/sqlx/snapshots/_matrix_tests.txt`. The CI `matrix-coverage` job -regenerates with the same feature set and `git diff --exit-code`s every -snapshot; a divergence fails with "Coverage inventory stale". This is the -guard that catches a silently dropped, renamed, or `#[cfg]`-gated matrix -test — a behaviour the SQLx assertions above cannot see, because a deleted -test simply stops running. When you add a scalar you add a new snapshot; -when you add or remove matrix tests you regenerate and commit the affected -snapshot in the same change. The files are a committed test baseline, **not** -gitignored generated SQL. See `tests/sqlx/snapshots/README.md`. +The *set of test names* the matrix emits is guarded by ONE committed, +token-normalized snapshot at `tests/sqlx/snapshots/matrix_tests.txt` — the +sorted inventory of every `scalars::::*` test name with the type token +replaced by the literal ``. (The per-type `_matrix_tests.txt` files are +gone: they were byte-identical modulo the token, so one canonical set plus a +per-type normalize-and-compare carries the same signal at a fraction of the +committed surface.) This is the guard that catches a silently dropped, renamed, +or `#[cfg]`-gated matrix test, a behaviour the SQLx assertions above cannot see. +The snapshot is a committed test baseline, **not** gitignored generated SQL. + +`mise run test:matrix:inventory` discovers the present scalar types from the +`encrypted_domain` binary's `--list`, normalizes each type's token to ``, +asserts every type's set equals the canonical snapshot, and cross-checks the +discovered type set against `cargo run -p eql-codegen -- list-types` (the catalog +is the single source). The CI `matrix-coverage` job gates it. **`tests/sqlx/snapshots/README.md` +is the source of truth** for the mechanics (pinned feature set, the catalog +cross-check, the CI diff, and when to regenerate); see it rather than +duplicating the detail here. ## 9. Fixtures @@ -317,47 +346,46 @@ absent. ### Single-sourcing the value list -The plaintext value list is declared **once**, in the manifest's optional -`[fixture]` table, and generated into Rust — never hand-maintained in two -places: +The plaintext value list is declared **once**, in the catalog row's `fixtures` +field, and generated into Rust — never hand-maintained in two places: -```toml -[fixture] -values = [ - "MIN", "-100", "-1", "ZERO", "1", "2", "5", "10", "17", "25", - "42", "50", "100", "250", "1000", "9999", "MAX", -] +```rust +fixtures: &[Fixture::Min, Fixture::N(-100), Fixture::N(-1), Fixture::Zero, + Fixture::N(1), Fixture::N(2), Fixture::N(5), Fixture::N(10), + Fixture::N(17), Fixture::N(25), Fixture::N(42), Fixture::N(50), + Fixture::N(100), Fixture::N(250), Fixture::N(1000), + Fixture::N(9999), Fixture::Max], ``` -Values are strings so the convention is type-agnostic. The sentinels `MIN`, -`MAX`, and `ZERO` map to the scalar's Rust named consts (for `int4`: -`i32::MIN`, `i32::MAX`, `0`); every other token is a numeric literal -validated against the type's representable range. The per-type rendering -rules live in `tasks/codegen/scalars.py` (mirroring `terms.py`), not in -free-form TOML fields. `load_spec` enforces the matrix invariant: the set -**must** include `MIN`, `MAX`, and zero, or the build fails. +`Fixture::Min` / `Fixture::Max` / `Fixture::Zero` resolve to the scalar's Rust +named consts (for `int4`: `i32::MIN`, `i32::MAX`, `0`); every `Fixture::N(_)` is +a numeric literal validated against the `ScalarKind`'s representable range by a +catalog `#[test]` (`numeric_value` is infallible, so the range check is the +explicit invariant `every_fixture_value_is_within_kind_bounds`). The same test +enforces the matrix invariant: the set **must** include `Min`, `Max`, and zero, +or the test fails (the compile-time analogue of the old `load_spec` validation). -The generator emits `tests/sqlx/src/fixtures/_values.rs` exposing one +`eql-codegen` emits `tests/sqlx/src/fixtures/_values.rs` exposing one `pub const VALUES: &[]`. Both consumers reference that single symbol — the fixture generator (`fixtures::eql_v2_::spec`) and the matrix -oracle (`impl ScalarType for { const FIXTURE_VALUES }`) — so the -oracle cannot drift from the values the generator encrypts. +oracle (`impl ScalarType for { const FIXTURE_VALUES }`) — so the oracle +cannot drift from the values the generator encrypts. Unlike the gitignored `*_*.sql` surface and the gitignored encrypted `tests/sqlx/fixtures/eql_v2_.sql` (whose ciphertext is non-deterministic -per-encrypt), `_values.rs` **is committed**: its rendering is -deterministic, so the CI `codegen` job regenerates it and runs -`git diff --exit-code` to catch a manifest edit that wasn't regenerated. -Regenerate with `mise run codegen:domain ` and commit the result; never -hand-edit it. +per-encrypt), `_values.rs` **is committed**: its rendering is deterministic, +so the CI `codegen` job regenerates it (`cargo run -p eql-codegen`) and runs +`git diff --exit-code` to catch a catalog edit that wasn't regenerated. +Regenerate with `cargo run -p eql-codegen` (or `mise run build`) and commit the +result; never hand-edit it. ## 10. Build And Verification -- `mise run codegen:domain ` (optional; refreshes one type while - iterating on its manifest before a full build) -- `mise run test:codegen` -- `mise run clean && mise run build` (regenerates every type's SQL - from its manifest first, then builds the release artefacts) +- `cargo run -p eql-codegen` (optional; refreshes all generated SQL + + `_values.rs` from the catalog before a full build) +- `mise run test:codegen` (`cargo test -p eql-scalars -p eql-codegen`) +- `mise run clean && mise run build` (regenerates every type's SQL from + the catalog first, then builds the release artefacts) - relevant SQLx suites - `mise run test` across supported PostgreSQL versions - `mise run --output prefix test:splinter --postgres 17` after a From f60386aa628a81195c9e7e18f707a8201b88caac Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 16:02:34 +1000 Subject: [PATCH 12/19] docs(snapshots): rewrite README for single catalog-driven matrix snapshot --- tests/sqlx/snapshots/README.md | 80 ++++++++++++++++++++++------------ 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/tests/sqlx/snapshots/README.md b/tests/sqlx/snapshots/README.md index a4ce5ae9..213b034f 100644 --- a/tests/sqlx/snapshots/README.md +++ b/tests/sqlx/snapshots/README.md @@ -1,23 +1,26 @@ -# Matrix coverage inventory snapshots +# Matrix coverage inventory snapshot -This directory holds one committed snapshot per scalar encrypted-domain type: +This directory holds ONE committed snapshot, `matrix_tests.txt` — the canonical, +token-normalized list of every `scalars::::*` test name in the +`encrypted_domain` SQLx binary, with each type token replaced by the literal +``. It is a **committed test baseline**, not gitignored generated SQL — keep +it in version control. -- `int4_matrix_tests.txt` -- `int2_matrix_tests.txt` +The per-type `_matrix_tests.txt` files are gone. They were byte-identical +modulo the type token (the matrix tests are macro-generated from one +`ordered_numeric_matrix!` invocation per type with no per-type variation), so a +single canonical set plus a per-type normalize-and-compare carries the same +signal at a fraction of the committed surface. -Each file is a sorted, byte-stable list of every `scalars::::*` test name in -the `encrypted_domain` SQLx binary. They are a **committed test baseline**, not -gitignored generated SQL — keep them in version control. - -## What they guard +## What it guards The SQLx assertions verify that the tests which run produce the right results. They cannot see a test that *stops running* — a matrix test that is deleted, renamed, or hidden behind a `#[cfg]` gate simply vanishes silently, quietly -shrinking coverage. These snapshots close that gap: they pin the *set of test +shrinking coverage. This snapshot closes that gap: it pins the *set of test names* so any such change shows up as an added/removed line in the PR diff. -## How they are generated +## How it is generated / checked Run: @@ -25,11 +28,21 @@ Run: mise run test:matrix:inventory ``` -The task (`mise.toml`, `[tasks."test:matrix:inventory"]`) enumerates the binary -with `cargo test --test encrypted_domain -- --list`, greps each -`scalars::` matrix into its own file, and `LC_ALL=C sort`s for ordering -that is byte-stable across locales. No database is required — `--list` only -enumerates; the suite uses runtime queries. +The task (`mise.toml`, `[tasks."test:matrix:inventory"]`): + +1. Lists the `encrypted_domain` binary ONCE with + `cargo test --no-default-features --test encrypted_domain -- --list`. +2. Discovers the set of scalar types present **from the binary's own output** + (the `scalars::::` prefixes) — never a directory glob. +3. Normalizes each type's token to `` and asserts that type's set equals the + canonical `matrix_tests.txt`. Asserts at least one type is present. +4. **Completeness cross-check:** asserts the discovered type set equals + `cargo run -p eql-codegen -- list-types` (the catalog is the single source). + A catalog type added without its matrix wiring — no `scalars::::` tests in + the binary — fails here. + +`LC_ALL=C sort` makes ordering byte-stable across locales. No database is +required — `--list` only enumerates; the suite uses runtime queries. It pins `--no-default-features` so the inventory is deterministic regardless of the caller's local flags. That deliberately excludes the `scale` feature arm @@ -38,16 +51,29 @@ instead by the scale gate plus the `family::mutations` negative controls. ## CI enforcement -The `matrix-coverage` job in `.github/workflows/test-eql.yml` regenerates with -the same pinned feature set and runs `git diff --exit-code` against every -snapshot in this directory. A divergence fails the job with: - -> Coverage inventory stale — run 'mise run test:matrix:inventory' and commit. - -## When you must update these - -- **Adding a new scalar type** → a new `_matrix_tests.txt` appears; commit it. -- **Adding / removing / renaming matrix tests** → regenerate and commit the - affected snapshot in the same change. +The `matrix-coverage` job in `.github/workflows/test-eql.yml` runs the same +task, then `git add -N tests/sqlx/snapshots` and +`git diff --exit-code -- tests/sqlx/snapshots`. The `git add -N` makes a +brand-new, never-committed snapshot trip the diff too. A divergence (or a failed +catalog cross-check) fails the job. + +## When you must update this + +- **Adding a new scalar type** → add the catalog row in + `eql-scalars::CATALOG`, wire the SQLx matrix oracle (see the implementation + spec §2), then run `mise run test:matrix:inventory`. If the new type's + normalized name set matches the canonical snapshot (it will, for a standard + `ordered_numeric_matrix!` type), no snapshot edit is needed — the cross-check + just confirms the type is wired. +- **Removing a scalar type** → remove the catalog row and its matrix wiring; the + cross-check then sees the type gone from both sides. +- **Changing which matrix tests the macro emits** → regenerate and commit + `matrix_tests.txt` in the same change: + ```bash + cd tests/sqlx + cargo test --no-default-features --test encrypted_domain -- --list \ + | sed -n 's/: test$//p' | grep '^scalars::int4::' \ + | sed -e 's/^scalars::int4::/scalars::::/' -e 's/_int4_/__/g' | LC_ALL=C sort > snapshots/matrix_tests.txt + ``` See `docs/reference/encrypted-domain-implementation-spec.md` §2 and §8. From 7feb83d693bd66b56d3548cf9a43d2479a9b2dd1 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 16:04:16 +1000 Subject: [PATCH 13/19] docs(changelog): record Rust catalog codegen + Python toolchain removal --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c75b70db..1a280a71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,9 +23,13 @@ Each entry that ships in a published release links to the PR that introduced it. ### Added - **`eql_v3` encrypted-domain schema, with the `int4` family as its first member.** Encrypted-domain type families now live in a new, additional `eql_v3` schema (the existing `eql_v2` schema is unchanged — it keeps the core types/operators and stays the documented public API). Four jsonb-backed domains for encrypted `int4` columns: `eql_v3.int4` (storage-only), `eql_v3.int4_eq` (`=` / `<>` via HMAC), and `eql_v3.int4_ord` / `eql_v3.int4_ord_ore` (also `<` `<=` `>` `>=` via ORE block terms). Supported comparisons resolve to inlinable wrappers; the native `jsonb` operator surface reachable through domain fallback is blocked (raises rather than silently mis-resolving). Each domain's `CHECK` requires the EQL envelope (`v`, `i`), the ciphertext (`c`), and the variant's index term(s), and pins the payload version (`VALUE->>'v' = '2'`, matching `eql_v2._encrypted_check_v`) — so a missing key or wrong-version payload is rejected on insert or cast rather than surfacing later at query time. Index via a functional index on the `eql_v3.eq_term` / `eql_v3.ord_term` extractors, not an operator class on the domain. The extractors still return the core `eql_v2.hmac_256` / `eql_v2.ore_block_u64_8_256` index-term types, which remain in `eql_v2` and are referenced cross-schema. Why: a type-safe, per-capability encrypted integer column instead of the untyped `eql_v2_encrypted`, namespaced under its own schema. This is the reference scalar implementation for the generated domain family. ([#239](https://github.com/cipherstash/encrypt-query-language/pull/239), supersedes [#225](https://github.com/cipherstash/encrypt-query-language/pull/225)) -- **`eql_v3.int2` encrypted-domain type family.** Four jsonb-backed domains for encrypted `int2` columns — `eql_v3.int2` (storage-only), `eql_v3.int2_eq` (`=` / `<>` via HMAC), and `eql_v3.int2_ord` / `eql_v3.int2_ord_ore` (also `<` `<=` `>` `>=` via ORE block terms, with `MIN` / `MAX` aggregates) — generated from `tasks/codegen/types/int2.toml` by the same materializer as the `eql_v3.int4` reference. Index via a functional index on the `eql_v3.eq_term` / `eql_v3.ord_term` extractors, not an operator class on the domain. Why: a type-safe, per-capability encrypted `smallint` column, proving the scalar generator generalizes beyond the `int4` reference. ([#243](https://github.com/cipherstash/encrypt-query-language/pull/243)) +- **`eql_v3.int2` encrypted-domain type family.** Four jsonb-backed domains for encrypted `int2` columns — `eql_v3.int2` (storage-only), `eql_v3.int2_eq` (`=` / `<>` via HMAC), and `eql_v3.int2_ord` / `eql_v3.int2_ord_ore` (also `<` `<=` `>` `>=` via ORE block terms, with `MIN` / `MAX` aggregates) — generated from the `int2` row in `eql-scalars::CATALOG` by the same materializer as the `eql_v3.int4` reference. Index via a functional index on the `eql_v3.eq_term` / `eql_v3.ord_term` extractors, not an operator class on the domain. Why: a type-safe, per-capability encrypted `smallint` column, proving the scalar generator generalizes beyond the `int4` reference. ([#243](https://github.com/cipherstash/encrypt-query-language/pull/243)) - **Per-domain `MIN` / `MAX` aggregates for the encrypted-domain family.** `eql_v3.min(eql_v3._ord)` / `eql_v3.max(eql_v3._ord)` (and the `_ord_ore` twin) are generated for every ord-capable scalar variant, giving type-safe extrema on domain-typed columns — comparison routes through the variant's `<` / `>` operator (ORE block term, no decryption). The aggregates are declared `PARALLEL = SAFE` with a combine function (the state function itself — min/max are associative), so PostgreSQL can use partial/parallel aggregation on large `GROUP BY` workloads. Why: the new domain types previously had no equivalent of the composite-type aggregates. The existing `eql_v2.min(eql_v2_encrypted)` / `eql_v2.max(eql_v2_encrypted)` aggregates are **retained** and continue to work on `eql_v2_encrypted` columns; the per-domain aggregates are additive and coexist with them. ([#239](https://github.com/cipherstash/encrypt-query-language/pull/239)) +### Changed + +- **Scalar encrypted-domain types are now defined in a Rust catalog, not TOML manifests; the Python codegen toolchain is removed.** Adding a scalar encrypted-domain type (`int4`, `int8`, …) is now one row in `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`) instead of authoring `tasks/codegen/types/.toml`. `mise run build` regenerates the same gitignored SQL surface and the same committed `tests/sqlx/src/fixtures/_values.rs` consts via `cargo run -p eql-codegen` (Rust, std-only) rather than the Python generator. The shipped SQL is unchanged — `release/*.sql` is byte-identical across the cutover — so there is no change for callers installing EQL; this only affects contributors who extend the scalar domain families. The `python` mise tool, the `pytest`-based `test:codegen` (now `cargo test -p eql-scalars -p eql-codegen`), the per-type `mise run codegen:domain` tasks, and the per-type `tests/sqlx/snapshots/_matrix_tests.txt` baselines (collapsed into one catalog-reconciled `tests/sqlx/snapshots/matrix_tests.txt`) are gone. Why: a single compiler-validated source of truth shared by the generator and the SQLx test harness, and one fewer toolchain in the build/test path — building and testing EQL no longer needs Python (Python remains only for the separate docs-markdown tooling). ([#PR](https://github.com/cipherstash/encrypt-query-language/pull/PR)) + ## [2.3.1] — 2026-05-21 ### Fixed From 35bcd9f3c8a2f07174b84c06369f75a4c732ef60 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Tue, 2 Jun 2026 16:15:48 +1000 Subject: [PATCH 14/19] docs(CLAUDE): match actual generated-file markers (-- / // AUTOMATICALLY GENERATED FILE) --- CLAUDE.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CLAUDE.md b/CLAUDE.md index 65da42bd..ec0788ca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -78,7 +78,7 @@ This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for search `src/encrypted_domain/` holds **encrypted-domain type families** — jsonb-backed PostgreSQL domains in the **`eql_v3` schema**, one domain per operator/index capability (`eql_v3.` storage-only, `eql_v3._eq`, `eql_v3._ord`). The schema qualifier replaces the old version-prefixed name, so the domains are `eql_v3.int4`, `eql_v3.int4_eq`, `eql_v3.int4_ord`, `eql_v3.int4_ord_ore` — created in `eql_v3`, not `public`. Their extractors/wrappers/aggregates (`eql_v3.eq_term`, `eql_v3.ord_term`, `eql_v3.eq`/`lt`/…, `eql_v3.min`/`max`) also live in `eql_v3`, but the index-term types they return and construct (`eql_v2.hmac_256`, `eql_v2.ore_block_u64_8_256`) stay in `eql_v2` and are referenced cross-schema. `eql_v3.int4` (PR #239, supersedes #225) is the reference scalar implementation; future scalar types such as `int8`, `bool`, `date`, `float`, `numeric`, `timestamp`, `text`, and `jsonb` follow this materializer pattern. `text`, `numeric`, and `jsonb` are planned but have no generated SQL surface yet — `jsonb` in particular needs a separate SQL design beyond the ordered-scalar materializer. The `eql-scalars` fixture catalog (`crates/eql-scalars`) already models their fixture values ahead of the SQL surface. -Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed; the committed `tests/sqlx/src/fixtures/_values.rs` consts are also generated (CI diffs them). Generated SQL carries an `AUTOMATICALLY GENERATED FILE — DO NOT EDIT` header (the project-wide marker `docs:validate` greps on) and the committed `_values.rs` carries an `AUTO-GENERATED — DO NOT EDIT` header; change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. +Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed; the committed `tests/sqlx/src/fixtures/_values.rs` consts are also generated (CI diffs them). Generated SQL carries a `-- AUTOMATICALLY GENERATED FILE` header (the project-wide marker `docs:validate` greps on) and the committed `_values.rs` carries a `// AUTOMATICALLY GENERATED FILE` header; change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. **Adding a new encrypted-domain type: follow `docs/reference/encrypted-domain-implementation-spec.md`.** The mechanics are fixed for ordered scalar domains; the catalog row only declares the token, kind, domain suffixes, and terms. New term behavior belongs in the `Term` enum's `impl` methods in `crates/eql-scalars/src` with tests, not in free-form catalog data. From 959eb85f0b999be9ffe8dd7d090dfe6cd463569f Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 3 Jun 2026 09:41:35 +1000 Subject: [PATCH 15/19] refactor(codegen): drop generated _values.rs; materialise fixtures from catalog The committed tests/sqlx/src/fixtures/{int4,int2}_values.rs were a Rust->Rust codegen round-trip: eql-codegen re-serialised each catalog row's Fixture list into a committed .rs that the test crate then imported -- even though that crate already depends on eql-scalars. Now the catalog row is the single definition for both the SQL surface and the test fixtures, as originally intended. eql-scalars materialises each row's Fixture list into a typed const at compile time via the new int_values! macro (numeric_value is now a const fn): pub const INT4_VALUES: &[i32] = ...; pub const INT2_VALUES: &[i16] = ...; ScalarType::FIXTURE_VALUES reads them directly. The trait contract is unchanged, so matrix.rs, the int4_expanded.rs snapshot, and mutations.rs are untouched. - delete int4_values.rs, int2_values.rs, the golden reference/int4/int4_values.rs, the templates.rs renderer, and the now-dead Rust-header writer machinery (write_generated_rs / is_generated_rs / AUTO_GENERATED_HEADER_RS) - eql-scalars: add int_values! + the two consts + values_tests pinning the lists - CI/tasks: drop the regenerate-and-diff fixture-const step and the values.rs git-clean check in codegen-parity.sh - docs: CLAUDE.md, CHANGELOG, implementation spec, reference README Shipped SQL is unchanged (release/*.sql byte-identical). Verified: cargo test -p eql-scalars -p eql-codegen, cargo check -p eql_tests --all-targets, mise run codegen:parity, mise run clean && mise run build. --- .github/workflows/test-eql.yml | 27 ++-- CHANGELOG.md | 2 +- CLAUDE.md | 2 +- crates/eql-codegen/src/consts.rs | 18 --- crates/eql-codegen/src/context.rs | 33 ----- crates/eql-codegen/src/generate.rs | 77 +++--------- crates/eql-codegen/src/lib.rs | 9 +- crates/eql-codegen/src/main.rs | 2 +- crates/eql-codegen/src/templates.rs | 78 ------------ crates/eql-codegen/src/writer.rs | 77 +----------- crates/eql-codegen/tests/parity.rs | 47 +++---- crates/eql-scalars/src/lib.rs | 116 +++++++++++++++++- .../encrypted-domain-implementation-spec.md | 63 +++++----- tasks/build.sh | 7 +- tasks/codegen-parity.sh | 44 ++++--- tests/codegen/reference/README.md | 16 ++- .../reference/int4/int4_eq_functions.sql | 2 +- .../reference/int4/int4_eq_operators.sql | 2 +- .../codegen/reference/int4/int4_functions.sql | 2 +- .../codegen/reference/int4/int4_operators.sql | 2 +- .../reference/int4/int4_ord_aggregates.sql | 2 +- .../reference/int4/int4_ord_functions.sql | 2 +- .../reference/int4/int4_ord_operators.sql | 2 +- .../int4/int4_ord_ore_aggregates.sql | 2 +- .../reference/int4/int4_ord_ore_functions.sql | 2 +- .../reference/int4/int4_ord_ore_operators.sql | 2 +- tests/codegen/reference/int4/int4_types.sql | 2 +- tests/codegen/reference/int4/int4_values.rs | 28 ----- tests/sqlx/src/fixtures/eql_v2_int2.rs | 2 +- tests/sqlx/src/fixtures/eql_v2_int4.rs | 2 +- tests/sqlx/src/fixtures/int2_values.rs | 30 ----- tests/sqlx/src/fixtures/int4_values.rs | 28 ----- tests/sqlx/src/fixtures/mod.rs | 11 +- tests/sqlx/src/fixtures/scalar_fixture.rs | 2 +- tests/sqlx/src/scalar_domains.rs | 20 +-- 35 files changed, 264 insertions(+), 499 deletions(-) delete mode 100644 crates/eql-codegen/src/templates.rs delete mode 100644 tests/codegen/reference/int4/int4_values.rs delete mode 100644 tests/sqlx/src/fixtures/int2_values.rs delete mode 100644 tests/sqlx/src/fixtures/int4_values.rs diff --git a/.github/workflows/test-eql.yml b/.github/workflows/test-eql.yml index feacd2c6..d74f2946 100644 --- a/.github/workflows/test-eql.yml +++ b/.github/workflows/test-eql.yml @@ -115,28 +115,15 @@ jobs: # Crate compile/lint/test (cargo test -p eql-scalars -p eql-codegen) runs # in the dedicated `test:crates` job; this job covers the codegen-specific - # gates only — fixture-value regeneration and golden/values parity. - - # Regenerate the committed Rust fixture-value consts for EVERY type from - # the catalog and fail if any differ from / are missing in the tree. - # eql-codegen renders all _values.rs deterministically (unlike the - # encrypted .sql fixtures, whose ciphertext is non-deterministic and - # gitignored), so a plain diff is the right guard — it catches a catalog - # edit that wasn't regenerated. `git add -N` registers any brand-new - # untracked const so a forgotten-to-commit file also trips the diff. No - # Postgres needed: the generator is std-only. - - name: Regenerate and verify fixture-value consts (all types) - run: | - cargo run -p eql-codegen - git add -N tests/sqlx/src/fixtures - git diff --exit-code -- tests/sqlx/src/fixtures \ - || { echo "Fixture value const(s) stale or uncommitted — run 'cargo run -p eql-codegen' and commit tests/sqlx/src/fixtures."; exit 1; } + # gate only — golden parity. The plaintext fixture lists are no longer a + # generated file: they live in the catalog (`eql_scalars::INT4_VALUES` / + # `INT2_VALUES`) and are pinned by `eql-scalars`'s own unit tests, so there + # is nothing to regenerate-and-diff here. # Parity gate: assert the Rust eql-codegen output is line-normalized-equal - # to the int4 golden reference and the committed _values.rs are byte- - # identical (git-clean) after regeneration. Python is no longer an oracle - # (retired in P2). No Postgres needed — the generator runs offline. - - name: Verify generator parity (golden + values) + # to the int4 golden reference. Python is no longer an oracle (retired in + # P2). No Postgres needed — the generator runs offline. + - name: Verify generator parity (golden) run: | mise run codegen:parity diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a280a71..853e5e81 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ Each entry that ships in a published release links to the PR that introduced it. ### Changed -- **Scalar encrypted-domain types are now defined in a Rust catalog, not TOML manifests; the Python codegen toolchain is removed.** Adding a scalar encrypted-domain type (`int4`, `int8`, …) is now one row in `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`) instead of authoring `tasks/codegen/types/.toml`. `mise run build` regenerates the same gitignored SQL surface and the same committed `tests/sqlx/src/fixtures/_values.rs` consts via `cargo run -p eql-codegen` (Rust, std-only) rather than the Python generator. The shipped SQL is unchanged — `release/*.sql` is byte-identical across the cutover — so there is no change for callers installing EQL; this only affects contributors who extend the scalar domain families. The `python` mise tool, the `pytest`-based `test:codegen` (now `cargo test -p eql-scalars -p eql-codegen`), the per-type `mise run codegen:domain` tasks, and the per-type `tests/sqlx/snapshots/_matrix_tests.txt` baselines (collapsed into one catalog-reconciled `tests/sqlx/snapshots/matrix_tests.txt`) are gone. Why: a single compiler-validated source of truth shared by the generator and the SQLx test harness, and one fewer toolchain in the build/test path — building and testing EQL no longer needs Python (Python remains only for the separate docs-markdown tooling). ([#PR](https://github.com/cipherstash/encrypt-query-language/pull/PR)) +- **Scalar encrypted-domain types are now defined in a Rust catalog, not TOML manifests; the Python codegen toolchain is removed.** Adding a scalar encrypted-domain type (`int4`, `int8`, …) is now one row in `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`) instead of authoring `tasks/codegen/types/.toml`. `mise run build` regenerates the gitignored SQL surface via `cargo run -p eql-codegen` (Rust, std-only) rather than the Python generator. The catalog row's `Fixture` list is the single source of truth for that type's plaintext fixtures: the SQLx test matrix reads it directly as a compile-time-materialised const (`eql_scalars::INT4_VALUES` / `INT2_VALUES`, `ScalarType::FIXTURE_VALUES`), so there is no longer a generated, committed `tests/sqlx/src/fixtures/_values.rs` — a Rust source of truth no longer round-trips through generated Rust. The shipped SQL is unchanged — `release/*.sql` is byte-identical across the cutover — so there is no change for callers installing EQL; this only affects contributors who extend the scalar domain families. The `python` mise tool, the `pytest`-based `test:codegen` (now `cargo test -p eql-scalars -p eql-codegen`), the per-type `mise run codegen:domain` tasks, and the per-type `tests/sqlx/snapshots/_matrix_tests.txt` baselines (collapsed into one catalog-reconciled `tests/sqlx/snapshots/matrix_tests.txt`) are gone. Why: a single compiler-validated source of truth shared by the generator and the SQLx test harness, and one fewer toolchain in the build/test path — building and testing EQL no longer needs Python (Python remains only for the separate docs-markdown tooling). ([#PR](https://github.com/cipherstash/encrypt-query-language/pull/PR)) ## [2.3.1] — 2026-05-21 diff --git a/CLAUDE.md b/CLAUDE.md index ec0788ca..9cb5b80f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -78,7 +78,7 @@ This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for search `src/encrypted_domain/` holds **encrypted-domain type families** — jsonb-backed PostgreSQL domains in the **`eql_v3` schema**, one domain per operator/index capability (`eql_v3.` storage-only, `eql_v3._eq`, `eql_v3._ord`). The schema qualifier replaces the old version-prefixed name, so the domains are `eql_v3.int4`, `eql_v3.int4_eq`, `eql_v3.int4_ord`, `eql_v3.int4_ord_ore` — created in `eql_v3`, not `public`. Their extractors/wrappers/aggregates (`eql_v3.eq_term`, `eql_v3.ord_term`, `eql_v3.eq`/`lt`/…, `eql_v3.min`/`max`) also live in `eql_v3`, but the index-term types they return and construct (`eql_v2.hmac_256`, `eql_v2.ore_block_u64_8_256`) stay in `eql_v2` and are referenced cross-schema. `eql_v3.int4` (PR #239, supersedes #225) is the reference scalar implementation; future scalar types such as `int8`, `bool`, `date`, `float`, `numeric`, `timestamp`, `text`, and `jsonb` follow this materializer pattern. `text`, `numeric`, and `jsonb` are planned but have no generated SQL surface yet — `jsonb` in particular needs a separate SQL design beyond the ordered-scalar materializer. The `eql-scalars` fixture catalog (`crates/eql-scalars`) already models their fixture values ahead of the SQL surface. -Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed; the committed `tests/sqlx/src/fixtures/_values.rs` consts are also generated (CI diffs them). Generated SQL carries a `-- AUTOMATICALLY GENERATED FILE` header (the project-wide marker `docs:validate` greps on) and the committed `_values.rs` carries a `// AUTOMATICALLY GENERATED FILE` header; change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. +Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed. The per-type plaintext fixture lists the SQLx matrix consumes are **not** a generated file — they are materialised from each `CATALOG` row at compile time as `eql_scalars::INT4_VALUES` / `INT2_VALUES` (the `int_values!` macro) and read directly by `ScalarType::FIXTURE_VALUES`; a Rust source of truth no longer round-trips through a committed generated `.rs`. Generated SQL carries a `-- AUTOMATICALLY GENERATED FILE` header (the project-wide marker `docs:validate` greps on); change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. **Adding a new encrypted-domain type: follow `docs/reference/encrypted-domain-implementation-spec.md`.** The mechanics are fixed for ordered scalar domains; the catalog row only declares the token, kind, domain suffixes, and terms. New term behavior belongs in the `Term` enum's `impl` methods in `crates/eql-scalars/src` with tests, not in free-form catalog data. diff --git a/crates/eql-codegen/src/consts.rs b/crates/eql-codegen/src/consts.rs index 3f2fdbf0..64d2e438 100644 --- a/crates/eql-codegen/src/consts.rs +++ b/crates/eql-codegen/src/consts.rs @@ -4,10 +4,6 @@ /// the writer uses it only to recognise files it owns (overwrite/clean safety). pub(crate) const AUTO_GENERATED_HEADER: &str = "-- AUTOMATICALLY GENERATED FILE.\n"; -/// Rust generated-file marker, prepended to `_values.rs` (which has no -/// template). Rust comment syntax so the `.rs` file stays valid. -pub(crate) const AUTO_GENERATED_HEADER_RS: &str = "// AUTOMATICALLY GENERATED FILE.\n"; - /// Schema housing the encrypted-domain families. pub(crate) const DOMAIN_SCHEMA: &str = "eql_v3"; /// Schema owning the core index-term types/constructors. @@ -37,20 +33,6 @@ mod tests { assert!(AUTO_GENERATED_HEADER.contains("AUTOMATICALLY GENERATED FILE")); } - #[test] - fn rust_marker_is_a_rust_comment() { - assert_eq!( - AUTO_GENERATED_HEADER_RS, - "// AUTOMATICALLY GENERATED FILE.\n" - ); - for line in AUTO_GENERATED_HEADER_RS.lines() { - assert!( - !line.starts_with("--"), - "rust marker must not contain SQL comments" - ); - } - } - #[test] fn sql_str_doubles_single_quotes() { assert_eq!(sql_str("o'brien"), "o''brien"); diff --git a/crates/eql-codegen/src/context.rs b/crates/eql-codegen/src/context.rs index 7066377f..01e80c6c 100644 --- a/crates/eql-codegen/src/context.rs +++ b/crates/eql-codegen/src/context.rs @@ -4,17 +4,6 @@ use crate::consts::*; use crate::operator_surface::Operator; use eql_scalars::{DomainSpec, Term}; -/// Line-normalize SQL for best-effort byte-exact comparison: trim each line's -/// leading/trailing whitespace and drop blank lines; preserve intra-line -/// spacing. NOT used for `_values.rs` (which stays byte-exact). -pub fn normalize_sql(s: &str) -> String { - s.lines() - .map(|l| l.trim()) - .filter(|l| !l.is_empty()) - .collect::>() - .join("\n") -} - /// Build the minijinja environment with the embedded templates: one whole-file /// template per output file (`types`/`functions`/`operators`/`aggregates`) plus /// the per-kind function-body partials that `functions.sql` dynamically @@ -296,28 +285,6 @@ mod tests { assert!(!is_ord_capable(&[])); } - #[test] - fn normalize_trims_lines_and_drops_blanks() { - let input = " CREATE DOMAIN x\n\n CHECK (a) \n\n"; - assert_eq!(normalize_sql(input), "CREATE DOMAIN x\nCHECK (a)"); - } - - #[test] - fn normalize_preserves_intra_line_spacing() { - let input = "RAISE EXCEPTION 'operator % is not supported for %';"; - assert_eq!( - normalize_sql(input), - "RAISE EXCEPTION 'operator % is not supported for %';" - ); - } - - #[test] - fn normalize_equal_modulo_indentation_and_blank_lines() { - let a = "DO $$\nBEGIN\n IF NOT EXISTS (\n ) THEN\n END IF;\nEND\n$$;\n"; - let b = "DO $$\n\nBEGIN\n IF NOT EXISTS (\n ) THEN\nEND IF;\nEND\n$$;"; - assert_eq!(normalize_sql(a), normalize_sql(b)); - } - #[test] fn environment_has_whole_file_and_partial_templates() { let env = environment(); diff --git a/crates/eql-codegen/src/generate.rs b/crates/eql-codegen/src/generate.rs index c7625545..581f0244 100644 --- a/crates/eql-codegen/src/generate.rs +++ b/crates/eql-codegen/src/generate.rs @@ -28,16 +28,6 @@ fn types_path(token: &str) -> String { format!("src/encrypted_domain/{token}/{token}_types.sql") } -/// Committed Rust fixture-value const path. Port of `fixture_values_rs_path`. -pub fn fixture_values_rs_path(out_root: &Path, token: &str) -> PathBuf { - out_root - .join("tests") - .join("sqlx") - .join("src") - .join("fixtures") - .join(format!("{token}_values.rs")) -} - /// Body for _types.sql: every domain in one idempotent DO block. /// Port of `render_types_file`. pub fn render_types_file(spec: &ScalarSpec) -> String { @@ -219,10 +209,8 @@ pub fn render_aggregates_file(token: &str, domain: &DomainSpec) -> Option Result, Ok(written) } -/// Generate every catalog type's SQL + committed _values.rs under `out_root`. -/// The single entry point: replaces Python's per-type and --all forms. +/// Generate every catalog type's gitignored SQL surface under `out_root`. The +/// single entry point: replaces Python's per-type and --all forms. The +/// plaintext fixture lists are not generated — they live in the catalog +/// (`eql_scalars::INT4_VALUES` / `INT2_VALUES`), read directly by the SQLx tests. pub fn generate_all(out_root: &Path) -> Result { for spec in eql_scalars::CATALOG { let token = spec.token; let out_dir = out_root.join("src").join("encrypted_domain").join(token); - let mut written = generate_type(spec, &out_dir)?; - - let rs_path = fixture_values_rs_path(out_root, token); - write_generated_rs(&rs_path, &render_fixture_values_rs(spec))?; - written.push(rs_path); + let written = generate_type(spec, &out_dir)?; for p in &written { let rel = p.strip_prefix(out_root).unwrap_or(p); @@ -312,7 +298,6 @@ mod tests { .expect("domain suffix") } - use crate::templates::render_fixture_values_rs; use std::fs; fn repo_root() -> PathBuf { @@ -384,18 +369,16 @@ mod tests { } #[test] - fn types_file_normalized_matches_golden() { - use crate::context::normalize_sql; + fn types_file_matches_golden() { let root = repo_root(); let path = root.join("tests/codegen/reference/int4/int4_types.sql"); let expected = strip_reference_marker(&fs::read_to_string(&path).unwrap()); let actual = render_types_file(spec("int4")); - assert_eq!(normalize_sql(&actual), normalize_sql(&expected)); + assert_eq!(actual, expected); } #[test] - fn functions_files_normalized_match_golden() { - use crate::context::normalize_sql; + fn functions_files_match_golden() { let root = repo_root(); let s = spec("int4"); for d in s.domains { @@ -403,17 +386,12 @@ mod tests { let path = root.join(format!("tests/codegen/reference/int4/{full}_functions.sql")); let expected = strip_reference_marker(&fs::read_to_string(&path).unwrap()); let actual = render_functions_file("int4", d); - assert_eq!( - normalize_sql(&actual), - normalize_sql(&expected), - "{full}_functions.sql diverged" - ); + assert_eq!(actual, expected, "{full}_functions.sql diverged"); } } #[test] - fn operators_files_normalized_match_golden() { - use crate::context::normalize_sql; + fn operators_files_match_golden() { let root = repo_root(); let s = spec("int4"); for d in s.domains { @@ -421,17 +399,12 @@ mod tests { let path = root.join(format!("tests/codegen/reference/int4/{full}_operators.sql")); let expected = strip_reference_marker(&fs::read_to_string(&path).unwrap()); let actual = render_operators_file("int4", d); - assert_eq!( - normalize_sql(&actual), - normalize_sql(&expected), - "{full}_operators.sql" - ); + assert_eq!(actual, expected, "{full}_operators.sql"); } } #[test] - fn aggregates_files_normalized_match_golden() { - use crate::context::normalize_sql; + fn aggregates_files_match_golden() { let root = repo_root(); let s = spec("int4"); for d in s.domains { @@ -441,11 +414,7 @@ mod tests { "tests/codegen/reference/int4/{full}_aggregates.sql" )); let expected = strip_reference_marker(&fs::read_to_string(&path).unwrap()); - assert_eq!( - normalize_sql(&actual), - normalize_sql(&expected), - "{full}_aggregates.sql" - ); + assert_eq!(actual, expected, "{full}_aggregates.sql"); } } } @@ -462,13 +431,11 @@ mod tests { continue; } let name = path.file_name().unwrap().to_str().unwrap().to_string(); - use crate::context::normalize_sql; let expected = strip_reference_marker(&fs::read_to_string(&path).unwrap()); let actual = rendered_for("int4", &name, s); assert_eq!( - normalize_sql(&actual), - normalize_sql(&expected), - "{name}: generator diverged from golden reference (normalized)" + actual, expected, + "{name}: generator diverged from golden reference" ); checked += 1; } @@ -478,18 +445,6 @@ mod tests { ); } - #[test] - fn generator_matches_int4_values_rs_reference() { - let root = repo_root(); - let path = root.join("tests/codegen/reference/int4/int4_values.rs"); - let expected = strip_reference_marker(&fs::read_to_string(&path).unwrap()); - let actual = render_fixture_values_rs(spec("int4")); - assert_eq!( - actual, expected, - "int4_values.rs: generator diverged from golden reference" - ); - } - #[test] fn generate_type_writes_expected_files() { let d = crate::writer::test_support::tempdir(); diff --git a/crates/eql-codegen/src/lib.rs b/crates/eql-codegen/src/lib.rs index ebde05bf..aada6bb7 100644 --- a/crates/eql-codegen/src/lib.rs +++ b/crates/eql-codegen/src/lib.rs @@ -1,11 +1,12 @@ //! Scalar encrypted-domain SQL generator. Renders the `eql-scalars` catalog to -//! the gitignored SQL surface and the committed `_values.rs` consts. The SQL -//! surface is validated against the `tests/codegen/reference/int4` golden under -//! line-normalized comparison; `_values.rs` is validated byte-exact. +//! the gitignored SQL surface, validated byte-for-byte against the +//! `tests/codegen/reference/int4` golden (modulo the one `-- REFERENCE:` +//! provenance line each reference file carries). The plaintext fixture lists +//! the SQLx matrix consumes live in the catalog itself +//! (`eql_scalars::INT4_VALUES` / `INT2_VALUES`), not in a generated file. pub mod consts; pub mod context; pub mod generate; pub mod operator_surface; -pub mod templates; pub mod writer; diff --git a/crates/eql-codegen/src/main.rs b/crates/eql-codegen/src/main.rs index b1b96de8..c51bf4c2 100644 --- a/crates/eql-codegen/src/main.rs +++ b/crates/eql-codegen/src/main.rs @@ -27,7 +27,7 @@ fn main() -> ExitCode { } if args.len() == 1 { - // No args: generate every type's SQL + _values.rs. + // No args: generate every type's gitignored SQL surface. match generate_all(&repo_root()) { Ok(0) => return ExitCode::SUCCESS, Ok(_) => return ExitCode::FAILURE, // any non-zero codegen result is a failure diff --git a/crates/eql-codegen/src/templates.rs b/crates/eql-codegen/src/templates.rs deleted file mode 100644 index e0da5290..00000000 --- a/crates/eql-codegen/src/templates.rs +++ /dev/null @@ -1,78 +0,0 @@ -//! Rust fixture-const renderer. The SQL surface is rendered from minijinja -//! templates (see `context.rs`); this file emits only the committed -//! `_values.rs` consts, which stay byte-exact. - -use eql_scalars::ScalarSpec; - -/// Body for tests/sqlx/src/fixtures/_values.rs. The writer prepends the -/// AUTO-GENERATED Rust header, so the body carries none. -/// Port of templates.py `render_fixture_values_rs`. -pub fn render_fixture_values_rs(spec: &ScalarSpec) -> String { - let token = spec.token; - let rust_type = spec.kind.rust_type(); - let mut literals = String::new(); - for &f in spec.fixtures { - literals.push_str(&format!(" {},\n", f.render_literal(spec.kind))); - } - // Raw string keeps the emitted shape legible while staying byte-exact; - // lines are flush-left because raw-string whitespace is literal output. - format!( - r#"//! Fixture plaintext values for the {token} encrypted-domain family. -//! -//! Generated from the `{token}` row in `eql-scalars::CATALOG` (`fixtures`) — -//! the single source of truth shared by the fixture generator -//! (`fixtures::eql_v2_{token}`) and the matrix oracle -//! (`ScalarType::FIXTURE_VALUES`). - -/// Distinct plaintext values present in the `eql_v2_{token}` fixture. -pub const VALUES: &[{rust_type}] = &[ -{literals}]; -"# - ) -} - -#[cfg(test)] -mod tests { - use super::*; - use eql_scalars::CATALOG; - - fn spec(token: &str) -> &'static ScalarSpec { - CATALOG - .iter() - .find(|s| s.token == token) - .expect("catalog token") - } - - #[test] - fn fixture_values_rs_emits_typed_const_for_int4() { - let body = render_fixture_values_rs(spec("int4")); - assert!(body.contains("pub const VALUES: &[i32] = &[")); - assert!(body.contains("eql-scalars::CATALOG")); - assert!(body.contains(" i32::MIN,\n")); - assert!(body.contains(" i32::MAX,\n")); - assert!(body.contains(" -1,\n")); - assert!(body.contains(" 0,\n")); - assert!(body.contains(" 1,\n")); - assert!(!body.contains("AUTO-GENERATED")); - } - - #[test] - fn fixture_values_rs_preserves_catalog_order() { - let body = render_fixture_values_rs(spec("int4")); - let min = body.find("i32::MIN").unwrap(); - let zero = body.find(" 0,").unwrap(); - let max = body.find("i32::MAX").unwrap(); - assert!(min < zero && zero < max); - } - - // Adapted from the plan's `fixture_values_rs_int8_uses_i64`: the shipped - // CATALOG has no int8 (deliberately reserved for a later branch), so this - // exercises the second committed non-i32 type — int2 (i16). - #[test] - fn fixture_values_rs_int2_uses_i16() { - let body = render_fixture_values_rs(spec("int2")); - assert!(body.contains("pub const VALUES: &[i16] = &[")); - assert!(body.contains(" i16::MIN,\n")); - assert!(body.contains(" -30000,\n")); - } -} diff --git a/crates/eql-codegen/src/writer.rs b/crates/eql-codegen/src/writer.rs index 7675af9f..afbcdce0 100644 --- a/crates/eql-codegen/src/writer.rs +++ b/crates/eql-codegen/src/writer.rs @@ -4,18 +4,13 @@ use std::fs; use std::io; use std::path::{Path, PathBuf}; -use crate::consts::{AUTO_GENERATED_HEADER, AUTO_GENERATED_HEADER_RS}; +use crate::consts::AUTO_GENERATED_HEADER; /// First line of the SQL header — the ownership marker. fn sql_marker() -> &'static str { AUTO_GENERATED_HEADER.lines().next().unwrap() } -/// First line of the Rust header — the ownership marker. -fn rs_marker() -> &'static str { - AUTO_GENERATED_HEADER_RS.lines().next().unwrap() -} - /// Raised when the generator would clobber a hand-written file. #[derive(Debug)] pub enum WriteError { @@ -53,11 +48,6 @@ pub fn is_generated(path: &Path) -> bool { path.is_file() && first_line(path).map(|l| l == sql_marker()).unwrap_or(false) } -/// True if the file carries the Rust AUTO-GENERATED marker. Port of `is_generated_rs`. -pub fn is_generated_rs(path: &Path) -> bool { - path.is_file() && first_line(path).map(|l| l == rs_marker()).unwrap_or(false) -} - /// Delete every generated .sql file in `directory`, returning removed paths. /// Port of `clean_generated_files`. pub fn clean_generated_files(directory: &Path) -> io::Result> { @@ -107,21 +97,6 @@ pub fn write_generated_file(path: &Path, body: &str) -> Result<(), WriteError> { Ok(()) } -/// Write `body` to a Rust file prefixed with the Rust header. Port of `write_generated_rs`. -pub fn write_generated_rs(path: &Path, body: &str) -> Result<(), WriteError> { - if path.exists() && !is_generated_rs(path) { - return Err(WriteError::Ownership(format!( - "refusing to overwrite hand-written file: {} (no AUTO-GENERATED header).", - path.display() - ))); - } - if let Some(parent) = path.parent() { - fs::create_dir_all(parent)?; - } - fs::write(path, format!("{AUTO_GENERATED_HEADER_RS}{body}"))?; - Ok(()) -} - #[cfg(test)] pub(crate) mod test_support { use std::fs; @@ -255,54 +230,4 @@ mod tests { let d = tmp(); assert!(clean_generated_files(d.path()).unwrap().is_empty()); } - - #[test] - fn write_generated_rs_creates_with_rust_header() { - let d = tmp(); - let p = d.path().join("int4_values.rs"); - write_generated_rs(&p, "pub const VALUES: &[i32] = &[];\n").unwrap(); - let text = fs::read_to_string(&p).unwrap(); - assert!(text.starts_with(AUTO_GENERATED_HEADER_RS)); - assert!(text.contains("pub const VALUES")); - } - - #[test] - fn is_generated_rs_true_for_rust_header() { - let d = tmp(); - let p = d.path().join("int4_values.rs"); - fs::write( - &p, - format!("{AUTO_GENERATED_HEADER_RS}pub const VALUES: &[i32] = &[];\n"), - ) - .unwrap(); - assert!(is_generated_rs(&p)); - } - - #[test] - fn is_generated_rs_false_for_handwritten() { - let d = tmp(); - let p = d.path().join("int4_values.rs"); - fs::write(&p, "//! hand-written\npub const VALUES: &[i32] = &[];\n").unwrap(); - assert!(!is_generated_rs(&p)); - } - - #[test] - fn write_generated_rs_refuses_handwritten() { - let d = tmp(); - let p = d.path().join("int4_values.rs"); - fs::write(&p, "//! hand-written\n").unwrap(); - let err = write_generated_rs(&p, "pub const VALUES: &[i32] = &[];\n").unwrap_err(); - assert!(err.to_string().contains("hand-written")); - } - - #[test] - fn write_generated_rs_overwrites_existing_generated() { - let d = tmp(); - let p = d.path().join("int4_values.rs"); - fs::write(&p, format!("{AUTO_GENERATED_HEADER_RS}// old\n")).unwrap(); - write_generated_rs(&p, "// new\n").unwrap(); - let text = fs::read_to_string(&p).unwrap(); - assert!(text.contains("// new")); - assert!(!text.contains("// old")); - } } diff --git a/crates/eql-codegen/tests/parity.rs b/crates/eql-codegen/tests/parity.rs index 87c27526..a59ed8dc 100644 --- a/crates/eql-codegen/tests/parity.rs +++ b/crates/eql-codegen/tests/parity.rs @@ -1,8 +1,10 @@ //! THE PARITY GATE. Runs the Rust generator (into a temp dir) and asserts the -//! int4 SQL surface is line-normalized-equal to the `tests/codegen/reference/int4` -//! golden, and that committed `_values.rs` are byte-identical to the -//! generator output. The golden reference — not the retired Python generator — -//! is the sole oracle. +//! int4 SQL surface is byte-for-byte equal to the `tests/codegen/reference/int4` +//! golden (modulo the one leading `-- REFERENCE:` provenance line). The golden +//! reference — not the retired Python generator — is the sole oracle. The +//! plaintext fixture lists are not generated; they live in the catalog +//! (`eql_scalars::INT4_VALUES` / `INT2_VALUES`) and are pinned by +//! `eql-scalars`'s own `values_tests`. use std::fs; use std::path::PathBuf; @@ -27,25 +29,6 @@ fn tempdir(tag: &str) -> PathBuf { p } -#[test] -fn rust_generator_matches_committed_values_rs() { - let root = repo_root(); - let out = tempdir("rust-values"); - eql_codegen::generate::generate_all(&out).expect("rust generate_all"); - - for spec in eql_scalars::CATALOG { - let token = spec.token; - let generated = out.join(format!("tests/sqlx/src/fixtures/{token}_values.rs")); - let committed = root.join(format!("tests/sqlx/src/fixtures/{token}_values.rs")); - let g = fs::read(&generated).expect("generated values.rs"); - let c = fs::read(&committed).expect("committed values.rs"); - assert_eq!( - g, c, - "{token}_values.rs: Rust generator output differs from the committed file" - ); - } -} - #[test] fn rust_generator_matches_int4_golden_files() { let root = repo_root(); @@ -61,20 +44,20 @@ fn rust_generator_matches_int4_golden_files() { } let name = path.file_name().unwrap().to_str().unwrap(); let reference = fs::read_to_string(&path).unwrap(); - // Strip the leading `-- REFERENCE:` provenance line. What remains is the - // generated body, which already starts with the template-owned - // `-- AUTOMATICALLY GENERATED FILE.` marker — the same first line the - // materialised file carries, so no header is re-added here. + // Strip the leading `-- REFERENCE:` provenance line(s), preserving the + // remaining bytes verbatim (`split_inclusive` keeps the `\n` + // terminators). What remains is the generated body, which already starts + // with the template-owned `-- AUTOMATICALLY GENERATED FILE.` marker — the + // same first line the materialised file carries — so the comparison is + // byte-for-byte with no header re-added. let expected: String = reference - .lines() + .split_inclusive('\n') .skip_while(|l| l.starts_with("-- REFERENCE:") || l.starts_with("// REFERENCE:")) - .map(|l| format!("{l}\n")) .collect(); let actual = fs::read_to_string(gen_dir.join(name)).unwrap(); assert_eq!( - eql_codegen::context::normalize_sql(&actual), - eql_codegen::context::normalize_sql(&expected), - "{name}: materialised output differs from golden (normalized)" + actual, expected, + "{name}: materialised output differs from golden" ); } } diff --git a/crates/eql-scalars/src/lib.rs b/crates/eql-scalars/src/lib.rs index 91cfae3d..f606a07b 100644 --- a/crates/eql-scalars/src/lib.rs +++ b/crates/eql-scalars/src/lib.rs @@ -263,7 +263,10 @@ impl Fixture { /// The integer value for this fixture (`Min`/`Max` -> kind bounds, `Zero` -> /// 0, `Int(n)` -> n), or `None` for the string-backed kinds. Does not /// range-check; `every_fixture_value_is_within_kind_bounds` guards the bounds. - pub fn numeric_value(self, kind: ScalarKind) -> Option { + /// + /// `const fn` so the `int_values!` materialiser can resolve a whole fixture + /// list into a typed `&'static` array at compile time. + pub const fn numeric_value(self, kind: ScalarKind) -> Option { match self { Fixture::Min => Some(kind.min_value()), Fixture::Max => Some(kind.max_value()), @@ -386,6 +389,43 @@ const INT2: ScalarSpec = ScalarSpec { /// drives generation order). New types are appended as their SQL surface lands. pub const CATALOG: &[ScalarSpec] = &[INT4, INT2]; +/// Materialise an integer scalar's fixtures into a typed `&'static` slice at +/// compile time. This is the **single-sourced** plaintext list the SQLx test +/// matrix reads as `ScalarType::FIXTURE_VALUES` and the fixture generator +/// encrypts — derived from the same `CATALOG` row that drives SQL generation, +/// so the oracle cannot drift from the fixture. (It replaces the old generated, +/// committed `tests/sqlx/src/fixtures/_values.rs` — a Rust source of truth no +/// longer needs to round-trip through generated Rust.) +/// +/// Integer kinds only: a non-numeric fixture (`Text`/`Numeric`/`Jsonb`) is a +/// const-eval error, mirroring `numeric_value`'s `None`. +macro_rules! int_values { + ($name:ident, $ty:ty, $spec:expr) => { + #[doc = concat!("Distinct plaintext fixture values for `", stringify!($spec), "`, ")] + #[doc = "materialised from its `CATALOG` row (see `int_values!`)."] + pub const $name: &[$ty] = { + const SPEC: ScalarSpec = $spec; + const N: usize = SPEC.fixtures.len(); + const ARR: [$ty; N] = { + let mut out = [0 as $ty; N]; + let mut i = 0; + while i < N { + out[i] = match SPEC.fixtures[i].numeric_value(SPEC.kind) { + Some(v) => v as $ty, + None => panic!("integer scalar fixture must resolve to a number"), + }; + i += 1; + } + out + }; + &ARR + }; + }; +} + +int_values!(INT4_VALUES, i32, INT4); +int_values!(INT2_VALUES, i16, INT2); + #[cfg(test)] mod rust_tests { use super::*; @@ -804,6 +844,80 @@ mod catalog_tests { } } +#[cfg(test)] +mod values_tests { + use super::*; + + // The exact typed lists the SQLx matrix consumes. These pin the values the + // deleted golden `int4_values.rs` / committed `_values.rs` used to pin: + // a catalog edit that changes a fixture must update these assertions. + #[test] + fn int4_values_materialise_to_typed_array() { + assert_eq!( + INT4_VALUES, + &[ + i32::MIN, + -100, + -1, + 0, + 1, + 2, + 5, + 10, + 17, + 25, + 42, + 50, + 100, + 250, + 1000, + 9999, + i32::MAX + ] + ); + } + + #[test] + fn int2_values_materialise_to_typed_array() { + assert_eq!( + INT2_VALUES, + &[ + i16::MIN, + -30000, + -100, + -1, + 0, + 1, + 2, + 5, + 10, + 17, + 25, + 42, + 50, + 100, + 250, + 1000, + 9999, + 30000, + i16::MAX + ] + ); + } + + #[test] + fn materialised_values_track_their_fixture_lists() { + // One value per fixture, in catalog order; sentinels resolve to extremes. + assert_eq!(INT4_VALUES.len(), INT4_FIXTURES.len()); + assert_eq!(INT2_VALUES.len(), INT2_FIXTURES.len()); + assert_eq!(INT4_VALUES.first(), Some(&i32::MIN)); + assert_eq!(INT4_VALUES.last(), Some(&i32::MAX)); + assert_eq!(INT2_VALUES.first(), Some(&i16::MIN)); + assert_eq!(INT2_VALUES.last(), Some(&i16::MAX)); + assert!(INT4_VALUES.contains(&0) && INT2_VALUES.contains(&0)); + } +} + #[cfg(test)] mod invariant_tests { use super::*; diff --git a/docs/reference/encrypted-domain-implementation-spec.md b/docs/reference/encrypted-domain-implementation-spec.md index 838f870c..cf1a4b19 100644 --- a/docs/reference/encrypted-domain-implementation-spec.md +++ b/docs/reference/encrypted-domain-implementation-spec.md @@ -69,6 +69,13 @@ future migration. and no Python: the catalog is the source of truth, validated by the compiler (an undefined `Term` or unknown `ScalarKind` is a compile error) plus catalog `#[test]`s over `CATALOG`. +- [ ] Materialise the type's plaintext fixture list as a typed const next to + `CATALOG`: add `int_values!(_VALUES, , );` (e.g. + `int_values!(INT8_VALUES, i64, INT8);`). The macro resolves the row's + `Fixture` list into a compile-time `&'static []` — the single source the + SQLx matrix reads as `FIXTURE_VALUES`. Pin the exact list with a + `values_tests` assertion. This replaces the old generated, committed + `_values.rs`. - [ ] **If `` needs a new scalar width**, add a `ScalarKind` enum variant in `crates/eql-scalars/src/lib.rs` with its rust-type name, `MIN`/`MAX`/zero symbols, and numeric bounds, and unit-test its `impl` methods. New term @@ -76,10 +83,11 @@ future migration. — not in free-form catalog data. - [ ] Run `cargo run -p eql-codegen` to materialise the generated SQL (`src/encrypted_domain//_{types,functions,operators,aggregates}.sql`, - gitignored) and the committed `tests/sqlx/src/fixtures/_values.rs` - const, or just `mise run build` — every build runs the generator first. - Commit the regenerated `_values.rs` (CI diffs it). There is no per-type - codegen task: one run generates every type from `CATALOG`. + gitignored), or just `mise run build` — every build runs the generator + first. There is no per-type codegen task: one run generates every type from + `CATALOG`. The plaintext fixture list is **not** generated — it is + materialised from the catalog row at compile time (see the next step), so + there is nothing to regenerate-and-commit on the test side. - [ ] Generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` are gitignored and never committed. The catalog (`eql-scalars::CATALOG`) plus the `eql-codegen` renderers are the source @@ -89,12 +97,11 @@ future migration. `-- REQUIRE:` edges. This file IS committed. - [ ] Do **not** add a `tests/codegen/reference//` baseline. `int4` is the single golden master for the type-generic generator: the SQL templates are - pure token substitution and the only type-specific rendering is - `_values.rs`, so a per-type baseline can only fail when `int4`'s already - would. Drift protection for the new type comes from the `int4` reference, - the committed `_values.rs` const guarded by the CI staleness check - (`cargo run -p eql-codegen` + `git diff --exit-code`) and the catalog/ - generator `#[test]`s (`cargo test -p eql-scalars -p eql-codegen`), and the + pure token substitution, so a per-type baseline can only fail when `int4`'s + already would. Drift protection for the new type comes from the `int4` + reference, the catalog `values_tests` pinning the materialised + `eql_scalars::_VALUES` const, the catalog/generator `#[test]`s + (`cargo test -p eql-scalars -p eql-codegen`), and the `ordered_numeric_matrix!` SQLx suite (behaviour, not bytes). - [ ] Wire the SQLx matrix oracle. The generated SQL is enough to install the domains, but the `ordered_numeric_matrix!` suite only runs once the Rust @@ -105,10 +112,10 @@ future migration. | File | Add | |------|-----| | `tests/sqlx/src/fixtures/eql_plaintext.rs` | A sealed `EqlPlaintext` impl for the scalar's Rust type: `impl Sealed for {}`, a `PlaintextSqlType` const for its base column type, `impl EqlPlaintext for ` (`CAST`, `PLAINTEXT_SQL_TYPE`, `to_plaintext` → the right `Plaintext` variant), plus the two `#[test]` casts. | - | `tests/sqlx/src/fixtures/eql_v2_.rs` | `crate::scalar_fixture!("eql_v2_", , VALUES);` (pulls `super::_values::VALUES`). | - | `tests/sqlx/src/fixtures/mod.rs` | `pub mod _values;` and `pub mod eql_v2_;`. | + | `tests/sqlx/src/fixtures/eql_v2_.rs` | `use eql_scalars::_VALUES as VALUES;` then `crate::scalar_fixture!("eql_v2_", , VALUES);`. | + | `tests/sqlx/src/fixtures/mod.rs` | `pub mod eql_v2_;`. | | `tests/sqlx/tests/generate_all_fixtures.rs` | An arm in `generate_for_token`: `"" => fixtures::eql_v2_::spec().run().await,`. The match is exhaustive over the catalog — a catalog token with no arm fails the generator loudly. | - | `tests/sqlx/src/scalar_domains.rs` | `impl ScalarType for ` — `PG_TYPE` (the base PG type, e.g. `"int8"`) and `FIXTURE_VALUES = crate::fixtures::_values::VALUES`. | + | `tests/sqlx/src/scalar_domains.rs` | `impl ScalarType for ` — `PG_TYPE` (the base PG type, e.g. `"int8"`) and `FIXTURE_VALUES = eql_scalars::_VALUES`. | | `tests/sqlx/tests/encrypted_domain/scalars/.rs` | `ordered_numeric_matrix! { suite = , scalar = , eql_type = "eql_v2_" }`. | | `tests/sqlx/tests/encrypted_domain/scalars/mod.rs` | `pub mod ;`. | @@ -347,7 +354,8 @@ absent. ### Single-sourcing the value list The plaintext value list is declared **once**, in the catalog row's `fixtures` -field, and generated into Rust — never hand-maintained in two places: +field, and materialised into a typed Rust const — never hand-maintained in two +places: ```rust fixtures: &[Fixture::Min, Fixture::N(-100), Fixture::N(-1), Fixture::Zero, @@ -365,24 +373,21 @@ explicit invariant `every_fixture_value_is_within_kind_bounds`). The same test enforces the matrix invariant: the set **must** include `Min`, `Max`, and zero, or the test fails (the compile-time analogue of the old `load_spec` validation). -`eql-codegen` emits `tests/sqlx/src/fixtures/_values.rs` exposing one -`pub const VALUES: &[]`. Both consumers reference that single -symbol — the fixture generator (`fixtures::eql_v2_::spec`) and the matrix -oracle (`impl ScalarType for { const FIXTURE_VALUES }`) — so the oracle -cannot drift from the values the generator encrypts. - -Unlike the gitignored `*_*.sql` surface and the gitignored encrypted -`tests/sqlx/fixtures/eql_v2_.sql` (whose ciphertext is non-deterministic -per-encrypt), `_values.rs` **is committed**: its rendering is deterministic, -so the CI `codegen` job regenerates it (`cargo run -p eql-codegen`) and runs -`git diff --exit-code` to catch a catalog edit that wasn't regenerated. -Regenerate with `cargo run -p eql-codegen` (or `mise run build`) and commit the -result; never hand-edit it. +The `int_values!` macro (in `crates/eql-scalars/src/lib.rs`) materialises that +`Fixture` list into a `pub const _VALUES: &[]` at compile +time, sitting next to `CATALOG`. Both consumers reference that single symbol — +the fixture generator (`fixtures::eql_v2_::spec`) and the matrix oracle +(`impl ScalarType for { const FIXTURE_VALUES = eql_scalars::_VALUES }`) +— so the oracle cannot drift from the values the generator encrypts. There is no +generated `_values.rs`: a Rust source of truth does not round-trip through +generated Rust. The exact list is pinned by a `values_tests` assertion, and the +`Fixture`-list invariants (`Min`/`Max`/zero present, in-bounds) by the catalog +`#[test]`s. ## 10. Build And Verification -- `cargo run -p eql-codegen` (optional; refreshes all generated SQL + - `_values.rs` from the catalog before a full build) +- `cargo run -p eql-codegen` (optional; refreshes all generated SQL from the + catalog before a full build) - `mise run test:codegen` (`cargo test -p eql-scalars -p eql-codegen`) - `mise run clean && mise run build` (regenerates every type's SQL from the catalog first, then builds the release artefacts) diff --git a/tasks/build.sh b/tasks/build.sh index 621b0b72..311dfbc7 100755 --- a/tasks/build.sh +++ b/tasks/build.sh @@ -26,9 +26,10 @@ find src/encrypted_domain -mindepth 2 -type f \ -delete 2>/dev/null || true # Regenerate every type — the catalog (eql-scalars::CATALOG) is the single -# source of truth for the enumeration; eql-codegen renders all SQL and all -# tests/sqlx/src/fixtures/_values.rs in one deterministic run. The orphan -# sweep above still handles the catalog-removed case the generator cannot. +# source of truth for the enumeration; eql-codegen renders all SQL in one +# deterministic run. The plaintext fixture lists are not generated — the SQLx +# tests read them straight from the catalog (eql_scalars::INT4_VALUES / …). The +# orphan sweep above still handles the catalog-removed case the generator cannot. cargo run -p eql-codegen # Fail loudly if any file referenced in a tsorted dep list doesn't exist. diff --git a/tasks/codegen-parity.sh b/tasks/codegen-parity.sh index 1f8d1e10..2de923be 100755 --- a/tasks/codegen-parity.sh +++ b/tasks/codegen-parity.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -#MISE description="Parity gate: Rust eql-codegen output matches the int4 golden (normalized) and committed values.rs" +#MISE description="Parity gate: Rust eql-codegen output matches the int4 golden (byte-for-byte)" set -euo pipefail @@ -9,25 +9,31 @@ cd "$REPO_ROOT" echo "==> Generating with the Rust generator (writes the real repo tree)" cargo run -q -p eql-codegen -- > /dev/null -echo "==> Diffing Rust int4 SQL vs golden reference (line-normalized)" -norm() { sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -v '^$'; } +echo "==> Comparing int4 generated SQL file SET vs golden (catches extra/dropped files)" +# The content loop below is golden-driven: it verifies every golden file has a +# matching generated body, so a DROPPED file fails there. It cannot see an EXTRA +# generated file (a new template output, or the new half of a rename) — that name +# is never iterated. Assert the sets are equal first to close that blind spot. +# "Generated" excludes any committed, hand-written SQL (e.g. int4_extensions.sql), +# which lives in this dir but has no golden counterpart; git-tracked == hand-written. +golden_set=$(cd tests/codegen/reference/int4 && ls *.sql | LC_ALL=C sort) +gen_set=$(cd src/encrypted_domain/int4 \ + && comm -23 <(ls *.sql | LC_ALL=C sort) \ + <(git ls-files . | sed 's#.*/##' | LC_ALL=C sort)) +if [ "$golden_set" != "$gen_set" ]; then + echo "int4 generated SQL file set differs from golden (< golden, > generated):" >&2 + diff <(echo "$golden_set") <(echo "$gen_set") >&2 || true + exit 1 +fi + +echo "==> Diffing Rust int4 SQL vs golden reference (byte-for-byte)" for f in tests/codegen/reference/int4/*.sql; do name="$(basename "$f")" - # Reference: drop the 1-line `-- REFERENCE:` provenance line. What remains — - # and the whole generated file — both start with the template-owned - # `-- AUTOMATICALLY GENERATED FILE.` marker, so no header strip is needed. - diff <(tail -n +2 "$f" | norm) \ - <(norm < "src/encrypted_domain/int4/$name") + # Drop the 1-line `-- REFERENCE:` provenance line, then compare the remaining + # bytes EXACTLY. Both the reference body (from line 2) and the whole generated + # file start with the template-owned `-- AUTOMATICALLY GENERATED FILE.` marker, + # so no header strip is needed — any whitespace or blank-line drift fails here. + diff <(tail -n +2 "$f") "src/encrypted_domain/int4/$name" done -echo "==> Verifying committed _values.rs are byte-identical (git clean)" -# `git diff` only catches modifications to tracked files; a newly-generated but -# uncommitted _values.rs would slip through. `git status --porcelain` also -# reports untracked files, mirroring the CI codegen job. -if [ -n "$(git status --porcelain -- tests/sqlx/src/fixtures/)" ]; then - echo "values.rs stale or uncommitted after regeneration" >&2 - git status --porcelain -- tests/sqlx/src/fixtures/ >&2 - exit 1 -fi - -echo "PARITY OK: Rust generator matches the int4 golden (normalized) and committed values.rs." +echo "PARITY OK: Rust generator matches the int4 golden (byte-for-byte)." diff --git a/tests/codegen/reference/README.md b/tests/codegen/reference/README.md index 58f01cc1..8a91bf68 100644 --- a/tests/codegen/reference/README.md +++ b/tests/codegen/reference/README.md @@ -1,13 +1,21 @@ # Codegen reference -The SQL files under `int4/` are the original, hand-written reference implementation for the encrypted-domain scalar generator. `int4` is the **single golden master**: the generator in `tasks/codegen/` is type-generic — its SQL templates are pure token substitution, and the only type-specific rendering is the `_values.rs` const — so one anchored type detects all template/term drift for every current and future scalar. +The SQL files under `int4/` are the hand-written golden reference for the encrypted-domain scalar generator. `int4` is the **single golden master**: the generator in `crates/eql-codegen` is type-generic — its SQL templates are pure token substitution driven by the `eql-scalars::CATALOG` rows — so one anchored type detects all template/term drift for every current and future scalar. -`tasks/codegen/test_against_reference.py` renders the generator's output for `int4` and asserts it matches these files byte-for-byte. If the generator diverges, either it regressed (fix `tasks/codegen/`) or the reference is being updated deliberately (commit the new `int4` reference in the same PR). +Each reference file's first line is a `-- REFERENCE:` provenance marker; everything after it is the generated body verbatim, starting with the template-owned `-- AUTOMATICALLY GENERATED FILE.` header. + +The parity gate renders the generator's output for `int4` and asserts it matches these files **byte-for-byte** after dropping that single provenance line. It runs three ways, all on the same reference: + +- `crates/eql-codegen/tests/parity.rs` — runs `generate_all` into a temp dir and byte-compares the materialised `int4` SQL surface; +- the in-crate golden tests in `crates/eql-codegen/src/generate.rs` — byte-compare each `render_*_file` output against the corresponding reference; +- `mise run codegen:parity` (`tasks/codegen-parity.sh`) — the CI shell gate, a plain `diff` of `tail -n +2 ` against the regenerated tree. + +If the generator diverges, either it regressed (fix `crates/eql-codegen`) or the reference is being updated deliberately (commit the new `int4` reference in the same PR). Whitespace and blank-line drift now fail the gate — there is no normalization. ## New scalar types do not add a reference Adding a scalar type (`int2`, `int8`, …) does **not** add a `tests/codegen/reference//` directory. A per-type baseline would be redundant: the SQL is byte-identical to `int4` modulo the type token, so it can only fail when `int4`'s baseline already would. New types are guaranteed three other ways: -- the `int4` reference here anchors the shared generator (templates + `terms.py`); -- the committed `tests/sqlx/src/fixtures/_values.rs` const is pinned by the CI staleness guard (`git diff --exit-code` after `mise run codegen:domain `) and by the `` cases in `tasks/codegen/test_scalars.py` (the only type-specific rendering, `i16::MIN` vs `i32::MIN`); +- the `int4` reference here anchors the shared generator (templates + the `Term` enum's capability `impl`s in `crates/eql-scalars`); +- the per-type plaintext fixture list (`eql_scalars::INT4_VALUES` / `INT2_VALUES`, materialised from each `CATALOG` row) is pinned by `eql-scalars`'s own `values_tests` — there is no generated `_values.rs` to diff; - the SQLx `ordered_numeric_matrix!` suite exercises the generated SQL's *behaviour* against a real database — a far stronger guarantee than a byte comparison. diff --git a/tests/codegen/reference/int4/int4_eq_functions.sql b/tests/codegen/reference/int4/int4_eq_functions.sql index 21fddfd5..1b244577 100644 --- a/tests/codegen/reference/int4/int4_eq_functions.sql +++ b/tests/codegen/reference/int4/int4_eq_functions.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema.sql -- REQUIRE: src/schema-v3.sql diff --git a/tests/codegen/reference/int4/int4_eq_operators.sql b/tests/codegen/reference/int4/int4_eq_operators.sql index fa0d44cd..a2190e16 100644 --- a/tests/codegen/reference/int4/int4_eq_operators.sql +++ b/tests/codegen/reference/int4/int4_eq_operators.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql -- REQUIRE: src/encrypted_domain/int4/int4_types.sql diff --git a/tests/codegen/reference/int4/int4_functions.sql b/tests/codegen/reference/int4/int4_functions.sql index 36c9df70..6dae8388 100644 --- a/tests/codegen/reference/int4/int4_functions.sql +++ b/tests/codegen/reference/int4/int4_functions.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema.sql -- REQUIRE: src/schema-v3.sql diff --git a/tests/codegen/reference/int4/int4_operators.sql b/tests/codegen/reference/int4/int4_operators.sql index def25237..e461c3b7 100644 --- a/tests/codegen/reference/int4/int4_operators.sql +++ b/tests/codegen/reference/int4/int4_operators.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql -- REQUIRE: src/encrypted_domain/int4/int4_types.sql diff --git a/tests/codegen/reference/int4/int4_ord_aggregates.sql b/tests/codegen/reference/int4/int4_ord_aggregates.sql index 7efdf177..08cdc10d 100644 --- a/tests/codegen/reference/int4/int4_ord_aggregates.sql +++ b/tests/codegen/reference/int4/int4_ord_aggregates.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql -- REQUIRE: src/encrypted_domain/int4/int4_types.sql diff --git a/tests/codegen/reference/int4/int4_ord_functions.sql b/tests/codegen/reference/int4/int4_ord_functions.sql index b4dda68d..2c0ee56b 100644 --- a/tests/codegen/reference/int4/int4_ord_functions.sql +++ b/tests/codegen/reference/int4/int4_ord_functions.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema.sql -- REQUIRE: src/schema-v3.sql diff --git a/tests/codegen/reference/int4/int4_ord_operators.sql b/tests/codegen/reference/int4/int4_ord_operators.sql index 697f162e..a5321c62 100644 --- a/tests/codegen/reference/int4/int4_ord_operators.sql +++ b/tests/codegen/reference/int4/int4_ord_operators.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql -- REQUIRE: src/encrypted_domain/int4/int4_types.sql diff --git a/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql b/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql index 5b160ed7..de5b0848 100644 --- a/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql +++ b/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql -- REQUIRE: src/encrypted_domain/int4/int4_types.sql diff --git a/tests/codegen/reference/int4/int4_ord_ore_functions.sql b/tests/codegen/reference/int4/int4_ord_ore_functions.sql index 327bc18c..75f09fb9 100644 --- a/tests/codegen/reference/int4/int4_ord_ore_functions.sql +++ b/tests/codegen/reference/int4/int4_ord_ore_functions.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema.sql -- REQUIRE: src/schema-v3.sql diff --git a/tests/codegen/reference/int4/int4_ord_ore_operators.sql b/tests/codegen/reference/int4/int4_ord_ore_operators.sql index 47549cdb..52f363cf 100644 --- a/tests/codegen/reference/int4/int4_ord_ore_operators.sql +++ b/tests/codegen/reference/int4/int4_ord_ore_operators.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql -- REQUIRE: src/encrypted_domain/int4/int4_types.sql diff --git a/tests/codegen/reference/int4/int4_types.sql b/tests/codegen/reference/int4/int4_types.sql index bb708a61..ba4d9d89 100644 --- a/tests/codegen/reference/int4/int4_types.sql +++ b/tests/codegen/reference/int4/int4_types.sql @@ -1,4 +1,4 @@ --- REFERENCE: hand-written parity baseline for tasks/codegen/ — see ../README.md +-- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md -- AUTOMATICALLY GENERATED FILE. -- REQUIRE: src/schema-v3.sql diff --git a/tests/codegen/reference/int4/int4_values.rs b/tests/codegen/reference/int4/int4_values.rs deleted file mode 100644 index 77a69ad0..00000000 --- a/tests/codegen/reference/int4/int4_values.rs +++ /dev/null @@ -1,28 +0,0 @@ -// REFERENCE: hand-reviewed parity baseline for tasks/codegen/ — see ../README.md -//! Fixture plaintext values for the int4 encrypted-domain family. -//! -//! Generated from the `int4` row in `eql-scalars::CATALOG` (`fixtures`) — -//! the single source of truth shared by the fixture generator -//! (`fixtures::eql_v2_int4`) and the matrix oracle -//! (`ScalarType::FIXTURE_VALUES`). - -/// Distinct plaintext values present in the `eql_v2_int4` fixture. -pub const VALUES: &[i32] = &[ - i32::MIN, - -100, - -1, - 0, - 1, - 2, - 5, - 10, - 17, - 25, - 42, - 50, - 100, - 250, - 1000, - 9999, - i32::MAX, -]; diff --git a/tests/sqlx/src/fixtures/eql_v2_int2.rs b/tests/sqlx/src/fixtures/eql_v2_int2.rs index 0848f85e..661e4475 100644 --- a/tests/sqlx/src/fixtures/eql_v2_int2.rs +++ b/tests/sqlx/src/fixtures/eql_v2_int2.rs @@ -7,6 +7,6 @@ //! no EQL dependency; the `eql_v3.int2` domain is layered on top by casting //! `payload` per query. -use super::int2_values::VALUES; +use eql_scalars::INT2_VALUES as VALUES; crate::scalar_fixture!("eql_v2_int2", i16, VALUES); diff --git a/tests/sqlx/src/fixtures/eql_v2_int4.rs b/tests/sqlx/src/fixtures/eql_v2_int4.rs index fd28b15b..facb69c4 100644 --- a/tests/sqlx/src/fixtures/eql_v2_int4.rs +++ b/tests/sqlx/src/fixtures/eql_v2_int4.rs @@ -6,6 +6,6 @@ //! no EQL dependency; #225 layers the `eql_v3.int4` domain on top by casting //! `payload` per query. -use super::int4_values::VALUES; +use eql_scalars::INT4_VALUES as VALUES; crate::scalar_fixture!("eql_v2_int4", i32, VALUES); diff --git a/tests/sqlx/src/fixtures/int2_values.rs b/tests/sqlx/src/fixtures/int2_values.rs deleted file mode 100644 index a1c0a847..00000000 --- a/tests/sqlx/src/fixtures/int2_values.rs +++ /dev/null @@ -1,30 +0,0 @@ -// AUTOMATICALLY GENERATED FILE. -//! Fixture plaintext values for the int2 encrypted-domain family. -//! -//! Generated from the `int2` row in `eql-scalars::CATALOG` (`fixtures`) — -//! the single source of truth shared by the fixture generator -//! (`fixtures::eql_v2_int2`) and the matrix oracle -//! (`ScalarType::FIXTURE_VALUES`). - -/// Distinct plaintext values present in the `eql_v2_int2` fixture. -pub const VALUES: &[i16] = &[ - i16::MIN, - -30000, - -100, - -1, - 0, - 1, - 2, - 5, - 10, - 17, - 25, - 42, - 50, - 100, - 250, - 1000, - 9999, - 30000, - i16::MAX, -]; diff --git a/tests/sqlx/src/fixtures/int4_values.rs b/tests/sqlx/src/fixtures/int4_values.rs deleted file mode 100644 index d0c31a63..00000000 --- a/tests/sqlx/src/fixtures/int4_values.rs +++ /dev/null @@ -1,28 +0,0 @@ -// AUTOMATICALLY GENERATED FILE. -//! Fixture plaintext values for the int4 encrypted-domain family. -//! -//! Generated from the `int4` row in `eql-scalars::CATALOG` (`fixtures`) — -//! the single source of truth shared by the fixture generator -//! (`fixtures::eql_v2_int4`) and the matrix oracle -//! (`ScalarType::FIXTURE_VALUES`). - -/// Distinct plaintext values present in the `eql_v2_int4` fixture. -pub const VALUES: &[i32] = &[ - i32::MIN, - -100, - -1, - 0, - 1, - 2, - 5, - 10, - 17, - 25, - 42, - 50, - 100, - 250, - 1000, - 9999, - i32::MAX, -]; diff --git a/tests/sqlx/src/fixtures/mod.rs b/tests/sqlx/src/fixtures/mod.rs index f616f556..9a78189e 100644 --- a/tests/sqlx/src/fixtures/mod.rs +++ b/tests/sqlx/src/fixtures/mod.rs @@ -26,14 +26,9 @@ pub mod cipherstash; pub mod driver; -/// Generated from the `int4` row in `eql-scalars::CATALOG` (`fixtures`). -/// Committed and verified by CI; never hand-edit (regenerated by `eql-codegen`). -pub mod int4_values; - +/// Scalar fixtures read their plaintext value lists directly from the catalog +/// (`eql_scalars::INT4_VALUES` / `INT2_VALUES`) — see `scalar_fixture!`. There +/// is no generated `_values.rs` module any more. pub mod eql_v2_int4; -/// Generated from the `int2` row in `eql-scalars::CATALOG` (`fixtures`). -/// Committed and verified by CI; never hand-edit (regenerated by `eql-codegen`). -pub mod int2_values; - pub mod eql_v2_int2; diff --git a/tests/sqlx/src/fixtures/scalar_fixture.rs b/tests/sqlx/src/fixtures/scalar_fixture.rs index 2394b049..1232b8b2 100644 --- a/tests/sqlx/src/fixtures/scalar_fixture.rs +++ b/tests/sqlx/src/fixtures/scalar_fixture.rs @@ -16,7 +16,7 @@ /// - `$name` — the fixture name (`"eql_v2_int2"`), drives every derived path. /// - `$ty` — the Rust plaintext type (`i16`); `<$ty>::MIN`/`MAX` supply the /// signed-extreme assertions. -/// - `$values` — the generated value const (`int2_values::VALUES`). +/// - `$values` — the catalog-materialised value const (`eql_scalars::INT2_VALUES`). /// /// Indexes are fixed to `Unique` (HMAC, drives `=` / `<>`) and `Ore` (ORE /// block terms, drives `<` `<=` `>` `>=`) with a committed `jsonb` payload — diff --git a/tests/sqlx/src/scalar_domains.rs b/tests/sqlx/src/scalar_domains.rs index c3acc428..39f5c079 100644 --- a/tests/sqlx/src/scalar_domains.rs +++ b/tests/sqlx/src/scalar_domains.rs @@ -74,20 +74,20 @@ pub trait ScalarType: impl ScalarType for i32 { const PG_TYPE: &'static str = "int4"; - /// Single-sourced from `tasks/codegen/types/int4.toml` `[fixture] values` - /// via the generated `fixtures::int4_values::VALUES` const — the same list - /// the fixture generator encrypts, so the oracle cannot drift from the - /// fixture. Spans the negative boundary, the i32 signed extremes, and zero. - const FIXTURE_VALUES: &'static [i32] = crate::fixtures::int4_values::VALUES; + /// Single-sourced from the `int4` row in `eql-scalars::CATALOG` + /// (`eql_scalars::INT4_VALUES`, materialised from its `Fixture` list) — the + /// same list the fixture generator encrypts, so the oracle cannot drift from + /// the fixture. Spans the negative boundary, the i32 signed extremes, and zero. + const FIXTURE_VALUES: &'static [i32] = eql_scalars::INT4_VALUES; } impl ScalarType for i16 { const PG_TYPE: &'static str = "int2"; - /// Single-sourced from `tasks/codegen/types/int2.toml` `[fixture] values` - /// via the generated `fixtures::int2_values::VALUES` const — the same list - /// the fixture generator encrypts, so the oracle cannot drift from the - /// fixture. Spans the negative boundary, the i16 signed extremes, and zero. - const FIXTURE_VALUES: &'static [i16] = crate::fixtures::int2_values::VALUES; + /// Single-sourced from the `int2` row in `eql-scalars::CATALOG` + /// (`eql_scalars::INT2_VALUES`, materialised from its `Fixture` list) — the + /// same list the fixture generator encrypts, so the oracle cannot drift from + /// the fixture. Spans the negative boundary, the i16 signed extremes, and zero. + const FIXTURE_VALUES: &'static [i16] = eql_scalars::INT2_VALUES; } /// Per-domain capability + payload shape. Storage carries no terms, `Eq` From 5006d58d26902587b702b2cb731780babd6f8747 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 3 Jun 2026 10:06:50 +1000 Subject: [PATCH 16/19] docs(codegen): align generator docs with Rust eql-codegen cutover Rewrite encrypted-domain-generator.md and the int4 golden-reference README to describe the Rust eql-codegen toolchain, replacing stale references to the retired Python codegen (TOML manifests, terms.py/templates.py, the test_against_reference.py byte-oracle, the codegen:domain tasks, and committed _values.rs fixtures). Source of truth is now the eql-scalars CATALOG; SQL is rendered via minijinja templates; parity is the byte-for-byte int4 golden gate (file-set guard + diff) in codegen-parity.sh, with no values.rs check. Also fix the codegen:parity mise task description, which still said "(normalized) + committed values.rs". --- docs/reference/encrypted-domain-generator.md | 702 ++++++++++--------- mise.toml | 2 +- tests/codegen/reference/README.md | 22 +- 3 files changed, 403 insertions(+), 323 deletions(-) diff --git a/docs/reference/encrypted-domain-generator.md b/docs/reference/encrypted-domain-generator.md index 9b2d6a07..770cbeaf 100644 --- a/docs/reference/encrypted-domain-generator.md +++ b/docs/reference/encrypted-domain-generator.md @@ -1,14 +1,22 @@ # Encrypted-Domain Code Generator -How `tasks/codegen/` turns a TOML manifest into the SQL surface for a -scalar encrypted-domain type. This document describes the generator -itself — its inputs, stages, outputs, and the invariants it enforces. -The contract those outputs must satisfy is in +How the Rust `eql-codegen` crate turns the `eql-scalars` catalog into the +SQL surface for a scalar encrypted-domain type. This document describes +the generator itself — its inputs, stages, outputs, and the invariants it +enforces. The contract those outputs must satisfy is in [`encrypted-domain-implementation-spec.md`](./encrypted-domain-implementation-spec.md); this file describes the machine that produces them. -The reference type is `eql_v3.int4` (PR #239). `text` and `jsonb` are -outside scope. +The reference type is `eql_v3.int4`. `text` and `jsonb` are outside scope. + +The generator is **Rust, not Python**. There is no TOML manifest, no +`tasks/codegen/` package, no `terms.py`/`templates.py`/`spec.py`. The +source of truth is the `CATALOG` const in +[`crates/eql-scalars/src/lib.rs`](../../crates/eql-scalars/src/lib.rs); +the renderers live in [`crates/eql-codegen/`](../../crates/eql-codegen/). +Adding a scalar type is adding a `ScalarSpec` row to `CATALOG`, validated +by the compiler plus catalog `#[test]`s — never an edit to free-form +manifest data. ## 1. Why a generator @@ -16,198 +24,230 @@ A single scalar encrypted-domain type emits several hundred SQL declarations across eleven files: four domains, three extractors, dozens of comparison wrappers and blockers, 176 `CREATE OPERATOR` statements (44 per domain), and MIN/MAX aggregates for every ordered domain. The shape -is mechanical and -the invariants are unforgiving — a `STRICT` blocker silently bypasses -its exception, a pinned `search_path` disables inlining and reverts -queries to seq scans. The generator exists so each new scalar type adds -one TOML file rather than ninety hand-written declarations that must -agree with each other and with `pin_search_path.sql`, +is mechanical and the invariants are unforgiving — a `STRICT` blocker +silently bypasses its exception, a pinned `search_path` disables inlining +and reverts queries to seq scans. The generator exists so each new scalar +type adds one `CATALOG` row rather than ninety hand-written declarations +that must agree with each other and with `pin_search_path.sql`, `tasks/test/splinter.sh`, and `src/encrypted_domain/functions.sql`. ## 2. Pipeline -`tasks/codegen/` is a small Python package. Entry point: -`python -m tasks.codegen.generate `, wrapped by -`mise run codegen:domain ` (`tasks/codegen/domain.sh:10`). -`tasks/build.sh` invokes the same entry point for every manifest at -the start of every `mise run build`, so the generated SQL is never -checked in — the TOML manifest is the source of truth. - -Stages, in order: - -1. **Load manifest** — `spec.load_spec(toml_path)` reads - `tasks/codegen/types/.toml`, validates the `[domain]` table, - validates the token and every domain name as SQL identifiers - (`_SQL_IDENTIFIER`, `spec.py:12`), checks each domain name starts with the - filename token, resolves every listed term against `terms.TERM_CATALOG`, - and parses the optional `[fixture]` table (`_load_fixture_values`, - `spec.py:36`). Returns a `TypeSpec` (`tasks/codegen/spec.py:98`). -2. **Resolve terms** — for each `DomainSpec`, `terms.require_terms` - maps catalog names (`hm`, `ore`) to `Term` records carrying the - extractor name, return type, JSON envelope key, supported - operators, and the SQL `-- REQUIRE:` edges those terms imply - (`tasks/codegen/terms.py:57-88`). -3. **Render** — `generate.render_types_file`, - `generate.render_functions_file`, `generate.render_operators_file`, - and `generate.render_aggregates_file` (the last only for ordered - domains) build SQL strings via the per-construct functions in - `templates.py`; when the manifest declares a `[fixture]` table, - `templates.render_fixture_values_rs` also renders the committed Rust - value const. No template engine — plain f-strings, with the structural - shape of each declaration encoded in code (`tasks/codegen/generate.py`). -4. **Write** — `writer.write_generated_file` prefixes every SQL output with - the `AUTO-GENERATED — DO NOT EDIT` header (`templates.py:13-17`) and - refuses to overwrite any pre-existing file that lacks that marker - (`tasks/codegen/writer.py:67`). The committed Rust value const is written - by `writer.write_generated_rs` (`writer.py:78`) with its own Rust - `AUTO-GENERATED` header. `generate_type` cleans stale generated files in - the target directory before rewriting so an abandoned domain disappears on - the next regeneration (`generate.py:221`). - -There is no caching layer, no incremental mode, and no rewriting of -hand-written files. Each invocation regenerates every output for one -type from a single manifest. - -## 3. Manifest format - -```toml -[domain] -int4 = [] -int4_eq = ["hm"] -int4_ord_ore = ["ore"] -int4_ord = ["ore"] +`eql-codegen` is a small Rust crate with a binary entry point. The +generator runs as `cargo run -p eql-codegen` (no subcommand), which calls +`generate::generate_all` (`crates/eql-codegen/src/generate.rs`) over every +row of `eql_scalars::CATALOG`, writing each type's SQL into +`src/encrypted_domain//`. A second subcommand, +`cargo run -p eql-codegen -- list-types`, prints the catalog tokens one per +line (consumed by the fixture and matrix-inventory enumeration). The +binary's `main` (`crates/eql-codegen/src/main.rs`) recognises exactly these +two forms; any other argument is a usage error. + +`tasks/build.sh` runs `cargo run -p eql-codegen` at the start of every +`mise run build`, so the generated SQL is never checked in — the catalog +is the source of truth. (The build first sweeps every generated +`*_{types,functions,operators,aggregates}.sql` under `src/encrypted_domain` +so a type removed from `CATALOG` cannot leave orphans the `src/**/*.sql` +build glob would pick up; hand-written `*_extensions.sql` is preserved by +the name patterns.) + +Stages, in order (`generate_all` → `generate_type`): + +1. **Read the catalog.** `eql_scalars::CATALOG` is the in-binary source of + truth — a `&[ScalarSpec]`, each row a `token`, a `ScalarKind`, an + ordered `&[DomainSpec]`, and a `&[Fixture]` list + (`crates/eql-scalars/src/lib.rs`). There is no parse/validate stage at + generation time: the catalog is validated at compile time (an undefined + `Term` or unknown `ScalarKind` does not compile) and by the catalog + `#[test]`s, so by the time `generate_all` runs the data is already + well-formed. +2. **Resolve terms.** For each `DomainSpec`, the `Term` enum's `impl` + methods supply the extractor name, return type, JSON envelope key, + supported operators, and the SQL `-- REQUIRE:` edges those terms imply + (`Term::operators_for_terms`, `term_json_keys`, `term_requires`, + `extractor_for_operator`, `role_for_terms` — `crates/eql-scalars/src/lib.rs`). +3. **Render.** `render_types_file`, `render_functions_file`, + `render_operators_file`, and `render_aggregates_file` (the last only for + ordered domains) build the context structs in + `crates/eql-codegen/src/context.rs` and render them through embedded + **minijinja** templates (`crates/eql-codegen/templates/*.j2`, + compiled in via `include_str!` — no runtime file IO). The structural + shape of each declaration is split between the context builders (Rust) + and the templates (Jinja). +4. **Write.** `clean_generated_files` first deletes every generated `.sql` + in the target directory (recognised by the header marker) so an + abandoned domain disappears on the next regeneration; + `ensure_generated_paths_writable` then refuses to proceed if any target + path is a hand-written file lacking the marker; `write_generated_file` + writes each rendered body verbatim (`crates/eql-codegen/src/writer.rs`). + The template emits the `-- AUTOMATICALLY GENERATED FILE.` marker as its + own first line, so the writer does not prepend a header — it only uses + the marker to recognise files it owns. + +There is no caching layer and no incremental mode. Each `cargo run -p +eql-codegen` regenerates every output for every catalog type from scratch. +Regeneration is deterministic: identical catalog + renderers produce +byte-identical SQL. + +## 3. Catalog format + +A scalar type is one `ScalarSpec` row +(`crates/eql-scalars/src/lib.rs`): + +```rust +ScalarSpec { + token: "int4", + kind: ScalarKind::I32, + domains: &[ + DomainSpec { suffix: "", terms: &[] }, + DomainSpec { suffix: "_eq", terms: &[Term::Hm] }, + DomainSpec { suffix: "_ord_ore", terms: &[Term::Ore] }, + DomainSpec { suffix: "_ord", terms: &[Term::Ore] }, + ], + fixtures: INT4_FIXTURES, +} ``` -Rules enforced by `spec.load_spec`: - -- The filename stem is the **type token** (`int4` here). It must match - the CLI argument and prefix every domain name. -- The TOML must have a non-empty `[domain]` table at the top level. The - only other recognised top-level key is the optional `[fixture]` table - (see §3a). -- The filename token and every domain key must be valid lowercase SQL - identifiers (`^[a-z][a-z0-9_]*$`); anything else raises `SpecError`. -- Each domain key must equal the token or start with `_`. -- Each value must be a list of strings, and each string must be a key - in `terms.TERM_CATALOG`. Unknown terms raise `SpecError`. - -The `[domain]` table declares nothing else — no extractor names, no -operator lists, no REQUIRE edges. Every behavioural fact comes from the -term catalog. +Structural rules, enforced by the type system and the catalog `#[test]`s +rather than a runtime validator: + +- `token` supplies the **type token** (`int4` here). Each domain's full + name is `token` + `suffix`; `ScalarSpec::domain_name` makes the old + "domain name must start with the token" rule structural, and + `every_domain_name_starts_with_its_token` pins it. +- `kind` is a `ScalarKind` (`I16` / `I32` / `I64` / `Numeric` / `Text` / + `Jsonb`), which carries the Rust type name, the `MIN`/`MAX`/zero symbols, + and the numeric bounds. Only the integer kinds have an i128 range with + `Min`/`Max`/`Zero` sentinels; the bounded accessors `panic!` on the + others (a misuse guard, gated by `is_int()`). +- `domains` is a non-empty `&[DomainSpec]` (pinned by + `every_type_has_at_least_one_domain`). Each `DomainSpec` is a `suffix` + plus a `&[Term]`; the storage domain is `suffix: ""` with no terms. +- `fixtures` is a `&[Fixture]` (see §3a). + +The `DomainSpec` declares nothing else — no extractor names, no operator +lists, no REQUIRE edges. Every behavioural fact comes from the `Term` +enum. Domains may be **twinned** (`int4_ord` and `int4_ord_ore` both carry -`["ore"]`). The generator emits them as independent domains with -byte-identical SQL modulo type name. Twins exist so callers can choose -a name that documents intent ("ordered, regardless of mechanism" vs -"ordered via ORE block") without committing to one term family in a -future migration. - -Manifest order is significant. The generator iterates domains in their -declared TOML order (`generate.py:48`), and that order shows up in the -generated `_types.sql` `DO` block. - -### 3a. Optional `[fixture]` table - -```toml -[fixture] -values = ["MIN", "-1", "ZERO", "1", "MAX"] +`&[Term::Ore]`). The generator emits them as independent domains with +byte-identical SQL modulo type name (`ordered_files_byte_identical_modulo_typename`). +Twins exist so callers can choose a name that documents intent ("ordered, +regardless of mechanism" vs "ordered via ORE block") without committing to +one term family in a future migration. + +Catalog order is significant. The generator iterates `CATALOG` in order +(driving generation order), and iterates each spec's `domains` slice in +order — that order shows up in the generated `_types.sql` `DO` block. + +### 3a. The `fixtures` field + +The `fixtures` field is an ordered `&[Fixture]` — the single source of +truth for the type's plaintext fixture list, consumed by the SQLx fixture +generator and the matrix oracle. A `Fixture` is value-kind tagged: +`Min` / `Max` / `Zero` (the integer matrix pivots, resolved per-kind), +`Int(i128)` (an integer literal), and `Numeric`/`Text`/`Jsonb` string +variants. The `fixtures!` macro range-checks each `Int` literal against the +kind at compile time (`N(-40000)` for an `i16` kind does not compile): + +```rust +const INT4_FIXTURES: &[Fixture] = fixtures!(int i32; + Min, N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17), N(25), + N(42), N(50), N(100), N(250), N(1000), N(9999), Max); ``` -A type may declare an ordered `[fixture] values` list — the single source -of truth for the committed Rust const -`tests/sqlx/src/fixtures/_values.rs`, consumed by the SQLx fixture -generator and the matrix oracle. `_load_fixture_values` (`spec.py:36`) -requires a non-empty list of string tokens; each resolves through the -scalar-kind catalog (`scalars.py`) — the sentinels `MIN` / `MAX` / `ZERO` -plus any numeric literal in the type's representable range. Validation -enforces a **distinct-plaintext contract**: duplicates are rejected against -the *resolved numeric* value, so both copy-paste token dups (`"1", "1"`) and -sentinel/literal aliases (`"MIN"` alongside the same number) raise -`SpecError` — and the set **must include MIN, MAX, and zero** (the matrix -comparison pivots). Unlike the gitignored SQL surface, `_values.rs` -**is committed** (its rendering is deterministic), and CI regenerates it and -runs `git diff --exit-code` to catch an un-regenerated manifest edit. See -implementation spec §9 for the authoring guidance. +Catalog `#[test]`s enforce a **distinct-plaintext contract** plus the +matrix-pivot requirement: `fixture_values_are_distinct_by_resolved_number` +rejects duplicates against the resolved value (so both copy-paste dups and +sentinel/literal aliases fail), `fixtures_include_min_max_and_zero` requires +`Min`, `Max`, and zero for integer kinds, and +`every_fixture_value_is_within_kind_bounds` keeps every resolved value in +range. These are the compile/test-time analogue of the old `load_spec` +validation. + +The plaintext value list is **not** rendered to a generated file. The +`int_values!` macro (next to `CATALOG`) materialises a `Fixture` list into +a typed `pub const _VALUES: &[]` at compile time +(`INT4_VALUES`, `INT2_VALUES`). Both consumers reference that single symbol +— the fixture generator and the matrix oracle's `FIXTURE_VALUES` — so the +oracle cannot drift from the values the generator encrypts. There is no +committed `_values.rs`: a Rust source of truth does not round-trip +through generated Rust. (The old generated, committed file is gone.) The +exact materialised list is pinned by the catalog's `values_tests`. ## 4. Term catalog -`tasks/codegen/terms.py:25-49` defines every term the materializer -recognises. A term is a frozen dataclass: - -```python -Term( - name="hm", # manifest key - json_key="hm", # envelope payload key - extractor="eq_term", # SQL extractor function name - returns="eql_v2.hmac_256", # extractor return type - ctor="hmac_256", # eql_v2 constructor in jsonb - role="eq", # file-header phrasing - operators=("=", "<>"), # operators this term enables - requires=("src/hmac_256/functions.sql",) # SQL REQUIRE edges -) -``` +The `Term` enum (`crates/eql-scalars/src/lib.rs`) defines every term the +materializer recognises. The `json_key`/`extractor`/`returns`/`ctor` +values are the cross-schema SQL contract — changing one is a generated-SQL +behaviour change, not a refactor. -Current catalog: +| Term | JSON key | Extractor | Returns | Operators | +| ----- | -------- | ----------- | -------------------------------- | -------------------------- | +| `Hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` `<>` | +| `Ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` `<>` `<` `<=` `>` `>=` | -| Term | JSON key | Extractor | Returns | Operators | -| ----- | -------- | ----------- | -------------------------------- | ---------------------------------- | -| `hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` `<>` | -| `ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` `<>` `<` `<=` `>` `>=` | +The index-term return types (`eql_v2.hmac_256`, +`eql_v2.ore_block_u64_8_256`) live in `eql_v2` and are referenced +cross-schema; the domains, extractors, and wrappers live in `eql_v3`. -Adding a term is a code change to `terms.py` with matching tests in -`test_terms.py` — never a free-form manifest field. The catalog is the -only source of operator support, extractor identity, and REQUIRE edges; -the manifest is a thin selector over it. +Adding a term is a code change to the `Term` enum's `impl` methods +(`json_key`, `extractor`, `returns`, `ctor`, `role`, `operators`, +`requires`) with matching `#[test]`s (`term_tests` / `term_helper_tests`) +— never a free-form catalog field. The `Term` enum is the only source of +operator support, extractor identity, and REQUIRE edges; a `DomainSpec` is +a thin selector over it. ## 5. The operator surface -`tasks/codegen/operator_surface.py` enumerates the surface every generated -domain declares: - -- **Supported-capable comparisons**: `=` `<>` `<` `<=` `>` `>=` `@>` `<@` -- **Path blockers**: `->` `->>` -- **Native `jsonb` fallback blockers**: `?` `?|` `?&` `@?` `@@` `#>` `#>>` `-` `#-` `||` - -Comparison and path operators keep the historical three-argument shapes: - -- Symmetric: `(domain, domain)`, `(domain, jsonb)`, `(jsonb, domain)` -- Path: `(domain, text)`, `(domain, integer)`, `(jsonb, domain)` - -Native `jsonb` fallback blockers use only the shapes PostgreSQL exposes -for `jsonb` itself, for a total of **44 `CREATE OPERATOR` statements per -domain**. Supported operators are emitted with full planner metadata -(`COMMUTATOR`, `NEGATOR`, `RESTRICT`, `JOIN` selectivity estimators) and -back onto inlinable wrappers; unsupported operators carry minimal metadata -and back onto blockers. - -Path operators always back onto blockers — neither current term -enables them. The additional native `jsonb` operators are blocker-only. -Untyped string literals are a PostgreSQL resolver edge: `? 'c'` can still -select the built-in `jsonb` operator, while `? 'c'::text` and bound text -parameters select the generated blocker. - -The union of these three lists is `KNOWN_JSONB_OPERATORS`. A live-DB -structural guard +`crates/eql-codegen/src/operator_surface.rs` enumerates the 20-operator +surface every generated domain declares (`OPERATORS`): + +- **Comparison operators**: `=` `<>` `<` `<=` `>` `>=` `@>` `<@` +- **Path-selector operators**: `->` `->>` +- **Native `jsonb` operators**: `?` `?|` `?&` `@?` `@@` `#>` `#>>` `-` `#-` `||` + +Each operator carries its PostgreSQL-shaped signatures. The comparison +operators use the three symmetric shapes — `(domain, domain)`, +`(domain, jsonb)`, `(jsonb, domain)`; the path and native operators use +only the shapes PostgreSQL exposes for `jsonb` itself. Summed across all +20 operators, that is **44 `CREATE OPERATOR` statements per domain** +(`operators_file_has_forty_four`). + +Whether an operator routes to a wrapper or a blocker is a per-domain +decision driven by the domain's terms (`Term::operators_for_terms`), not a +property of the operator. Supported operators are emitted with full planner +metadata (`COMMUTATOR`, `NEGATOR`, `RESTRICT`, `JOIN` selectivity +estimators) and back onto inlinable wrappers; unsupported operators carry +minimal metadata and back onto blockers (`operator_entry` only renders +metadata when the operator is supported on that domain). + +Path operators always back onto blockers — neither current term enables +them. The native `jsonb` operators are blocker-only. Untyped string +literals are a PostgreSQL resolver edge: `? 'c'` can still select the +built-in `jsonb` operator, while `? 'c'::text` and bound text parameters +select the generated blocker. + +A live-DB structural guard (`tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs`) -queries `pg_operator` for every operator with a `jsonb` argument and asserts -the set is a subset of this union, so a future PostgreSQL version that adds a -`jsonb` operator nobody enumerated here fails the test rather than silently -routing an encrypted column to native plaintext-`jsonb` semantics. -`test_operator_surface.py` pins the Python union; the Rust test mirrors it. +queries `pg_operator` for every operator with a `jsonb` argument and +asserts the set is a subset of the surface this module enumerates, so a +future PostgreSQL version that adds a `jsonb` operator nobody enumerated +here fails the test rather than silently routing an encrypted column to +native plaintext-`jsonb` semantics. The `operator_surface` unit tests pin +the Rust surface (20 operators, signatures, metadata); the live-DB test +mirrors it. ## 6. Generated outputs -For a manifest with `D` domains of which `A` are ordered (ord-capable), -the generator writes `1 + 2D + A` SQL files into -`src/encrypted_domain//`, plus — when the manifest carries a -`[fixture]` table — one committed Rust const at -`tests/sqlx/src/fixtures/_values.rs`. For `int4` (`D = 4`, `A = 2`): -eleven SQL files and one Rust file. The SQL outputs are gitignored — `tasks/build.sh` regenerates them at the -start of every build from each `tasks/codegen/types/.toml`, -`mise run codegen:domain ` refreshes a single type manually, and -`mise run codegen:domain:all` regenerates every type in one invocation (the -same `generate.py --all` enumeration the build uses). The manifest plus -`tasks/codegen/terms.py` are the source of truth. +For a type with `D` domains of which `A` are ordered (ord-capable), the +generator writes `1 + 2D + A` SQL files into +`src/encrypted_domain//`. For `int4` (`D = 4`, `A = 2`): eleven SQL +files. The SQL outputs are **gitignored** — +`.gitignore` excludes `src/encrypted_domain/*/*_{types,functions,operators,aggregates}.sql`, +and `tasks/build.sh` regenerates them at the start of every build. There is +**no per-type codegen task**: one `cargo run -p eql-codegen` regenerates +every catalog type in a single deterministic run. | File | Content | | --------------------------------- | ---------------------------------------------------------------------------------------- | @@ -218,29 +258,30 @@ same `generate.py --all` enumeration the build uses). The manifest plus Every file: -- Opens with the `AUTO-GENERATED — DO NOT EDIT` header - (`templates.py:13-17`). +- Opens with the `-- AUTOMATICALLY GENERATED FILE.` marker (the project-wide + marker `docs:validate` greps on to skip generated SQL — + `crates/eql-codegen/src/consts.rs`). - Declares its `-- REQUIRE:` edges in dependency order — types files - require `src/schema.sql`; function files require schema, types, and + require `src/schema-v3.sql`; function files require schema, types, and `src/encrypted_domain/functions.sql` plus each term's `requires` set; - operator files require schema, types, and their domain's function - file; aggregate files require schema, types, and their domain's - function and operator files. -- Carries Doxygen `--! @file` / `--! @brief` headers describing its - role. + operator files require `src/schema-v3.sql`, types, and their domain's + function file; aggregate files require `src/schema-v3.sql`, types, and + their domain's function and operator files. +- Carries Doxygen `--! @file` / `--! @brief` headers describing its role. ### Function-count totals per domain -| Domain terms | Extractors | Wrappers | Blockers | Functions | Operators | -| ------------ | ---------: | -------: | -------: | --------: | --------: | -| none | 0 | 0 | 44 | 44 | 44 | -| `["hm"]` | 1 | 6 | 38 | 45 | 44 | -| `["ore"]` | 1 | 18 | 26 | 45 | 44 | +| Domain terms | Extractors | Wrappers | Blockers | Functions | Operators | +| ---------------- | ---------: | -------: | -------: | --------: | --------: | +| none | 0 | 0 | 44 | 44 | 44 | +| `&[Term::Hm]` | 1 | 6 | 38 | 45 | 44 | +| `&[Term::Ore]` | 1 | 18 | 26 | 45 | 44 | -Six wrappers for `hm` = `=` and `<>` × three shapes. Eighteen for `ore` +Six wrappers for `Hm` = `=` and `<>` × three shapes. Eighteen for `Ore` = six operators × three shapes. The 44-operator total never moves; the wrapper/blocker split is what shifts, and native `jsonb` fallback -operators are always blockers. +operators are always blockers. (Pinned by `storage_functions_file_is_all_blockers`, +`eq_functions_file_counts`, `ore_functions_file_counts`.) The table above covers `_functions.sql` only. Ordered domains additionally emit `_aggregates.sql` — two state functions @@ -252,67 +293,74 @@ parallel aggregation on large `GROUP BY` ORE workloads with no decryption. ## 7. Invariants the generator enforces -The generator's job is partly to write SQL and partly to make -incorrect SQL unreachable. Invariants encoded in code: - -- **Blockers are never `STRICT`.** `render_blocker_bool`, - `render_blocker_path`, and `render_blocker_native` emit - `IMMUTABLE PARALLEL SAFE` without the - `STRICT` qualifier (`templates.py:263-345`), so a `NULL` - argument still reaches the `RAISE` and the unsupported-operator - exception fires. There is no code path that produces a strict - blocker. -- **Wrappers are inlinable SQL.** `render_wrapper` and - `render_extractor` emit `LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE` - with a single-statement `SELECT` and no `SET search_path` - (`templates.py:218-260`). `pin_search_path.sql:265-290` - catches them structurally and leaves them unpinned. -- **Aggregate state functions are the deliberate exception.** - `render_aggregate` emits `min_sfunc` / `max_sfunc` as - `LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE` *with* a pinned - `SET search_path` (`templates.py:379-452`). They are aggregate transition - functions, not index expressions, so pinning is correct; the generated - `min` / `max` aggregates are allowlisted by name in `splinter.sh`. The - aggregates are `parallel = safe` with the sfunc reused as `combinefunc`. +The generator's job is partly to write SQL and partly to make incorrect +SQL unreachable. Invariants encoded in the renderers / templates and +guarded by `#[test]`s in `crates/eql-codegen/src/generate.rs`: + +- **Blockers are never `STRICT` and always `plpgsql`.** The + unsupported-operator template emits each blocker as `IMMUTABLE PARALLEL + SAFE` / `LANGUAGE plpgsql` without `STRICT`, so a `NULL` argument still + reaches the `RAISE`. `blockers_are_never_strict_and_always_plpgsql` + asserts the storage domain (all blockers) contains no `STRICT` and as + many `LANGUAGE plpgsql` as `CREATE FUNCTION`. A `LANGUAGE sql` blocker + would be inlinable and could be elided when the result is provably + unused; `plpgsql` is opaque to the planner so the `RAISE` survives. +- **Wrappers and extractors are inlinable SQL.** They emit `LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE` with a single-statement `SELECT` and **no + `SET search_path`** (`inlinable_functions_have_no_set_search_path`). A + pinned `search_path` disables inlining. `tasks/pin_search_path.sql` + recognises these functions structurally — by language (`sql`), volatility + (`IMMUTABLE`), and a jsonb-backed `DOMAIN` argument in the `eql_v3` + schema — and leaves them unpinned, with no per-type edit. +- **Aggregate state functions are the deliberate exception.** `min_sfunc` / + `max_sfunc` are `LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE` *with* + a pinned `SET search_path` (`aggregate_state_functions_are_plpgsql_not_inlinable`). + They are aggregate transition functions, not index expressions, so + pinning is correct; the generated `min` / `max` aggregates are + allowlisted by name in `splinter.sh`. - **SQL-literal injection is structurally prevented.** Every string interpolated into a single-quoted SQL literal — payload keys, operator - symbols, domain names in `RAISE` messages — passes through `_sql_str` - (`templates.py:46`), which doubles embedded single quotes. Today's catalog - strings are all quote-free so it is a no-op, but it guarantees a future - quote-bearing catalog string cannot break out of its literal. + symbols, domain names in `RAISE` messages — passes through `sql_str` + (`crates/eql-codegen/src/consts.rs`), which doubles embedded single + quotes. Today's catalog strings are all quote-free so it is a no-op, but + it guarantees a future quote-bearing string cannot break out of its + literal (`unsupported_entry_preserves_operator_literal_and_domain_lit_is_escaped`, + `domain_block_escapes_quote_bearing_name`). - **No domain-over-domain.** Every domain is `CREATE DOMAIN eql_v3. - AS jsonb`, never `AS ` (`templates.py:72`). PostgreSQL - resolves operators against the underlying base type; a derived domain - would silently bypass the fixed operator surface. -- **No operator class on a domain.** The generator emits operators, - not operator classes. Callers index through the extractor function - (e.g. `USING btree (eql_v3.ord_term(col))`), whose return type - already carries a default opclass. -- **Ownership boundary.** `writer.is_generated` recognises owned files - by their header line and refuses to overwrite anything else - (`writer.py:20-26`, `44-53`). A hand-written file at a generated - path is a hard error, not a silent clobber. Stale generated files - for removed domains are cleaned before the new files land - (`writer.py:29-41`). + AS jsonb`, never `AS ` (`types_file_has_all_four_domains`). + PostgreSQL resolves operators against the underlying base type; a derived + domain would silently bypass the fixed operator surface. +- **No operator class on a domain.** The generator emits operators, not + operator classes. Callers index through the extractor function (e.g. + `USING btree (eql_v3.ord_term(col))`), whose return type already carries + a default opclass. +- **Ownership boundary.** `is_generated` recognises owned files by their + header marker; `ensure_generated_paths_writable` refuses to overwrite + anything else, and `clean_generated_files` deletes only files carrying + the marker (`crates/eql-codegen/src/writer.rs`). A hand-written file at a + generated path is a hard error, not a silent clobber. Stale generated + files for removed domains are cleaned before the new files land. ## 8. Extension files -`_extensions.sql` is the hand-written sibling. The generator -never creates, lists, or cleans it; it has no auto-generated header -and must declare its own `-- REQUIRE:` edges. Use it for behaviour -that's specific to the type and not part of the fixed surface — e.g. -cross-domain casts, helper functions, type-specific constraints. +`_extensions.sql` is the hand-written sibling. The generator never +creates, lists, or cleans it; it has no auto-generated header and must +declare its own `-- REQUIRE:` edges. Use it for behaviour that's specific +to the type and not part of the fixed surface — e.g. cross-domain casts, +helper functions, type-specific constraints. Unlike the generated +siblings, `_extensions.sql` IS committed. (Neither `int4` nor `int2` +ships one today — there is no committed `*_extensions.sql` in the tree.) -`pin_search_path.sql:291-302` describes the fallback marker for -inline-critical extension functions that take no domain argument and -so escape the structural skip: +`tasks/pin_search_path.sql` describes the fallback marker for +inline-critical extension functions that take no domain argument and so +escape the structural skip: ```sql COMMENT ON FUNCTION eql_v2.my_helper(...) IS 'eql-inline-critical: ...'; ``` -The generator does **not** emit this marker; every function it -produces takes a domain argument and is covered by the structural skip +The generator does **not** emit this marker; every function it produces +takes a domain argument and is covered by the structural skip intrinsically. ## 9. Lint and test integration @@ -320,90 +368,116 @@ intrinsically. The generator depends on two pieces of build tooling recognising its output without per-type edits: -- **`tasks/pin_search_path.sql:265-290`** — structural skip identifies - encrypted-domain functions by language (`sql`), volatility - (`IMMUTABLE`), and the presence of at least one argument typed as a - jsonb-backed `DOMAIN` in the `eql_v3` schema. New scalar types - need no edit here. +- **`tasks/pin_search_path.sql`** — structural skip identifies + encrypted-domain functions by language (`sql`), volatility (`IMMUTABLE`), + and the presence of at least one argument typed as a jsonb-backed + `DOMAIN` in the `eql_v3` schema. New scalar types need no edit here. - **`tasks/test/splinter.sh`** — name-based allowlist. The converged - wrapper names (`eq`, `neq`, `lt`, `lte`, `gt`, `gte`, `eq_term`, - `ord_term`) are already covered by entries originally added for - `ste_vec_entry` and friends (`splinter.sh:87-104`). Splinter matches - by name only, so a new scalar type that uses the catalog extractors - inherits coverage. Adding a new term whose extractor has a new name - requires a splinter entry. + wrapper / extractor names (`eq`, `neq`, `lt`, `lte`, `gt`, `gte`, + `eq_term`, `ord_term`) plus the generated `min` / `max` aggregates are + covered by `eql_v3`-schema entries. Splinter matches by name only, so a + new scalar type that uses the catalog extractors inherits coverage. + Adding a new term whose extractor has a new name requires a splinter + entry. ## 10. Tests -`mise run test:codegen` runs the generator test suite — `pytest -tasks/codegen` — with no database required: - -- `test_spec.py`, `test_terms.py`, `test_scalars.py`, - `test_operator_surface.py`, `test_templates.py`, `test_writer.py` — unit - tests per module. -- `test_generate.py` — end-to-end rendering tests asserting file - counts and structural shape. -- `test_against_reference.py` — byte-for-byte match of in-memory - `render_*_file` output against a hand-reviewed (header-stripped) - reference under `tests/codegen/reference/int4/`. Runs anywhere - without depending on materialised `src/encrypted_domain//`. The - reference fixture is the human-readable contract that survives - generator refactors. - -The codegen suite is a prerequisite of the PostgreSQL test matrix -(`tasks/test.sh`), so generated-SQL drift fails CI before any database +The generator's tests are Rust, run by `mise run test:codegen` +(`cargo test -p eql-scalars -p eql-codegen`) — no database required. The +broader `mise run test:crates` adds `cargo clippy ... -D warnings`. + +- **`eql-scalars` unit tests** — `rust_tests`, `term_tests`, + `term_helper_tests`, `fixture_tests`, `catalog_tests`, `invariant_tests`, + `values_tests` over `CATALOG`, the `Term`/`ScalarKind`/`Fixture` impls, + and the materialised `_VALUES` consts + (`crates/eql-scalars/src/lib.rs`). +- **`eql-codegen` unit tests** — file counts, language/volatility + invariants, escaping guards, and twin byte-identity + (`crates/eql-codegen/src/generate.rs` `#[cfg(test)]` module). +- **The parity gate** — `mise run codegen:parity` + (`tasks/codegen-parity.sh`). It runs `cargo run -p eql-codegen` into the + real tree, then: + 1. compares the int4 generated SQL **file set** against the golden under + `tests/codegen/reference/int4/*.sql`, excluding committed hand-written + files (`comm -23` of `ls` against `git ls-files`), so an extra or + dropped generated file fails; and + 2. diffs each golden file **byte-for-byte** against its generated + counterpart, after dropping the golden's single leading + `-- REFERENCE:` provenance line (`tail -n +2`). Both bodies start with + the `-- AUTOMATICALLY GENERATED FILE.` marker, so no header strip is + needed. + The same byte-for-byte assertion runs in-crate as + `crates/eql-codegen/tests/parity.rs` (`rust_generator_matches_int4_golden_files`) + and in the `generate.rs` golden tests. The golden reference — not any + Python oracle — is the sole contract that survives generator refactors. + +CI runs these in three jobs in `.github/workflows/test-eql.yml`: the +`test:crates` job (`Rust workspace crates`) compiles/lints/tests the +crates, the `codegen` job (`Encrypted-domain codegen`) runs `mise run +codegen:parity`, and the `matrix-coverage` job runs `mise run +test:matrix:inventory`. The codegen job is a prerequisite of the +PostgreSQL test matrix, so generated-SQL drift fails CI before any database test runs. ## 11. Adding a new scalar type -The end-to-end shape from a generator perspective: - -1. **Author** `tasks/codegen/types/.toml`. Domain names must - start with the token; term names must already exist in - `terms.TERM_CATALOG`. If `` is a new scalar kind, first register - a `ScalarKind` in `scalars.py` — `load_spec` resolves the scalar before - anything else, so an unregistered token raises - `ScalarError: unknown scalar token ''`. -2. **Regenerate**. Either run `mise run codegen:domain ` while - iterating, or just `mise run build` — the build regenerates every - manifest first. The generator cleans stale generated files, writes - new ones, and refuses any hand-written file at a generated path. - Generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` are - gitignored and never committed. -3. **Hand-write** `_extensions.sql` if the type needs SQL - beyond the fixed surface. Add `eql-inline-critical` markers only on - inline-critical helpers that take no domain argument. This file IS +From a generator perspective: + +1. **Add a `ScalarSpec` row to `eql_scalars::CATALOG`** + (`crates/eql-scalars/src/lib.rs`) — `token`, `kind`, the `domains` + slice, and the `fixtures` list. Term names must be `Term` variants and + the kind must be a `ScalarKind` variant, or it does not compile. If the + type needs a new scalar width, add a `ScalarKind` variant (with its + rust-type name, `MIN`/`MAX`/zero symbols, and bounds) and unit-test its + `impl`. New term behaviour belongs in the `Term` enum's `impl`, not in + catalog data. +2. **Materialise the value list** with `int_values!(_VALUES, , + );` next to `CATALOG`, and pin it with a `values_tests` + assertion. This is the single source the SQLx matrix reads as + `FIXTURE_VALUES`. There is nothing to regenerate-and-commit on the test + side — it is a compile-time const, not a generated file. +3. **Regenerate.** `cargo run -p eql-codegen` (or just `mise run build` — + the build runs the generator first). One run regenerates every catalog + type; there is no per-type codegen task. The generated + `*_{types,functions,operators,aggregates}.sql` are gitignored and never committed. -4. **Build picks it up automatically** — `tasks/build.sh` regenerates - before computing the `tsort` graph, so the new files appear in the - dependency walk via the `-- REQUIRE:` edges the generator emits. -5. **Test.** Do **not** add a `tests/codegen/reference//` baseline. - `int4` is the sole golden master for the type-generic generator: the SQL - templates are pure token substitution and the only type-specific rendering - is `_values.rs`, so a per-type baseline can only fail where `int4`'s - already would. Drift protection for the new type comes from the `int4` - reference (shared templates + `terms.py`), the committed `_values.rs` - const guarded by the codegen staleness check, the `` cases in - `test_scalars.py`, and the `ordered_numeric_matrix!` SQLx suite (behaviour, - not bytes). Run `mise run test:codegen`, the relevant SQLx suites, and the - PostgreSQL matrix. -6. **Snapshot the matrix inventory.** Run `mise run test:matrix:inventory` - and commit the new `tests/sqlx/snapshots/_matrix_tests.txt` — the - sorted list of the type's `scalars::::*` test names. CI's - `matrix-coverage` job `git diff --exit-code`s it (like `_values.rs`) - to catch a silently dropped or renamed matrix test. The snapshot is a - committed test baseline, not gitignored generated SQL. See - `tests/sqlx/snapshots/README.md`. - -Adding a new **term** is a bigger move — edit `terms.py`, add tests, -audit `splinter.sh` for a name collision, and update the reference -fixture under `tests/codegen/reference/`. +4. **Hand-write** `_extensions.sql` if the type needs SQL beyond the + fixed surface, with explicit `-- REQUIRE:` edges. This file IS committed. +5. **Do not add a `tests/codegen/reference//` baseline.** `int4` is + the sole golden master for the type-generic generator: the templates are + pure token substitution, so a per-type baseline can only fail where + `int4`'s already would. Drift protection for the new type comes from the + `int4` reference (shared templates + `Term` enum), the catalog + `values_tests` pinning the materialised `_VALUES`, the + catalog/generator `#[test]`s, and the `ordered_numeric_matrix!` SQLx + suite (behaviour, not bytes). +6. **Wire the SQLx matrix oracle and snapshot the inventory.** The + implementation spec §2 lists the hand-maintained registration files. + Then run `mise run test:matrix:inventory`: it normalizes each present + type's `scalars::::*` test-name set to ``, asserts it equals + the single canonical `tests/sqlx/snapshots/matrix_tests.txt`, and + cross-checks the present type set against `cargo run -p eql-codegen -- + list-types`. There is **no per-type snapshot** — the per-type + `_matrix_tests.txt` files were collapsed into one token-normalized + snapshot. You only regenerate `matrix_tests.txt` when the macro's + emitted name set itself changes. A catalog type added without its matrix + wiring fails the cross-check (catalog has the type, binary has no + `scalars::::` tests). See `tests/sqlx/snapshots/README.md` and + the implementation spec §2 / §8. + +Adding a new **term** is a bigger move — edit the `Term` enum's `impl` +methods, add `#[test]`s, audit `splinter.sh` for a name collision if the +extractor name is new, and (because it changes the int4 surface) update the +golden reference under `tests/codegen/reference/int4/`. ## 12. Out of scope -`text` and `jsonb` are not materialised through this generator. There -is no guard preventing a `text.toml` from being authored; the catalog -simply lacks the term shape those types would need. Text and JSONB -encrypted behaviour lives on the composite `eql_v2_encrypted` type and -its hand-written operator surface in `src/encrypted/` and -`src/operators/`, not the scalar materializer. +`text` and `jsonb` are not materialised through this generator. The +`ScalarKind` enum carries `Text`/`Numeric`/`Jsonb` variants and the +`Fixture` enum carries their string-backed shapes at the capability layer, +but `CATALOG` declares only the integer scalars today, so no `text`/`jsonb` +SQL surface is generated. Text and JSONB encrypted behaviour lives on the +composite `eql_v2_encrypted` type and its hand-written operator surface in +`src/encrypted/` and `src/operators/`, not the scalar materializer. +`jsonb` in particular needs a separate SQL design beyond this +ordered-scalar materializer. diff --git a/mise.toml b/mise.toml index d4fabf59..06c13bbc 100644 --- a/mise.toml +++ b/mise.toml @@ -94,7 +94,7 @@ cargo test --test payload_schema_tests """ [tasks."codegen:parity"] -description = "Parity gate: Rust eql-codegen output matches the int4 golden (normalized) + committed values.rs" +description = "Parity gate: Rust eql-codegen output matches the int4 golden (byte-for-byte)" dir = "{{config_root}}" run = "bash tasks/codegen-parity.sh" diff --git a/tests/codegen/reference/README.md b/tests/codegen/reference/README.md index 8a91bf68..ae7204ab 100644 --- a/tests/codegen/reference/README.md +++ b/tests/codegen/reference/README.md @@ -1,21 +1,27 @@ # Codegen reference -The SQL files under `int4/` are the hand-written golden reference for the encrypted-domain scalar generator. `int4` is the **single golden master**: the generator in `crates/eql-codegen` is type-generic — its SQL templates are pure token substitution driven by the `eql-scalars::CATALOG` rows — so one anchored type detects all template/term drift for every current and future scalar. +The SQL files under `int4/` are the hand-maintained golden reference for the encrypted-domain scalar generator, the Rust crate `crates/eql-codegen` (embedded minijinja templates in `crates/eql-codegen/templates/*.j2`). `int4` is the **single golden master**: the generator is type-generic — its templates are pure token substitution driven by the `eql_scalars::CATALOG` rows (`crates/eql-scalars/src/lib.rs`) — so one anchored type detects all template/term drift for every current and future scalar. Each reference file's first line is a `-- REFERENCE:` provenance marker; everything after it is the generated body verbatim, starting with the template-owned `-- AUTOMATICALLY GENERATED FILE.` header. -The parity gate renders the generator's output for `int4` and asserts it matches these files **byte-for-byte** after dropping that single provenance line. It runs three ways, all on the same reference: +The parity gate runs the generator (`cargo run -p eql-codegen`, which writes the real `src/encrypted_domain/int4/` tree) and asserts its output matches these files **byte-for-byte** after dropping that single provenance line. It runs three ways, all on the same reference: -- `crates/eql-codegen/tests/parity.rs` — runs `generate_all` into a temp dir and byte-compares the materialised `int4` SQL surface; -- the in-crate golden tests in `crates/eql-codegen/src/generate.rs` — byte-compare each `render_*_file` output against the corresponding reference; -- `mise run codegen:parity` (`tasks/codegen-parity.sh`) — the CI shell gate, a plain `diff` of `tail -n +2 ` against the regenerated tree. +- `mise run codegen:parity` (`tasks/codegen-parity.sh`) — the CI shell gate. It first compares the generated `int4` SQL *file set* against the golden `*.sql` set (`comm -23` against `git ls-files` excludes the committed, hand-written `int4_extensions.sql`, which has no golden counterpart) to catch extra/dropped files, then `diff`s each golden file against its generated counterpart after `tail -n +2` drops the provenance line. Any whitespace or blank-line drift fails — there is no normalization. +- `crates/eql-codegen/tests/parity.rs` (`rust_generator_matches_int4_golden_files`) — runs `generate_all` into a temp dir and byte-compares the materialised `int4` SQL surface against the same golden. +- the in-crate golden tests in `crates/eql-codegen/src/generate.rs` — byte-compare each `render_*_file` output against the corresponding reference. -If the generator diverges, either it regressed (fix `crates/eql-codegen`) or the reference is being updated deliberately (commit the new `int4` reference in the same PR). Whitespace and blank-line drift now fail the gate — there is no normalization. +The golden reference, not any retired generator, is the sole oracle. If the generator diverges, either it regressed (fix `crates/eql-codegen`) or the reference is being updated deliberately (commit the new `int4` reference in the same PR). + +See `docs/reference/encrypted-domain-generator.md` for the full generator story (manifest-free catalog, templates, term capabilities). + +## No committed fixture values + +Plaintext fixture lists are **not** generated and **not** committed as `_values.rs` files — there are none in the tree. They live in the catalog as `eql_scalars::INT4_VALUES` / `INT2_VALUES`, materialised at compile time by the `int_values!` macro in `crates/eql-scalars/src/lib.rs` from each `CATALOG` row, and pinned by `eql-scalars`'s own `values_tests`. The parity gate only globs `*.sql`; it does not check any `values.rs`. ## New scalar types do not add a reference Adding a scalar type (`int2`, `int8`, …) does **not** add a `tests/codegen/reference//` directory. A per-type baseline would be redundant: the SQL is byte-identical to `int4` modulo the type token, so it can only fail when `int4`'s baseline already would. New types are guaranteed three other ways: - the `int4` reference here anchors the shared generator (templates + the `Term` enum's capability `impl`s in `crates/eql-scalars`); -- the per-type plaintext fixture list (`eql_scalars::INT4_VALUES` / `INT2_VALUES`, materialised from each `CATALOG` row) is pinned by `eql-scalars`'s own `values_tests` — there is no generated `_values.rs` to diff; -- the SQLx `ordered_numeric_matrix!` suite exercises the generated SQL's *behaviour* against a real database — a far stronger guarantee than a byte comparison. +- a catalog row plus the compiler and `eql-scalars`'s `#[test]`/`values_tests` over `CATALOG` validate the new type's spec and materialised value list; +- the SQLx `ordered_numeric_matrix!` suite exercises the generated SQL's *behaviour* against a real database — a far stronger guarantee than a byte comparison — and `mise run test:matrix:inventory` reconciles the matrix test-name set against the single canonical, token-normalized `tests/sqlx/snapshots/matrix_tests.txt` (cross-checked against `eql-codegen list-types`) with no database required. From 7ec279e46ac0670b00c708da0cb3cd80e19217aa Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 3 Jun 2026 10:55:59 +1000 Subject: [PATCH 17/19] fix(ci): pin bash via shebang for strict-mode mise tasks mise runs inline TOML tasks under `sh`, which is dash on the CI runners. `fixture:generate:all` used `set -euo pipefail` and failed there with 'Illegal option -o pipefail' (the task is new on this branch and had never run under CI's dash). `test:matrix:inventory` and `test:matrix:expand` had the same latent bug -- pipefail plus real pipes. Pin bash with a `#!/usr/bin/env bash` shebang as the first line of each strict-mode run block (mise honors it) and standardise on `set -euo pipefail`, so pipefail is portable regardless of the runner's /bin/sh. --- mise.toml | 10 +++++++--- tasks/fixtures.toml | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/mise.toml b/mise.toml index 06c13bbc..7f3e4d9c 100644 --- a/mise.toml +++ b/mise.toml @@ -109,14 +109,16 @@ cargo test -p eql-scalars -p eql-codegen description = "Compile, lint and test the std-only Rust workspace crates (no database)" dir = "{{config_root}}" run = """ +#!/usr/bin/env bash # eql-scalars / eql-codegen are the lean workspace members. Scope explicitly to # them (NOT --workspace): a workspace-wide test would drag in tests/sqlx, whose # suite needs Postgres + CS_* secrets and is already covered by the `test` job. # clippy is likewise scoped — a workspace clippy recompiles the heavy # sqlx/tokio/cipherstash-client tree for no added coverage of these crates. -# `set -eu` only (no pipefail): mise runs tasks under `sh`, which is dash on the -# CI runners, and dash rejects `set -o pipefail`. There are no pipes here. -set -eu +# bash is pinned via the `#!/usr/bin/env bash` shebang above (mise honors a +# `#!` first line), so `set -o pipefail` is available regardless of the runner's +# /bin/sh (dash on the CI images). +set -euo pipefail cargo fmt --check cargo clippy -p eql-scalars -p eql-codegen --all-targets -- -D warnings cargo test -p eql-scalars -p eql-codegen @@ -126,6 +128,7 @@ cargo test -p eql-scalars -p eql-codegen description = "Verify the matrix test-name set against the single canonical snapshot, catalog-cross-checked (no database required)" dir = "{{config_root}}/tests/sqlx" run = """ +#!/usr/bin/env bash # ONE canonical, token-normalized snapshot (snapshots/matrix_tests.txt) pins the # set of macro-emitted matrix test names. The two per-type snapshots are gone: # they were byte-identical modulo the type token, so one canonical set plus a @@ -188,6 +191,7 @@ echo "Matrix inventory OK: ${checked} type(s) match the canonical snapshot; cata description = "Regenerate the int4 matrix cargo-expand snapshot (requires the pinned nightly + cargo-expand)" dir = "{{config_root}}/tests/sqlx" run = """ +#!/usr/bin/env bash # Body-level fidelity backstop for the macro: the expanded source of the int4 # matrix arms. The `cargo +nightly-...` invocation below is the SINGLE source of # the pinned nightly date — .github/workflows/macro-expand-eql.yml greps it from diff --git a/tasks/fixtures.toml b/tasks/fixtures.toml index f24bc684..04b7d5b5 100644 --- a/tasks/fixtures.toml +++ b/tasks/fixtures.toml @@ -19,6 +19,10 @@ description = "Regenerate every scalar SQLx fixture in one process, driven by eq # Must run inside the crate — a workspace member still builds from its own dir. dir = "{{config_root}}/tests/sqlx" run = """ +#!/usr/bin/env bash +# bash is pinned via the `#!/usr/bin/env bash` shebang above (mise honors a `#!` +# first line), so `set -o pipefail` is available regardless of the runner's +# /bin/sh (dash on the CI images). set -euo pipefail cargo test --features fixture-gen --test generate_all_fixtures \ generate_all -- --ignored --exact --nocapture From 5721f13839fe9851079a5384570c15fced76374e Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 3 Jun 2026 10:56:06 +1000 Subject: [PATCH 18/19] docs(reference): rename encrypted-domain spec to task-oriented guide Rename encrypted-domain-implementation-spec.md (and fold in the generator reference) to adding-a-scalar-encrypted-domain-type.md, update all cross-references (CLAUDE.md, eql-functions.md, sql-support.md, tests READMEs), and fill in the PR #252 link in the unreleased CHANGELOG entry. --- CHANGELOG.md | 2 +- CLAUDE.md | 2 +- .../adding-a-scalar-encrypted-domain-type.md | 619 ++++++++++++++++++ docs/reference/encrypted-domain-generator.md | 483 -------------- .../encrypted-domain-implementation-spec.md | 400 ----------- docs/reference/eql-functions.md | 4 +- docs/reference/sql-support.md | 2 +- tests/codegen/reference/README.md | 2 +- tests/sqlx/snapshots/README.md | 7 +- 9 files changed, 629 insertions(+), 892 deletions(-) create mode 100644 docs/reference/adding-a-scalar-encrypted-domain-type.md delete mode 100644 docs/reference/encrypted-domain-generator.md delete mode 100644 docs/reference/encrypted-domain-implementation-spec.md diff --git a/CHANGELOG.md b/CHANGELOG.md index 853e5e81..76700194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,7 +28,7 @@ Each entry that ships in a published release links to the PR that introduced it. ### Changed -- **Scalar encrypted-domain types are now defined in a Rust catalog, not TOML manifests; the Python codegen toolchain is removed.** Adding a scalar encrypted-domain type (`int4`, `int8`, …) is now one row in `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`) instead of authoring `tasks/codegen/types/.toml`. `mise run build` regenerates the gitignored SQL surface via `cargo run -p eql-codegen` (Rust, std-only) rather than the Python generator. The catalog row's `Fixture` list is the single source of truth for that type's plaintext fixtures: the SQLx test matrix reads it directly as a compile-time-materialised const (`eql_scalars::INT4_VALUES` / `INT2_VALUES`, `ScalarType::FIXTURE_VALUES`), so there is no longer a generated, committed `tests/sqlx/src/fixtures/_values.rs` — a Rust source of truth no longer round-trips through generated Rust. The shipped SQL is unchanged — `release/*.sql` is byte-identical across the cutover — so there is no change for callers installing EQL; this only affects contributors who extend the scalar domain families. The `python` mise tool, the `pytest`-based `test:codegen` (now `cargo test -p eql-scalars -p eql-codegen`), the per-type `mise run codegen:domain` tasks, and the per-type `tests/sqlx/snapshots/_matrix_tests.txt` baselines (collapsed into one catalog-reconciled `tests/sqlx/snapshots/matrix_tests.txt`) are gone. Why: a single compiler-validated source of truth shared by the generator and the SQLx test harness, and one fewer toolchain in the build/test path — building and testing EQL no longer needs Python (Python remains only for the separate docs-markdown tooling). ([#PR](https://github.com/cipherstash/encrypt-query-language/pull/PR)) +- **Scalar encrypted-domain types are now defined in a Rust catalog, not TOML manifests; the Python codegen toolchain is removed.** Adding a scalar encrypted-domain type (`int4`, `int8`, …) is now one row in `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`) instead of authoring `tasks/codegen/types/.toml`. `mise run build` regenerates the gitignored SQL surface via `cargo run -p eql-codegen` (Rust, std-only) rather than the Python generator. The catalog row's `Fixture` list is the single source of truth for that type's plaintext fixtures: the SQLx test matrix reads it directly as a compile-time-materialised const (`eql_scalars::INT4_VALUES` / `INT2_VALUES`, `ScalarType::FIXTURE_VALUES`), so there is no longer a generated, committed `tests/sqlx/src/fixtures/_values.rs` — a Rust source of truth no longer round-trips through generated Rust. The shipped SQL is unchanged — `release/*.sql` is byte-identical across the cutover — so there is no change for callers installing EQL; this only affects contributors who extend the scalar domain families. The `python` mise tool, the `pytest`-based `test:codegen` (now `cargo test -p eql-scalars -p eql-codegen`), the per-type `mise run codegen:domain` tasks, and the per-type `tests/sqlx/snapshots/_matrix_tests.txt` baselines (collapsed into one catalog-reconciled `tests/sqlx/snapshots/matrix_tests.txt`) are gone. Why: a single compiler-validated source of truth shared by the generator and the SQLx test harness, and one fewer toolchain in the build/test path — building and testing EQL no longer needs Python (Python remains only for the separate docs-markdown tooling). ([#252](https://github.com/cipherstash/encrypt-query-language/pull/252)) ## [2.3.1] — 2026-05-21 diff --git a/CLAUDE.md b/CLAUDE.md index 9cb5b80f..50766e33 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -80,7 +80,7 @@ This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for search Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed. The per-type plaintext fixture lists the SQLx matrix consumes are **not** a generated file — they are materialised from each `CATALOG` row at compile time as `eql_scalars::INT4_VALUES` / `INT2_VALUES` (the `int_values!` macro) and read directly by `ScalarType::FIXTURE_VALUES`; a Rust source of truth no longer round-trips through a committed generated `.rs`. Generated SQL carries a `-- AUTOMATICALLY GENERATED FILE` header (the project-wide marker `docs:validate` greps on); change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. -**Adding a new encrypted-domain type: follow `docs/reference/encrypted-domain-implementation-spec.md`.** The mechanics are fixed for ordered scalar domains; the catalog row only declares the token, kind, domain suffixes, and terms. New term behavior belongs in the `Term` enum's `impl` methods in `crates/eql-scalars/src` with tests, not in free-form catalog data. +**Adding a new encrypted-domain type: follow `docs/reference/adding-a-scalar-encrypted-domain-type.md`.** The mechanics are fixed for ordered scalar domains; the catalog row only declares the token, kind, domain suffixes, and terms. New term behavior belongs in the `Term` enum's `impl` methods in `crates/eql-scalars/src` with tests, not in free-form catalog data. Regeneration is deterministic: an identical `CATALOG` produces byte-identical SQL. If `mise run build` produces unexpected output, the change is in `crates/eql-scalars/src` (the catalog/terms) or `crates/eql-codegen/src` (the renderers) — not in random run-to-run variation. diff --git a/docs/reference/adding-a-scalar-encrypted-domain-type.md b/docs/reference/adding-a-scalar-encrypted-domain-type.md new file mode 100644 index 00000000..ee304a3d --- /dev/null +++ b/docs/reference/adding-a-scalar-encrypted-domain-type.md @@ -0,0 +1,619 @@ +# Adding a Scalar Encrypted-Domain Type + +The one reference for adding a scalar encrypted-domain type (`int4`, `int2`, +and future ordered numeric scalars). The **top half** (§§1–4) is the path you +follow to add a type; the **reference half** (§§5–7) is the detail behind it — +the generated surface, its invariants, and how the generator itself works. +Read top-down to ship a type; drop into the reference half when something +breaks or you need the *why*. + +A scalar encrypted-domain type is a family of concrete `jsonb` domains in the +**`eql_v3`** schema (`eql_v3.`, `eql_v3._eq`, +`eql_v3._ord`, …), dropped by `DROP SCHEMA eql_v3 CASCADE` and surviving +an `eql_v2` uninstall. Their extractors, comparison wrappers, and MIN/MAX +aggregates also live in `eql_v3`; the index-term types they return +(`eql_v2.hmac_256`, `eql_v2.ore_block_u64_8_256`) stay in `eql_v2` and are +referenced cross-schema. + +The whole SQL surface is **generated** from a single Rust source of truth: the +`CATALOG` const in [`crates/eql-scalars/src/lib.rs`](../../crates/eql-scalars/src/lib.rs), +rendered by the [`eql-codegen`](../../crates/eql-codegen/) crate. There is no +TOML manifest and no Python — adding a type is adding one `ScalarSpec` row, +validated by the compiler plus catalog `#[test]`s. The reference type is +`eql_v3.int4`. **`text` and `jsonb` are out of scope** for this materializer +(see §7). + +--- + +## 1. TL;DR — the one path + +To add a scalar type `` (e.g. `int8`), with Rust type `` (e.g. `i64`): + +1. **Add a `ScalarSpec` row to `eql_scalars::CATALOG`** — `token`, `kind`, + `domains`, `fixtures` (§2). If the type needs a new scalar width, add a + `ScalarKind` variant first; if it needs new term behaviour, that goes in the + `Term` enum's `impl`, never in catalog data. +2. **Materialise the value list** — `int_values!(_VALUES, , );` + next to `CATALOG`, pinned by a `values_tests` assertion (§2). This is the + single source the SQLx matrix reads; there is no generated `_values.rs`. +3. **Wire the SQLx matrix oracle** — copy the seven small registrations from the + `int4` reference (§3). +4. **Regenerate** — `cargo run -p eql-codegen` (or just `mise run build`, which + runs the generator first). One run regenerates *every* catalog type; there is + no per-type codegen task. The generated `*_{types,functions,operators,aggregates}.sql` + are gitignored and never committed. +5. **Snapshot the matrix inventory** — `mise run test:matrix:inventory` (§3). +6. **Verify** — `mise run test:codegen`, the relevant SQLx suites, and the + PostgreSQL matrix (§4). + +Things you do **not** do: + +- **Don't commit generated SQL.** `*_types.sql` / `*_functions.sql` / + `*_operators.sql` / `*_aggregates.sql` are gitignored; the catalog plus the + renderers are the source of truth. Change the catalog and rebuild — never + hand-edit generated SQL. +- **Don't add a `tests/codegen/reference//` baseline.** `int4` is the sole + golden master (§4). +- **Don't edit `mise.toml`, the CI workflow, `pin_search_path.sql`, or + `splinter.sh`** for an ordinary type — they recognise the generated surface + intrinsically (§5, §6). The exception is a brand-new *term* whose extractor + has a new name (§5). + +Hand-written SQL beyond the fixed surface goes in +`src/encrypted_domain//_extensions.sql` with explicit `-- REQUIRE:` edges +— and **that file IS committed** (§5). + +--- + +## 2. The catalog row (`ScalarSpec`) + +A scalar type is one `ScalarSpec` row in +[`crates/eql-scalars/src/lib.rs`](../../crates/eql-scalars/src/lib.rs): + +```rust +ScalarSpec { + token: "int4", + kind: ScalarKind::I32, + domains: &[ + DomainSpec { suffix: "", terms: &[] }, + DomainSpec { suffix: "_eq", terms: &[Term::Hm] }, + DomainSpec { suffix: "_ord_ore", terms: &[Term::Ore] }, + DomainSpec { suffix: "_ord", terms: &[Term::Ore] }, + ], + fixtures: INT4_FIXTURES, +} +``` + +The fields, all enforced by the type system and the catalog `#[test]`s rather +than a runtime validator: + +- **`token`** — the type token (`int4`); supplies `` everywhere. Each + domain's full name is `token` + `suffix` (`ScalarSpec::domain_name`), pinned by + `every_domain_name_starts_with_its_token`. +- **`kind`** — a `ScalarKind` (`I16` / `I32` / `I64` / `Numeric` / `Text` / + `Jsonb`), carrying the Rust type name, the `MIN`/`MAX`/zero symbols, and the + numeric bounds. Only the integer kinds have an i128 range with `Min`/`Max`/`Zero` + sentinels; the bounded accessors `panic!` on the others (a misuse guard gated + by `is_int()`). **If `` needs a new scalar width, add a `ScalarKind` + variant** (rust-type name, `MIN`/`MAX`/zero symbols, bounds) with unit tests + over its `impl` methods. +- **`domains`** — a non-empty `&[DomainSpec]` (pinned by + `every_type_has_at_least_one_domain`), each a `suffix` + the fixed `&[Term]` it + carries. The storage domain is `suffix: ""` with no terms; `_eq => [Term::Hm]`; + `_ord` and `_ord_ore => [Term::Ore]`. A `DomainSpec` declares nothing else — no + extractor names, no operator lists, no REQUIRE edges. Every behavioural fact + comes from the `Term` enum. +- **`fixtures`** — the type's plaintext fixture list (see below). + +**Terms** are fixed by the `Term` enum (`crates/eql-scalars/src/lib.rs`). The +`json_key` / `extractor` / `returns` / `ctor` values are the cross-schema SQL +contract — changing one is a generated-SQL behaviour change, not a refactor: + +| Term | JSON key | Extractor | Returns | Operators | +| ----- | -------- | ----------- | -------------------------------- | -------------------------- | +| `Hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` `<>` | +| `Ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` `<>` `<` `<=` `>` `>=` | + +A type that needs a non-ORE equality term on an ordered domain needs a **new +`Term`**, not a catalog flag. Adding a term is a code change to the `Term` +enum's `impl` methods (`json_key`, `extractor`, `returns`, `ctor`, `role`, +`operators`, `requires`) with matching `#[test]`s (`term_tests` / +`term_helper_tests`) — never a free-form catalog field. + +**Twins.** `int4_ord` and `int4_ord_ore` both carry `&[Term::Ore]`. The +generator emits them as independent domains with byte-identical SQL modulo type +name (`ordered_files_byte_identical_modulo_typename`). Twins let callers choose +a name that documents intent ("ordered, regardless of mechanism" vs "ordered via +ORE block") without committing to one term family in a future migration. + +**Order is significant.** The generator iterates `CATALOG` in order (driving +generation order), and iterates each spec's `domains` slice in order — that +order shows up in the generated `_types.sql` `DO` block. Order the slice +the way you want the output to read. + +### Fixtures — single-sourcing the value list + +The `fixtures` field is an ordered `&[Fixture]` — the single source of truth +for the type's plaintext list, consumed by both the SQLx fixture generator and +the matrix oracle. A `Fixture` is value-kind tagged: `Min` / `Max` / `Zero` (the +integer matrix pivots, resolved per-kind), `Int(i128)` (an integer literal), and +`Numeric` / `Text` / `Jsonb` string variants. The `fixtures!` macro +range-checks each `Int` literal against the kind at compile time (`N(-40000)` +for an `i16` kind does not compile): + +```rust +const INT4_FIXTURES: &[Fixture] = fixtures!(int i32; + Min, N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17), N(25), + N(42), N(50), N(100), N(250), N(1000), N(9999), Max); +``` + +Catalog `#[test]`s enforce a **distinct-plaintext contract** plus the +matrix-pivot requirement: + +- `fixture_values_are_distinct_by_resolved_number` rejects duplicates against + the *resolved* value, so both copy-paste dups and sentinel/literal aliases + (`Min` alongside the same number) fail; +- `fixtures_include_min_max_and_zero` requires `Min`, `Max`, and zero for + integer kinds — the matrix uses those three as comparison pivots and fetches + each one's ciphertext from the fixture via `fetch_fixture_payload`, which fails + loudly if the row is absent; +- `every_fixture_value_is_within_kind_bounds` keeps every resolved value in + range. + +These are the compile/test-time analogue of the old `load_spec` validation. +Beyond the pivots, choose values so range operators produce distinguishable +result counts, include useful boundaries, and cover omitted-term negative cases. + +The plaintext list is **not** rendered to a generated file. The `int_values!` +macro (next to `CATALOG`) materialises a `Fixture` list into a typed `pub const +_VALUES: &[]` at compile time (`INT4_VALUES`, `INT2_VALUES`): + +```rust +int_values!(INT4_VALUES, i32, INT4); +``` + +Both consumers reference that single symbol — the fixture generator +(`fixtures::eql_v2_::spec`) and the matrix oracle's `FIXTURE_VALUES` — so the +oracle cannot drift from the values the generator encrypts. There is no +committed `_values.rs`: a Rust source of truth does not round-trip through +generated Rust. Pin the exact materialised list with a `values_tests` assertion. + +--- + +## 3. Wire the SQLx matrix oracle + +The generated SQL is enough to *install* the domains, but the +`ordered_numeric_matrix!` suite only runs once the Rust harness knows about the +scalar. These are hand-maintained registration lists — copy each piece from the +`int4` reference. `` is the scalar's Rust type (`i32` for `int4`, `i16` for +`int2`): + +| File | Add | +|------|-----| +| `tests/sqlx/src/fixtures/eql_plaintext.rs` | A sealed `EqlPlaintext` impl for ``: `impl Sealed for {}`, a `PlaintextSqlType` const for its base column type, `impl EqlPlaintext for ` (`CAST`, `PLAINTEXT_SQL_TYPE`, `to_plaintext` → the right `Plaintext` variant), plus the two `#[test]` casts. | +| `tests/sqlx/src/fixtures/eql_v2_.rs` | `use eql_scalars::_VALUES as VALUES;` then `crate::scalar_fixture!("eql_v2_", , VALUES);`. | +| `tests/sqlx/src/fixtures/mod.rs` | `pub mod eql_v2_;`. | +| `tests/sqlx/tests/generate_all_fixtures.rs` | An arm in `generate_for_token`: `"" => fixtures::eql_v2_::spec().run().await,`. The match is exhaustive over the catalog — a catalog token with no arm fails the generator loudly. | +| `tests/sqlx/src/scalar_domains.rs` | `impl ScalarType for ` — `PG_TYPE` (the base PG type, e.g. `"int8"`) and `FIXTURE_VALUES = eql_scalars::_VALUES`. | +| `tests/sqlx/tests/encrypted_domain/scalars/.rs` | `ordered_numeric_matrix! { suite = , scalar = , eql_type = "eql_v2_" }`. | +| `tests/sqlx/tests/encrypted_domain/scalars/mod.rs` | `pub mod ;`. | + +Forget one and the matrix simply does not run for the type — the matrix +inventory cross-check (below) surfaces it, because the catalog has the type but +the binary has no `scalars::::` tests. (A future Phase-4 `scalar_types!` +registry, tracked separately, will collapse these into one declaration.) + +The coverage these registrations unlock comes from the `ordered_numeric_matrix!` +convention wrapper in `tests/sqlx/src/matrix.rs`: one `impl ScalarType` plus a +single invocation taking `suite`, `scalar`, and `eql_type`. The matrix derives +its comparison pivots — the scalar's `MIN`, `MAX`, and zero +(`Default::default()`) — from the type rather than a hand-written list, so the +invocation carries no pivot argument. Equality-only scalars use the sibling +`eq_only_scalar_matrix!`. The `matrix.rs` module header is the canonical, +current list of the categories the matrix emits (sanity, correctness, +cross-shape, supported-NULL, blocker raises, index engagement, ORDER BY, ORDER +BY USING) — read it rather than duplicating a count here. For ordered `int4`, +keep the assertion that distinct plaintext values produce distinct ORE blocks; +do not add assertions for term behaviour the catalog does not promise. + +### Matrix coverage inventory snapshot + +The *set of test names* the matrix emits is guarded by **one** committed, +token-normalized snapshot at `tests/sqlx/snapshots/matrix_tests.txt` — the +sorted inventory of every `scalars::::*` test name with the type token +replaced by the literal ``. (The per-type `_matrix_tests.txt` files are +gone: they were byte-identical modulo the token, so one canonical set plus a +per-type normalize-and-compare carries the same signal at a fraction of the +committed surface.) This is the guard that catches a silently dropped, renamed, +or `#[cfg]`-gated matrix test — a behaviour the SQLx assertions cannot see (a +deleted test just stops running). The snapshot is a committed test baseline, +**not** gitignored generated SQL. + +`mise run test:matrix:inventory` discovers the present scalar types from the +`encrypted_domain` binary's `--list`, normalizes each type's token to ``, +asserts every type's set equals the canonical snapshot, and cross-checks the +discovered type set against `cargo run -p eql-codegen -- list-types` (the +catalog is the single source). You do **not** edit a per-type snapshot or touch +`mise.toml` / the CI workflow — you only regenerate the one `matrix_tests.txt` +when the macro's emitted name set itself changes. A catalog type missing its +matrix wiring fails the cross-check. The CI `matrix-coverage` job gates it. +**`tests/sqlx/snapshots/README.md` is the source of truth** for the mechanics +(pinned feature set, the catalog cross-check, the CI diff, and when to +regenerate). + +--- + +## 4. Regenerate, snapshot & verify + +Regeneration is deterministic: identical catalog + renderers produce +byte-identical SQL. If `mise run build` produces unexpected output, the change +is in `crates/eql-scalars/src` (catalog/terms) or `crates/eql-codegen/src` +(renderers) — not run-to-run variation. + +Run, in order: + +- `cargo run -p eql-codegen` (optional; refreshes all generated SQL from the + catalog before a full build) +- `mise run test:codegen` (`cargo test -p eql-scalars -p eql-codegen`) +- `mise run test:matrix:inventory` (matrix inventory + catalog cross-check; no + database) +- `mise run clean && mise run build` (regenerates every type's SQL from the + catalog first, then builds the release artefacts — a bare build can leave + stale `release/*.sql`) +- the relevant SQLx suites +- `mise run test` across supported PostgreSQL versions +- `mise run --output prefix test:splinter --postgres 17` after a PostgreSQL 17 + install has built EQL + +The CI codegen job is a prerequisite of the PostgreSQL test matrix, so +generated-SQL drift is caught before database tests run. + +**Why no per-type golden baseline.** Do **not** add a +`tests/codegen/reference//` baseline. `int4` is the sole golden master for +the type-generic generator: the templates are pure token substitution, so a +per-type baseline can only fail where `int4`'s already would. Drift protection +for a new type comes from the `int4` reference (shared templates + `Term` enum), +the catalog `values_tests` pinning the materialised `_VALUES`, the +catalog/generator `#[test]`s, and the `ordered_numeric_matrix!` SQLx suite +(behaviour, not bytes). + +--- + +## 5. The generated surface — what correct output looks like + +This is the contract the generated SQL satisfies. You normally never read it to +*add* a type — read it when a test fails or you're extending the surface. + +### Domains and CHECK constraints + +The generator emits `src/encrypted_domain//_types.sql` (gitignored; +materialised on every build) with one idempotent `DO $$ ... $$` block. Every +domain is a concrete domain over `jsonb` in the `eql_v3` schema — **never** +`CREATE DOMAIN a AS b` over another generated domain (PostgreSQL resolves +operators against the underlying base type, bypassing the fixed surface). Each +domain's `CHECK` requires: + +- fixed envelope keys `v` and `i`; +- ciphertext key `c`; +- catalog JSON keys for the listed terms; +- the envelope version value `VALUE->>'v' = '2'`, matching the repo-wide + `eql_v2._encrypted_check_v` rule (`src/encrypted/constraints.sql`). + +So a domain with `&[Term::Ore]` requires `v`, `i`, `c`, and `ob` present, with +`v` pinned to `2`. Beyond key presence and the version value, a malformed term +can still fail later inside its extractor. + +### Extractors, wrappers, and blockers + +Extractor names and return types come from the `Term` enum. Generated extractors +and supported comparison wrappers are inline-friendly SQL functions: + +```sql +LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE +AS $$ SELECT ... $$; +``` + +They must **not** carry a pinned `search_path` — a `SET` clause disables +inlining and reverts index-backed queries to seq scans. The build tooling +recognises these functions structurally, so the generator emits no +`eql-inline-critical` markers. (Aggregate state functions are the one deliberate +exception — see below.) + +Unsupported operators route to **blockers**, which are `LANGUAGE plpgsql`, +`IMMUTABLE`, `PARALLEL SAFE`, and intentionally **not `STRICT`**: + +- **`plpgsql`, not `sql`.** A `LANGUAGE sql` body is inlinable, and the planner + could elide the call when the result is provably unused (dead `CASE` branch, + folded predicate), letting a blocked operator appear to succeed. `plpgsql` is + opaque to the planner, so the call — and its `RAISE` — always survives. +- **Not `STRICT`.** A `STRICT` blocker lets PostgreSQL skip the body and return + `NULL` on a `NULL` argument, silently bypassing the unsupported-operator + exception. + +### Operators + +Every generated domain declares supported scalar comparison operators plus +blockers for the native `jsonb` operator surface PostgreSQL could otherwise +reach through domain-to-base-type fallback. The surface is a fixed 20 operators +(`crates/eql-codegen/src/operator_surface.rs`, `OPERATORS`), each with its +PostgreSQL-shaped signatures, summing to **44 `CREATE OPERATOR` statements per +domain**: + +| Operators | Forms | +|---|---| +| `=` `<>` `<` `<=` `>` `>=` `@>` `<@` | `(domain, domain)` · `(domain, jsonb)` · `(jsonb, domain)` | +| `->` `->>` | `(domain, text)` · `(domain, integer)` · `(jsonb, domain)` | +| `?` | `(domain, text)` | +| `?\|` `?&` | `(domain, text[])` | +| `@?` `@@` | `(domain, jsonpath)` | +| `#>` `#>>` `#-` | `(domain, text[])` | +| `-` | `(domain, text)` · `(domain, integer)` · `(domain, text[])` | +| `\|\|` | `(domain, domain)` · `(domain, jsonb)` · `(jsonb, domain)` | + +Whether an operator routes to a wrapper or a blocker is a per-domain decision +driven by the domain's terms (`Term::operators_for_terms`), not a property of +the operator. Supported operators are emitted with full planner metadata +(`COMMUTATOR`, `NEGATOR`, `RESTRICT`, `JOIN` selectivity estimators) backing +onto inlinable wrappers; everything else carries minimal metadata backing onto +blockers. Path operators always back onto blockers — neither current term +enables them — and the native `jsonb` operators are blocker-only. + +The wrapper/blocker split per domain (the 44-operator total never moves): + +| Domain terms | Extractors | Wrappers | Blockers | Functions | Operators | +| ---------------- | ---------: | -------: | -------: | --------: | --------: | +| none | 0 | 0 | 44 | 44 | 44 | +| `&[Term::Hm]` | 1 (`eq_term`) | 6 | 38 | 45 | 44 | +| `&[Term::Ore]` | 1 (`ord_term`) | 18 | 26 | 45 | 44 | + +Six wrappers for `Hm` = `=` and `<>` × three shapes; eighteen for `Ore` = six +operators × three shapes. + +**Untyped-literal resolver edge.** PostgreSQL's operator resolver still prefers +the built-in `jsonb` operator for untyped string literals in forms such as +`payload::eql_v3.int4 ? 'c'`. Use typed parameters or explicit casts +(`? 'c'::text`, bound text parameters) to route those forms to the generated +blocker. A live-DB structural guard +(`tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs`) queries +`pg_operator` for every operator with a `jsonb` argument and asserts the set is +a subset of the enumerated surface, so a future PostgreSQL version that adds a +`jsonb` operator nobody enumerated fails the test rather than silently routing an +encrypted column to native plaintext-`jsonb` semantics. + +### Aggregates + +Each ordered (ord-capable) domain additionally gets a generated +`_aggregates.sql`: two state functions (`eql_v3.min_sfunc`, +`eql_v3.max_sfunc`) and two aggregates (`eql_v3.min()`, +`eql_v3.max()`). Comparison routes through the domain's `<` / `>` +operator (the ORE block term — no decryption). The state functions are `LANGUAGE +plpgsql IMMUTABLE STRICT PARALLEL SAFE` **with** a pinned `SET search_path` — +the one place the "no pinned `search_path`" rule does not apply, because +aggregate transition functions are never index expressions. `STRICT` makes +PostgreSQL seed the running state with the first non-NULL value and skip NULLs, +so an all-NULL group returns NULL. Each `CREATE AGGREGATE` declares +`combinefunc = ` and `parallel = safe`: min/max are associative, so the +state function doubles as the combine function, enabling partial and parallel +aggregation on large `GROUP BY` ORE workloads with no decryption. Storage-only +and equality-only domains have no comparator and emit no aggregate file. + +### Indexing + +Do not create operator classes on generated domains. Index through the +extractor, whose return type already carries a default opclass: + +```sql +CREATE INDEX ... ON table_name USING btree (eql_v3.ord_term(col)); +CREATE INDEX ... ON table_name USING hash (eql_v3.eq_term(col)); +``` + +`ore` depends on `src/ore_block_u64_8_256/functions.sql` and +`src/ore_block_u64_8_256/operators.sql`; `hm` depends on +`src/hmac_256/functions.sql`. + +### Extension files + +Optional hand-written SQL beyond the fixed surface belongs in +`src/encrypted_domain//_extensions.sql`. The generator never creates, +lists, headers, or cleans it; it must declare its own `-- REQUIRE:` edges +(usually to `_types.sql` and whichever generated function or operator file it +extends). Use it for cross-domain casts, helper functions, or type-specific +constraints. Unlike the generated siblings, **`_extensions.sql` IS +committed.** (Neither `int4` nor `int2` ships one today.) + +`tasks/pin_search_path.sql` describes the fallback marker for inline-critical +extension functions that take no domain argument and so escape the structural +skip: + +```sql +COMMENT ON FUNCTION eql_v2.my_helper(...) IS 'eql-inline-critical: ...'; +``` + +The generator never emits this marker; every function it produces takes a domain +argument and is covered by the structural skip intrinsically. + +### Invariants the generator enforces + +The generator's job is partly to write SQL and partly to make incorrect SQL +unreachable. Invariants encoded in the renderers / templates and guarded by +`#[test]`s in `crates/eql-codegen/src/generate.rs`: + +- **Blockers are never `STRICT` and always `plpgsql`** — the + unsupported-operator template emits each blocker as `IMMUTABLE PARALLEL SAFE` / + `LANGUAGE plpgsql` without `STRICT` + (`blockers_are_never_strict_and_always_plpgsql`). +- **Wrappers and extractors are inlinable SQL** — `LANGUAGE sql IMMUTABLE STRICT + PARALLEL SAFE`, single-statement `SELECT`, no `SET search_path` + (`inlinable_functions_have_no_set_search_path`). +- **Aggregate state functions are the deliberate exception** — `plpgsql` *with* + a pinned `SET search_path` (`aggregate_state_functions_are_plpgsql_not_inlinable`). +- **SQL-literal injection is structurally prevented** — every interpolated + single-quoted literal passes through `sql_str` + (`crates/eql-codegen/src/consts.rs`), which doubles embedded single quotes. +- **No domain-over-domain** — every domain is `CREATE DOMAIN eql_v3. AS + jsonb` (`types_file_has_all_four_domains`). +- **No operator class on a domain** — the generator emits operators, not + operator classes. +- **Ownership boundary** — `is_generated` recognises owned files by their header + marker; `ensure_generated_paths_writable` refuses to overwrite anything else, + and `clean_generated_files` deletes only marked files + (`crates/eql-codegen/src/writer.rs`). A hand-written file at a generated path + is a hard error, not a silent clobber. + +### Lint and test integration + +Two pieces of build tooling recognise the generated output without per-type +edits: + +- **`tasks/pin_search_path.sql`** — structural skip identifies encrypted-domain + functions by language (`sql`), volatility (`IMMUTABLE`), and a jsonb-backed + `DOMAIN` argument in the `eql_v3` schema. New scalar types need no edit. +- **`tasks/test/splinter.sh`** — name-based allowlist. The converged wrapper / + extractor names (`eq`, `neq`, `lt`, `lte`, `gt`, `gte`, `eq_term`, `ord_term`) + plus the generated `min` / `max` aggregates are already covered by + `eql_v3`-schema entries. A new scalar type inherits coverage; **only a new + term whose extractor has a new name requires a splinter entry.** + +--- + +## 6. Generator internals — the machine + +You need this section only when **modifying the generator itself**, not when +adding a type. + +### Why a generator + +A single scalar type emits several hundred SQL declarations across eleven files: +four domains, three extractors, dozens of wrappers and blockers, 176 `CREATE +OPERATOR` statements (44 per domain), and MIN/MAX aggregates per ordered domain. +The shape is mechanical and the invariants are unforgiving — a `STRICT` blocker +silently bypasses its exception; a pinned `search_path` reverts queries to seq +scans. The generator exists so each new type adds one `CATALOG` row rather than +ninety hand-written declarations that must agree with each other and with +`pin_search_path.sql`, `tasks/test/splinter.sh`, and +`src/encrypted_domain/functions.sql`. + +### Pipeline + +`eql-codegen` is a small Rust crate with a binary entry point. The generator +runs as `cargo run -p eql-codegen` (no subcommand), which calls +`generate::generate_all` (`crates/eql-codegen/src/generate.rs`) over every row of +`eql_scalars::CATALOG`, writing each type's SQL into +`src/encrypted_domain//`. A second subcommand, `cargo run -p eql-codegen +-- list-types`, prints the catalog tokens one per line (consumed by the fixture +and matrix-inventory enumeration). `main` (`crates/eql-codegen/src/main.rs`) +recognises exactly these two forms; any other argument is a usage error. + +`tasks/build.sh` runs `cargo run -p eql-codegen` at the start of every `mise run +build`, so the generated SQL is never checked in. (The build first sweeps every +generated `*_{types,functions,operators,aggregates}.sql` under +`src/encrypted_domain` so a type removed from `CATALOG` cannot leave orphans the +`src/**/*.sql` build glob would pick up; hand-written `*_extensions.sql` is +preserved by the name patterns.) + +Stages, in order (`generate_all` → `generate_type`): + +1. **Read the catalog.** `eql_scalars::CATALOG` is the in-binary source of truth + — a `&[ScalarSpec]`. There is no parse/validate stage at generation time: the + catalog is validated at compile time (an undefined `Term` or unknown + `ScalarKind` does not compile) and by the catalog `#[test]`s, so the data is + already well-formed by the time `generate_all` runs. +2. **Resolve terms.** For each `DomainSpec`, the `Term` enum's `impl` methods + supply the extractor name, return type, JSON envelope key, supported + operators, and the SQL `-- REQUIRE:` edges those terms imply + (`Term::operators_for_terms`, `term_json_keys`, `term_requires`, + `extractor_for_operator`, `role_for_terms`). +3. **Render.** `render_types_file`, `render_functions_file`, + `render_operators_file`, and `render_aggregates_file` (the last only for + ordered domains) build the context structs in + `crates/eql-codegen/src/context.rs` and render them through embedded + **minijinja** templates (`crates/eql-codegen/templates/*.j2`, compiled in via + `include_str!` — no runtime file IO). The structural shape of each declaration + is split between the context builders (Rust) and the templates (Jinja). +4. **Write.** `clean_generated_files` first deletes every generated `.sql` in the + target directory (recognised by the header marker) so an abandoned domain + disappears on the next regeneration; `ensure_generated_paths_writable` then + refuses to proceed if any target path is a hand-written file lacking the + marker; `write_generated_file` writes each rendered body verbatim + (`crates/eql-codegen/src/writer.rs`). The template emits the `-- AUTOMATICALLY + GENERATED FILE.` marker as its own first line, so the writer does not prepend + a header — it only uses the marker to recognise files it owns. + +There is no caching layer and no incremental mode. Each run regenerates every +output for every catalog type from scratch. + +### Generated outputs + +For a type with `D` domains of which `A` are ordered, the generator writes `1 + +2D + A` SQL files into `src/encrypted_domain//`. For `int4` (`D = 4`, `A = +2`): eleven SQL files. The outputs are gitignored +(`.gitignore` excludes `src/encrypted_domain/*/*_{types,functions,operators,aggregates}.sql`) +and regenerated at the start of every build. + +| File | Content | +| --------------------------------- | ---------------------------------------------------------------------------------------- | +| `_types.sql` | Single idempotent `DO` block creating every domain; each `CHECK` pins the payload version (`VALUE->>'v' = '2'`) and required envelope/ciphertext/term keys; one `--! @brief` per domain | +| `_functions.sql` | One extractor per unique term, then 44 wrappers-or-blockers covering the surface | +| `_operators.sql` | 44 `CREATE OPERATOR` statements with planner metadata on supported ops | +| `_aggregates.sql` | MIN/MAX state functions + `CREATE AGGREGATE`; emitted only for ordered domains | + +Every file opens with the `-- AUTOMATICALLY GENERATED FILE.` marker (the +project-wide marker `docs:validate` greps on to skip generated SQL — +`crates/eql-codegen/src/consts.rs`), declares its `-- REQUIRE:` edges in +dependency order (types files require `src/schema-v3.sql`; function files require +both `src/schema.sql` and `src/schema-v3.sql`, the types file, and +`src/encrypted_domain/functions.sql` plus each term's `requires` set; operator +files require `src/schema-v3.sql`, the types file, and their domain's function +file; aggregate files require `src/schema-v3.sql`, the types file, and their +domain's function and operator files), and carries Doxygen `--! @file` / +`--! @brief` headers. + +### Generator tests and the parity gate + +The generator's tests are Rust, run by `mise run test:codegen` (`cargo test -p +eql-scalars -p eql-codegen`) — no database. `mise run test:crates` adds `cargo +clippy ... -D warnings`. + +- **`eql-scalars` unit tests** — `rust_tests`, `term_tests`, + `term_helper_tests`, `fixture_tests`, `catalog_tests`, `invariant_tests`, + `values_tests` over `CATALOG`, the `Term` / `ScalarKind` / `Fixture` impls, and + the materialised `_VALUES` consts. +- **`eql-codegen` unit tests** — file counts, language/volatility invariants, + escaping guards, and twin byte-identity + (`crates/eql-codegen/src/generate.rs` `#[cfg(test)]`). +- **The parity gate** — `mise run codegen:parity` (`tasks/codegen-parity.sh`). + It runs the generator into the real tree, then (1) compares the int4 generated + SQL **file set** against the golden under `tests/codegen/reference/int4/*.sql`, + excluding committed hand-written files (`comm -23` of `ls` against `git + ls-files`), so an extra or dropped generated file fails; and (2) diffs each + golden file **byte-for-byte** against its generated counterpart, after dropping + the golden's single leading `-- REFERENCE:` provenance line (`tail -n +2`). The + same byte-for-byte assertion runs in-crate as + `crates/eql-codegen/tests/parity.rs` + (`rust_generator_matches_int4_golden_files`). The golden reference — not any + Python oracle — is the sole contract that survives generator refactors. + +CI runs these in three jobs in `.github/workflows/test-eql.yml`: `rust-crates` +(`Rust workspace crates`, runs `mise run test:crates`), `codegen` +(`Encrypted-domain codegen`, runs `mise run codegen:parity`), and +`matrix-coverage` (`Matrix coverage inventory`, runs `mise run +test:matrix:inventory`). The codegen job is a prerequisite of the PostgreSQL +test matrix. + +Adding a new **term** is a bigger move than adding a type: edit the `Term` enum's +`impl` methods, add `#[test]`s, audit `splinter.sh` for a name collision if the +extractor name is new, and — because it changes the int4 surface — update the +golden reference under `tests/codegen/reference/int4/`. + +--- + +## 7. Out of scope — `text` and `jsonb` + +`text` and `jsonb` are **not** materialised through this generator. The +`ScalarKind` enum carries `Text` / `Numeric` / `Jsonb` variants and the +`Fixture` enum carries their string-backed shapes at the capability layer, but +`CATALOG` declares only the integer scalars today, so no `text` / `jsonb` SQL +surface is generated. Text and JSONB encrypted behaviour lives on the composite +`eql_v2_encrypted` type and its hand-written operator surface in `src/encrypted/` +and `src/operators/`, not the scalar materializer. `jsonb` in particular needs a +separate SQL design beyond this ordered-scalar materializer. diff --git a/docs/reference/encrypted-domain-generator.md b/docs/reference/encrypted-domain-generator.md deleted file mode 100644 index 770cbeaf..00000000 --- a/docs/reference/encrypted-domain-generator.md +++ /dev/null @@ -1,483 +0,0 @@ -# Encrypted-Domain Code Generator - -How the Rust `eql-codegen` crate turns the `eql-scalars` catalog into the -SQL surface for a scalar encrypted-domain type. This document describes -the generator itself — its inputs, stages, outputs, and the invariants it -enforces. The contract those outputs must satisfy is in -[`encrypted-domain-implementation-spec.md`](./encrypted-domain-implementation-spec.md); -this file describes the machine that produces them. - -The reference type is `eql_v3.int4`. `text` and `jsonb` are outside scope. - -The generator is **Rust, not Python**. There is no TOML manifest, no -`tasks/codegen/` package, no `terms.py`/`templates.py`/`spec.py`. The -source of truth is the `CATALOG` const in -[`crates/eql-scalars/src/lib.rs`](../../crates/eql-scalars/src/lib.rs); -the renderers live in [`crates/eql-codegen/`](../../crates/eql-codegen/). -Adding a scalar type is adding a `ScalarSpec` row to `CATALOG`, validated -by the compiler plus catalog `#[test]`s — never an edit to free-form -manifest data. - -## 1. Why a generator - -A single scalar encrypted-domain type emits several hundred SQL -declarations across eleven files: four domains, three extractors, dozens -of comparison wrappers and blockers, 176 `CREATE OPERATOR` statements (44 -per domain), and MIN/MAX aggregates for every ordered domain. The shape -is mechanical and the invariants are unforgiving — a `STRICT` blocker -silently bypasses its exception, a pinned `search_path` disables inlining -and reverts queries to seq scans. The generator exists so each new scalar -type adds one `CATALOG` row rather than ninety hand-written declarations -that must agree with each other and with `pin_search_path.sql`, -`tasks/test/splinter.sh`, and `src/encrypted_domain/functions.sql`. - -## 2. Pipeline - -`eql-codegen` is a small Rust crate with a binary entry point. The -generator runs as `cargo run -p eql-codegen` (no subcommand), which calls -`generate::generate_all` (`crates/eql-codegen/src/generate.rs`) over every -row of `eql_scalars::CATALOG`, writing each type's SQL into -`src/encrypted_domain//`. A second subcommand, -`cargo run -p eql-codegen -- list-types`, prints the catalog tokens one per -line (consumed by the fixture and matrix-inventory enumeration). The -binary's `main` (`crates/eql-codegen/src/main.rs`) recognises exactly these -two forms; any other argument is a usage error. - -`tasks/build.sh` runs `cargo run -p eql-codegen` at the start of every -`mise run build`, so the generated SQL is never checked in — the catalog -is the source of truth. (The build first sweeps every generated -`*_{types,functions,operators,aggregates}.sql` under `src/encrypted_domain` -so a type removed from `CATALOG` cannot leave orphans the `src/**/*.sql` -build glob would pick up; hand-written `*_extensions.sql` is preserved by -the name patterns.) - -Stages, in order (`generate_all` → `generate_type`): - -1. **Read the catalog.** `eql_scalars::CATALOG` is the in-binary source of - truth — a `&[ScalarSpec]`, each row a `token`, a `ScalarKind`, an - ordered `&[DomainSpec]`, and a `&[Fixture]` list - (`crates/eql-scalars/src/lib.rs`). There is no parse/validate stage at - generation time: the catalog is validated at compile time (an undefined - `Term` or unknown `ScalarKind` does not compile) and by the catalog - `#[test]`s, so by the time `generate_all` runs the data is already - well-formed. -2. **Resolve terms.** For each `DomainSpec`, the `Term` enum's `impl` - methods supply the extractor name, return type, JSON envelope key, - supported operators, and the SQL `-- REQUIRE:` edges those terms imply - (`Term::operators_for_terms`, `term_json_keys`, `term_requires`, - `extractor_for_operator`, `role_for_terms` — `crates/eql-scalars/src/lib.rs`). -3. **Render.** `render_types_file`, `render_functions_file`, - `render_operators_file`, and `render_aggregates_file` (the last only for - ordered domains) build the context structs in - `crates/eql-codegen/src/context.rs` and render them through embedded - **minijinja** templates (`crates/eql-codegen/templates/*.j2`, - compiled in via `include_str!` — no runtime file IO). The structural - shape of each declaration is split between the context builders (Rust) - and the templates (Jinja). -4. **Write.** `clean_generated_files` first deletes every generated `.sql` - in the target directory (recognised by the header marker) so an - abandoned domain disappears on the next regeneration; - `ensure_generated_paths_writable` then refuses to proceed if any target - path is a hand-written file lacking the marker; `write_generated_file` - writes each rendered body verbatim (`crates/eql-codegen/src/writer.rs`). - The template emits the `-- AUTOMATICALLY GENERATED FILE.` marker as its - own first line, so the writer does not prepend a header — it only uses - the marker to recognise files it owns. - -There is no caching layer and no incremental mode. Each `cargo run -p -eql-codegen` regenerates every output for every catalog type from scratch. -Regeneration is deterministic: identical catalog + renderers produce -byte-identical SQL. - -## 3. Catalog format - -A scalar type is one `ScalarSpec` row -(`crates/eql-scalars/src/lib.rs`): - -```rust -ScalarSpec { - token: "int4", - kind: ScalarKind::I32, - domains: &[ - DomainSpec { suffix: "", terms: &[] }, - DomainSpec { suffix: "_eq", terms: &[Term::Hm] }, - DomainSpec { suffix: "_ord_ore", terms: &[Term::Ore] }, - DomainSpec { suffix: "_ord", terms: &[Term::Ore] }, - ], - fixtures: INT4_FIXTURES, -} -``` - -Structural rules, enforced by the type system and the catalog `#[test]`s -rather than a runtime validator: - -- `token` supplies the **type token** (`int4` here). Each domain's full - name is `token` + `suffix`; `ScalarSpec::domain_name` makes the old - "domain name must start with the token" rule structural, and - `every_domain_name_starts_with_its_token` pins it. -- `kind` is a `ScalarKind` (`I16` / `I32` / `I64` / `Numeric` / `Text` / - `Jsonb`), which carries the Rust type name, the `MIN`/`MAX`/zero symbols, - and the numeric bounds. Only the integer kinds have an i128 range with - `Min`/`Max`/`Zero` sentinels; the bounded accessors `panic!` on the - others (a misuse guard, gated by `is_int()`). -- `domains` is a non-empty `&[DomainSpec]` (pinned by - `every_type_has_at_least_one_domain`). Each `DomainSpec` is a `suffix` - plus a `&[Term]`; the storage domain is `suffix: ""` with no terms. -- `fixtures` is a `&[Fixture]` (see §3a). - -The `DomainSpec` declares nothing else — no extractor names, no operator -lists, no REQUIRE edges. Every behavioural fact comes from the `Term` -enum. - -Domains may be **twinned** (`int4_ord` and `int4_ord_ore` both carry -`&[Term::Ore]`). The generator emits them as independent domains with -byte-identical SQL modulo type name (`ordered_files_byte_identical_modulo_typename`). -Twins exist so callers can choose a name that documents intent ("ordered, -regardless of mechanism" vs "ordered via ORE block") without committing to -one term family in a future migration. - -Catalog order is significant. The generator iterates `CATALOG` in order -(driving generation order), and iterates each spec's `domains` slice in -order — that order shows up in the generated `_types.sql` `DO` block. - -### 3a. The `fixtures` field - -The `fixtures` field is an ordered `&[Fixture]` — the single source of -truth for the type's plaintext fixture list, consumed by the SQLx fixture -generator and the matrix oracle. A `Fixture` is value-kind tagged: -`Min` / `Max` / `Zero` (the integer matrix pivots, resolved per-kind), -`Int(i128)` (an integer literal), and `Numeric`/`Text`/`Jsonb` string -variants. The `fixtures!` macro range-checks each `Int` literal against the -kind at compile time (`N(-40000)` for an `i16` kind does not compile): - -```rust -const INT4_FIXTURES: &[Fixture] = fixtures!(int i32; - Min, N(-100), N(-1), Zero, N(1), N(2), N(5), N(10), N(17), N(25), - N(42), N(50), N(100), N(250), N(1000), N(9999), Max); -``` - -Catalog `#[test]`s enforce a **distinct-plaintext contract** plus the -matrix-pivot requirement: `fixture_values_are_distinct_by_resolved_number` -rejects duplicates against the resolved value (so both copy-paste dups and -sentinel/literal aliases fail), `fixtures_include_min_max_and_zero` requires -`Min`, `Max`, and zero for integer kinds, and -`every_fixture_value_is_within_kind_bounds` keeps every resolved value in -range. These are the compile/test-time analogue of the old `load_spec` -validation. - -The plaintext value list is **not** rendered to a generated file. The -`int_values!` macro (next to `CATALOG`) materialises a `Fixture` list into -a typed `pub const _VALUES: &[]` at compile time -(`INT4_VALUES`, `INT2_VALUES`). Both consumers reference that single symbol -— the fixture generator and the matrix oracle's `FIXTURE_VALUES` — so the -oracle cannot drift from the values the generator encrypts. There is no -committed `_values.rs`: a Rust source of truth does not round-trip -through generated Rust. (The old generated, committed file is gone.) The -exact materialised list is pinned by the catalog's `values_tests`. - -## 4. Term catalog - -The `Term` enum (`crates/eql-scalars/src/lib.rs`) defines every term the -materializer recognises. The `json_key`/`extractor`/`returns`/`ctor` -values are the cross-schema SQL contract — changing one is a generated-SQL -behaviour change, not a refactor. - -| Term | JSON key | Extractor | Returns | Operators | -| ----- | -------- | ----------- | -------------------------------- | -------------------------- | -| `Hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` `<>` | -| `Ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` `<>` `<` `<=` `>` `>=` | - -The index-term return types (`eql_v2.hmac_256`, -`eql_v2.ore_block_u64_8_256`) live in `eql_v2` and are referenced -cross-schema; the domains, extractors, and wrappers live in `eql_v3`. - -Adding a term is a code change to the `Term` enum's `impl` methods -(`json_key`, `extractor`, `returns`, `ctor`, `role`, `operators`, -`requires`) with matching `#[test]`s (`term_tests` / `term_helper_tests`) -— never a free-form catalog field. The `Term` enum is the only source of -operator support, extractor identity, and REQUIRE edges; a `DomainSpec` is -a thin selector over it. - -## 5. The operator surface - -`crates/eql-codegen/src/operator_surface.rs` enumerates the 20-operator -surface every generated domain declares (`OPERATORS`): - -- **Comparison operators**: `=` `<>` `<` `<=` `>` `>=` `@>` `<@` -- **Path-selector operators**: `->` `->>` -- **Native `jsonb` operators**: `?` `?|` `?&` `@?` `@@` `#>` `#>>` `-` `#-` `||` - -Each operator carries its PostgreSQL-shaped signatures. The comparison -operators use the three symmetric shapes — `(domain, domain)`, -`(domain, jsonb)`, `(jsonb, domain)`; the path and native operators use -only the shapes PostgreSQL exposes for `jsonb` itself. Summed across all -20 operators, that is **44 `CREATE OPERATOR` statements per domain** -(`operators_file_has_forty_four`). - -Whether an operator routes to a wrapper or a blocker is a per-domain -decision driven by the domain's terms (`Term::operators_for_terms`), not a -property of the operator. Supported operators are emitted with full planner -metadata (`COMMUTATOR`, `NEGATOR`, `RESTRICT`, `JOIN` selectivity -estimators) and back onto inlinable wrappers; unsupported operators carry -minimal metadata and back onto blockers (`operator_entry` only renders -metadata when the operator is supported on that domain). - -Path operators always back onto blockers — neither current term enables -them. The native `jsonb` operators are blocker-only. Untyped string -literals are a PostgreSQL resolver edge: `? 'c'` can still select the -built-in `jsonb` operator, while `? 'c'::text` and bound text parameters -select the generated blocker. - -A live-DB structural guard -(`tests/sqlx/tests/encrypted_domain/family/jsonb_operator_surface.rs`) -queries `pg_operator` for every operator with a `jsonb` argument and -asserts the set is a subset of the surface this module enumerates, so a -future PostgreSQL version that adds a `jsonb` operator nobody enumerated -here fails the test rather than silently routing an encrypted column to -native plaintext-`jsonb` semantics. The `operator_surface` unit tests pin -the Rust surface (20 operators, signatures, metadata); the live-DB test -mirrors it. - -## 6. Generated outputs - -For a type with `D` domains of which `A` are ordered (ord-capable), the -generator writes `1 + 2D + A` SQL files into -`src/encrypted_domain//`. For `int4` (`D = 4`, `A = 2`): eleven SQL -files. The SQL outputs are **gitignored** — -`.gitignore` excludes `src/encrypted_domain/*/*_{types,functions,operators,aggregates}.sql`, -and `tasks/build.sh` regenerates them at the start of every build. There is -**no per-type codegen task**: one `cargo run -p eql-codegen` regenerates -every catalog type in a single deterministic run. - -| File | Content | -| --------------------------------- | ---------------------------------------------------------------------------------------- | -| `_types.sql` | Single idempotent `DO` block creating every domain; each domain `CHECK` pins the payload version (`VALUE->>'v' = '2'`) and required envelope/ciphertext/term keys; one `--! @brief` per domain | -| `_functions.sql` | One extractor per unique term, then 44 wrappers-or-blockers covering the surface | -| `_operators.sql` | 44 `CREATE OPERATOR` statements with planner metadata on supported ops | -| `_aggregates.sql` | MIN/MAX state functions + `CREATE AGGREGATE`; emitted only for ordered (ord-capable) domains | - -Every file: - -- Opens with the `-- AUTOMATICALLY GENERATED FILE.` marker (the project-wide - marker `docs:validate` greps on to skip generated SQL — - `crates/eql-codegen/src/consts.rs`). -- Declares its `-- REQUIRE:` edges in dependency order — types files - require `src/schema-v3.sql`; function files require schema, types, and - `src/encrypted_domain/functions.sql` plus each term's `requires` set; - operator files require `src/schema-v3.sql`, types, and their domain's - function file; aggregate files require `src/schema-v3.sql`, types, and - their domain's function and operator files. -- Carries Doxygen `--! @file` / `--! @brief` headers describing its role. - -### Function-count totals per domain - -| Domain terms | Extractors | Wrappers | Blockers | Functions | Operators | -| ---------------- | ---------: | -------: | -------: | --------: | --------: | -| none | 0 | 0 | 44 | 44 | 44 | -| `&[Term::Hm]` | 1 | 6 | 38 | 45 | 44 | -| `&[Term::Ore]` | 1 | 18 | 26 | 45 | 44 | - -Six wrappers for `Hm` = `=` and `<>` × three shapes. Eighteen for `Ore` -= six operators × three shapes. The 44-operator total never moves; the -wrapper/blocker split is what shifts, and native `jsonb` fallback -operators are always blockers. (Pinned by `storage_functions_file_is_all_blockers`, -`eq_functions_file_counts`, `ore_functions_file_counts`.) - -The table above covers `_functions.sql` only. Ordered domains -additionally emit `_aggregates.sql` — two state functions -(`min_sfunc`, `max_sfunc`) and two `CREATE AGGREGATE` declarations -(`eql_v3.min`, `eql_v3.max`). Each aggregate declares -`combinefunc = ` and `parallel = safe`: min/max are associative, so -the state function doubles as the combine function, enabling partial and -parallel aggregation on large `GROUP BY` ORE workloads with no decryption. - -## 7. Invariants the generator enforces - -The generator's job is partly to write SQL and partly to make incorrect -SQL unreachable. Invariants encoded in the renderers / templates and -guarded by `#[test]`s in `crates/eql-codegen/src/generate.rs`: - -- **Blockers are never `STRICT` and always `plpgsql`.** The - unsupported-operator template emits each blocker as `IMMUTABLE PARALLEL - SAFE` / `LANGUAGE plpgsql` without `STRICT`, so a `NULL` argument still - reaches the `RAISE`. `blockers_are_never_strict_and_always_plpgsql` - asserts the storage domain (all blockers) contains no `STRICT` and as - many `LANGUAGE plpgsql` as `CREATE FUNCTION`. A `LANGUAGE sql` blocker - would be inlinable and could be elided when the result is provably - unused; `plpgsql` is opaque to the planner so the `RAISE` survives. -- **Wrappers and extractors are inlinable SQL.** They emit `LANGUAGE sql - IMMUTABLE STRICT PARALLEL SAFE` with a single-statement `SELECT` and **no - `SET search_path`** (`inlinable_functions_have_no_set_search_path`). A - pinned `search_path` disables inlining. `tasks/pin_search_path.sql` - recognises these functions structurally — by language (`sql`), volatility - (`IMMUTABLE`), and a jsonb-backed `DOMAIN` argument in the `eql_v3` - schema — and leaves them unpinned, with no per-type edit. -- **Aggregate state functions are the deliberate exception.** `min_sfunc` / - `max_sfunc` are `LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE` *with* - a pinned `SET search_path` (`aggregate_state_functions_are_plpgsql_not_inlinable`). - They are aggregate transition functions, not index expressions, so - pinning is correct; the generated `min` / `max` aggregates are - allowlisted by name in `splinter.sh`. -- **SQL-literal injection is structurally prevented.** Every string - interpolated into a single-quoted SQL literal — payload keys, operator - symbols, domain names in `RAISE` messages — passes through `sql_str` - (`crates/eql-codegen/src/consts.rs`), which doubles embedded single - quotes. Today's catalog strings are all quote-free so it is a no-op, but - it guarantees a future quote-bearing string cannot break out of its - literal (`unsupported_entry_preserves_operator_literal_and_domain_lit_is_escaped`, - `domain_block_escapes_quote_bearing_name`). -- **No domain-over-domain.** Every domain is `CREATE DOMAIN eql_v3. - AS jsonb`, never `AS ` (`types_file_has_all_four_domains`). - PostgreSQL resolves operators against the underlying base type; a derived - domain would silently bypass the fixed operator surface. -- **No operator class on a domain.** The generator emits operators, not - operator classes. Callers index through the extractor function (e.g. - `USING btree (eql_v3.ord_term(col))`), whose return type already carries - a default opclass. -- **Ownership boundary.** `is_generated` recognises owned files by their - header marker; `ensure_generated_paths_writable` refuses to overwrite - anything else, and `clean_generated_files` deletes only files carrying - the marker (`crates/eql-codegen/src/writer.rs`). A hand-written file at a - generated path is a hard error, not a silent clobber. Stale generated - files for removed domains are cleaned before the new files land. - -## 8. Extension files - -`_extensions.sql` is the hand-written sibling. The generator never -creates, lists, or cleans it; it has no auto-generated header and must -declare its own `-- REQUIRE:` edges. Use it for behaviour that's specific -to the type and not part of the fixed surface — e.g. cross-domain casts, -helper functions, type-specific constraints. Unlike the generated -siblings, `_extensions.sql` IS committed. (Neither `int4` nor `int2` -ships one today — there is no committed `*_extensions.sql` in the tree.) - -`tasks/pin_search_path.sql` describes the fallback marker for -inline-critical extension functions that take no domain argument and so -escape the structural skip: - -```sql -COMMENT ON FUNCTION eql_v2.my_helper(...) IS 'eql-inline-critical: ...'; -``` - -The generator does **not** emit this marker; every function it produces -takes a domain argument and is covered by the structural skip -intrinsically. - -## 9. Lint and test integration - -The generator depends on two pieces of build tooling recognising its -output without per-type edits: - -- **`tasks/pin_search_path.sql`** — structural skip identifies - encrypted-domain functions by language (`sql`), volatility (`IMMUTABLE`), - and the presence of at least one argument typed as a jsonb-backed - `DOMAIN` in the `eql_v3` schema. New scalar types need no edit here. -- **`tasks/test/splinter.sh`** — name-based allowlist. The converged - wrapper / extractor names (`eq`, `neq`, `lt`, `lte`, `gt`, `gte`, - `eq_term`, `ord_term`) plus the generated `min` / `max` aggregates are - covered by `eql_v3`-schema entries. Splinter matches by name only, so a - new scalar type that uses the catalog extractors inherits coverage. - Adding a new term whose extractor has a new name requires a splinter - entry. - -## 10. Tests - -The generator's tests are Rust, run by `mise run test:codegen` -(`cargo test -p eql-scalars -p eql-codegen`) — no database required. The -broader `mise run test:crates` adds `cargo clippy ... -D warnings`. - -- **`eql-scalars` unit tests** — `rust_tests`, `term_tests`, - `term_helper_tests`, `fixture_tests`, `catalog_tests`, `invariant_tests`, - `values_tests` over `CATALOG`, the `Term`/`ScalarKind`/`Fixture` impls, - and the materialised `_VALUES` consts - (`crates/eql-scalars/src/lib.rs`). -- **`eql-codegen` unit tests** — file counts, language/volatility - invariants, escaping guards, and twin byte-identity - (`crates/eql-codegen/src/generate.rs` `#[cfg(test)]` module). -- **The parity gate** — `mise run codegen:parity` - (`tasks/codegen-parity.sh`). It runs `cargo run -p eql-codegen` into the - real tree, then: - 1. compares the int4 generated SQL **file set** against the golden under - `tests/codegen/reference/int4/*.sql`, excluding committed hand-written - files (`comm -23` of `ls` against `git ls-files`), so an extra or - dropped generated file fails; and - 2. diffs each golden file **byte-for-byte** against its generated - counterpart, after dropping the golden's single leading - `-- REFERENCE:` provenance line (`tail -n +2`). Both bodies start with - the `-- AUTOMATICALLY GENERATED FILE.` marker, so no header strip is - needed. - The same byte-for-byte assertion runs in-crate as - `crates/eql-codegen/tests/parity.rs` (`rust_generator_matches_int4_golden_files`) - and in the `generate.rs` golden tests. The golden reference — not any - Python oracle — is the sole contract that survives generator refactors. - -CI runs these in three jobs in `.github/workflows/test-eql.yml`: the -`test:crates` job (`Rust workspace crates`) compiles/lints/tests the -crates, the `codegen` job (`Encrypted-domain codegen`) runs `mise run -codegen:parity`, and the `matrix-coverage` job runs `mise run -test:matrix:inventory`. The codegen job is a prerequisite of the -PostgreSQL test matrix, so generated-SQL drift fails CI before any database -test runs. - -## 11. Adding a new scalar type - -From a generator perspective: - -1. **Add a `ScalarSpec` row to `eql_scalars::CATALOG`** - (`crates/eql-scalars/src/lib.rs`) — `token`, `kind`, the `domains` - slice, and the `fixtures` list. Term names must be `Term` variants and - the kind must be a `ScalarKind` variant, or it does not compile. If the - type needs a new scalar width, add a `ScalarKind` variant (with its - rust-type name, `MIN`/`MAX`/zero symbols, and bounds) and unit-test its - `impl`. New term behaviour belongs in the `Term` enum's `impl`, not in - catalog data. -2. **Materialise the value list** with `int_values!(_VALUES, , - );` next to `CATALOG`, and pin it with a `values_tests` - assertion. This is the single source the SQLx matrix reads as - `FIXTURE_VALUES`. There is nothing to regenerate-and-commit on the test - side — it is a compile-time const, not a generated file. -3. **Regenerate.** `cargo run -p eql-codegen` (or just `mise run build` — - the build runs the generator first). One run regenerates every catalog - type; there is no per-type codegen task. The generated - `*_{types,functions,operators,aggregates}.sql` are gitignored and never - committed. -4. **Hand-write** `_extensions.sql` if the type needs SQL beyond the - fixed surface, with explicit `-- REQUIRE:` edges. This file IS committed. -5. **Do not add a `tests/codegen/reference//` baseline.** `int4` is - the sole golden master for the type-generic generator: the templates are - pure token substitution, so a per-type baseline can only fail where - `int4`'s already would. Drift protection for the new type comes from the - `int4` reference (shared templates + `Term` enum), the catalog - `values_tests` pinning the materialised `_VALUES`, the - catalog/generator `#[test]`s, and the `ordered_numeric_matrix!` SQLx - suite (behaviour, not bytes). -6. **Wire the SQLx matrix oracle and snapshot the inventory.** The - implementation spec §2 lists the hand-maintained registration files. - Then run `mise run test:matrix:inventory`: it normalizes each present - type's `scalars::::*` test-name set to ``, asserts it equals - the single canonical `tests/sqlx/snapshots/matrix_tests.txt`, and - cross-checks the present type set against `cargo run -p eql-codegen -- - list-types`. There is **no per-type snapshot** — the per-type - `_matrix_tests.txt` files were collapsed into one token-normalized - snapshot. You only regenerate `matrix_tests.txt` when the macro's - emitted name set itself changes. A catalog type added without its matrix - wiring fails the cross-check (catalog has the type, binary has no - `scalars::::` tests). See `tests/sqlx/snapshots/README.md` and - the implementation spec §2 / §8. - -Adding a new **term** is a bigger move — edit the `Term` enum's `impl` -methods, add `#[test]`s, audit `splinter.sh` for a name collision if the -extractor name is new, and (because it changes the int4 surface) update the -golden reference under `tests/codegen/reference/int4/`. - -## 12. Out of scope - -`text` and `jsonb` are not materialised through this generator. The -`ScalarKind` enum carries `Text`/`Numeric`/`Jsonb` variants and the -`Fixture` enum carries their string-backed shapes at the capability layer, -but `CATALOG` declares only the integer scalars today, so no `text`/`jsonb` -SQL surface is generated. Text and JSONB encrypted behaviour lives on the -composite `eql_v2_encrypted` type and its hand-written operator surface in -`src/encrypted/` and `src/operators/`, not the scalar materializer. -`jsonb` in particular needs a separate SQL design beyond this -ordered-scalar materializer. diff --git a/docs/reference/encrypted-domain-implementation-spec.md b/docs/reference/encrypted-domain-implementation-spec.md deleted file mode 100644 index cf1a4b19..00000000 --- a/docs/reference/encrypted-domain-implementation-spec.md +++ /dev/null @@ -1,400 +0,0 @@ -# Encrypted Domain Type Implementation Spec - -This is the scalar encrypted-domain generator contract used by `int4`. -It applies to scalar domains whose searchable payloads are represented by -the fixed `Term` catalog in `crates/eql-scalars/src`. - -`text` and `jsonb` are outside this scalar materializer. - -## 1. Model - -Each generated domain is a concrete `jsonb` domain in the `eql_v3` -schema named `eql_v3.` (dropped by `DROP SCHEMA eql_v3 CASCADE`; -survives an `eql_v2` uninstall). A type's catalog row is intentionally -small — a `ScalarSpec` whose `domains` field lists each generated domain -as a `DomainSpec` (a `suffix` plus the fixed terms it carries): - -```rust -ScalarSpec { - token: "int4", - kind: ScalarKind::I32, - domains: &[ - DomainSpec { suffix: "", terms: &[] }, - DomainSpec { suffix: "_eq", terms: &[Term::Hm] }, - DomainSpec { suffix: "_ord_ore", terms: &[Term::Ore] }, - DomainSpec { suffix: "_ord", terms: &[Term::Ore] }, - ], - fixtures: &[/* see §9 */], -} -``` - -The `token` supplies the type token; each domain's full name is `token` -+ `suffix`. The generator emits domains in the order the `domains` slice -declares them, so order the slice the way you want the generated output to -read. Term capabilities are fixed by the `Term` enum -(`crates/eql-scalars/src`): - -| Term | JSON key | Extractor | Return type | Supported operators | -|---|---|---|---|---| -| `Hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` / `<>` | -| `Ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` / `<>` / `<` / `<=` / `>` / `>=` | - -For current `int4`, domains carrying `Ore` use JSON key `ob`, extractor -`ord_term`, and the ORE block supports equality plus ordering. A type -that needs a non-ORE equality term on an ordered domain needs a new -`Term` design, not a catalog flag. - -The row above declares two ordered domains, `int4_ord` and -`int4_ord_ore`, carrying the same term. They are intentional twins: the -generator emits byte-identical SQL (modulo type name) so callers can pick -a name that documents intent without committing to a term family in a -future migration. - -## 2. Checklist - -- [ ] Add a row to the Rust catalog `eql-scalars::CATALOG` - (`crates/eql-scalars/src/lib.rs`). A `ScalarSpec` declares: - - - `token` — the type token (e.g. `int8`); supplies `` everywhere. - - `kind` — the `ScalarKind` (`I16` / `I32` / `I64`), which carries the - Rust type name, the `MIN`/`MAX`/zero symbols, and the numeric bounds. - - `domains` — a `&[DomainSpec]`, each a `suffix` + the fixed `Term`s it - carries. The storage domain is suffix `""` with no terms; `_eq => [Hm]`; - `_ord` and `_ord_ore => [Ore]`. - - `fixtures` — the `Fixture` value list (see §9). It MUST include `Min`, - `Max`, and zero. - - Terms determine operator support: `Hm` provides `=` / `<>`; `Ore` - provides `=` / `<>` / `<` / `<=` / `>` / `>=`. There is no TOML manifest - and no Python: the catalog is the source of truth, validated by the - compiler (an undefined `Term` or unknown `ScalarKind` is a compile error) - plus catalog `#[test]`s over `CATALOG`. -- [ ] Materialise the type's plaintext fixture list as a typed const next to - `CATALOG`: add `int_values!(_VALUES, , );` (e.g. - `int_values!(INT8_VALUES, i64, INT8);`). The macro resolves the row's - `Fixture` list into a compile-time `&'static []` — the single source the - SQLx matrix reads as `FIXTURE_VALUES`. Pin the exact list with a - `values_tests` assertion. This replaces the old generated, committed - `_values.rs`. -- [ ] **If `` needs a new scalar width**, add a `ScalarKind` enum variant in - `crates/eql-scalars/src/lib.rs` with its rust-type name, `MIN`/`MAX`/zero - symbols, and numeric bounds, and unit-test its `impl` methods. New term - behaviour likewise belongs in the `Term` enum's `impl` methods with tests - — not in free-form catalog data. -- [ ] Run `cargo run -p eql-codegen` to materialise the generated SQL - (`src/encrypted_domain//_{types,functions,operators,aggregates}.sql`, - gitignored), or just `mise run build` — every build runs the generator - first. There is no per-type codegen task: one run generates every type from - `CATALOG`. The plaintext fixture list is **not** generated — it is - materialised from the catalog row at compile time (see the next step), so - there is nothing to regenerate-and-commit on the test side. -- [ ] Generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / - `*_aggregates.sql` are gitignored and never committed. The catalog - (`eql-scalars::CATALOG`) plus the `eql-codegen` renderers are the source - of truth. Change the catalog and rebuild; do not hand-edit generated SQL. -- [ ] Put optional hand-written SQL in - `src/encrypted_domain//_extensions.sql` with explicit - `-- REQUIRE:` edges. This file IS committed. -- [ ] Do **not** add a `tests/codegen/reference//` baseline. `int4` is the - single golden master for the type-generic generator: the SQL templates are - pure token substitution, so a per-type baseline can only fail when `int4`'s - already would. Drift protection for the new type comes from the `int4` - reference, the catalog `values_tests` pinning the materialised - `eql_scalars::_VALUES` const, the catalog/generator `#[test]`s - (`cargo test -p eql-scalars -p eql-codegen`), and the - `ordered_numeric_matrix!` SQLx suite (behaviour, not bytes). -- [ ] Wire the SQLx matrix oracle. The generated SQL is enough to install the - domains, but the `ordered_numeric_matrix!` suite only runs once the Rust - harness knows about the scalar. Copy each piece from the `int4` - reference — these are hand-maintained registration lists (the Phase-4 - `scalar_types!` registry, a separate plan, will collapse them): - - | File | Add | - |------|-----| - | `tests/sqlx/src/fixtures/eql_plaintext.rs` | A sealed `EqlPlaintext` impl for the scalar's Rust type: `impl Sealed for {}`, a `PlaintextSqlType` const for its base column type, `impl EqlPlaintext for ` (`CAST`, `PLAINTEXT_SQL_TYPE`, `to_plaintext` → the right `Plaintext` variant), plus the two `#[test]` casts. | - | `tests/sqlx/src/fixtures/eql_v2_.rs` | `use eql_scalars::_VALUES as VALUES;` then `crate::scalar_fixture!("eql_v2_", , VALUES);`. | - | `tests/sqlx/src/fixtures/mod.rs` | `pub mod eql_v2_;`. | - | `tests/sqlx/tests/generate_all_fixtures.rs` | An arm in `generate_for_token`: `"" => fixtures::eql_v2_::spec().run().await,`. The match is exhaustive over the catalog — a catalog token with no arm fails the generator loudly. | - | `tests/sqlx/src/scalar_domains.rs` | `impl ScalarType for ` — `PG_TYPE` (the base PG type, e.g. `"int8"`) and `FIXTURE_VALUES = eql_scalars::_VALUES`. | - | `tests/sqlx/tests/encrypted_domain/scalars/.rs` | `ordered_numeric_matrix! { suite = , scalar = , eql_type = "eql_v2_" }`. | - | `tests/sqlx/tests/encrypted_domain/scalars/mod.rs` | `pub mod ;`. | - - `` is the scalar's Rust type (`i32` for `int4`, `i16` for `int2`). - Forget one and the matrix simply does not run for the type — the matrix - inventory cross-check (next step) surfaces it, because the catalog has the - type but the binary has no `scalars::::` tests. -- [ ] Run `mise run test:matrix:inventory`. It verifies every present type's - token-normalized `scalars::::*` name set equals the single canonical - `tests/sqlx/snapshots/matrix_tests.txt`, and cross-checks the present type - set against `cargo run -p eql-codegen -- list-types`. You do **not** edit a - per-type snapshot — there is one canonical snapshot; you only regenerate it - when the macro's emitted name set itself changes. A catalog type missing - its matrix wiring fails the cross-check. See §8 and - `tests/sqlx/snapshots/README.md`. -- [ ] Run `mise run test:codegen` (`cargo test -p eql-scalars -p eql-codegen`), - the relevant SQLx suites, and the PostgreSQL matrix before merging. - -## 3. Domain Generation - -The generator emits `src/encrypted_domain//_types.sql` (gitignored; -materialised on every `mise run build` and every `cargo run -p eql-codegen`) -with one idempotent `DO $$ ... $$` block. Domain `CHECK` -constraints always require: - -- fixed envelope keys `v` and `i`; -- ciphertext key `c`; -- catalog JSON keys for the listed terms; -- the envelope version value: `VALUE->>'v' = '2'`, matching the repo-wide - `eql_v2._encrypted_check_v` rule (`src/encrypted/constraints.sql`). - -For example, a domain with `["ore"]` requires `v`, `i`, `c`, and `ob` present, -with `v` pinned to `2`. Beyond key presence and the version value, a malformed -term can still fail later inside its extractor unless a future catalog design -adds stronger validation. - -Every generated domain is a concrete domain over `jsonb` in the `eql_v3` -schema. Do not define one generated domain over another generated domain; -PostgreSQL resolves operators against the underlying base type in ways -that bypass the fixed operator surface. - -## 4. Extractors And Wrappers - -Extractor names and return types come from the `Term` enum -(`crates/eql-scalars/src`), not from catalog data. Generated extractors and -supported comparison wrappers are inline-friendly SQL functions: - -```sql -LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE -AS $$ SELECT ... $$; -``` - -Extractors and comparison wrappers must not carry a pinned `search_path` -— a `SET` clause disables inlining and reverts index-backed queries to -seq scans. The build tooling recognises these generated functions -structurally, so the generator does not emit `eql-inline-critical` -markers. Aggregate state functions are the one deliberate exception — see -§5 — because they are never index expressions. - -Unsupported operators route to blockers. Blockers are `plpgsql`, -`IMMUTABLE`, `PARALLEL SAFE`, and intentionally not `STRICT`. Both -choices are deliberate: - -- **`plpgsql`, not `sql`.** A `LANGUAGE sql` body would be inlinable, and - the planner could elide the call when the result is provably unused - (dead `CASE` branch, folded predicate), letting a blocked operator - appear to succeed. `plpgsql` is opaque to the planner, so the call — - and its `RAISE` — always survives. -- **Not `STRICT`.** A `STRICT` blocker lets PostgreSQL skip the body and - return `NULL` on a `NULL` argument, silently bypassing the - unsupported-operator exception. - -## 5. Operators - -Every generated domain declares supported scalar comparison operators plus -blockers for the native `jsonb` operator surface that PostgreSQL could -otherwise reach through domain-to-base-type fallback. Each domain emits -44 `CREATE OPERATOR` statements. Supported operators route to wrappers; -everything else routes to blockers. - -| Operators | Forms | -|---|---| -| `=` `<>` `<` `<=` `>` `>=` `@>` `<@` | `(domain, domain)` · `(domain, jsonb)` · `(jsonb, domain)` | -| `->` `->>` | `(domain, text)` · `(domain, integer)` · `(jsonb, domain)` | -| `?` | `(domain, text)` | -| `?\|` `?&` | `(domain, text[])` | -| `@?` `@@` | `(domain, jsonpath)` | -| `#>` `#>>` `#-` | `(domain, text[])` | -| `-` | `(domain, text)` · `(domain, integer)` · `(domain, text[])` | -| `\|\|` | `(domain, domain)` · `(domain, jsonb)` · `(jsonb, domain)` | - -Function counts: - -| Domain terms | Extractors | Wrappers | Blockers | Functions | Operators | -|---|---:|---:|---:|---:|---:| -| none | 0 | 0 | 44 | 44 | 44 | -| `hm` | 1 (`eq_term`) | 6 | 38 | 45 | 44 | -| `ore` | 1 (`ord_term`) | 18 | 26 | 45 | 44 | - -Supported comparison operators carry planner metadata such as -`COMMUTATOR`, `NEGATOR`, `RESTRICT`, and `JOIN`. Blocker operators keep -minimal metadata because they should never be planner-visible supported -paths. - -PostgreSQL's operator resolver still prefers the built-in `jsonb` operator -for untyped string literals in forms such as `payload::eql_v3.int4 ? 'c'`. -Use typed parameters or explicit casts (`'c'::text`) to route those forms -to the generated blocker. The generated surface blocks the typed native -operator shapes exposed by the catalog. - -### Aggregates - -Each ordered (ord-capable) domain additionally gets a generated -`_aggregates.sql` file declaring `MIN` / `MAX`: - -- two state functions, `eql_v3.min_sfunc` and `eql_v3.max_sfunc`, and -- two aggregates, `eql_v3.min()` and `eql_v3.max()`. - -Comparison routes through the domain's `<` / `>` operator (the ORE block -term — no decryption). The state functions are `LANGUAGE plpgsql -IMMUTABLE STRICT PARALLEL SAFE` **with** a pinned `SET search_path`. This is -the one place the "no pinned `search_path`" rule of §4 does not apply: -aggregate transition functions are never index expressions, so pinning is -correct. `STRICT` makes PostgreSQL seed the running state with the first -non-NULL value and skip NULLs, so an all-NULL group returns NULL. - -Each `CREATE AGGREGATE` declares `combinefunc = ` and -`parallel = safe`: min/max are associative, so the state function doubles as -the combine function, and with a `PARALLEL SAFE` sfunc/combinefunc -PostgreSQL can use partial and parallel aggregation on the large `GROUP BY` -ORE workloads these aggregates exist to serve — still with no decryption. -Storage-only and equality-only domains have no comparator and emit no -aggregate file. - -## 6. Extension Files - -Optional hand-written SQL beyond the fixed scalar surface belongs in: - -```text -src/encrypted_domain//_extensions.sql -``` - -The generator must not create this file, list it in the catalog, add an -auto-generated header, or clean it during regeneration. The file must -declare its own `-- REQUIRE:` edges, usually to `_types.sql` and -whichever generated function or operator file it extends. Unlike the -generated siblings, `_extensions.sql` IS committed. - -## 7. Indexing - -Do not create operator classes on generated domains. Index through -the extractor: - -```sql -CREATE INDEX ... ON table_name USING btree (eql_v3.ord_term(col)); -CREATE INDEX ... ON table_name USING hash (eql_v3.eq_term(col)); -``` - -The extractor return type must already have the needed PostgreSQL access -method support. `ore` depends on -`src/ore_block_u64_8_256/functions.sql` and -`src/ore_block_u64_8_256/operators.sql`; `hm` depends on -`src/hmac_256/functions.sql`. - -## 8. Tests - -Cover each generated domain with SQLx tests appropriate to its terms: - -- supported operators return correct rows for all argument forms; -- unsupported operators raise the expected error for all forms; -- blockers raise on `NULL` input; -- supported wrappers return `NULL` for `NULL` operands; -- functional indexes engage and return correct rows; -- constant-on-left comparisons engage the index where applicable; -- domain `CHECK` rejects non-object and under-populated payloads; -- real typed columns are tested, not only cast literals; -- generated ordered-domain twins remain byte-identical modulo type name - (the shared generator is anchored by the `int4` golden master in - `tests/codegen/reference/int4/` via the eql-codegen parity test; - new types add no baseline of their own — see §2). - -For ordered numeric scalars this coverage is generated by the -`ordered_numeric_matrix!` convention wrapper in `tests/sqlx/src/matrix.rs`: -one `impl ScalarType` (`tests/sqlx/src/scalar_domains.rs`) plus a single -invocation taking `suite`, `scalar`, and `eql_type`. The matrix derives -its comparison pivots — the scalar's `MIN`, `MAX`, and zero -(`Default::default()`) — from the type rather than a hand-written list, so -the invocation carries no pivot argument. Equality-only scalars use the -sibling `eq_only_scalar_matrix!`. The `matrix.rs` module header is the -canonical, current list of the test categories the matrix emits (sanity, -correctness, cross-shape, supported-NULL, blocker raises, index engagement, -ORDER BY, ORDER BY USING) — read it rather than maintaining a duplicate -count here. - -For ordered `int4`, keep the assertion that distinct plaintext values -produce distinct ORE blocks. Do not add assertions for term behavior that -the catalog does not promise. - -### Matrix coverage inventory snapshot - -The *set of test names* the matrix emits is guarded by ONE committed, -token-normalized snapshot at `tests/sqlx/snapshots/matrix_tests.txt` — the -sorted inventory of every `scalars::::*` test name with the type token -replaced by the literal ``. (The per-type `_matrix_tests.txt` files are -gone: they were byte-identical modulo the token, so one canonical set plus a -per-type normalize-and-compare carries the same signal at a fraction of the -committed surface.) This is the guard that catches a silently dropped, renamed, -or `#[cfg]`-gated matrix test, a behaviour the SQLx assertions above cannot see. -The snapshot is a committed test baseline, **not** gitignored generated SQL. - -`mise run test:matrix:inventory` discovers the present scalar types from the -`encrypted_domain` binary's `--list`, normalizes each type's token to ``, -asserts every type's set equals the canonical snapshot, and cross-checks the -discovered type set against `cargo run -p eql-codegen -- list-types` (the catalog -is the single source). The CI `matrix-coverage` job gates it. **`tests/sqlx/snapshots/README.md` -is the source of truth** for the mechanics (pinned feature set, the catalog -cross-check, the CI diff, and when to regenerate); see it rather than -duplicating the detail here. - -## 9. Fixtures - -Fixture generation should use real encrypted payloads produced through -CipherStash Proxy. A single payload table may carry every term needed by -the generated domains for that type. For `int4`, the payloads carry `c`, -`hm`, and `ob`; the equality domain reads `hm`, and ordered domains read -`ob`. - -Choose values so range operators produce distinguishable result counts, -include useful boundaries, and cover omitted-term negative cases. For a -scalar driven by `ordered_numeric_matrix!`, the fixture **must** include -the type's `MIN`, `MAX`, and zero (`Default::default()`): the matrix uses -those three as comparison pivots and fetches each one's ciphertext from the -fixture via `fetch_fixture_payload`, which fails loudly if the row is -absent. - -### Single-sourcing the value list - -The plaintext value list is declared **once**, in the catalog row's `fixtures` -field, and materialised into a typed Rust const — never hand-maintained in two -places: - -```rust -fixtures: &[Fixture::Min, Fixture::N(-100), Fixture::N(-1), Fixture::Zero, - Fixture::N(1), Fixture::N(2), Fixture::N(5), Fixture::N(10), - Fixture::N(17), Fixture::N(25), Fixture::N(42), Fixture::N(50), - Fixture::N(100), Fixture::N(250), Fixture::N(1000), - Fixture::N(9999), Fixture::Max], -``` - -`Fixture::Min` / `Fixture::Max` / `Fixture::Zero` resolve to the scalar's Rust -named consts (for `int4`: `i32::MIN`, `i32::MAX`, `0`); every `Fixture::N(_)` is -a numeric literal validated against the `ScalarKind`'s representable range by a -catalog `#[test]` (`numeric_value` is infallible, so the range check is the -explicit invariant `every_fixture_value_is_within_kind_bounds`). The same test -enforces the matrix invariant: the set **must** include `Min`, `Max`, and zero, -or the test fails (the compile-time analogue of the old `load_spec` validation). - -The `int_values!` macro (in `crates/eql-scalars/src/lib.rs`) materialises that -`Fixture` list into a `pub const _VALUES: &[]` at compile -time, sitting next to `CATALOG`. Both consumers reference that single symbol — -the fixture generator (`fixtures::eql_v2_::spec`) and the matrix oracle -(`impl ScalarType for { const FIXTURE_VALUES = eql_scalars::_VALUES }`) -— so the oracle cannot drift from the values the generator encrypts. There is no -generated `_values.rs`: a Rust source of truth does not round-trip through -generated Rust. The exact list is pinned by a `values_tests` assertion, and the -`Fixture`-list invariants (`Min`/`Max`/zero present, in-bounds) by the catalog -`#[test]`s. - -## 10. Build And Verification - -- `cargo run -p eql-codegen` (optional; refreshes all generated SQL from the - catalog before a full build) -- `mise run test:codegen` (`cargo test -p eql-scalars -p eql-codegen`) -- `mise run clean && mise run build` (regenerates every type's SQL from - the catalog first, then builds the release artefacts) -- relevant SQLx suites -- `mise run test` across supported PostgreSQL versions -- `mise run --output prefix test:splinter --postgres 17` after a - PostgreSQL 17 install has built EQL - -The CI codegen job should remain a prerequisite of the PostgreSQL test -matrix so generated SQL drift is caught before database tests run. diff --git a/docs/reference/eql-functions.md b/docs/reference/eql-functions.md index e517e63e..cfca800c 100644 --- a/docs/reference/eql-functions.md +++ b/docs/reference/eql-functions.md @@ -426,7 +426,7 @@ eql_v2.ste_vec(val jsonb) RETURNS eql_v2_encrypted[] Extract the equality (`hm`) or ordering (`ob`) index term from a scalar encrypted-domain value. Generated per eq/ord-capable variant of every -scalar type — see [Encrypted-Domain Code Generator](./encrypted-domain-generator.md). +scalar type — see [Adding a Scalar Encrypted-Domain Type](./adding-a-scalar-encrypted-domain-type.md). The argument type selects the overload, and both are inlinable so a functional index built on the extractor engages. The extractors live in the `eql_v3` schema; their return types remain the core `eql_v2` @@ -449,7 +449,7 @@ CREATE INDEX ON users USING btree (eql_v3.ord_term(salary_encrypted)); > The full per-domain operator/wrapper/blocker surface (and the > `eql_v3.` / `_eq` / `_ord` / `_ord_ore` domain types themselves) is > documented in [SQL support](./sql-support.md#encrypted-domain-scalar-types-eql_v3t) -> and the [generator reference](./encrypted-domain-generator.md). +> and the [scalar encrypted-domain type reference](./adding-a-scalar-encrypted-domain-type.md). --- diff --git a/docs/reference/sql-support.md b/docs/reference/sql-support.md index d15be2ee..c5204850 100644 --- a/docs/reference/sql-support.md +++ b/docs/reference/sql-support.md @@ -61,7 +61,7 @@ Use the equivalent [`jsonb_path_query`](#jsonb-functions-and-selectors-enabled-b ## Encrypted-domain scalar types (`eql_v3.`) -Scalar encrypted-domain types (e.g. `eql_v3.int4`; see the [generator reference](./encrypted-domain-generator.md)) are a different access model from the matrix above. Instead of configuring a search index on an `eql_v2_encrypted` column, you type the column as a specific domain *variant* whose operator surface is fixed at generation time. The index terms travel in the payload; there is no `add_search_config` step. The domains and their operator surface live in the `eql_v3` schema (dropped by `DROP SCHEMA eql_v3 CASCADE`, and they survive an `eql_v2` uninstall); their extracted index-term types remain the core `eql_v2` types. +Scalar encrypted-domain types (e.g. `eql_v3.int4`; see [Adding a Scalar Encrypted-Domain Type](./adding-a-scalar-encrypted-domain-type.md)) are a different access model from the matrix above. Instead of configuring a search index on an `eql_v2_encrypted` column, you type the column as a specific domain *variant* whose operator surface is fixed at generation time. The index terms travel in the payload; there is no `add_search_config` step. The domains and their operator surface live in the `eql_v3` schema (dropped by `DROP SCHEMA eql_v3 CASCADE`, and they survive an `eql_v2` uninstall); their extracted index-term types remain the core `eql_v2` types. Each scalar type `` generates one storage-only variant plus eq/ord query variants: diff --git a/tests/codegen/reference/README.md b/tests/codegen/reference/README.md index ae7204ab..186d6d5c 100644 --- a/tests/codegen/reference/README.md +++ b/tests/codegen/reference/README.md @@ -12,7 +12,7 @@ The parity gate runs the generator (`cargo run -p eql-codegen`, which writes the The golden reference, not any retired generator, is the sole oracle. If the generator diverges, either it regressed (fix `crates/eql-codegen`) or the reference is being updated deliberately (commit the new `int4` reference in the same PR). -See `docs/reference/encrypted-domain-generator.md` for the full generator story (manifest-free catalog, templates, term capabilities). +See `docs/reference/adding-a-scalar-encrypted-domain-type.md` §6 for the full generator story (catalog source of truth, minijinja templates, term capabilities). ## No committed fixture values diff --git a/tests/sqlx/snapshots/README.md b/tests/sqlx/snapshots/README.md index 213b034f..ed363608 100644 --- a/tests/sqlx/snapshots/README.md +++ b/tests/sqlx/snapshots/README.md @@ -60,8 +60,9 @@ catalog cross-check) fails the job. ## When you must update this - **Adding a new scalar type** → add the catalog row in - `eql-scalars::CATALOG`, wire the SQLx matrix oracle (see the implementation - spec §2), then run `mise run test:matrix:inventory`. If the new type's + `eql-scalars::CATALOG`, wire the SQLx matrix oracle (see + `docs/reference/adding-a-scalar-encrypted-domain-type.md` §3), then run + `mise run test:matrix:inventory`. If the new type's normalized name set matches the canonical snapshot (it will, for a standard `ordered_numeric_matrix!` type), no snapshot edit is needed — the cross-check just confirms the type is wired. @@ -76,4 +77,4 @@ catalog cross-check) fails the job. | sed -e 's/^scalars::int4::/scalars::::/' -e 's/_int4_/__/g' | LC_ALL=C sort > snapshots/matrix_tests.txt ``` -See `docs/reference/encrypted-domain-implementation-spec.md` §2 and §8. +See `docs/reference/adding-a-scalar-encrypted-domain-type.md` §3 (matrix oracle + inventory snapshot). From e8bbe81d2888d553d15ad147b2b0be1fb025ae14 Mon Sep 17 00:00:00 2001 From: Toby Hede Date: Wed, 3 Jun 2026 14:11:07 +1000 Subject: [PATCH 19/19] =?UTF-8?q?build:=20always=20run=20codegen=20?= =?UTF-8?q?=E2=80=94=20drop=20mise=20sources/outputs=20incremental=20skip?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `#MISE sources`/`outputs` declarations let mise skip the `build` task when it judged the inputs unchanged and the release artefacts already present. Because the encrypted-domain SQL is gitignored and regenerated by `cargo run -p eql-codegen`, a skipped build could ship stale `release/*.sql`. Drop them so the build always regenerates. --- tasks/build.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/tasks/build.sh b/tasks/build.sh index 311dfbc7..ead83184 100755 --- a/tasks/build.sh +++ b/tasks/build.sh @@ -1,8 +1,6 @@ #!/usr/bin/env bash #MISE description="Build SQL into single release file" #MISE alias="b" -#MISE sources=["src/**/*.sql", "tasks/pin_search_path.sql", "tasks/uninstall.sql", "tasks/uninstall-protect.sql", "crates/eql-scalars/src/**/*.rs", "crates/eql-codegen/src/**/*.rs"] -#MISE outputs=["release/cipherstash-encrypt.sql","release/cipherstash-encrypt-uninstall.sql","release/cipherstash-encrypt-protect.sql","release/cipherstash-encrypt-protect-uninstall.sql"] #USAGE flag "--version " help="Specify release version of EQL" default="DEV" #!/bin/bash