diff --git a/.github/workflows/test-eql.yml b/.github/workflows/test-eql.yml index d74f2946..776d701a 100644 --- a/.github/workflows/test-eql.yml +++ b/.github/workflows/test-eql.yml @@ -127,6 +127,34 @@ jobs: run: | mise run codegen:parity + self-contained-v3: + name: "eql_v3 self-containment" + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + with: + persist-credentials: false + + - uses: jdx/mise-action@1648a7812b9aeae629881980618f079932869151 # v4 + with: + version: 2026.4.0 + install: true + cache: true + + - uses: Swatinem/rust-cache@e18b497796c12c097a38f9edb9d0641fb99eee32 # v2 + with: + workspaces: . + shared-key: sqlx-tests + + # Build to materialise release/cipherstash-encrypt-v3.sql and + # src/deps-ordered-v3.txt, then assert no eql_v2 symbol/file leakage. + - name: Build EQL + run: mise run --force build + + - name: Assert eql_v3 is self-contained + run: mise run test:self_contained_v3 + matrix-coverage: name: "Matrix coverage inventory" runs-on: ubuntu-latest @@ -216,6 +244,11 @@ jobs: rustup component add --toolchain ${active_rust_toolchain} rustfmt clippy mise run --output prefix test --postgres ${POSTGRES_VERSION} + - name: Clean-DB v3 install smoke (Postgres ${{ matrix.postgres-version }}) + run: | + mise run build + mise run test:clean_install_v3 + splinter: name: "Supabase splinter" runs-on: ubuntu-latest-m diff --git a/.gitignore b/.gitignore index bdc8e444..83ee4c42 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,9 @@ deps-ordered.txt deps-supabase.txt deps-ordered-supabase.txt +src/deps-v3.txt +src/deps-ordered-v3.txt + # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore src/version.sql @@ -223,13 +226,13 @@ tests/sqlx/migrations/001_install_eql.sql # never commit — stale fixtures hide bugs) tests/sqlx/fixtures/eql_v2* -# Generated encrypted-domain SQL — regenerated by `tasks/build.sh` from -# tasks/codegen/types/.toml on every build (or `mise run codegen:domain -# ` to refresh manually). Hand-written *_extensions.sql stays committed. -src/encrypted_domain/*/*_types.sql -src/encrypted_domain/*/*_functions.sql -src/encrypted_domain/*/*_operators.sql -src/encrypted_domain/*/*_aggregates.sql +# Generated encrypted-domain SQL — regenerated by `tasks/build.sh` from the +# eql-scalars::CATALOG via `cargo run -p eql-codegen` on every build. The +# hand-written src/v3/scalars/functions.sql (no type subdir) stays committed. +src/v3/scalars/*/*_types.sql +src/v3/scalars/*/*_functions.sql +src/v3/scalars/*/*_operators.sql +src/v3/scalars/*/*_aggregates.sql # Large generated test data files tests/ste_vec_vast.sql diff --git a/CHANGELOG.md b/CHANGELOG.md index 76700194..1404e61d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,9 +22,10 @@ Each entry that ships in a published release links to the PR that introduced it. ### Added -- **`eql_v3` encrypted-domain schema, with the `int4` family as its first member.** Encrypted-domain type families now live in a new, additional `eql_v3` schema (the existing `eql_v2` schema is unchanged — it keeps the core types/operators and stays the documented public API). Four jsonb-backed domains for encrypted `int4` columns: `eql_v3.int4` (storage-only), `eql_v3.int4_eq` (`=` / `<>` via HMAC), and `eql_v3.int4_ord` / `eql_v3.int4_ord_ore` (also `<` `<=` `>` `>=` via ORE block terms). Supported comparisons resolve to inlinable wrappers; the native `jsonb` operator surface reachable through domain fallback is blocked (raises rather than silently mis-resolving). Each domain's `CHECK` requires the EQL envelope (`v`, `i`), the ciphertext (`c`), and the variant's index term(s), and pins the payload version (`VALUE->>'v' = '2'`, matching `eql_v2._encrypted_check_v`) — so a missing key or wrong-version payload is rejected on insert or cast rather than surfacing later at query time. Index via a functional index on the `eql_v3.eq_term` / `eql_v3.ord_term` extractors, not an operator class on the domain. The extractors still return the core `eql_v2.hmac_256` / `eql_v2.ore_block_u64_8_256` index-term types, which remain in `eql_v2` and are referenced cross-schema. Why: a type-safe, per-capability encrypted integer column instead of the untyped `eql_v2_encrypted`, namespaced under its own schema. This is the reference scalar implementation for the generated domain family. ([#239](https://github.com/cipherstash/encrypt-query-language/pull/239), supersedes [#225](https://github.com/cipherstash/encrypt-query-language/pull/225)) +- **`eql_v3` encrypted-domain schema, with the `int4` family as its first member.** Encrypted-domain type families now live in a new, additional `eql_v3` schema (the existing `eql_v2` schema is unchanged — it keeps the core types/operators and stays the documented public API). Four jsonb-backed domains for encrypted `int4` columns: `eql_v3.int4` (storage-only), `eql_v3.int4_eq` (`=` / `<>` via HMAC), and `eql_v3.int4_ord` / `eql_v3.int4_ord_ore` (also `<` `<=` `>` `>=` via ORE block terms). Supported comparisons resolve to inlinable wrappers; the native `jsonb` operator surface reachable through domain fallback is blocked (raises rather than silently mis-resolving). Each domain's `CHECK` requires the EQL envelope (`v`, `i`), the ciphertext (`c`), and the variant's index term(s), and pins the payload version (`VALUE->>'v' = '2'`, matching `eql_v2._encrypted_check_v`) — so a missing key or wrong-version payload is rejected on insert or cast rather than surfacing later at query time. Index via a functional index on the `eql_v3.eq_term` / `eql_v3.ord_term` extractors, not an operator class on the domain. The extractors return the searchable-encrypted-metadata index-term types `eql_v3.hmac_256` / `eql_v3.ore_block_u64_8_256`, which `eql_v3` owns directly (see the self-contained `eql_v3` schema entry below). Why: a type-safe, per-capability encrypted integer column instead of the untyped `eql_v2_encrypted`, namespaced under its own schema. This is the reference scalar implementation for the generated domain family. ([#239](https://github.com/cipherstash/encrypt-query-language/pull/239), supersedes [#225](https://github.com/cipherstash/encrypt-query-language/pull/225)) - **`eql_v3.int2` encrypted-domain type family.** Four jsonb-backed domains for encrypted `int2` columns — `eql_v3.int2` (storage-only), `eql_v3.int2_eq` (`=` / `<>` via HMAC), and `eql_v3.int2_ord` / `eql_v3.int2_ord_ore` (also `<` `<=` `>` `>=` via ORE block terms, with `MIN` / `MAX` aggregates) — generated from the `int2` row in `eql-scalars::CATALOG` by the same materializer as the `eql_v3.int4` reference. Index via a functional index on the `eql_v3.eq_term` / `eql_v3.ord_term` extractors, not an operator class on the domain. Why: a type-safe, per-capability encrypted `smallint` column, proving the scalar generator generalizes beyond the `int4` reference. ([#243](https://github.com/cipherstash/encrypt-query-language/pull/243)) - **Per-domain `MIN` / `MAX` aggregates for the encrypted-domain family.** `eql_v3.min(eql_v3._ord)` / `eql_v3.max(eql_v3._ord)` (and the `_ord_ore` twin) are generated for every ord-capable scalar variant, giving type-safe extrema on domain-typed columns — comparison routes through the variant's `<` / `>` operator (ORE block term, no decryption). The aggregates are declared `PARALLEL = SAFE` with a combine function (the state function itself — min/max are associative), so PostgreSQL can use partial/parallel aggregation on large `GROUP BY` workloads. Why: the new domain types previously had no equivalent of the composite-type aggregates. The existing `eql_v2.min(eql_v2_encrypted)` / `eql_v2.max(eql_v2_encrypted)` aggregates are **retained** and continue to work on `eql_v2_encrypted` columns; the per-domain aggregates are additive and coexist with them. ([#239](https://github.com/cipherstash/encrypt-query-language/pull/239)) +- **Self-contained `eql_v3` schema + standalone `release/cipherstash-encrypt-v3.sql` installer.** The `eql_v3` encrypted-domain surface no longer depends on `eql_v2` at runtime: it now owns its own copies of the searchable-encrypted-metadata (SEM) index-term types — `eql_v3.hmac_256` and `eql_v3.ore_block_u64_8_256` (with its btree operator class) — so the `eql_v3.eq_term` / `eql_v3.ord_term` extractors return `eql_v3` types and no `eql_v2.` appears anywhere in the v3 SQL. The whole v3 surface relocated under a single `src/v3/` tree (`src/v3/sem/` for the hand-written SEM types, `src/v3/scalars/` for the generated domain families). A new build variant ships the `eql_v3` schema on its own as `release/cipherstash-encrypt-v3.sql`, installable into a database with no `eql_v2` present; a CI gate greps that artifact and its dependency closure to keep it `eql_v2`-free. Why: a clean foundation for the per-scalar encrypted-domain model to stand alone, ahead of it replacing the `eql_v2_encrypted` composite column type. This is additive — a new schema and a new artifact — and leaves `eql_v2` byte-for-byte unchanged. ([#255](https://github.com/cipherstash/encrypt-query-language/pull/255)) ### Changed diff --git a/CLAUDE.md b/CLAUDE.md index 50766e33..9c9a08ca 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -37,6 +37,7 @@ This project uses `mise` for task management. Common commands: - `cipherstash-encrypt.sql` - Main installer - `cipherstash-encrypt-supabase.sql` - Supabase-compatible (excludes operator classes) - `cipherstash-encrypt-protect.sql` - ProtectJS variant (excludes config management) + - `cipherstash-encrypt-v3.sql` - Standalone, self-contained `eql_v3` surface only (globbed from `src/v3` alone; no `eql_v2`, installable into a DB with no `eql_v2` present) - Corresponding uninstallers for each variant #### Build Variants @@ -45,13 +46,14 @@ This project uses `mise` for task management. Common commands: | Main | Nothing | Full EQL with all features | | Supabase | Operator classes | Supabase compatibility | | Protect | `src/config/*`, `src/encryptindex/*` | ProtectJS (no database-side config) | +| v3-only | Everything outside `src/v3` (and `pin_search_path.sql`) | Self-contained `eql_v3` surface, `eql_v2`-free (gated by `mise run test:self_contained_v3`) | ## Project Architecture This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for searchable encryption. Key architectural components: ### Core Structure -- **Schema**: Core EQL functions/types are in the `eql_v2` PostgreSQL schema. The encrypted-domain type families (`int4` and future scalar domains) live in a separate `eql_v3` schema (see below); they reuse the core `eql_v2` index-term types cross-schema. `eql_v2` is unchanged and remains the documented public API. +- **Schema**: Core EQL functions/types are in the `eql_v2` PostgreSQL schema. The encrypted-domain type families (`int4` and future scalar domains) live in a separate `eql_v3` schema (see below). The `eql_v3` surface is **self-contained**: it owns its own copies of the searchable-encrypted-metadata (SEM) index-term types (`eql_v3.hmac_256`, `eql_v3.ore_block_u64_8_256`, hand-written under `src/v3/sem/`) and has no runtime dependency on `eql_v2`. `eql_v2` is unchanged and remains the documented public API. - **Main Type**: `eql_v2_encrypted` - composite type for encrypted columns (stored as JSONB) - **Configuration**: `eql_v2_configuration` table tracks encryption configs - **Index Types**: Various encrypted index types (blake3, hmac_256, bloom_filter, ore variants) @@ -62,7 +64,7 @@ This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for search - `src/operators/` - SQL operators for encrypted data comparisons - `src/config/` - Configuration management functions - `src/blake3/`, `src/hmac_256/`, `src/bloom_filter/`, `src/ore_*` - Index implementations -- `src/encrypted_domain/` - Encrypted-domain type families (jsonb-backed PostgreSQL domains, one per operator/index capability) +- `src/v3/` - Self-contained `eql_v3` surface: `src/v3/schema.sql`, forked `src/v3/crypto.sql` / `src/v3/common.sql`, hand-written SEM index-term types under `src/v3/sem/` (`hmac_256`, `ore_block_u64_8_256`), and the generated scalar encrypted-domain families under `src/v3/scalars//` (plus the shared blocker `src/v3/scalars/functions.sql`) - `tasks/` - mise task scripts - `tests/sqlx/` - Rust/SQLx test framework (PostgreSQL 14-17 support) - `release/` - Generated SQL installation files @@ -76,9 +78,9 @@ This is the **Encrypt Query Language (EQL)** - a PostgreSQL extension for search ### Encrypted-Domain Types -`src/encrypted_domain/` holds **encrypted-domain type families** — jsonb-backed PostgreSQL domains in the **`eql_v3` schema**, one domain per operator/index capability (`eql_v3.` storage-only, `eql_v3._eq`, `eql_v3._ord`). The schema qualifier replaces the old version-prefixed name, so the domains are `eql_v3.int4`, `eql_v3.int4_eq`, `eql_v3.int4_ord`, `eql_v3.int4_ord_ore` — created in `eql_v3`, not `public`. Their extractors/wrappers/aggregates (`eql_v3.eq_term`, `eql_v3.ord_term`, `eql_v3.eq`/`lt`/…, `eql_v3.min`/`max`) also live in `eql_v3`, but the index-term types they return and construct (`eql_v2.hmac_256`, `eql_v2.ore_block_u64_8_256`) stay in `eql_v2` and are referenced cross-schema. `eql_v3.int4` (PR #239, supersedes #225) is the reference scalar implementation; future scalar types such as `int8`, `bool`, `date`, `float`, `numeric`, `timestamp`, `text`, and `jsonb` follow this materializer pattern. `text`, `numeric`, and `jsonb` are planned but have no generated SQL surface yet — `jsonb` in particular needs a separate SQL design beyond the ordered-scalar materializer. The `eql-scalars` fixture catalog (`crates/eql-scalars`) already models their fixture values ahead of the SQL surface. +`src/v3/scalars/` holds the generated **encrypted-domain type families** — jsonb-backed PostgreSQL domains in the **`eql_v3` schema**, one domain per operator/index capability (`eql_v3.` storage-only, `eql_v3._eq`, `eql_v3._ord`). The schema qualifier replaces the old version-prefixed name, so the domains are `eql_v3.int4`, `eql_v3.int4_eq`, `eql_v3.int4_ord`, `eql_v3.int4_ord_ore` — created in `eql_v3`, not `public`. Their extractors/wrappers/aggregates (`eql_v3.eq_term`, `eql_v3.ord_term`, `eql_v3.eq`/`lt`/…, `eql_v3.min`/`max`) also live in `eql_v3`, and the SEM index-term types they return and construct (`eql_v3.hmac_256`, `eql_v3.ore_block_u64_8_256`) are **also `eql_v3`** — hand-written under `src/v3/sem/` so the whole v3 surface is self-contained (no `eql_v2.` appears anywhere in v3 SQL; CI gates this via `mise run test:self_contained_v3` and the standalone `release/cipherstash-encrypt-v3.sql` installer). `eql_v3.int4` (PR #239, supersedes #225) is the reference scalar implementation; future scalar types such as `int8`, `bool`, `date`, `float`, `numeric`, `timestamp`, `text`, and `jsonb` follow this materializer pattern. `text`, `numeric`, and `jsonb` are planned but have no generated SQL surface yet — `jsonb` in particular needs a separate SQL design beyond the ordered-scalar materializer. The `eql-scalars` fixture catalog (`crates/eql-scalars`) already models their fixture values ahead of the SQL surface. -Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/encrypted_domain//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed. The per-type plaintext fixture lists the SQLx matrix consumes are **not** a generated file — they are materialised from each `CATALOG` row at compile time as `eql_scalars::INT4_VALUES` / `INT2_VALUES` (the `int_values!` macro) and read directly by `ScalarType::FIXTURE_VALUES`; a Rust source of truth no longer round-trips through a committed generated `.rs`. Generated SQL carries a `-- AUTOMATICALLY GENERATED FILE` header (the project-wide marker `docs:validate` greps on); change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/encrypted_domain//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. +Adding a scalar encrypted-domain type is one row in the Rust catalog `eql-scalars::CATALOG` (`crates/eql-scalars/src/lib.rs`): a `ScalarSpec` giving the type `token` (e.g. `int8`), its `ScalarKind` (the `kind` field), the `DomainSpec`s mapping each generated domain suffix to its fixed index `Term`s (`_eq => [Hm]`, `_ord`/`_ord_ore => [Ore]`), and the `Fixture` value list. Term capabilities are fixed in the `Term` enum's `impl` methods (with unit tests): `Hm` provides equality, and `Ore` provides equality plus ordering. There is no TOML manifest and no Python — the catalog is the source of truth, validated by the compiler (an undefined term or unknown scalar is a compile error) plus catalog `#[test]`s. `mise run build` runs `cargo run -p eql-codegen`, which regenerates the scalar SQL surface into `src/v3/scalars//` from `CATALOG` at the start of every build; that surface includes supported comparison wrappers plus blockers for native `jsonb` operators that would otherwise be reachable through domain fallback. `cargo run -p eql-codegen` regenerates every type at once (the same call `mise run build` uses; there is no per-type codegen task). The generated `*_types.sql` / `*_functions.sql` / `*_operators.sql` / `*_aggregates.sql` files are gitignored and never committed. The per-type plaintext fixture lists the SQLx matrix consumes are **not** a generated file — they are materialised from each `CATALOG` row at compile time as `eql_scalars::INT4_VALUES` / `INT2_VALUES` (the `int_values!` macro) and read directly by `ScalarType::FIXTURE_VALUES`; a Rust source of truth no longer round-trips through a committed generated `.rs`. Generated SQL carries a `-- AUTOMATICALLY GENERATED FILE` header (the project-wide marker `docs:validate` greps on); change the catalog and rebuild, never hand-edit. Hand-written SQL beyond the fixed surface goes in `src/v3/scalars//_extensions.sql` with no auto-generated header and explicit `-- REQUIRE:` edges — that file IS committed. `text` and `jsonb` are out of scope for this scalar materializer. **Adding a new encrypted-domain type: follow `docs/reference/adding-a-scalar-encrypted-domain-type.md`.** The mechanics are fixed for ordered scalar domains; the catalog row only declares the token, kind, domain suffixes, and terms. New term behavior belongs in the `Term` enum's `impl` methods in `crates/eql-scalars/src` with tests, not in free-form catalog data. diff --git a/crates/eql-codegen/src/consts.rs b/crates/eql-codegen/src/consts.rs index 64d2e438..0f7a1ef7 100644 --- a/crates/eql-codegen/src/consts.rs +++ b/crates/eql-codegen/src/consts.rs @@ -4,10 +4,12 @@ /// the writer uses it only to recognise files it owns (overwrite/clean safety). pub(crate) const AUTO_GENERATED_HEADER: &str = "-- AUTOMATICALLY GENERATED FILE.\n"; -/// Schema housing the encrypted-domain families. -pub(crate) const DOMAIN_SCHEMA: &str = "eql_v3"; -/// Schema owning the core index-term types/constructors. -pub(crate) const CORE_SCHEMA: &str = "eql_v2"; +/// The single schema housing the self-contained `eql_v3` surface: the +/// encrypted-domain families AND the SEM index-term types/constructors they +/// call. v3 has zero dependency on `eql_v2`, so domains and core index-term +/// types share one schema by construction — there is no second schema to point +/// the core types at. +pub(crate) const SCHEMA: &str = "eql_v3"; /// Always-present payload keys checked for presence in every domain CHECK, in /// order: envelope version (`v`), ident (`i`), ciphertext (`c`). Term-specific diff --git a/crates/eql-codegen/src/context.rs b/crates/eql-codegen/src/context.rs index 01e80c6c..78b50883 100644 --- a/crates/eql-codegen/src/context.rs +++ b/crates/eql-codegen/src/context.rs @@ -49,8 +49,7 @@ pub fn environment() -> minijinja::Environment<'static> { include_str!("../templates/aggregates.sql.j2"), ) .expect("aggregates.sql template"); - env.add_global("domain_schema", DOMAIN_SCHEMA); - env.add_global("core_schema", CORE_SCHEMA); + env.add_global("schema", SCHEMA); env } @@ -104,7 +103,7 @@ pub enum FnEntry { Extractor { ret: String, // e.g. eql_v2.hmac_256 (selection STAYS in Rust) extractor: String, // e.g. eq_term - ctor: String, // e.g. hmac_256 (called as {{ core_schema }}.{{ ctor }}) + ctor: String, // e.g. hmac_256 (called as {{ schema }}.{{ ctor }}) }, Wrapper { op: String, // SQL operator used in the body, e.g. = @@ -132,9 +131,14 @@ pub struct FunctionsContext { } /// Build the inlinable index-extractor entry for a domain term. +/// +/// The `RETURNS` type name equals the constructor name (`hmac_256`, +/// `ore_block_u64_8_256`); qualify it with `SCHEMA` — the same schema as the +/// body's constructor call — so the declared return type and the call stay in +/// lockstep. `Term::returns()` is intentionally not used. pub fn extractor_entry(term: Term) -> FnEntry { FnEntry::Extractor { - ret: term.returns().to_string(), + ret: format!("{SCHEMA}.{}", term.ctor()), extractor: term.extractor().to_string(), ctor: term.ctor().to_string(), } @@ -224,7 +228,7 @@ pub struct AggregatesContext { /// The schema-qualified SQL domain type name, e.g. `eql_v3.int4_eq`. /// Port of `domain_name`. pub fn domain_name(name: &str) -> String { - format!("{DOMAIN_SCHEMA}.{name}") + format!("{SCHEMA}.{name}") } /// The full domain name from a token + suffix (suffix "" => bare token). @@ -236,9 +240,9 @@ pub fn full_domain_name(token: &str, suffix: &str) -> String { /// Port of `_extract_arg`. `dom` is the schema-qualified domain name. pub fn extract_arg(arg_type: &str, extractor: &str, dom: &str, arg: &str) -> String { if arg_type == "jsonb" { - format!("{DOMAIN_SCHEMA}.{extractor}({arg}::{dom})") + format!("{SCHEMA}.{extractor}({arg}::{dom})") } else { - format!("{DOMAIN_SCHEMA}.{extractor}({arg})") + format!("{SCHEMA}.{extractor}({arg})") } } diff --git a/crates/eql-codegen/src/generate.rs b/crates/eql-codegen/src/generate.rs index 581f0244..622ca634 100644 --- a/crates/eql-codegen/src/generate.rs +++ b/crates/eql-codegen/src/generate.rs @@ -7,6 +7,20 @@ use eql_scalars::{DomainSpec, ScalarSpec, Term}; use crate::context::{domain_name, is_ord_capable}; use crate::operator_surface::OPERATORS; +/// REQUIRE edge for the v3 schema file — pulled in by every generated file. +const V3_SCHEMA: &str = "src/v3/schema.sql"; +/// REQUIRE edge for the hand-written shared blocker helper. +const V3_SCALARS_BLOCKER: &str = "src/v3/scalars/functions.sql"; +/// Root of the generated per-token scalar surface. The single place the tree +/// layout is spelled out — keeps `types_path`/`scalar_path` and the REQUIRE +/// vecs from drifting if the surface ever relocates again. +const V3_SCALARS_DIR: &str = "src/v3/scalars"; + +/// REQUIRE path for a generated file `file` under a token's scalar dir. +fn scalar_path(token: &str, file: &str) -> String { + format!("{V3_SCALARS_DIR}/{token}/{file}") +} + /// The full domain name (token + suffix). suffix "" => bare token. fn full_name(token: &str, suffix: &str) -> String { format!("{token}{suffix}") @@ -25,7 +39,7 @@ fn arg_b_name(symbol: &str) -> &'static str { /// REQUIRE path for a type's _types.sql. Port of `_types_path`. fn types_path(token: &str) -> String { - format!("src/encrypted_domain/{token}/{token}_types.sql") + scalar_path(token, &format!("{token}_types.sql")) } /// Body for _types.sql: every domain in one idempotent DO block. @@ -50,10 +64,9 @@ pub fn render_types_file(spec: &ScalarSpec) -> String { /// REQUIRE edges for a domain's _functions.sql. Port of `_functions_requires`. fn functions_requires(token: &str, terms: &[Term]) -> Vec { let mut reqs = vec![ - "src/schema.sql".to_string(), - "src/schema-v3.sql".to_string(), + V3_SCHEMA.to_string(), types_path(token), - "src/encrypted_domain/functions.sql".to_string(), + V3_SCALARS_BLOCKER.to_string(), ]; for extra in Term::term_requires(terms) { if !reqs.iter().any(|r| r == extra) { @@ -163,9 +176,9 @@ pub fn render_operators_file(token: &str, domain: &DomainSpec) -> String { let ctx = OperatorsContext { requires: vec![ - "src/schema-v3.sql".to_string(), + V3_SCHEMA.to_string(), types_path(token), - format!("src/encrypted_domain/{token}/{name}_functions.sql"), + scalar_path(token, &format!("{name}_functions.sql")), ], token: token.to_string(), name, @@ -190,10 +203,10 @@ pub fn render_aggregates_file(token: &str, domain: &DomainSpec) -> Option Result, pub fn generate_all(out_root: &Path) -> Result { for spec in eql_scalars::CATALOG { let token = spec.token; - let out_dir = out_root.join("src").join("encrypted_domain").join(token); + let out_dir = out_root.join(V3_SCALARS_DIR).join(token); let written = generate_type(spec, &out_dir)?; for p in &written { @@ -475,7 +488,7 @@ mod tests { #[test] fn types_file_has_all_four_domains() { let sql = render_types_file(spec("int4")); - assert!(sql.contains("-- REQUIRE: src/schema-v3.sql")); + assert!(sql.contains("-- REQUIRE: src/v3/schema.sql")); for dom in ["int4", "int4_eq", "int4_ord_ore", "int4_ord"] { assert!( sql.contains(&format!("CREATE DOMAIN eql_v3.{dom} AS jsonb")), @@ -504,7 +517,7 @@ mod tests { let sql = render_functions_file(s.token, domain(s, "_eq")); assert_eq!(sql.matches("CREATE FUNCTION").count(), 45); assert!(sql.contains("CREATE FUNCTION eql_v3.eq_term(a eql_v3.int4_eq)")); - assert!(sql.contains("RETURNS eql_v2.hmac_256")); + assert!(sql.contains("RETURNS eql_v3.hmac_256")); assert_eq!( sql.matches("LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE") .count(), @@ -520,7 +533,7 @@ mod tests { let sql = render_functions_file(s.token, domain(s, "_ord")); assert_eq!(sql.matches("CREATE FUNCTION").count(), 45); assert!(sql.contains("CREATE FUNCTION eql_v3.ord_term(a eql_v3.int4_ord)")); - assert!(sql.contains("RETURNS eql_v2.ore_block_u64_8_256")); + assert!(sql.contains("RETURNS eql_v3.ore_block_u64_8_256")); assert_eq!( sql.matches("LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE") .count(), @@ -553,9 +566,9 @@ mod tests { assert_eq!(sql.matches("CREATE AGGREGATE").count(), 2); assert!(sql.contains("eql_v3.min_sfunc")); assert!(sql.contains("eql_v3.max_sfunc")); - assert!(sql.contains("-- REQUIRE: src/encrypted_domain/int4/int4_ord_operators.sql")); - assert!(sql.contains("-- REQUIRE: src/encrypted_domain/int4/int4_ord_functions.sql")); - assert!(sql.contains("-- REQUIRE: src/encrypted_domain/int4/int4_types.sql")); + assert!(sql.contains("-- REQUIRE: src/v3/scalars/int4/int4_ord_operators.sql")); + assert!(sql.contains("-- REQUIRE: src/v3/scalars/int4/int4_ord_functions.sql")); + assert!(sql.contains("-- REQUIRE: src/v3/scalars/int4/int4_types.sql")); } #[test] diff --git a/crates/eql-codegen/src/writer.rs b/crates/eql-codegen/src/writer.rs index afbcdce0..4d310fc8 100644 --- a/crates/eql-codegen/src/writer.rs +++ b/crates/eql-codegen/src/writer.rs @@ -90,6 +90,25 @@ pub fn ensure_generated_paths_writable(paths: &[PathBuf]) -> Result<(), WriteErr /// — it does not prepend a header. pub fn write_generated_file(path: &Path, body: &str) -> Result<(), WriteError> { ensure_generated_paths_writable(std::slice::from_ref(&path.to_path_buf()))?; + // The template is trusted to carry the ownership marker as its first line, + // but a renderer bug (or a hand-edited template) could drop it — which would + // then defeat `is_generated`/`clean_generated_files`, leaving an unowned file + // the next run refuses to overwrite. Validate the marker before writing. + let first = body + .lines() + .next() + .unwrap_or("") + .trim_end_matches(['\r', '\n']); + if first != sql_marker() { + return Err(WriteError::Ownership(format!( + "refusing to write generated file without the AUTO-GENERATED marker as its \ + first line: {} (expected first line {:?}, got {:?}). The SQL template must \ + emit the marker.", + path.display(), + sql_marker(), + first + ))); + } if let Some(parent) = path.parent() { fs::create_dir_all(parent)?; } @@ -171,6 +190,22 @@ mod tests { assert!(is_generated(&p)); } + #[test] + fn write_rejects_body_without_marker() { + let d = tmp(); + let p = d.path().join("int4_types.sql"); + // A body whose first line is NOT the AUTO-GENERATED marker must be + // rejected — the template is required to emit it. + let body = "-- REQUIRE: src/v3/schema.sql\nDO $$ BEGIN END $$;\n"; + let err = write_generated_file(&p, body).unwrap_err(); + assert!(matches!(err, WriteError::Ownership(_))); + assert!(err.to_string().contains("AUTO-GENERATED marker")); + assert!( + !p.exists(), + "no file should be written when the marker is missing" + ); + } + #[test] fn write_refuses_to_overwrite_handwritten() { let d = tmp(); diff --git a/crates/eql-codegen/templates/aggregates.sql.j2 b/crates/eql-codegen/templates/aggregates.sql.j2 index 9660917d..d85ab283 100644 --- a/crates/eql-codegen/templates/aggregates.sql.j2 +++ b/crates/eql-codegen/templates/aggregates.sql.j2 @@ -9,7 +9,7 @@ --! @param state {{ dom }} --! @param value {{ dom }} --! @return {{ dom }} -CREATE FUNCTION {{ domain_schema }}.{{ a.name }}_sfunc(state {{ dom }}, value {{ dom }}) +CREATE FUNCTION {{ schema }}.{{ a.name }}_sfunc(state {{ dom }}, value {{ dom }}) RETURNS {{ dom }} LANGUAGE plpgsql IMMUTABLE STRICT PARALLEL SAFE SET search_path = pg_catalog, extensions, public @@ -25,10 +25,10 @@ $$; --! @brief {{ a.name }} aggregate for {{ dom }}. --! @param input {{ dom }} --! @return {{ dom }} -CREATE AGGREGATE {{ domain_schema }}.{{ a.name }}({{ dom }}) ( - sfunc = {{ domain_schema }}.{{ a.name }}_sfunc, +CREATE AGGREGATE {{ schema }}.{{ a.name }}({{ dom }}) ( + sfunc = {{ schema }}.{{ a.name }}_sfunc, stype = {{ dom }}, - combinefunc = {{ domain_schema }}.{{ a.name }}_sfunc, + combinefunc = {{ schema }}.{{ a.name }}_sfunc, parallel = safe ); {% endfor -%} diff --git a/crates/eql-codegen/templates/functions/extractor.sql.j2 b/crates/eql-codegen/templates/functions/extractor.sql.j2 index da649b21..9044f890 100644 --- a/crates/eql-codegen/templates/functions/extractor.sql.j2 +++ b/crates/eql-codegen/templates/functions/extractor.sql.j2 @@ -1,7 +1,7 @@ --! @brief Index extractor for {{ dom }}. --! @param a {{ dom }} --! @return {{ e.ret }} -CREATE FUNCTION {{ domain_schema }}.{{ e.extractor }}(a {{ dom }}) +CREATE FUNCTION {{ schema }}.{{ e.extractor }}(a {{ dom }}) RETURNS {{ e.ret }} LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE -AS $$ SELECT {{ core_schema }}.{{ e.ctor }}(a::jsonb) $$; +AS $$ SELECT {{ schema }}.{{ e.ctor }}(a::jsonb) $$; diff --git a/crates/eql-codegen/templates/functions/unsupported.sql.j2 b/crates/eql-codegen/templates/functions/unsupported.sql.j2 index f33dab6c..5ec85aed 100644 --- a/crates/eql-codegen/templates/functions/unsupported.sql.j2 +++ b/crates/eql-codegen/templates/functions/unsupported.sql.j2 @@ -2,7 +2,7 @@ --! @param {{ e.args[0].name }} {{ e.args[0].ty }} --! @param {{ e.args[1].name }} {{ e.args[1].ty }} --! @return {{ e.returns }} -CREATE FUNCTION {{ domain_schema }}.{{ e.function_name }}({{ e.args[0].name }} {{ e.args[0].ty }}, {{ e.args[1].name }} {{ e.args[1].ty }}) +CREATE FUNCTION {{ schema }}.{{ e.function_name }}({{ e.args[0].name }} {{ e.args[0].ty }}, {{ e.args[1].name }} {{ e.args[1].ty }}) RETURNS {{ e.returns }} IMMUTABLE PARALLEL SAFE AS $$ BEGIN RAISE EXCEPTION 'operator % is not supported for %', '{{ e.operator_lit }}', '{{ domain_lit }}'; END; $$ LANGUAGE plpgsql; diff --git a/crates/eql-codegen/templates/functions/wrapper.sql.j2 b/crates/eql-codegen/templates/functions/wrapper.sql.j2 index 243f2465..1e0b43ac 100644 --- a/crates/eql-codegen/templates/functions/wrapper.sql.j2 +++ b/crates/eql-codegen/templates/functions/wrapper.sql.j2 @@ -2,6 +2,6 @@ --! @param {{ e.args[0].name }} {{ e.args[0].ty }} --! @param {{ e.args[1].name }} {{ e.args[1].ty }} --! @return boolean -CREATE FUNCTION {{ domain_schema }}.{{ e.function_name }}({{ e.args[0].name }} {{ e.args[0].ty }}, {{ e.args[1].name }} {{ e.args[1].ty }}) +CREATE FUNCTION {{ schema }}.{{ e.function_name }}({{ e.args[0].name }} {{ e.args[0].ty }}, {{ e.args[1].name }} {{ e.args[1].ty }}) RETURNS boolean LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE AS $$ SELECT {{ e.call_a }} {{ e.op }} {{ e.call_b }} $$; diff --git a/crates/eql-codegen/templates/operators.sql.j2 b/crates/eql-codegen/templates/operators.sql.j2 index 1dc1360c..bb33ff52 100644 --- a/crates/eql-codegen/templates/operators.sql.j2 +++ b/crates/eql-codegen/templates/operators.sql.j2 @@ -6,7 +6,7 @@ --! @brief Operators for {{ dom }}. {% for o in operators %} CREATE OPERATOR {{ o.symbol }} ( - FUNCTION = {{ domain_schema }}.{{ o.function_name }}, + FUNCTION = {{ schema }}.{{ o.function_name }}, LEFTARG = {{ o.leftarg }}, RIGHTARG = {{ o.rightarg }}{% if o.metadata %}, {{ o.metadata }}{% endif %} ); diff --git a/crates/eql-codegen/templates/types.sql.j2 b/crates/eql-codegen/templates/types.sql.j2 index 99636ac0..f46f6616 100644 --- a/crates/eql-codegen/templates/types.sql.j2 +++ b/crates/eql-codegen/templates/types.sql.j2 @@ -1,18 +1,18 @@ -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql +-- REQUIRE: src/v3/schema.sql ---! @file encrypted_domain/{{ token }}/{{ token }}_types.sql +--! @file v3/scalars/{{ token }}/{{ token }}_types.sql --! @brief Encrypted-domain types for {{ token }}. DO $$ BEGIN {%- for d in domains %} - --! @brief Encrypted domain {{ domain_schema }}.{{ d.name }}. + --! @brief Encrypted domain {{ schema }}.{{ d.name }}. IF NOT EXISTS ( SELECT 1 FROM pg_type - WHERE typname = '{{ d.typname }}' AND typnamespace = '{{ domain_schema }}'::regnamespace + WHERE typname = '{{ d.typname }}' AND typnamespace = '{{ schema }}'::regnamespace ) THEN - CREATE DOMAIN {{ domain_schema }}.{{ d.name }} AS jsonb + CREATE DOMAIN {{ schema }}.{{ d.name }} AS jsonb CHECK ( jsonb_typeof(VALUE) = 'object' {%- for k in d.keys %} diff --git a/crates/eql-codegen/tests/parity.rs b/crates/eql-codegen/tests/parity.rs index a59ed8dc..a0f1e3b1 100644 --- a/crates/eql-codegen/tests/parity.rs +++ b/crates/eql-codegen/tests/parity.rs @@ -36,7 +36,30 @@ fn rust_generator_matches_int4_golden_files() { eql_codegen::generate::generate_all(&out).expect("rust generate_all"); let ref_dir = root.join("tests/codegen/reference/int4"); - let gen_dir = out.join("src/encrypted_domain/int4"); + let gen_dir = out.join("src/v3/scalars/int4"); + + // Assert the generated .sql file SET matches the reference set first — the + // per-file byte comparison below only iterates reference files, so a missing + // generated file (or an extra one the reference never pins) would otherwise + // pass silently. + let sql_names = |dir: &std::path::Path| -> Vec { + let mut names: Vec = fs::read_dir(dir) + .unwrap() + .filter_map(|e| e.ok().map(|e| e.path())) + .filter(|p| p.extension().and_then(|x| x.to_str()) == Some("sql")) + .map(|p| p.file_name().unwrap().to_str().unwrap().to_string()) + .collect(); + names.sort(); + names + }; + let ref_names = sql_names(&ref_dir); + let gen_names = sql_names(&gen_dir); + assert_eq!( + gen_names, ref_names, + "generated int4 .sql file set differs from golden reference set \ + (reference: {ref_names:?}, generated: {gen_names:?})" + ); + for entry in fs::read_dir(&ref_dir).unwrap() { let path = entry.unwrap().path(); if path.extension().and_then(|e| e.to_str()) != Some("sql") { diff --git a/crates/eql-scalars/src/lib.rs b/crates/eql-scalars/src/lib.rs index 07bb45e8..79017728 100644 --- a/crates/eql-scalars/src/lib.rs +++ b/crates/eql-scalars/src/lib.rs @@ -145,14 +145,6 @@ impl Term { } } - /// Cross-schema return type of the extractor (in `eql_v2`). - pub const fn returns(self) -> &'static str { - match self { - Term::Hm => "eql_v2.hmac_256", - Term::Ore => "eql_v2.ore_block_u64_8_256", - } - } - /// Constructor name for the index-term type (unqualified). pub const fn ctor(self) -> &'static str { match self { @@ -180,10 +172,10 @@ impl Term { /// SQL `-- REQUIRE:` edges this term pulls in, in catalog order. pub const fn requires(self) -> &'static [&'static str] { match self { - Term::Hm => &["src/hmac_256/functions.sql"], + Term::Hm => &["src/v3/sem/hmac_256/functions.sql"], Term::Ore => &[ - "src/ore_block_u64_8_256/functions.sql", - "src/ore_block_u64_8_256/operators.sql", + "src/v3/sem/ore_block_u64_8_256/functions.sql", + "src/v3/sem/ore_block_u64_8_256/operators.sql", ], } } @@ -411,7 +403,20 @@ macro_rules! int_values { let mut i = 0; while i < N { out[i] = match SPEC.fixtures[i].numeric_value(SPEC.kind) { - Some(v) => v as $ty, + Some(v) => { + // Const-eval bounds check: a fixture value that does + // not fit the narrowed target type would otherwise be + // silently truncated/wrapped by `as`. Make it a + // compile-time error instead. + if v < <$ty>::MIN as i128 || v > <$ty>::MAX as i128 { + panic!(concat!( + "integer scalar fixture value out of range for `", + stringify!($ty), + "`" + )); + } + v as $ty + } None => panic!("integer scalar fixture must resolve to a number"), }; i += 1; @@ -513,11 +518,10 @@ mod term_tests { let hm = Term::Hm; assert_eq!(hm.json_key(), "hm"); assert_eq!(hm.extractor(), "eq_term"); - assert_eq!(hm.returns(), "eql_v2.hmac_256"); assert_eq!(hm.ctor(), "hmac_256"); assert_eq!(hm.role(), "eq"); assert_eq!(hm.operators(), &["=", "<>"]); - assert_eq!(hm.requires(), &["src/hmac_256/functions.sql"]); + assert_eq!(hm.requires(), &["src/v3/sem/hmac_256/functions.sql"]); } #[test] @@ -525,15 +529,14 @@ mod term_tests { let ore = Term::Ore; assert_eq!(ore.json_key(), "ob"); assert_eq!(ore.extractor(), "ord_term"); - assert_eq!(ore.returns(), "eql_v2.ore_block_u64_8_256"); assert_eq!(ore.ctor(), "ore_block_u64_8_256"); assert_eq!(ore.role(), "ord"); assert_eq!(ore.operators(), &["=", "<>", "<", "<=", ">", ">="]); assert_eq!( ore.requires(), &[ - "src/ore_block_u64_8_256/functions.sql", - "src/ore_block_u64_8_256/operators.sql", + "src/v3/sem/ore_block_u64_8_256/functions.sql", + "src/v3/sem/ore_block_u64_8_256/operators.sql", ] ); } @@ -571,9 +574,9 @@ mod term_helper_tests { assert_eq!( Term::term_requires(&[Term::Ore, Term::Ore, Term::Hm]), vec![ - "src/ore_block_u64_8_256/functions.sql", - "src/ore_block_u64_8_256/operators.sql", - "src/hmac_256/functions.sql", + "src/v3/sem/ore_block_u64_8_256/functions.sql", + "src/v3/sem/ore_block_u64_8_256/operators.sql", + "src/v3/sem/hmac_256/functions.sql", ] ); assert!(Term::term_requires(&[]).is_empty()); diff --git a/docs/reference/adding-a-scalar-encrypted-domain-type.md b/docs/reference/adding-a-scalar-encrypted-domain-type.md index 68d913e8..e4ae242b 100644 --- a/docs/reference/adding-a-scalar-encrypted-domain-type.md +++ b/docs/reference/adding-a-scalar-encrypted-domain-type.md @@ -11,9 +11,11 @@ A scalar encrypted-domain type is a family of concrete `jsonb` domains in the **`eql_v3`** schema (`eql_v3.`, `eql_v3._eq`, `eql_v3._ord`, …), dropped by `DROP SCHEMA eql_v3 CASCADE` and surviving an `eql_v2` uninstall. Their extractors, comparison wrappers, and MIN/MAX -aggregates also live in `eql_v3`; the index-term types they return -(`eql_v2.hmac_256`, `eql_v2.ore_block_u64_8_256`) stay in `eql_v2` and are -referenced cross-schema. +aggregates also live in `eql_v3`; the searchable-encrypted-metadata (SEM) +index-term types they return (`eql_v3.hmac_256`, +`eql_v3.ore_block_u64_8_256`) are **also `eql_v3`** — hand-written under +`src/v3/sem/`. The whole v3 surface is self-contained: it owns every type it +needs and has no runtime dependency on `eql_v2` (CI gates this — see §6). The whole SQL surface is **generated** from a single Rust source of truth: the `CATALOG` const in [`crates/eql-scalars/src/lib.rs`](../../crates/eql-scalars/src/lib.rs), @@ -60,7 +62,7 @@ Things you do **not** do: has a new name (§5). Hand-written SQL beyond the fixed surface goes in -`src/encrypted_domain//_extensions.sql` with explicit `-- REQUIRE:` edges +`src/v3/scalars//_extensions.sql` with explicit `-- REQUIRE:` edges — and **that file IS committed** (§5). --- @@ -111,8 +113,8 @@ contract — changing one is a generated-SQL behaviour change, not a refactor: | Term | JSON key | Extractor | Returns | Operators | | ----- | -------- | ----------- | -------------------------------- | -------------------------- | -| `Hm` | `hm` | `eq_term` | `eql_v2.hmac_256` | `=` `<>` | -| `Ore` | `ob` | `ord_term` | `eql_v2.ore_block_u64_8_256` | `=` `<>` `<` `<=` `>` `>=` | +| `Hm` | `hm` | `eq_term` | `eql_v3.hmac_256` | `=` `<>` | +| `Ore` | `ob` | `ord_term` | `eql_v3.ore_block_u64_8_256` | `=` `<>` `<` `<=` `>` `>=` | A type that needs a non-ORE equality term on an ordered domain needs a **new `Term`**, not a catalog flag. Adding a term is a code change to the `Term` @@ -290,7 +292,7 @@ This is the contract the generated SQL satisfies. You normally never read it to ### Domains and CHECK constraints -The generator emits `src/encrypted_domain//_types.sql` (gitignored; +The generator emits `src/v3/scalars//_types.sql` (gitignored; materialised on every build) with one idempotent `DO $$ ... $$` block. Every domain is a concrete domain over `jsonb` in the `eql_v3` schema — **never** `CREATE DOMAIN a AS b` over another generated domain (PostgreSQL resolves @@ -411,14 +413,14 @@ CREATE INDEX ... ON table_name USING btree (eql_v3.ord_term(col)); CREATE INDEX ... ON table_name USING hash (eql_v3.eq_term(col)); ``` -`ore` depends on `src/ore_block_u64_8_256/functions.sql` and -`src/ore_block_u64_8_256/operators.sql`; `hm` depends on -`src/hmac_256/functions.sql`. +`ore` depends on `src/v3/sem/ore_block_u64_8_256/functions.sql` and +`src/v3/sem/ore_block_u64_8_256/operators.sql`; `hm` depends on +`src/v3/sem/hmac_256/functions.sql`. ### Extension files Optional hand-written SQL beyond the fixed surface belongs in -`src/encrypted_domain//_extensions.sql`. The generator never creates, +`src/v3/scalars//_extensions.sql`. The generator never creates, lists, headers, or cleans it; it must declare its own `-- REQUIRE:` edges (usually to `_types.sql` and whichever generated function or operator file it extends). Use it for cross-domain casts, helper functions, or type-specific @@ -495,7 +497,7 @@ silently bypasses its exception; a pinned `search_path` reverts queries to seq scans. The generator exists so each new type adds one `CATALOG` row rather than ninety hand-written declarations that must agree with each other and with `pin_search_path.sql`, `tasks/test/splinter.sh`, and -`src/encrypted_domain/functions.sql`. +`src/v3/scalars/functions.sql`. ### Pipeline @@ -503,15 +505,20 @@ ninety hand-written declarations that must agree with each other and with runs as `cargo run -p eql-codegen` (no subcommand), which calls `generate::generate_all` (`crates/eql-codegen/src/generate.rs`) over every row of `eql_scalars::CATALOG`, writing each type's SQL into -`src/encrypted_domain//`. A second subcommand, `cargo run -p eql-codegen +`src/v3/scalars//`. A second subcommand, `cargo run -p eql-codegen -- list-types`, prints the catalog tokens one per line (consumed by the fixture and matrix-inventory enumeration). `main` (`crates/eql-codegen/src/main.rs`) recognises exactly these two forms; any other argument is a usage error. +The generator targets the `eql_v3` schema throughout: `CORE_SCHEMA = "eql_v3"` +(`crates/eql-codegen/src/consts.rs`) qualifies both the domain families and the +SEM index-term types the extractors return (`eql_v3.hmac_256`, +`eql_v3.ore_block_u64_8_256`), so no generated SQL references `eql_v2`. + `tasks/build.sh` runs `cargo run -p eql-codegen` at the start of every `mise run build`, so the generated SQL is never checked in. (The build first sweeps every generated `*_{types,functions,operators,aggregates}.sql` under -`src/encrypted_domain` so a type removed from `CATALOG` cannot leave orphans the +`src/v3/scalars` so a type removed from `CATALOG` cannot leave orphans the `src/**/*.sql` build glob would pick up; hand-written `*_extensions.sql` is preserved by the name patterns.) @@ -549,9 +556,9 @@ output for every catalog type from scratch. ### Generated outputs For a type with `D` domains of which `A` are ordered, the generator writes `1 + -2D + A` SQL files into `src/encrypted_domain//`. For `int4` (`D = 4`, `A = +2D + A` SQL files into `src/v3/scalars//`. For `int4` (`D = 4`, `A = 2`): eleven SQL files. The outputs are gitignored -(`.gitignore` excludes `src/encrypted_domain/*/*_{types,functions,operators,aggregates}.sql`) +(`.gitignore` excludes `src/v3/scalars/*/*_{types,functions,operators,aggregates}.sql`) and regenerated at the start of every build. | File | Content | @@ -564,11 +571,11 @@ and regenerated at the start of every build. Every file opens with the `-- AUTOMATICALLY GENERATED FILE.` marker (the project-wide marker `docs:validate` greps on to skip generated SQL — `crates/eql-codegen/src/consts.rs`), declares its `-- REQUIRE:` edges in -dependency order (types files require `src/schema-v3.sql`; function files require -both `src/schema.sql` and `src/schema-v3.sql`, the types file, and -`src/encrypted_domain/functions.sql` plus each term's `requires` set; operator -files require `src/schema-v3.sql`, the types file, and their domain's function -file; aggregate files require `src/schema-v3.sql`, the types file, and their +dependency order (types files require `src/v3/schema.sql`; function files require +`src/v3/schema.sql`, the types file, and +`src/v3/scalars/functions.sql` plus each term's `requires` set; operator +files require `src/v3/schema.sql`, the types file, and their domain's function +file; aggregate files require `src/v3/schema.sql`, the types file, and their domain's function and operator files), and carries Doxygen `--! @file` / `--! @brief` headers. diff --git a/docs/reference/eql-functions.md b/docs/reference/eql-functions.md index cfca800c..b610349c 100644 --- a/docs/reference/eql-functions.md +++ b/docs/reference/eql-functions.md @@ -429,21 +429,23 @@ encrypted-domain value. Generated per eq/ord-capable variant of every scalar type — see [Adding a Scalar Encrypted-Domain Type](./adding-a-scalar-encrypted-domain-type.md). The argument type selects the overload, and both are inlinable so a functional index built on the extractor engages. The extractors live in -the `eql_v3` schema; their return types remain the core `eql_v2` -index-term types. +the `eql_v3` schema; their return types are the self-contained `eql_v3` +SEM index-term types. ```sql -- int4 — generated for every scalar type's eq / ord variants. -eql_v3.eq_term(a eql_v3.int4_eq) RETURNS eql_v2.hmac_256 -eql_v3.ord_term(a eql_v3.int4_ord) RETURNS eql_v2.ore_block_u64_8_256 -eql_v3.ord_term(a eql_v3.int4_ord_ore) RETURNS eql_v2.ore_block_u64_8_256 +eql_v3.eq_term(a eql_v3.int4_eq) RETURNS eql_v3.hmac_256 +eql_v3.ord_term(a eql_v3.int4_ord) RETURNS eql_v3.ore_block_u64_8_256 +eql_v3.ord_term(a eql_v3.int4_ord_ore) RETURNS eql_v3.ore_block_u64_8_256 ``` **Example:** ```sql --- Functional indexes on the extracted terms (see Database Indexes) -CREATE INDEX ON users USING hash (eql_v3.eq_term(salary_encrypted)); -CREATE INDEX ON users USING btree (eql_v3.ord_term(salary_encrypted)); +-- Functional indexes on the extracted terms (see Database Indexes). +-- A column carries a single domain type, so `eq_term` and `ord_term` +-- apply to different columns (an `_eq` column vs an `_ord`/`_ord_ore` one). +CREATE INDEX ON users USING hash (eql_v3.eq_term(salary_eq)); +CREATE INDEX ON users USING btree (eql_v3.ord_term(salary_ord)); ``` > The full per-domain operator/wrapper/blocker surface (and the diff --git a/docs/reference/sql-support.md b/docs/reference/sql-support.md index c5204850..82770ad9 100644 --- a/docs/reference/sql-support.md +++ b/docs/reference/sql-support.md @@ -61,7 +61,7 @@ Use the equivalent [`jsonb_path_query`](#jsonb-functions-and-selectors-enabled-b ## Encrypted-domain scalar types (`eql_v3.`) -Scalar encrypted-domain types (e.g. `eql_v3.int4`; see [Adding a Scalar Encrypted-Domain Type](./adding-a-scalar-encrypted-domain-type.md)) are a different access model from the matrix above. Instead of configuring a search index on an `eql_v2_encrypted` column, you type the column as a specific domain *variant* whose operator surface is fixed at generation time. The index terms travel in the payload; there is no `add_search_config` step. The domains and their operator surface live in the `eql_v3` schema (dropped by `DROP SCHEMA eql_v3 CASCADE`, and they survive an `eql_v2` uninstall); their extracted index-term types remain the core `eql_v2` types. +Scalar encrypted-domain types (e.g. `eql_v3.int4`; see [Adding a Scalar Encrypted-Domain Type](./adding-a-scalar-encrypted-domain-type.md)) are a different access model from the matrix above. Instead of configuring a search index on an `eql_v2_encrypted` column, you type the column as a specific domain *variant* whose operator surface is fixed at generation time. The index terms travel in the payload; there is no `add_search_config` step. The domains and their operator surface live in the `eql_v3` schema (dropped by `DROP SCHEMA eql_v3 CASCADE`, and they survive an `eql_v2` uninstall); their extracted index-term types are the self-contained `eql_v3` SEM types (`eql_v3.hmac_256`, `eql_v3.ore_block_u64_8_256`). Each scalar type `` generates one storage-only variant plus eq/ord query variants: diff --git a/mise.toml b/mise.toml index ccd458fc..8baafecb 100644 --- a/mise.toml +++ b/mise.toml @@ -42,8 +42,8 @@ run = """ rm -f release/cipherstash-encrypt.sql """ -[tasks."test:sqlx"] -description = "Run SQLx tests with hybrid migration approach" +[tasks."test:sqlx:prep"] +description = "Prepare the SQLx test DB: cp built EQL into migrations, migrate, regenerate fixtures" # `build` produces release/cipherstash-encrypt.sql, which is then cp'd into # tests/sqlx/migrations/001_install_eql.sql below. Without this dep, a stale # release artifact silently ships an old EQL extension into the test DB and @@ -73,14 +73,23 @@ sqlx migrate run echo "Regenerating SQLx fixtures..." cd "{{config_root}}" mise run fixture:generate:all +""" +[tasks."test:sqlx"] +description = "Run SQLx tests with hybrid migration approach" +# Prep (build + cp + migrate + fixtures) is shared with test:sqlx:watch. +depends = ["test:sqlx:prep"] +dir = "{{config_root}}/tests/sqlx" +run = """ echo "Running Rust tests..." -cd tests/sqlx cargo test """ [tasks."test:sqlx:watch"] description = "Run SQLx tests in watch mode (rebuild EQL on changes)" +# Same prep as test:sqlx so watch mode starts from a migrated DB + fresh +# fixtures, not a stale checkout. +depends = ["test:sqlx:prep"] dir = "{{config_root}}/tests/sqlx" run = """ cargo watch -x test diff --git a/src/lint/lints.sql b/src/lint/lints.sql index f7870abd..08c4a7de 100644 --- a/src/lint/lints.sql +++ b/src/lint/lints.sql @@ -1,5 +1,5 @@ -- REQUIRE: src/schema.sql --- REQUIRE: src/schema-v3.sql +-- REQUIRE: src/v3/schema.sql --! @brief EQL lint: detect non-inlinable operator implementation functions --! diff --git a/src/schema-v3.sql b/src/schema-v3.sql deleted file mode 100644 index 06df8d38..00000000 --- a/src/schema-v3.sql +++ /dev/null @@ -1,22 +0,0 @@ ---! @file schema-v3.sql ---! @brief EQL v3 schema creation ---! ---! Creates the eql_v3 schema, which houses the encrypted-domain type ---! families (eql_v3.int4 and future scalar domains): their domains, index-term ---! extractors, comparison wrappers, blockers, and aggregates. The core ---! index-term types these reuse (eql_v2.hmac_256, eql_v2.ore_block_u64_8_256) ---! remain in the eql_v2 schema and are referenced cross-schema. ---! ---! Drops existing schema if present to support clean reinstallation. ---! ---! @warning DROP SCHEMA CASCADE will remove all objects in the schema ---! @note eql_v3 is a new, additional schema for domain families; the eql_v2 ---! schema name is unchanged. - ---! @brief Drop existing EQL v3 schema ---! @warning CASCADE will drop all dependent objects -DROP SCHEMA IF EXISTS eql_v3 CASCADE; - ---! @brief Create EQL v3 schema ---! @note Houses the encrypted-domain type families -CREATE SCHEMA eql_v3; diff --git a/src/v3/common.sql b/src/v3/common.sql new file mode 100644 index 00000000..698989c7 --- /dev/null +++ b/src/v3/common.sql @@ -0,0 +1,49 @@ +-- REQUIRE: src/v3/schema.sql + +--! @file v3/common.sql +--! @brief Common utility functions for the self-contained eql_v3 surface. +--! +--! Forked from src/common.sql (design D7) so the eql_v3 ORE constructor owns the +--! one transitive helper it needs without reaching into another schema. The +--! eql_v2 original is unchanged. + +--! @brief Convert JSONB hex array to bytea array +--! @internal +--! +--! Converts a JSONB array of hex-encoded strings into a PostgreSQL bytea array. +--! Used for deserializing binary data (like ORE terms) from JSONB storage. +--! +--! @param val jsonb JSONB array of hex-encoded strings +--! @return bytea[] Array of decoded binary values +--! +--! @note Returns NULL if input is JSON null +--! @note Each array element is hex-decoded to bytea +--! @note Inlinable `LANGUAGE sql` IMMUTABLE form (no `SET search_path`) so the +--! planner can fold this per-encrypted-value helper into the calling query. +--! This deliberately diverges from the v2 plpgsql equivalent (intentionally +--! left unchanged): the `CASE WHEN jsonb_typeof(val) = 'array'` guard only +--! evaluates the set-returning `jsonb_array_elements_text` for an array, so a +--! non-array JSON scalar returns NULL here instead of raising "cannot extract +--! elements from a scalar". Both callers only ever pass an array or JSON null +--! (`val->'ob'`), so the divergence is unreachable in practice; JSON null and +--! empty array still return NULL exactly as before. +CREATE FUNCTION eql_v3.jsonb_array_to_bytea_array(val jsonb) +RETURNS bytea[] + IMMUTABLE +AS $$ + SELECT CASE WHEN jsonb_typeof(val) = 'array' + THEN ( + SELECT array_agg(decode(value::text, 'hex')::bytea) + FROM jsonb_array_elements_text(val) AS value + ) + ELSE NULL + END; +$$ LANGUAGE sql; + +--! @internal Mark this hand-written helper inline-critical so the post-install +--! pin_search_path pass leaves it unpinned (no `SET search_path`), preserving +--! SQL-function inlining. It takes a bare `jsonb` arg (not a jsonb-backed +--! encrypted DOMAIN), so the structural skip in tasks/pin_search_path.sql does +--! not recognise it; this marker is the documented manual opt-in. +COMMENT ON FUNCTION eql_v3.jsonb_array_to_bytea_array(jsonb) IS + 'eql-inline-critical: per-encrypted-value ORE helper; must stay inlinable (unpinned search_path)'; diff --git a/src/v3/crypto.sql b/src/v3/crypto.sql new file mode 100644 index 00000000..bd99b162 --- /dev/null +++ b/src/v3/crypto.sql @@ -0,0 +1,53 @@ +-- REQUIRE: src/v3/schema.sql + +--! @file v3/crypto.sql +--! @brief PostgreSQL pgcrypto extension enablement (eql_v3 fork) +--! +--! Forked from src/crypto.sql (design D8) so the entire eql_v3 dependency +--! closure lives under src/v3/. Enables the pgcrypto extension which provides +--! cryptographic functions used by the eql_v3 ORE comparison path. +--! +--! Installs pgcrypto into the `extensions` schema (Supabase convention) to +--! avoid the `extension_in_public` lint. Every EQL function that uses pgcrypto +--! has `pg_catalog, extensions, public` on its `search_path`, so a pre-existing +--! install in `public` keeps working — and a pre-existing install anywhere else +--! will be rejected at install time. The body is idempotent +--! (`CREATE SCHEMA IF NOT EXISTS`, `pg_extension` guard), so running it +--! alongside the eql_v2 copy in a combined install is safe. +--! +--! @note pgcrypto provides functions like digest(), hmac(), gen_random_bytes() + +--! @brief Create extensions schema (Supabase convention) +CREATE SCHEMA IF NOT EXISTS extensions; + +--! @brief Enable pgcrypto extension and validate its schema +DO $$ +DECLARE + pgcrypto_schema name; +BEGIN + IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'pgcrypto') THEN + CREATE EXTENSION pgcrypto WITH SCHEMA extensions; + END IF; + + SELECT n.nspname INTO pgcrypto_schema + FROM pg_extension e + JOIN pg_namespace n ON n.oid = e.extnamespace + WHERE e.extname = 'pgcrypto'; + + IF pgcrypto_schema = 'extensions' THEN + -- expected location, nothing to say + NULL; + ELSIF pgcrypto_schema = 'public' THEN + RAISE NOTICE + 'pgcrypto is installed in the `public` schema. EQL works against this layout, ' + 'but Supabase splinter will flag it as `extension_in_public`. Move it with: ' + 'ALTER EXTENSION pgcrypto SET SCHEMA extensions'; + ELSE + RAISE EXCEPTION + 'pgcrypto is installed in schema `%`, which is not on the EQL function search_path ' + '(pg_catalog, extensions, public). EQL cryptographic operations would fail at ' + 'runtime. Relocate the extension before installing EQL: ' + 'ALTER EXTENSION pgcrypto SET SCHEMA extensions', + pgcrypto_schema; + END IF; +END $$; diff --git a/src/encrypted_domain/functions.sql b/src/v3/scalars/functions.sql similarity index 87% rename from src/encrypted_domain/functions.sql rename to src/v3/scalars/functions.sql index 71a070a1..53273023 100644 --- a/src/encrypted_domain/functions.sql +++ b/src/v3/scalars/functions.sql @@ -1,9 +1,9 @@ --- REQUIRE: src/schema-v3.sql +-- REQUIRE: src/v3/schema.sql ---! @file encrypted_domain/functions.sql +--! @file v3/scalars/functions.sql --! @brief Shared blocker helper for the eql_v3 encrypted-domain families. --! ---! Per-domain wrapper functions live in src/encrypted_domain//. +--! Per-domain wrapper functions live in src/v3/scalars//. --! Blockers in those files delegate to encrypted_domain_unsupported_bool --! so every domain raises a uniform domain-specific error rather than --! letting an unsupported operator fall through to native jsonb diff --git a/src/v3/schema.sql b/src/v3/schema.sql new file mode 100644 index 00000000..41be4d40 --- /dev/null +++ b/src/v3/schema.sql @@ -0,0 +1,23 @@ +--! @file v3/schema.sql +--! @brief EQL v3 schema creation +--! +--! Creates the eql_v3 schema, which houses the self-contained encrypted-domain +--! type families (eql_v3.int4, eql_v3.int8, and future scalar domains): their +--! jsonb-backed domains, the searchable-encrypted-metadata (SEM) index-term +--! types they use (eql_v3.hmac_256, eql_v3.ore_block_u64_8_256), the index-term +--! extractors, comparison wrappers, blockers, and aggregates. The v3 surface is +--! self-contained — it owns every type it needs and has no runtime dependency +--! on another EQL schema. +--! +--! Drops existing schema if present to support clean reinstallation. +--! +--! @warning DROP SCHEMA CASCADE will remove all objects in the schema +--! @note eql_v3 is a new, additional schema for the encrypted-domain families. + +--! @brief Drop existing EQL v3 schema +--! @warning CASCADE will drop all dependent objects +DROP SCHEMA IF EXISTS eql_v3 CASCADE; + +--! @brief Create EQL v3 schema +--! @note Houses the encrypted-domain type families +CREATE SCHEMA eql_v3; diff --git a/src/v3/sem/hmac_256/functions.sql b/src/v3/sem/hmac_256/functions.sql new file mode 100644 index 00000000..9b2592cc --- /dev/null +++ b/src/v3/sem/hmac_256/functions.sql @@ -0,0 +1,42 @@ +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/sem/hmac_256/types.sql + +--! @file v3/sem/hmac_256/functions.sql +--! @brief HMAC-SHA256 index-term extraction from a jsonb payload (eql_v3 SEM). +--! +--! jsonb-only subset of src/hmac_256/functions.sql. The encrypted-column and +--! ste_vec-entry overloads are intentionally omitted — the eql_v3 scalar +--! domains extract from the jsonb payload directly via a cast to the domain. +--! (Doc comments deliberately avoid naming eql_v2 symbols so the +--! self-containment grep stays clean.) + +--! @brief Extract HMAC-SHA256 index term from JSONB payload +--! +--! Inlinable single-statement SQL — the planner can fold this into the calling +--! query so functional hash/btree indexes built on `eql_v3.eq_term(col)` +--! (which calls this) engage structurally. +--! +--! @param val jsonb containing encrypted EQL payload +--! @return eql_v3.hmac_256 HMAC-SHA256 hash value, or NULL when `hm` is absent +CREATE FUNCTION eql_v3.hmac_256(val jsonb) + RETURNS eql_v3.hmac_256 + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT (val ->> 'hm')::eql_v3.hmac_256 +$$; + + +--! @brief Check if JSONB payload contains HMAC-SHA256 index term +--! +--! @param val jsonb containing encrypted EQL payload +--! @return boolean True if 'hm' field is present and non-null +CREATE FUNCTION eql_v3.has_hmac_256(val jsonb) + RETURNS boolean + IMMUTABLE STRICT PARALLEL SAFE + SET search_path = pg_catalog, extensions, public +AS $$ + BEGIN + RETURN val ->> 'hm' IS NOT NULL; + END; +$$ LANGUAGE plpgsql; diff --git a/src/v3/sem/hmac_256/types.sql b/src/v3/sem/hmac_256/types.sql new file mode 100644 index 00000000..92987935 --- /dev/null +++ b/src/v3/sem/hmac_256/types.sql @@ -0,0 +1,12 @@ +-- REQUIRE: src/v3/schema.sql + +--! @file v3/sem/hmac_256/types.sql +--! @brief HMAC-SHA256 index term type (eql_v3 SEM) +--! +--! Domain type representing HMAC-SHA256 hash values. Used for exact-match +--! encrypted searches. The hash is stored in the 'hm' field of encrypted data +--! payloads. Self-contained eql_v3 copy (design D1/D3); the eql_v2 original is +--! unchanged. +--! +--! @note Transient type used only during query execution. +CREATE DOMAIN eql_v3.hmac_256 AS text; diff --git a/src/v3/sem/ore_block_u64_8_256/functions.sql b/src/v3/sem/ore_block_u64_8_256/functions.sql new file mode 100644 index 00000000..ccc6a817 --- /dev/null +++ b/src/v3/sem/ore_block_u64_8_256/functions.sql @@ -0,0 +1,216 @@ +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/crypto.sql +-- REQUIRE: src/v3/common.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/types.sql + +--! @file v3/sem/ore_block_u64_8_256/functions.sql +--! @brief ORE block construction, extraction, and comparison (eql_v3 SEM). +--! +--! jsonb-only subset of src/ore_block_u64_8_256/functions.sql. The +--! encrypted-column overloads are omitted; the helper jsonb_array_to_bytea_array +--! and pgcrypto encrypt() are reached via the forked src/v3/common.sql and +--! src/v3/crypto.sql so the whole closure stays under src/v3. (Doc comments +--! deliberately avoid naming eql_v2 symbols so the self-containment grep stays +--! clean.) + +--! @brief Convert JSONB array to ORE block composite type +--! @internal +--! @param val jsonb Array of hex-encoded ORE block terms +--! @return eql_v3.ore_block_u64_8_256 ORE block composite, or NULL if input is null +--! @note Inlinable `LANGUAGE sql` IMMUTABLE form (no `SET search_path`) so the +--! planner can fold this per-encrypted-value helper into the calling query. +--! This deliberately diverges from the v2 plpgsql equivalent (intentionally +--! left unchanged): the `CASE WHEN jsonb_typeof(val) = 'array'` guard only +--! evaluates the array path for an array, so a non-array JSON scalar returns +--! NULL here instead of raising. The sole caller passes `val->'ob'`, always an +--! array or JSON null, so the divergence is unreachable in practice; JSON null +--! and empty array still return NULL exactly as before. +CREATE FUNCTION eql_v3.jsonb_array_to_ore_block_u64_8_256(val jsonb) +RETURNS eql_v3.ore_block_u64_8_256 + IMMUTABLE +AS $$ + SELECT CASE WHEN jsonb_typeof(val) = 'array' + THEN ROW(( + SELECT array_agg(ROW(b)::eql_v3.ore_block_u64_8_256_term) + FROM unnest(eql_v3.jsonb_array_to_bytea_array(val)) AS b + ))::eql_v3.ore_block_u64_8_256 + ELSE NULL + END; +$$ LANGUAGE sql; + +--! @internal Mark this hand-written helper inline-critical so the post-install +--! pin_search_path pass leaves it unpinned (no `SET search_path`), preserving +--! SQL-function inlining. It takes a bare `jsonb` arg (not a jsonb-backed +--! encrypted DOMAIN), so the structural skip in tasks/pin_search_path.sql does +--! not recognise it; this marker is the documented manual opt-in. +COMMENT ON FUNCTION eql_v3.jsonb_array_to_ore_block_u64_8_256(jsonb) IS + 'eql-inline-critical: per-encrypted-value ORE helper; must stay inlinable (unpinned search_path)'; + + +--! @brief Extract ORE block index term from JSONB payload +--! @param val jsonb containing encrypted EQL payload +--! @return eql_v3.ore_block_u64_8_256 ORE block index term +--! @throws Exception if 'ob' field is missing +CREATE FUNCTION eql_v3.ore_block_u64_8_256(val jsonb) + RETURNS eql_v3.ore_block_u64_8_256 + IMMUTABLE STRICT PARALLEL SAFE + SET search_path = pg_catalog, extensions, public +AS $$ + BEGIN + IF val IS NULL THEN + RETURN NULL; + END IF; + + IF eql_v3.has_ore_block_u64_8_256(val) THEN + RETURN eql_v3.jsonb_array_to_ore_block_u64_8_256(val->'ob'); + END IF; + RAISE 'Expected an ore index (ob) value in json: %', val; + END; +$$ LANGUAGE plpgsql; + + +--! @brief Check if JSONB payload contains ORE block index term +--! @param val jsonb containing encrypted EQL payload +--! @return boolean True if 'ob' field is present and non-null +CREATE FUNCTION eql_v3.has_ore_block_u64_8_256(val jsonb) + RETURNS boolean + IMMUTABLE STRICT PARALLEL SAFE + SET search_path = pg_catalog, extensions, public +AS $$ + BEGIN + RETURN val ->> 'ob' IS NOT NULL; + END; +$$ LANGUAGE plpgsql; + + +--! @brief Compare two ORE block terms using cryptographic comparison +--! @internal +--! @param a eql_v3.ore_block_u64_8_256_term First ORE term +--! @param b eql_v3.ore_block_u64_8_256_term Second ORE term +--! @return integer -1 if a < b, 0 if a = b, 1 if a > b +--! @throws Exception if ciphertexts are different lengths +CREATE FUNCTION eql_v3.compare_ore_block_u64_8_256_term(a eql_v3.ore_block_u64_8_256_term, b eql_v3.ore_block_u64_8_256_term) + RETURNS integer + SET search_path = pg_catalog, extensions, public +AS $$ + DECLARE + eq boolean := true; + unequal_block smallint := 0; + hash_key bytea; + data_block bytea; + encrypt_block bytea; + target_block bytea; + + left_block_size CONSTANT smallint := 16; + right_block_size CONSTANT smallint := 32; + right_offset CONSTANT smallint := 136; -- 8 * 17 + + indicator smallint := 0; + BEGIN + IF a IS NULL AND b IS NULL THEN + RETURN 0; + END IF; + + IF a IS NULL THEN + RETURN -1; + END IF; + + IF b IS NULL THEN + RETURN 1; + END IF; + + IF bit_length(a.bytes) != bit_length(b.bytes) THEN + RAISE EXCEPTION 'Ciphertexts are different lengths'; + END IF; + + FOR block IN 0..7 LOOP + IF + substr(a.bytes, 1 + block, 1) != substr(b.bytes, 1 + block, 1) + OR substr(a.bytes, 9 + left_block_size * block, left_block_size) != substr(b.bytes, 9 + left_block_size * BLOCK, left_block_size) + THEN + IF eq THEN + unequal_block := block; + END IF; + eq = false; + END IF; + END LOOP; + + IF eq THEN + RETURN 0::integer; + END IF; + + hash_key := substr(b.bytes, right_offset + 1, 16); + + target_block := substr(b.bytes, right_offset + 17 + (unequal_block * right_block_size), right_block_size); + + data_block := substr(a.bytes, 9 + (left_block_size * unequal_block), left_block_size); + + encrypt_block := encrypt(data_block::bytea, hash_key::bytea, 'aes-ecb'); + + indicator := ( + get_bit( + encrypt_block, + 0 + ) + get_bit(target_block, get_byte(a.bytes, unequal_block))) % 2; + + IF indicator = 1 THEN + RETURN 1::integer; + ELSE + RETURN -1::integer; + END IF; + END; +$$ LANGUAGE plpgsql; + + +--! @brief Compare arrays of ORE block terms recursively +--! @internal +--! @param a eql_v3.ore_block_u64_8_256_term[] First array +--! @param b eql_v3.ore_block_u64_8_256_term[] Second array +--! @return integer -1/0/1, or NULL if either array is NULL +CREATE FUNCTION eql_v3.compare_ore_block_u64_8_256_terms(a eql_v3.ore_block_u64_8_256_term[], b eql_v3.ore_block_u64_8_256_term[]) +RETURNS integer + SET search_path = pg_catalog, extensions, public +AS $$ + DECLARE + cmp_result integer; + BEGIN + IF a IS NULL OR b IS NULL THEN + RETURN NULL; + END IF; + + IF cardinality(a) = 0 AND cardinality(b) = 0 THEN + RETURN 0; + END IF; + + IF (cardinality(a) = 0) AND cardinality(b) > 0 THEN + RETURN -1; + END IF; + + IF cardinality(a) > 0 AND (cardinality(b) = 0) THEN + RETURN 1; + END IF; + + cmp_result := eql_v3.compare_ore_block_u64_8_256_term(a[1], b[1]); + + IF cmp_result = 0 THEN + RETURN eql_v3.compare_ore_block_u64_8_256_terms(a[2:array_length(a,1)], b[2:array_length(b,1)]); + END IF; + + RETURN cmp_result; + END +$$ LANGUAGE plpgsql; + + +--! @brief Compare ORE block composite types +--! @internal +--! @param a eql_v3.ore_block_u64_8_256 First ORE block +--! @param b eql_v3.ore_block_u64_8_256 Second ORE block +--! @return integer -1/0/1 +CREATE FUNCTION eql_v3.compare_ore_block_u64_8_256_terms(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS integer + SET search_path = pg_catalog, extensions, public +AS $$ + BEGIN + RETURN eql_v3.compare_ore_block_u64_8_256_terms(a.terms, b.terms); + END +$$ LANGUAGE plpgsql; diff --git a/src/v3/sem/ore_block_u64_8_256/operator_class.sql b/src/v3/sem/ore_block_u64_8_256/operator_class.sql new file mode 100644 index 00000000..b367c8f6 --- /dev/null +++ b/src/v3/sem/ore_block_u64_8_256/operator_class.sql @@ -0,0 +1,26 @@ +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/types.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/functions.sql + +--! @file v3/sem/ore_block_u64_8_256/operator_class.sql +--! @brief B-tree operator family + default class on eql_v3.ore_block_u64_8_256. +--! +--! Gives the composite type its DEFAULT btree opclass so the recommended +--! functional index `CREATE INDEX ON t (eql_v3.ord_term(col))` engages without +--! an explicit opclass annotation (design D4). Excluded from the Supabase build +--! variant by the `**/*operator_class.sql` glob. + +--! @brief B-tree operator family for ORE block types +CREATE OPERATOR FAMILY eql_v3.ore_block_u64_8_256_operator_family USING btree; + +--! @brief B-tree operator class for ORE block encrypted values +--! +--! Supports operators: <, <=, =, >=, >. Uses comparison function +--! compare_ore_block_u64_8_256_terms. +CREATE OPERATOR CLASS eql_v3.ore_block_u64_8_256_operator_class DEFAULT FOR TYPE eql_v3.ore_block_u64_8_256 USING btree FAMILY eql_v3.ore_block_u64_8_256_operator_family AS + OPERATOR 1 <, + OPERATOR 2 <=, + OPERATOR 3 =, + OPERATOR 4 >=, + OPERATOR 5 >, + FUNCTION 1 eql_v3.compare_ore_block_u64_8_256_terms(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256); diff --git a/src/v3/sem/ore_block_u64_8_256/operators.sql b/src/v3/sem/ore_block_u64_8_256/operators.sql new file mode 100644 index 00000000..beca9da8 --- /dev/null +++ b/src/v3/sem/ore_block_u64_8_256/operators.sql @@ -0,0 +1,181 @@ +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/types.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/functions.sql + +--! @file v3/sem/ore_block_u64_8_256/operators.sql +--! @brief Comparison operators on eql_v3.ore_block_u64_8_256. +--! +--! The six backing functions are inlinable single-statement SQL so the planner +--! can fold the eql_v3 comparison wrappers through to functional-index matching. + +--! @brief Equality backing function for ORE block types +--! @internal +--! +--! @param a eql_v3.ore_block_u64_8_256 Left operand +--! @param b eql_v3.ore_block_u64_8_256 Right operand +--! @return boolean True if the ORE blocks are equal +--! +--! @see eql_v3.compare_ore_block_u64_8_256_terms +CREATE FUNCTION eql_v3.ore_block_u64_8_256_eq(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS boolean + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT eql_v3.compare_ore_block_u64_8_256_terms(a, b) = 0 +$$; + +--! @brief Not-equal backing function for ORE block types +--! @internal +--! +--! @param a eql_v3.ore_block_u64_8_256 Left operand +--! @param b eql_v3.ore_block_u64_8_256 Right operand +--! @return boolean True if the ORE blocks are not equal +--! +--! @see eql_v3.compare_ore_block_u64_8_256_terms +CREATE FUNCTION eql_v3.ore_block_u64_8_256_neq(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS boolean + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT eql_v3.compare_ore_block_u64_8_256_terms(a, b) <> 0 +$$; + +--! @brief Less-than backing function for ORE block types +--! @internal +--! +--! @param a eql_v3.ore_block_u64_8_256 Left operand +--! @param b eql_v3.ore_block_u64_8_256 Right operand +--! @return boolean True if the left operand is less than the right operand +--! +--! @see eql_v3.compare_ore_block_u64_8_256_terms +CREATE FUNCTION eql_v3.ore_block_u64_8_256_lt(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS boolean + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT eql_v3.compare_ore_block_u64_8_256_terms(a, b) = -1 +$$; + +--! @brief Less-than-or-equal backing function for ORE block types +--! @internal +--! +--! @param a eql_v3.ore_block_u64_8_256 Left operand +--! @param b eql_v3.ore_block_u64_8_256 Right operand +--! @return boolean True if the left operand is less than or equal to the right operand +--! +--! @see eql_v3.compare_ore_block_u64_8_256_terms +CREATE FUNCTION eql_v3.ore_block_u64_8_256_lte(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS boolean + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT eql_v3.compare_ore_block_u64_8_256_terms(a, b) != 1 +$$; + +--! @brief Greater-than backing function for ORE block types +--! @internal +--! +--! @param a eql_v3.ore_block_u64_8_256 Left operand +--! @param b eql_v3.ore_block_u64_8_256 Right operand +--! @return boolean True if the left operand is greater than the right operand +--! +--! @see eql_v3.compare_ore_block_u64_8_256_terms +CREATE FUNCTION eql_v3.ore_block_u64_8_256_gt(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS boolean + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT eql_v3.compare_ore_block_u64_8_256_terms(a, b) = 1 +$$; + +--! @brief Greater-than-or-equal backing function for ORE block types +--! @internal +--! +--! @param a eql_v3.ore_block_u64_8_256 Left operand +--! @param b eql_v3.ore_block_u64_8_256 Right operand +--! @return boolean True if the left operand is greater than or equal to the right operand +--! +--! @see eql_v3.compare_ore_block_u64_8_256_terms +CREATE FUNCTION eql_v3.ore_block_u64_8_256_gte(a eql_v3.ore_block_u64_8_256, b eql_v3.ore_block_u64_8_256) +RETURNS boolean + LANGUAGE sql + IMMUTABLE STRICT PARALLEL SAFE +AS $$ + SELECT eql_v3.compare_ore_block_u64_8_256_terms(a, b) != -1 +$$; + + +--! @brief = operator for ORE block types +--! +--! COMMUTATOR is the operator itself: equality is symmetric. Required for the +--! MERGES flag — without it the planner raises "could not find commutator" the +--! first time an ore_block equality is used as a join qual (e.g. via the inlined +--! eql_v3._ord_ore equality wrappers). +CREATE OPERATOR = ( + FUNCTION=eql_v3.ore_block_u64_8_256_eq, + LEFTARG=eql_v3.ore_block_u64_8_256, + RIGHTARG=eql_v3.ore_block_u64_8_256, + COMMUTATOR = =, + NEGATOR = <>, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +--! @brief <> operator for ORE block types +CREATE OPERATOR <> ( + FUNCTION=eql_v3.ore_block_u64_8_256_neq, + LEFTARG=eql_v3.ore_block_u64_8_256, + RIGHTARG=eql_v3.ore_block_u64_8_256, + COMMUTATOR = <>, + NEGATOR = =, + RESTRICT = eqsel, + JOIN = eqjoinsel, + HASHES, + MERGES +); + +--! @brief > operator for ORE block types +CREATE OPERATOR > ( + FUNCTION=eql_v3.ore_block_u64_8_256_gt, + LEFTARG=eql_v3.ore_block_u64_8_256, + RIGHTARG=eql_v3.ore_block_u64_8_256, + COMMUTATOR = <, + NEGATOR = <=, + RESTRICT = scalargtsel, + JOIN = scalargtjoinsel +); + +--! @brief < operator for ORE block types +CREATE OPERATOR < ( + FUNCTION=eql_v3.ore_block_u64_8_256_lt, + LEFTARG=eql_v3.ore_block_u64_8_256, + RIGHTARG=eql_v3.ore_block_u64_8_256, + COMMUTATOR = >, + NEGATOR = >=, + RESTRICT = scalarltsel, + JOIN = scalarltjoinsel +); + +--! @brief <= operator for ORE block types +CREATE OPERATOR <= ( + FUNCTION=eql_v3.ore_block_u64_8_256_lte, + LEFTARG=eql_v3.ore_block_u64_8_256, + RIGHTARG=eql_v3.ore_block_u64_8_256, + COMMUTATOR = >=, + NEGATOR = >, + RESTRICT = scalarlesel, + JOIN = scalarlejoinsel +); + +--! @brief >= operator for ORE block types +CREATE OPERATOR >= ( + FUNCTION=eql_v3.ore_block_u64_8_256_gte, + LEFTARG=eql_v3.ore_block_u64_8_256, + RIGHTARG=eql_v3.ore_block_u64_8_256, + COMMUTATOR = <=, + NEGATOR = <, + RESTRICT = scalargesel, + JOIN = scalargejoinsel +); diff --git a/src/v3/sem/ore_block_u64_8_256/types.sql b/src/v3/sem/ore_block_u64_8_256/types.sql new file mode 100644 index 00000000..f7e44dd0 --- /dev/null +++ b/src/v3/sem/ore_block_u64_8_256/types.sql @@ -0,0 +1,26 @@ +-- REQUIRE: src/v3/schema.sql + +--! @file v3/sem/ore_block_u64_8_256/types.sql +--! @brief ORE block index-term types (eql_v3 SEM). +--! +--! Self-contained eql_v3 copies of the Order-Revealing Encryption block types +--! (design D1/D3). The eql_v2 originals are unchanged. + +--! @brief ORE block term type for Order-Revealing Encryption +--! +--! Composite type representing a single ORE block term. Stores encrypted data +--! as bytea that enables range comparisons without decryption. +CREATE TYPE eql_v3.ore_block_u64_8_256_term AS ( + bytes bytea +); + + +--! @brief ORE block index term type for range queries +--! +--! Composite type containing an array of ORE block terms. The array is stored +--! in the 'ob' field of encrypted data payloads. +--! +--! @note Transient type used only during query execution. +CREATE TYPE eql_v3.ore_block_u64_8_256 AS ( + terms eql_v3.ore_block_u64_8_256_term[] +); diff --git a/tasks/build.sh b/tasks/build.sh index ead83184..4d3d94e0 100755 --- a/tasks/build.sh +++ b/tasks/build.sh @@ -8,7 +8,7 @@ set -euo pipefail # Regenerate encrypted-domain SQL from the Rust catalog before building. -# Generated files (src/encrypted_domain//_*.sql) are gitignored; the +# Generated files (src/v3/scalars//_*.sql) are gitignored; the # catalog at crates/eql-scalars/src (eql-scalars::CATALOG) is the source of # truth, rendered by the eql-codegen binary. # @@ -17,8 +17,8 @@ set -euo pipefail # pick up. eql-codegen cleans within a directory it regenerates, but never # runs for a type no longer in the catalog. Hand-written *_extensions.sql is # preserved by the name patterns; -mindepth 2 keeps the type-agnostic -# src/encrypted_domain/functions.sql safe. -find src/encrypted_domain -mindepth 2 -type f \ +# src/v3/scalars/functions.sql safe. +find src/v3/scalars -mindepth 2 -type f \ \( -name '*_types.sql' -o -name '*_functions.sql' -o -name '*_operators.sql' \ -o -name '*_aggregates.sql' \) \ -delete 2>/dev/null || true @@ -48,6 +48,29 @@ verify_deps_exist() { fi } +# Fail loudly if any v3 REQUIRE edge points OUTSIDE src/v3. The v3-only build +# must be self-contained (no eql_v2 coupling); a stray `-- REQUIRE: src/...` +# edge to a non-v3 file would silently pull eql_v2 SQL into the v3 artefact (or +# tsort would drop it), breaking self-containment. Each line in deps-v3.txt is +# " "; self-edges (file == dep) are skipped, every other dep target +# must start with src/v3/. +verify_v3_self_contained() { + local dep_file=$1 + local offending=0 + while IFS=' ' read -r src dep; do + [[ -z "$dep" ]] && continue + [[ "$src" == "$dep" ]] && continue + if [[ "$dep" != src/v3/* ]]; then + echo "ERROR: v3 REQUIRE edge points outside src/v3: $src -- REQUIRE: $dep" >&2 + offending=1 + fi + done < "$dep_file" + if [[ $offending -ne 0 ]]; then + echo "ERROR: v3-only build is not self-contained — a -- REQUIRE: target lives outside src/v3 (see above)." >&2 + exit 1 + fi +} + mkdir -p release rm -f release/cipherstash-encrypt-uninstall.sql @@ -59,6 +82,9 @@ rm -f release/cipherstash-encrypt-supabase.sql rm -f release/cipherstash-encrypt-protect.sql rm -f release/cipherstash-encrypt-protect-uninstall.sql +rm -f release/cipherstash-encrypt-v3.sql +rm -f release/cipherstash-encrypt-v3-uninstall.sql + rm -f dbdev/eql--0.0.0.sql rm -f src/version.sql @@ -68,6 +94,8 @@ rm -f src/deps-supabase.txt rm -f src/deps-ordered-supabase.txt rm -f src/deps-protect.txt rm -f src/deps-ordered-protect.txt +rm -f src/deps-v3.txt +rm -f src/deps-ordered-v3.txt RELEASE_VERSION=${usage_version:-DEV} @@ -163,6 +191,39 @@ cat tasks/pin_search_path.sql >> release/cipherstash-encrypt-protect.sql cat tasks/uninstall-protect.sql >> release/cipherstash-encrypt-protect-uninstall.sql +# v3-only build (design D9): the self-contained eql_v3 surface — schema, SEM +# types, scalar domains — globbed from src/v3 ONLY. This is the unit the +# self-containment gate greps; it is the only artifact that can be "free of +# eql_v2", because the combined variants glob all of src/. It deliberately does +# NOT append tasks/pin_search_path.sql (D11): that script is eql_v2-coupled +# (raises if public.eql_v2_encrypted / eql_v2.ste_vec_entry are absent and only +# ever pins eql_v2 functions), so appending it would both fail a clean v3 +# install and break the self-containment grep. +find src/v3 -type f -path "*.sql" ! -path "*_test.sql" | while IFS= read -r sql_file; do + echo "$sql_file" + + echo "$sql_file $sql_file" >> src/deps-v3.txt + + while IFS= read -r line; do + if [[ "$line" == *"-- REQUIRE:"* ]]; then + deps=${line#*-- REQUIRE: } + for dep in $deps; do + echo "$sql_file $dep" >> src/deps-v3.txt + done + fi + done < "$sql_file" +done + +verify_v3_self_contained src/deps-v3.txt + +cat src/deps-v3.txt | tsort | tac > src/deps-ordered-v3.txt +verify_deps_exist src/deps-ordered-v3.txt + +cat src/deps-ordered-v3.txt | xargs cat | grep -v REQUIRE >> release/cipherstash-encrypt-v3.sql + +cat tasks/uninstall-v3.sql >> release/cipherstash-encrypt-v3-uninstall.sql + + echo echo '###############################################' echo "# ✅Build succeeded" @@ -172,8 +233,10 @@ echo 'Installer:' echo ' release/cipherstash-encrypt.sql' echo ' release/cipherstash-encrypt-supabase.sql' echo ' release/cipherstash-encrypt-protect.sql' +echo ' release/cipherstash-encrypt-v3.sql' echo echo 'Uninstaller:' echo ' release/cipherstash-encrypt-uninstall.sql' echo ' release/cipherstash-encrypt-uninstall-supabase.sql' echo ' release/cipherstash-encrypt-protect-uninstall.sql' +echo ' release/cipherstash-encrypt-v3-uninstall.sql' diff --git a/tasks/codegen-parity.sh b/tasks/codegen-parity.sh index 2de923be..0445802f 100755 --- a/tasks/codegen-parity.sh +++ b/tasks/codegen-parity.sh @@ -16,9 +16,12 @@ echo "==> Comparing int4 generated SQL file SET vs golden (catches extra/dropped # is never iterated. Assert the sets are equal first to close that blind spot. # "Generated" excludes any committed, hand-written SQL (e.g. int4_extensions.sql), # which lives in this dir but has no golden counterpart; git-tracked == hand-written. -golden_set=$(cd tests/codegen/reference/int4 && ls *.sql | LC_ALL=C sort) -gen_set=$(cd src/encrypted_domain/int4 \ - && comm -23 <(ls *.sql | LC_ALL=C sort) \ +# find (not `ls *.sql`) so an empty dir yields zero lines instead of aborting +# under `set -e`; `-maxdepth 1` + sed strips the leading `./` for bare names. +golden_set=$(cd tests/codegen/reference/int4 \ + && find . -maxdepth 1 -name '*.sql' | sed 's#.*/##' | LC_ALL=C sort) +gen_set=$(cd src/v3/scalars/int4 \ + && comm -23 <(find . -maxdepth 1 -name '*.sql' | sed 's#.*/##' | LC_ALL=C sort) \ <(git ls-files . | sed 's#.*/##' | LC_ALL=C sort)) if [ "$golden_set" != "$gen_set" ]; then echo "int4 generated SQL file set differs from golden (< golden, > generated):" >&2 @@ -33,7 +36,7 @@ for f in tests/codegen/reference/int4/*.sql; do # bytes EXACTLY. Both the reference body (from line 2) and the whole generated # file start with the template-owned `-- AUTOMATICALLY GENERATED FILE.` marker, # so no header strip is needed — any whitespace or blank-line drift fails here. - diff <(tail -n +2 "$f") "src/encrypted_domain/int4/$name" + diff <(tail -n +2 "$f") "src/v3/scalars/int4/$name" done echo "PARITY OK: Rust generator matches the int4 golden (byte-for-byte)." diff --git a/tasks/pin_search_path.sql b/tasks/pin_search_path.sql index 774be740..75eed567 100644 --- a/tasks/pin_search_path.sql +++ b/tasks/pin_search_path.sql @@ -1,5 +1,5 @@ --! @file pin_search_path.sql ---! @brief Post-install: pin search_path on every eql_v2.* function +--! @brief Post-install: pin search_path on every eql_v2.* and eql_v3.* function --! --! This file is appended verbatim by `tasks/build.sh` to the end of every --! release variant (main, supabase, protect/stack), AFTER all `src/**/*.sql` @@ -99,7 +99,8 @@ BEGIN SELECT pg_catalog.array_agg(p.oid) INTO inline_critical_oids FROM pg_catalog.pg_proc p JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace - WHERE n.nspname = 'eql_v2' + WHERE ( + n.nspname = 'eql_v2' AND ( -- Same-type (encrypted, encrypted) operators that must inline. -- `like`/`ilike` are the SQL helpers that `~~`/`~~*` delegate to; @@ -244,7 +245,28 @@ BEGIN OR p.proargtypes[0] = (SELECT t.oid FROM pg_catalog.pg_type t JOIN pg_catalog.pg_namespace n ON n.oid = t.typnamespace WHERE n.nspname = 'eql_v2' AND t.typname = 'stevec_query'))) - ); + ) + ) + OR ( + -- eql_v3 SEM index-term functions (self-contained fork). These mirror the + -- eql_v2 ore_block / hmac_256 inline-critical clauses above: the + -- comparison-wrapper inlining for the eql_v3 *_ord domains and eq_term only + -- reaches functional-index matching if these inner functions stay inlinable + -- (no SET, IMMUTABLE). The generated extractors/wrappers themselves are + -- spared by the jsonb-DOMAIN structural skip below; these SEM functions take + -- a composite (ore_block) or raw jsonb (hmac_256) arg, so they need an + -- explicit entry here. + n.nspname = 'eql_v3' + AND ( + (p.pronargs = 2 + AND p.proname IN ('ore_block_u64_8_256_eq', 'ore_block_u64_8_256_neq', + 'ore_block_u64_8_256_lt', 'ore_block_u64_8_256_lte', + 'ore_block_u64_8_256_gt', 'ore_block_u64_8_256_gte')) + OR (p.pronargs = 1 + AND p.proname = 'hmac_256' + AND p.proargtypes[0] = jsonb_oid) + ) + ); FOR fn_oid IN SELECT p.oid diff --git a/tasks/test/clean_install_v3.sh b/tasks/test/clean_install_v3.sh new file mode 100755 index 00000000..7c771da8 --- /dev/null +++ b/tasks/test/clean_install_v3.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +#MISE description="Install release/cipherstash-encrypt-v3.sql into a scratch DB with NO eql_v2 and smoke-test it (D11, D4)" +#USAGE flag "--port " help="Postgres port" default="7432" +#USAGE flag "--user " help="Postgres user" default="cipherstash" + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$REPO_ROOT" + +PG_PORT="${usage_port:-7432}" +PG_USER="${usage_user:-cipherstash}" +export PGPASSWORD="${POSTGRES_PASSWORD:-password}" +SCRATCH_DB="cipherstash_v3_clean" + +ADMIN=(psql -U "$PG_USER" -h localhost -p "$PG_PORT" -d postgres -v ON_ERROR_STOP=1 -q) +RUN=(psql -U "$PG_USER" -h localhost -p "$PG_PORT" -d "$SCRATCH_DB" -v ON_ERROR_STOP=1 -q) + +test -f release/cipherstash-encrypt-v3.sql || { echo "Build first: release/cipherstash-encrypt-v3.sql missing" >&2; exit 2; } + +echo "==> (re)creating scratch database $SCRATCH_DB (no eql_v2 installed)" +"${ADMIN[@]}" -c "DROP DATABASE IF EXISTS ${SCRATCH_DB} WITH (FORCE);" +"${ADMIN[@]}" -c "CREATE DATABASE ${SCRATCH_DB};" + +cleanup() { "${ADMIN[@]}" -c "DROP DATABASE IF EXISTS ${SCRATCH_DB} WITH (FORCE);" >/dev/null 2>&1 || true; } +trap cleanup EXIT + +echo "==> installing the standalone eql_v3 surface" +"${RUN[@]}" -f release/cipherstash-encrypt-v3.sql + +echo "==> asserting NO eql_v2 schema exists (proves no v2 dependency)" +"${RUN[@]}" -c "DO \$\$ BEGIN IF EXISTS (SELECT 1 FROM pg_namespace WHERE nspname = 'eql_v2') THEN RAISE EXCEPTION 'eql_v2 schema unexpectedly present'; END IF; END \$\$;" + +echo "==> smoke: domains, SEM types, extractors, opclass functional index (D4)" +"${RUN[@]}" <<'SQL' +-- Domains and SEM types exist in eql_v3. +SELECT 'eql_v3.int4_ord'::regtype; +SELECT 'eql_v3.hmac_256'::regtype; +SELECT 'eql_v3.ore_block_u64_8_256'::regtype; + +-- A real ordered-domain column + the documented functional index. This is the +-- D4 proof: it fails outright if the ported operator_class is absent. +CREATE TABLE v3_smoke (c eql_v3.int4_ord); +CREATE INDEX v3_smoke_ord ON v3_smoke (eql_v3.ord_term(c)); +DROP TABLE v3_smoke; +SQL + +echo "==> smoke: the shared blocker is reachable and raises" +"${RUN[@]}" <<'SQL' +DO $$ +DECLARE + raised boolean := false; +BEGIN + -- The blocker always RAISEs; catch it and assert we got the expected message. + BEGIN + PERFORM eql_v3.encrypted_domain_unsupported_bool('eql_v3.int4', '<'); + EXCEPTION WHEN OTHERS THEN + raised := true; + IF SQLERRM <> 'operator < is not supported for eql_v3.int4' THEN + RAISE EXCEPTION 'blocker raised an unexpected message: %', SQLERRM; + END IF; + END; + + IF NOT raised THEN + RAISE EXCEPTION 'blocker eql_v3.encrypted_domain_unsupported_bool did not raise'; + END IF; +END $$; +SQL + +echo "clean v3 install OK (D11 + D4 proven)" diff --git a/tasks/test/self_contained_v3.sh b/tasks/test/self_contained_v3.sh new file mode 100755 index 00000000..72466624 --- /dev/null +++ b/tasks/test/self_contained_v3.sh @@ -0,0 +1,50 @@ +#!/usr/bin/env bash +#MISE description="Assert the eql_v3 surface is self-contained (no eql_v2 symbol/file leakage)" + +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$REPO_ROOT" + +fail=0 + +# Symbol level (design goal 1): no eql_v2. anywhere under src/v3 — the +# hand-written SEM + foundation files plus the gitignored generated scalar +# surface (present because build runs codegen). Run `mise run build` first. +# Match both schema-qualified refs (`eql_v2.`) and bare v2 entity names +# (`eql_v2_encrypted`, `eql_v2_configuration`, …). Prose like "the eql_v2 +# original is unchanged" in doc comments is intentionally still allowed. +echo "==> Symbol gate: no 'eql_v2.' / 'eql_v2_' under src/v3" +if grep -rnE 'eql_v2[._]' src/v3; then + echo "ERROR: eql_v2 symbol/entity reference found in src/v3 (must be self-contained)" >&2 + fail=1 +fi + +# File level (design goal 2): the v3-only dependency closure pulls in no file +# outside src/v3/. tsort output is one path per line. +if [[ ! -f src/deps-ordered-v3.txt ]]; then + echo "ERROR: src/deps-ordered-v3.txt missing — run 'mise run build' first" >&2 + exit 2 +fi +echo "==> File gate: every path in src/deps-ordered-v3.txt is under src/v3/" +if grep -v '^src/v3/' src/deps-ordered-v3.txt; then + echo "ERROR: v3 dep closure pulls in a path outside src/v3/ (eql_v2 file leak)" >&2 + fail=1 +fi + +# Belt-and-braces: the assembled artifact carries no eql_v2 symbol. +echo "==> Artifact gate: release/cipherstash-encrypt-v3.sql has no 'eql_v2.' / 'eql_v2_'" +if [[ ! -f release/cipherstash-encrypt-v3.sql ]]; then + echo "ERROR: release/cipherstash-encrypt-v3.sql missing — run 'mise run build' first" >&2 + exit 2 +fi +if grep -nE 'eql_v2[._]' release/cipherstash-encrypt-v3.sql; then + echo "ERROR: assembled v3 artifact contains an eql_v2 symbol/entity reference" >&2 + fail=1 +fi + +if [[ $fail -ne 0 ]]; then + echo "self-containment gate FAILED" >&2 + exit 1 +fi +echo "self-containment gate OK" diff --git a/tasks/test/splinter.sh b/tasks/test/splinter.sh index a01de51a..282b485b 100755 --- a/tasks/test/splinter.sh +++ b/tasks/test/splinter.sh @@ -107,8 +107,8 @@ function_search_path_mutable eql_v2 grouped_value function Aggregate: same as mi # encrypted-type operators above; splinter matches by (schema, name, type), so # they need their own rows. The plpgsql blockers are pinned by # tasks/pin_search_path.sql and do not surface here. -function_search_path_mutable eql_v3 eq_term function HMAC equality term extractor for the eql_v3 *_eq domains: returns eql_v2.hmac_256. Must inline so `eql_v3.eq_term(col)` folds into the calling query and matches the functional hash/btree index built on the same expression. SET search_path would disable SQL function inlining (see PostgreSQL inline_function). -function_search_path_mutable eql_v3 ord_term function ORE-block order term extractor for the eql_v3 ordered domains: returns eql_v2.ore_block_u64_8_256 (carrying the main DEFAULT btree opclass). Used inside the inlinable comparison wrappers and as the functional-index expression USING btree (eql_v3.ord_term(col)); must inline. Covers both ord_term overloads (eql_v3.int4_ord, eql_v3.int4_ord_ore). +function_search_path_mutable eql_v3 eq_term function HMAC equality term extractor for the eql_v3 *_eq domains: returns eql_v3.hmac_256. Must inline so `eql_v3.eq_term(col)` folds into the calling query and matches the functional hash/btree index built on the same expression. SET search_path would disable SQL function inlining (see PostgreSQL inline_function). +function_search_path_mutable eql_v3 ord_term function ORE-block order term extractor for the eql_v3 ordered domains: returns eql_v3.ore_block_u64_8_256 (carrying the main DEFAULT btree opclass). Used inside the inlinable comparison wrappers and as the functional-index expression USING btree (eql_v3.ord_term(col)); must inline. Covers both ord_term overloads (eql_v3.int4_ord, eql_v3.int4_ord_ore). function_search_path_mutable eql_v3 eq function Equality comparison wrapper on the eql_v3 domains. Inlines to `eq_term(a) = eq_term(b)`; must reach the functional index on eql_v3.eq_term(col) for bare-form equality to engage Index Scan. Covers the converged eq wrappers on the eql_v3 int4 variants. function_search_path_mutable eql_v3 neq function Inequality comparison wrapper on the eql_v3 domains. Same rationale as eql_v3.eq. function_search_path_mutable eql_v3 lt function Less-than comparison wrapper on the eql_v3 ordered domains. Inlines to `ord_term(a) < ord_term(b)`; must reach the functional btree index on eql_v3.ord_term(col) for range queries to engage Index Scan. @@ -117,6 +117,15 @@ function_search_path_mutable eql_v3 gt function Greater-than comparison wrapper function_search_path_mutable eql_v3 gte function Greater-than-or-equal comparison wrapper on the eql_v3 ordered domains. Same rationale as eql_v3.lt. function_search_path_mutable eql_v3 min function Per-domain MIN aggregate on the eql_v3 ordered domains (splinter labels aggregates type=function): ALTER AGGREGATE has no SET configuration_parameter syntax, and ALTER ROUTINE/FUNCTION reject aggregates. The aggregate's SFUNC carries a pinned search_path. function_search_path_mutable eql_v3 max function Per-domain MAX aggregate on the eql_v3 ordered domains. Same as eql_v3.min. +function_search_path_mutable eql_v3 ore_block_u64_8_256_eq function Inner comparator for the eql_v3 ore_block_u64_8_256 type's `=` operator (self-contained SEM fork). The eql_v3 *_ord comparison wrappers inline to `ord_term(a) op ord_term(b)`; the planner only carries that through to the functional ORE index if this inner function is also inlinable (no SET, IMMUTABLE). Mirrors eql_v2.ore_block_u64_8_256_eq. +function_search_path_mutable eql_v3 ore_block_u64_8_256_neq function Inner comparator for the eql_v3 ore_block_u64_8_256 `<>` operator. Same rationale as eql_v3.ore_block_u64_8_256_eq. +function_search_path_mutable eql_v3 ore_block_u64_8_256_lt function Inner comparator for the eql_v3 ore_block_u64_8_256 `<` operator. Same rationale as eql_v3.ore_block_u64_8_256_eq. +function_search_path_mutable eql_v3 ore_block_u64_8_256_lte function Inner comparator for the eql_v3 ore_block_u64_8_256 `<=` operator. Same rationale as eql_v3.ore_block_u64_8_256_eq. +function_search_path_mutable eql_v3 ore_block_u64_8_256_gt function Inner comparator for the eql_v3 ore_block_u64_8_256 `>` operator. Same rationale as eql_v3.ore_block_u64_8_256_eq. +function_search_path_mutable eql_v3 ore_block_u64_8_256_gte function Inner comparator for the eql_v3 ore_block_u64_8_256 `>=` operator. Same rationale as eql_v3.ore_block_u64_8_256_eq. +function_search_path_mutable eql_v3 hmac_256 function HMAC equality extractor for the eql_v3 SEM fork: inlinable SQL (jsonb) constructor used inside eql_v3.eq_term. Must inline so the functional hash/btree index on eql_v3.eq_term(col) engages. Mirrors eql_v2.hmac_256. +function_search_path_mutable eql_v3 jsonb_array_to_bytea_array function Hand-written jsonb→bytea[] helper for the eql_v3 SEM fork: inlinable SQL (no SET, IMMUTABLE). Reached per-encrypted-value through eql_v3.ore_block_u64_8_256; must inline so the planner can fold it into the calling query. Pinned by neither the structural skip (it takes bare jsonb, not a jsonb-backed domain) nor an inline-critical OID clause — it carries the documented `eql-inline-critical` COMMENT marker that tasks/pin_search_path.sql honours. The eql_v2 copy stays plpgsql (pinned) by design. +function_search_path_mutable eql_v3 jsonb_array_to_ore_block_u64_8_256 function Hand-written jsonb→ore_block composite helper for the eql_v3 SEM fork: inlinable SQL (no SET, IMMUTABLE). Same rationale as eql_v3.jsonb_array_to_bytea_array — reached per-encrypted-value through eql_v3.ore_block_u64_8_256, carries the `eql-inline-critical` COMMENT marker. The eql_v2 copy stays plpgsql (pinned) by design. ALLOW # Wrap splinter (a single bare SELECT expression) into a subquery we can diff --git a/tasks/uninstall-v3.sql b/tasks/uninstall-v3.sql new file mode 100644 index 00000000..ce680dc9 --- /dev/null +++ b/tasks/uninstall-v3.sql @@ -0,0 +1,3 @@ +-- Uninstall the standalone eql_v3 surface. CASCADE removes the domains, SEM +-- types, operators, opclass, and any columns typed with the eql_v3 domains. +DROP SCHEMA IF EXISTS eql_v3 CASCADE; diff --git a/tests/codegen/reference/README.md b/tests/codegen/reference/README.md index 186d6d5c..fb2de720 100644 --- a/tests/codegen/reference/README.md +++ b/tests/codegen/reference/README.md @@ -4,7 +4,7 @@ The SQL files under `int4/` are the hand-maintained golden reference for the enc Each reference file's first line is a `-- REFERENCE:` provenance marker; everything after it is the generated body verbatim, starting with the template-owned `-- AUTOMATICALLY GENERATED FILE.` header. -The parity gate runs the generator (`cargo run -p eql-codegen`, which writes the real `src/encrypted_domain/int4/` tree) and asserts its output matches these files **byte-for-byte** after dropping that single provenance line. It runs three ways, all on the same reference: +The parity gate runs the generator (`cargo run -p eql-codegen`, which writes the real `src/v3/scalars/int4/` tree) and asserts its output matches these files **byte-for-byte** after dropping that single provenance line. It runs three ways, all on the same reference: - `mise run codegen:parity` (`tasks/codegen-parity.sh`) — the CI shell gate. It first compares the generated `int4` SQL *file set* against the golden `*.sql` set (`comm -23` against `git ls-files` excludes the committed, hand-written `int4_extensions.sql`, which has no golden counterpart) to catch extra/dropped files, then `diff`s each golden file against its generated counterpart after `tail -n +2` drops the provenance line. Any whitespace or blank-line drift fails — there is no normalization. - `crates/eql-codegen/tests/parity.rs` (`rust_generator_matches_int4_golden_files`) — runs `generate_all` into a temp dir and byte-compares the materialised `int4` SQL surface against the same golden. diff --git a/tests/codegen/reference/int4/int4_eq_functions.sql b/tests/codegen/reference/int4/int4_eq_functions.sql index 1b244577..54a7c048 100644 --- a/tests/codegen/reference/int4/int4_eq_functions.sql +++ b/tests/codegen/reference/int4/int4_eq_functions.sql @@ -1,21 +1,20 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema.sql --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/functions.sql --- REQUIRE: src/hmac_256/functions.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/functions.sql +-- REQUIRE: src/v3/sem/hmac_256/functions.sql --! @file encrypted_domain/int4/int4_eq_functions.sql --! @brief Functions for eql_v3.int4_eq. --! @brief Index extractor for eql_v3.int4_eq. --! @param a eql_v3.int4_eq ---! @return eql_v2.hmac_256 +--! @return eql_v3.hmac_256 CREATE FUNCTION eql_v3.eq_term(a eql_v3.int4_eq) -RETURNS eql_v2.hmac_256 +RETURNS eql_v3.hmac_256 LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE -AS $$ SELECT eql_v2.hmac_256(a::jsonb) $$; +AS $$ SELECT eql_v3.hmac_256(a::jsonb) $$; --! @brief Operator wrapper for eql_v3.int4_eq. --! @param a eql_v3.int4_eq diff --git a/tests/codegen/reference/int4/int4_eq_operators.sql b/tests/codegen/reference/int4/int4_eq_operators.sql index a2190e16..9da0ce32 100644 --- a/tests/codegen/reference/int4/int4_eq_operators.sql +++ b/tests/codegen/reference/int4/int4_eq_operators.sql @@ -1,8 +1,8 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/int4/int4_eq_functions.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/int4/int4_eq_functions.sql --! @file encrypted_domain/int4/int4_eq_operators.sql --! @brief Operators for eql_v3.int4_eq. diff --git a/tests/codegen/reference/int4/int4_functions.sql b/tests/codegen/reference/int4/int4_functions.sql index 6dae8388..dc142163 100644 --- a/tests/codegen/reference/int4/int4_functions.sql +++ b/tests/codegen/reference/int4/int4_functions.sql @@ -1,9 +1,8 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema.sql --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/functions.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/functions.sql --! @file encrypted_domain/int4/int4_functions.sql --! @brief Functions for eql_v3.int4. diff --git a/tests/codegen/reference/int4/int4_operators.sql b/tests/codegen/reference/int4/int4_operators.sql index e461c3b7..fb6c03f9 100644 --- a/tests/codegen/reference/int4/int4_operators.sql +++ b/tests/codegen/reference/int4/int4_operators.sql @@ -1,8 +1,8 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/int4/int4_functions.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/int4/int4_functions.sql --! @file encrypted_domain/int4/int4_operators.sql --! @brief Operators for eql_v3.int4. diff --git a/tests/codegen/reference/int4/int4_ord_aggregates.sql b/tests/codegen/reference/int4/int4_ord_aggregates.sql index 08cdc10d..95ce5d3b 100644 --- a/tests/codegen/reference/int4/int4_ord_aggregates.sql +++ b/tests/codegen/reference/int4/int4_ord_aggregates.sql @@ -1,9 +1,9 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/int4/int4_ord_functions.sql --- REQUIRE: src/encrypted_domain/int4/int4_ord_operators.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/int4/int4_ord_functions.sql +-- REQUIRE: src/v3/scalars/int4/int4_ord_operators.sql --! @file encrypted_domain/int4/int4_ord_aggregates.sql --! @brief Aggregates for eql_v3.int4_ord. diff --git a/tests/codegen/reference/int4/int4_ord_functions.sql b/tests/codegen/reference/int4/int4_ord_functions.sql index 2c0ee56b..4b170fcb 100644 --- a/tests/codegen/reference/int4/int4_ord_functions.sql +++ b/tests/codegen/reference/int4/int4_ord_functions.sql @@ -1,22 +1,21 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema.sql --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/functions.sql --- REQUIRE: src/ore_block_u64_8_256/functions.sql --- REQUIRE: src/ore_block_u64_8_256/operators.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/functions.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/functions.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/operators.sql --! @file encrypted_domain/int4/int4_ord_functions.sql --! @brief Functions for eql_v3.int4_ord. --! @brief Index extractor for eql_v3.int4_ord. --! @param a eql_v3.int4_ord ---! @return eql_v2.ore_block_u64_8_256 +--! @return eql_v3.ore_block_u64_8_256 CREATE FUNCTION eql_v3.ord_term(a eql_v3.int4_ord) -RETURNS eql_v2.ore_block_u64_8_256 +RETURNS eql_v3.ore_block_u64_8_256 LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE -AS $$ SELECT eql_v2.ore_block_u64_8_256(a::jsonb) $$; +AS $$ SELECT eql_v3.ore_block_u64_8_256(a::jsonb) $$; --! @brief Operator wrapper for eql_v3.int4_ord. --! @param a eql_v3.int4_ord diff --git a/tests/codegen/reference/int4/int4_ord_operators.sql b/tests/codegen/reference/int4/int4_ord_operators.sql index a5321c62..52a52a12 100644 --- a/tests/codegen/reference/int4/int4_ord_operators.sql +++ b/tests/codegen/reference/int4/int4_ord_operators.sql @@ -1,8 +1,8 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/int4/int4_ord_functions.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/int4/int4_ord_functions.sql --! @file encrypted_domain/int4/int4_ord_operators.sql --! @brief Operators for eql_v3.int4_ord. diff --git a/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql b/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql index de5b0848..369a1938 100644 --- a/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql +++ b/tests/codegen/reference/int4/int4_ord_ore_aggregates.sql @@ -1,9 +1,9 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/int4/int4_ord_ore_functions.sql --- REQUIRE: src/encrypted_domain/int4/int4_ord_ore_operators.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/int4/int4_ord_ore_functions.sql +-- REQUIRE: src/v3/scalars/int4/int4_ord_ore_operators.sql --! @file encrypted_domain/int4/int4_ord_ore_aggregates.sql --! @brief Aggregates for eql_v3.int4_ord_ore. diff --git a/tests/codegen/reference/int4/int4_ord_ore_functions.sql b/tests/codegen/reference/int4/int4_ord_ore_functions.sql index 75f09fb9..e93c8491 100644 --- a/tests/codegen/reference/int4/int4_ord_ore_functions.sql +++ b/tests/codegen/reference/int4/int4_ord_ore_functions.sql @@ -1,22 +1,21 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema.sql --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/functions.sql --- REQUIRE: src/ore_block_u64_8_256/functions.sql --- REQUIRE: src/ore_block_u64_8_256/operators.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/functions.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/functions.sql +-- REQUIRE: src/v3/sem/ore_block_u64_8_256/operators.sql --! @file encrypted_domain/int4/int4_ord_ore_functions.sql --! @brief Functions for eql_v3.int4_ord_ore. --! @brief Index extractor for eql_v3.int4_ord_ore. --! @param a eql_v3.int4_ord_ore ---! @return eql_v2.ore_block_u64_8_256 +--! @return eql_v3.ore_block_u64_8_256 CREATE FUNCTION eql_v3.ord_term(a eql_v3.int4_ord_ore) -RETURNS eql_v2.ore_block_u64_8_256 +RETURNS eql_v3.ore_block_u64_8_256 LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE -AS $$ SELECT eql_v2.ore_block_u64_8_256(a::jsonb) $$; +AS $$ SELECT eql_v3.ore_block_u64_8_256(a::jsonb) $$; --! @brief Operator wrapper for eql_v3.int4_ord_ore. --! @param a eql_v3.int4_ord_ore diff --git a/tests/codegen/reference/int4/int4_ord_ore_operators.sql b/tests/codegen/reference/int4/int4_ord_ore_operators.sql index 52f363cf..73e57f63 100644 --- a/tests/codegen/reference/int4/int4_ord_ore_operators.sql +++ b/tests/codegen/reference/int4/int4_ord_ore_operators.sql @@ -1,8 +1,8 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql --- REQUIRE: src/encrypted_domain/int4/int4_types.sql --- REQUIRE: src/encrypted_domain/int4/int4_ord_ore_functions.sql +-- REQUIRE: src/v3/schema.sql +-- REQUIRE: src/v3/scalars/int4/int4_types.sql +-- REQUIRE: src/v3/scalars/int4/int4_ord_ore_functions.sql --! @file encrypted_domain/int4/int4_ord_ore_operators.sql --! @brief Operators for eql_v3.int4_ord_ore. diff --git a/tests/codegen/reference/int4/int4_types.sql b/tests/codegen/reference/int4/int4_types.sql index ba4d9d89..01082ea2 100644 --- a/tests/codegen/reference/int4/int4_types.sql +++ b/tests/codegen/reference/int4/int4_types.sql @@ -1,8 +1,8 @@ --- REFERENCE: hand-written parity baseline for crates/eql-codegen — see ../README.md +-- REFERENCE: hand-maintained parity baseline for crates/eql-codegen - see ../README.md -- AUTOMATICALLY GENERATED FILE. --- REQUIRE: src/schema-v3.sql +-- REQUIRE: src/v3/schema.sql ---! @file encrypted_domain/int4/int4_types.sql +--! @file v3/scalars/int4/int4_types.sql --! @brief Encrypted-domain types for int4. DO $$ diff --git a/tests/sqlx/Cargo.toml b/tests/sqlx/Cargo.toml index 11217726..208f6d86 100644 --- a/tests/sqlx/Cargo.toml +++ b/tests/sqlx/Cargo.toml @@ -39,6 +39,6 @@ scale = [] # in the process env, BOTH a ZeroKMS auth credential (`CS_CLIENT_ACCESS_KEY` # + `CS_WORKSPACE_CRN`, via AutoStrategy) AND a client key (`CS_CLIENT_ID` + # `CS_CLIENT_KEY`, via EnvKeyProvider) — the two pairs are not alternatives. -# Run one with: -# mise run fixture:generate +# Regenerate all fixtures with: +# mise run fixture:generate:all fixture-gen = [] diff --git a/tests/sqlx/fixtures/drop_operator_classes.sql b/tests/sqlx/fixtures/drop_operator_classes.sql index 2c5632bb..094073aa 100644 --- a/tests/sqlx/fixtures/drop_operator_classes.sql +++ b/tests/sqlx/fixtures/drop_operator_classes.sql @@ -16,6 +16,13 @@ DROP OPERATOR FAMILY IF EXISTS eql_v2.encrypted_hash_operator_family USING hash DROP OPERATOR CLASS IF EXISTS eql_v2.ore_block_u64_8_256_operator_class USING btree CASCADE; DROP OPERATOR FAMILY IF EXISTS eql_v2.ore_block_u64_8_256_operator_family USING btree CASCADE; +-- Drop the self-contained eql_v3 ORE btree operator class too — its file carries +-- the `*operator_class.sql` suffix, so the Supabase build's `**/*operator_class.sql` +-- glob excludes it as well. Without this the unqualified-name opclass check below +-- still finds the eql_v3 copy. +DROP OPERATOR CLASS IF EXISTS eql_v3.ore_block_u64_8_256_operator_class USING btree CASCADE; +DROP OPERATOR FAMILY IF EXISTS eql_v3.ore_block_u64_8_256_operator_family USING btree CASCADE; + -- Drop ore_block_u64_8_256 operators (also excluded from Supabase build) DROP OPERATOR IF EXISTS = (eql_v2.ore_block_u64_8_256, eql_v2.ore_block_u64_8_256) CASCADE; DROP OPERATOR IF EXISTS <> (eql_v2.ore_block_u64_8_256, eql_v2.ore_block_u64_8_256) CASCADE; diff --git a/tests/sqlx/src/fixtures/cipherstash.rs b/tests/sqlx/src/fixtures/cipherstash.rs index adb12173..22e552fa 100644 --- a/tests/sqlx/src/fixtures/cipherstash.rs +++ b/tests/sqlx/src/fixtures/cipherstash.rs @@ -59,9 +59,14 @@ async fn build_cipher() -> Result>> { /// fail on an unknown index name. Extending fixture coverage to a new /// index is one variant on `IndexKind` plus one arm here, both compile- /// time checked. +/// The single encrypted-payload column name. Single-sourced here so the +/// `ColumnConfig` built for encryption and the `INSERT` target column in the +/// driver cannot drift apart. +pub const PAYLOAD_COLUMN: &str = "payload"; + pub fn column_config_for(spec_indexes: &[IndexKind], cast: Cast) -> Result { let column_type = cast_to_column_type(cast)?; - let mut config = ColumnConfig::build("payload").casts_as(column_type); + let mut config = ColumnConfig::build(PAYLOAD_COLUMN).casts_as(column_type); for ix in spec_indexes { config = config.add_index(Index::new(index_type_for(*ix))); diff --git a/tests/sqlx/src/fixtures/driver.rs b/tests/sqlx/src/fixtures/driver.rs index 1d6c3f45..127e20a1 100644 --- a/tests/sqlx/src/fixtures/driver.rs +++ b/tests/sqlx/src/fixtures/driver.rs @@ -196,12 +196,19 @@ where .context("building ColumnConfig from FixtureSpec indexes")?; let working = self.working_table(); - let payloads = cipherstash::encrypt_store(&working, "payload", self.values(), &config) - .await - .context("encrypting fixture values")?; - - let insert = - format!("INSERT INTO public.{working} (id, plaintext, payload) VALUES ($1, $2, $3)"); + let payloads = cipherstash::encrypt_store( + &working, + cipherstash::PAYLOAD_COLUMN, + self.values(), + &config, + ) + .await + .context("encrypting fixture values")?; + + let insert = format!( + "INSERT INTO public.{working} (id, plaintext, {col}) VALUES ($1, $2, $3)", + col = cipherstash::PAYLOAD_COLUMN + ); for (i, (value, payload)) in self.values().iter().zip(payloads).enumerate() { let id = (i as i64) + 1; sqlx::query(&insert) diff --git a/tests/sqlx/src/fixtures/validation.rs b/tests/sqlx/src/fixtures/validation.rs index e641a726..61746e60 100644 --- a/tests/sqlx/src/fixtures/validation.rs +++ b/tests/sqlx/src/fixtures/validation.rs @@ -5,8 +5,17 @@ use std::fmt; -/// Lowercase snake-case identifier, must start with a letter: `^[a-z][a-z0-9_]*$`. +/// Maximum unquoted identifier length PostgreSQL preserves; longer identifiers +/// are silently truncated (`NAMEDATALEN - 1`). +const MAX_IDENTIFIER_LEN: usize = 63; + +/// Lowercase snake-case identifier, must start with a letter and be at most +/// 63 bytes (PostgreSQL truncates beyond that): `^[a-z][a-z0-9_]{0,62}$`. fn is_valid_identifier(s: &str) -> bool { + // All accepted chars are single-byte ASCII, so byte length == char count. + if s.len() > MAX_IDENTIFIER_LEN { + return false; + } let mut chars = s.chars(); match chars.next() { Some(c) if c.is_ascii_lowercase() => {} @@ -107,6 +116,22 @@ mod tests { assert!(FixtureIdentifier::try_from("a;DROP").is_err()); // injection attempt } + #[test] + fn accepts_63_char_identifier() { + // 63 bytes is the longest PostgreSQL preserves unquoted. + let id = format!("a{}", "b".repeat(62)); + assert_eq!(id.len(), 63); + assert!(FixtureIdentifier::try_from(id.as_str()).is_ok()); + } + + #[test] + fn rejects_64_char_identifier() { + // 64 bytes would be silently truncated by PostgreSQL. + let id = format!("a{}", "b".repeat(63)); + assert_eq!(id.len(), 64); + assert!(FixtureIdentifier::try_from(id.as_str()).is_err()); + } + #[test] fn identifier_renders_via_display() { let id = FixtureIdentifier::try_from("eql_v2_int4").unwrap(); diff --git a/tests/sqlx/src/scalar_types.rs b/tests/sqlx/src/scalar_types.rs index 0d6a70a8..676cc0b1 100644 --- a/tests/sqlx/src/scalar_types.rs +++ b/tests/sqlx/src/scalar_types.rs @@ -14,7 +14,7 @@ //! to forward it to the matching `eql_tests_macros` proc-macro: //! //! - `scalar_type_impls` — `scalar_domains.rs` (lib): the `impl ScalarType` block. -//! - `fixture_modules` — `fixtures/mod.rs` (lib): the `pub mod eql_v2_` modules. +//! - `fixture_modules` — `fixtures/mod.rs` (lib): the `pub mod eql_v3_` modules. //! - `matrix_suites` — `tests/encrypted_domain/scalars/mod.rs` (test binary): //! the `ordered_numeric_matrix!` suites. //! - `fixture_dispatch` — `tests/generate_all_fixtures.rs` (test binary): the diff --git a/tests/sqlx/tests/build_validation_tests.rs b/tests/sqlx/tests/build_validation_tests.rs index 264f770b..691a5d4a 100644 --- a/tests/sqlx/tests/build_validation_tests.rs +++ b/tests/sqlx/tests/build_validation_tests.rs @@ -138,3 +138,55 @@ fn protect_variant_is_smaller_than_full() { full.len() ); } + +// ============================================================================= +// v3-only Variant Tests (design D9/D11 — self-contained eql_v3 surface) +// ============================================================================= + +#[test] +fn v3_variant_file_exists() { + assert!( + Path::new("../../release/cipherstash-encrypt-v3.sql").exists(), + "v3-only variant installer should exist" + ); +} + +#[test] +fn v3_uninstaller_exists() { + assert!( + Path::new("../../release/cipherstash-encrypt-v3-uninstall.sql").exists(), + "v3-only variant uninstaller should exist" + ); +} + +#[test] +fn v3_variant_creates_eql_v3_schema() { + let sql = read_release_sql("cipherstash-encrypt-v3.sql"); + assert!( + sql.contains("CREATE SCHEMA eql_v3"), + "v3 variant must create the eql_v3 schema" + ); +} + +#[test] +fn v3_variant_has_no_eql_v2_symbol() { + let sql = read_release_sql("cipherstash-encrypt-v3.sql"); + // Reject both schema-qualified refs (`eql_v2.`) and bare v2 entity names + // (`eql_v2_encrypted`, `eql_v2_configuration`, …). Prose mentions like + // "the eql_v2 original is unchanged" in doc comments are still allowed. + assert!( + !sql.contains("eql_v2.") && !sql.contains("eql_v2_"), + "v3 variant must be self-contained (no eql_v2. or eql_v2_ reference)" + ); +} + +#[test] +fn v3_variant_omits_v2_coupled_pin_search_path() { + // D11: the v3 artifact must NOT append tasks/pin_search_path.sql, which is + // eql_v2-coupled (references eql_v2_encrypted / ste_vec_entry). + let sql = read_release_sql("cipherstash-encrypt-v3.sql"); + assert!( + !sql.contains("ste_vec_entry") && !sql.contains("eql_v2_encrypted"), + "v3 variant must not carry the eql_v2-coupled pin_search_path script" + ); +} diff --git a/tests/sqlx/tests/encrypted_domain/family/inlinability.rs b/tests/sqlx/tests/encrypted_domain/family/inlinability.rs index 3cacb605..8caed2bf 100644 --- a/tests/sqlx/tests/encrypted_domain/family/inlinability.rs +++ b/tests/sqlx/tests/encrypted_domain/family/inlinability.rs @@ -87,6 +87,119 @@ async fn no_encrypted_domain_inline_critical_function_is_pinned(pool: PgPool) -> Ok(()) } +/// Direct guard for the self-contained eql_v3 SEM index-term functions. Unlike +/// the structural guard above (which covers jsonb-domain-arg functions), these +/// take a composite (ore_block_u64_8_256) or raw jsonb (hmac_256/the two +/// per-encrypted-value `jsonb_array_to_*` helpers) arg, so they are NOT caught +/// by the structural pin-skip and need explicit inline_critical allowlisting. +/// If pin_search_path.sql pins any of them, v3 functional-index inlining +/// silently regresses to Seq Scan — this test fails instead. +/// +/// `jsonb_array_to_bytea_array(jsonb)` and +/// `jsonb_array_to_ore_block_u64_8_256(jsonb)` are included here: both take a +/// bare `jsonb` arg (not a jsonb-backed encrypted DOMAIN), so the structural +/// skip in tasks/pin_search_path.sql does not recognise them — they are kept +/// unpinned by the `eql-inline-critical` COMMENT marker instead. This test +/// asserts the unpinned + inlinable-SQL state directly; the companion +/// `eql_v3_sem_inline_critical_functions_carry_marker` test below asserts the +/// marker itself, so an edit that drops the marker (or a pin_search_path.sql +/// refactor that stops honouring it) fails CI even though both checks live in +/// separate tests. +#[sqlx::test] +async fn eql_v3_sem_inline_critical_functions_are_unpinned(pool: PgPool) -> Result<()> { + let rows: Vec<(String,)> = sqlx::query_as( + r#" + SELECT p.proname || '(' || pg_catalog.pg_get_function_arguments(p.oid) || ')' + FROM pg_catalog.pg_proc p + JOIN pg_catalog.pg_namespace n ON n.oid = p.pronamespace + WHERE n.nspname = 'eql_v3' + AND ( + (p.pronargs = 2 AND p.proname IN ( + 'ore_block_u64_8_256_eq','ore_block_u64_8_256_neq', + 'ore_block_u64_8_256_lt','ore_block_u64_8_256_lte', + 'ore_block_u64_8_256_gt','ore_block_u64_8_256_gte')) + OR (p.pronargs = 1 AND p.proname IN ( + 'hmac_256', + 'jsonb_array_to_bytea_array', + 'jsonb_array_to_ore_block_u64_8_256') + AND p.proargtypes[0] = 'jsonb'::regtype) + ) + AND ( + -- offender: pinned search_path, or not inlinable SQL/IMMUTABLE + EXISTS (SELECT 1 FROM unnest(coalesce(p.proconfig,'{}'::text[])) c WHERE c LIKE 'search_path=%') + OR p.provolatile <> 'i' + OR p.prolang <> (SELECT l.oid FROM pg_catalog.pg_language l WHERE l.lanname = 'sql') + ) + ORDER BY 1 + "#, + ) + .fetch_all(&pool) + .await?; + + assert!( + rows.is_empty(), + "eql_v3 SEM inline-critical functions must stay unpinned + inlinable SQL; offenders: {:?}", + rows.iter().map(|r| &r.0).collect::>() + ); + Ok(()) +} + +/// Companion guard for the two bare-`jsonb` per-encrypted-value helpers +/// (`jsonb_array_to_bytea_array`, `jsonb_array_to_ore_block_u64_8_256`). The +/// unpinned state asserted above is only DURABLE because each helper carries an +/// `eql-inline-critical` COMMENT marker that `tasks/pin_search_path.sql` honours +/// (it skips pinning functions whose `pg_description` matches +/// `'eql-inline-critical%'`). Neither helper is caught by the structural +/// jsonb-domain skip, so the marker is the ONLY thing keeping them unpinned — +/// an edit that removes the marker, or a pin_search_path.sql refactor that drops +/// the marker handling, would silently re-pin them and break inlining. This test +/// asserts the marker is present (and the helpers are SQL/IMMUTABLE) so that +/// failure surfaces here. +#[sqlx::test] +async fn eql_v3_sem_inline_critical_helpers_carry_marker(pool: PgPool) -> Result<()> { + // Each expected helper must appear with a present inline-critical marker + // and be inlinable SQL/IMMUTABLE. Any helper that is missing, unmarked, or + // not inlinable SQL/IMMUTABLE is an offender. + let offenders: Vec<(String, Option, String, String)> = sqlx::query_as( + r#" + WITH expected(proname) AS ( + VALUES ('jsonb_array_to_bytea_array'), + ('jsonb_array_to_ore_block_u64_8_256') + ) + SELECT e.proname AS proname, + d.description AS marker, + l.lanname AS prolang, + p.provolatile::text AS provolatile + FROM expected e + LEFT JOIN pg_catalog.pg_proc p + ON p.proname = e.proname + AND p.pronamespace = 'eql_v3'::regnamespace + AND p.pronargs = 1 + AND p.proargtypes[0] = 'jsonb'::regtype + LEFT JOIN pg_catalog.pg_language l ON l.oid = p.prolang + LEFT JOIN pg_catalog.pg_description d + ON d.objoid = p.oid AND d.classoid = 'pg_proc'::regclass + WHERE p.oid IS NULL + OR d.description IS NULL + OR d.description NOT LIKE 'eql-inline-critical%' + OR l.lanname IS DISTINCT FROM 'sql' + OR p.provolatile IS DISTINCT FROM 'i' + ORDER BY e.proname + "#, + ) + .fetch_all(&pool) + .await?; + + assert!( + offenders.is_empty(), + "eql_v3 SEM bare-jsonb helpers must carry an `eql-inline-critical` COMMENT \ + marker and be inlinable SQL/IMMUTABLE — the marker is what keeps \ + pin_search_path.sql from pinning them. Offenders \ + (proname, marker, prolang, provolatile): {offenders:#?}" + ); + Ok(()) +} + #[sqlx::test] async fn every_inline_critical_eligible_domain_has_inline_critical_functions( pool: PgPool, diff --git a/tests/sqlx/tests/encrypted_domain/family/mod.rs b/tests/sqlx/tests/encrypted_domain/family/mod.rs index 6622e0f8..892842a2 100644 --- a/tests/sqlx/tests/encrypted_domain/family/mod.rs +++ b/tests/sqlx/tests/encrypted_domain/family/mod.rs @@ -4,4 +4,5 @@ pub mod inlinability; pub mod jsonb_operator_surface; pub mod mutations; +pub mod sem; pub mod support; diff --git a/tests/sqlx/tests/encrypted_domain/family/mutations.rs b/tests/sqlx/tests/encrypted_domain/family/mutations.rs index 7be20853..25389842 100644 --- a/tests/sqlx/tests/encrypted_domain/family/mutations.rs +++ b/tests/sqlx/tests/encrypted_domain/family/mutations.rs @@ -143,14 +143,14 @@ async fn rerouting_ord_eq_through_hm_flips_ord_routes_arm(pool: PgPool) -> Resul "baseline: `_ord` `=` must match exactly the pivot via ob with hm stripped (got {baseline})" ); - // Mutation: reroute `_ord` `=` through HMAC. `eql_v2.hmac_256(jsonb)` is + // Mutation: reroute `_ord` `=` through HMAC. `eql_v3.hmac_256(jsonb)` is // STRICT and the `hm` key is absent, so it yields NULL and `=` matches // nothing. mutate( &pool, "CREATE OR REPLACE FUNCTION eql_v3.eq(a eql_v3.int4_ord, b eql_v3.int4_ord) \ RETURNS boolean LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE \ - AS $$ SELECT eql_v2.hmac_256(a::jsonb) = eql_v2.hmac_256(b::jsonb) $$", + AS $$ SELECT eql_v3.hmac_256(a::jsonb) = eql_v3.hmac_256(b::jsonb) $$", ) .await?; @@ -198,7 +198,7 @@ async fn dropping_strict_on_eq_flips_supported_null_arm(pool: PgPool) -> Result< Ok(()) } -// 5. Ord `<` correctness routes through `eql_v2.lt`. Turning `lt` into a +// 5. Ord `<` correctness routes through `eql_v3.lt`. Turning `lt` into a // blocker makes `<` raise — proving the ord `<` correctness arm has teeth. // Crucially, ORDER BY routes through `ord_term`, NOT `<`, so it must stay // green here. This is the #5-vs-#7 split: #5 attacks `<`, #7 attacks the @@ -297,12 +297,12 @@ async fn rerouting_eq_eq_through_ob_flips_eq_arm(pool: PgPool) -> Result<()> { ); // Mutation: reroute `_eq` `=` through ORE. The `ob` key is absent, so - // `eql_v2.ore_block_u64_8_256(jsonb)` raises rather than matching. + // `eql_v3.ore_block_u64_8_256(jsonb)` raises rather than matching. mutate( &pool, "CREATE OR REPLACE FUNCTION eql_v3.eq(a eql_v3.int4_eq, b eql_v3.int4_eq) \ RETURNS boolean LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE \ - AS $$ SELECT eql_v2.ore_block_u64_8_256(a::jsonb) = eql_v2.ore_block_u64_8_256(b::jsonb) $$", + AS $$ SELECT eql_v3.ore_block_u64_8_256(a::jsonb) = eql_v3.ore_block_u64_8_256(b::jsonb) $$", ) .await?; @@ -354,8 +354,8 @@ async fn collapsing_ord_term_flips_order_by_arm(pool: PgPool) -> Result<()> { let const_payload = fetch_fixture_payload::(&pool, 0).await?; let ddl = format!( "CREATE OR REPLACE FUNCTION eql_v3.ord_term(a eql_v3.int4_ord) \ - RETURNS eql_v2.ore_block_u64_8_256 LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE \ - AS $mutbody$ SELECT eql_v2.ore_block_u64_8_256('{esc}'::jsonb) $mutbody$", + RETURNS eql_v3.ore_block_u64_8_256 LANGUAGE sql IMMUTABLE STRICT PARALLEL SAFE \ + AS $mutbody$ SELECT eql_v3.ore_block_u64_8_256('{esc}'::jsonb) $mutbody$", esc = const_payload.replace('\'', "''"), ); mutate(&pool, &ddl).await?; @@ -409,8 +409,8 @@ async fn making_ord_term_non_strict_flips_order_by_nulls_arm(pool: PgPool) -> Re let const_payload = fetch_fixture_payload::(&pool, 0).await?; let ddl = format!( "CREATE OR REPLACE FUNCTION eql_v3.ord_term(a eql_v3.int4_ord) \ - RETURNS eql_v2.ore_block_u64_8_256 LANGUAGE sql IMMUTABLE PARALLEL SAFE \ - AS $mutbody$ SELECT eql_v2.ore_block_u64_8_256(\ + RETURNS eql_v3.ore_block_u64_8_256 LANGUAGE sql IMMUTABLE PARALLEL SAFE \ + AS $mutbody$ SELECT eql_v3.ore_block_u64_8_256(\ coalesce(a, '{esc}'::jsonb::eql_v3.int4_ord)::jsonb) $mutbody$", esc = const_payload.replace('\'', "''"), ); diff --git a/tests/sqlx/tests/encrypted_domain/family/sem.rs b/tests/sqlx/tests/encrypted_domain/family/sem.rs new file mode 100644 index 00000000..b7d2543c --- /dev/null +++ b/tests/sqlx/tests/encrypted_domain/family/sem.rs @@ -0,0 +1,395 @@ +//! Direct behavioural tests for the self-contained `eql_v3` searchable- +//! encrypted-metadata (SEM) index-term functions (`eql_v3.hmac_256`, +//! `eql_v3.ore_block_u64_8_256` and their comparators). +//! +//! These functions are a HAND-PORT of the `eql_v2` originals (`src/v3/sem/`). +//! The scalar matrix already exercises the happy path of the *array* comparator +//! end-to-end against real ciphertext fixtures (ordering, equality, min/max, +//! injectivity, index engagement). This file covers the branches the matrix +//! structurally cannot reach, and which are otherwise tested only on the +//! `eql_v2` copies (in `tests/index_compare_tests.rs`): +//! +//! - T1: differential v2↔v3 parity on real `ob` fixtures (the strongest guard +//! against a faithful-port slip — see below). +//! - T2: the `'Ciphertexts are different lengths'` RAISE (all real fixtures are +//! equal length, so the matrix never hits it). +//! - T3: NULL-term ordering inside `compare_ore_block_u64_8_256_term` — the +//! `STRICT` comparison wrappers short-circuit before these branches run. +//! - T4: array-level NULL + empty/cardinality base cases of the recursion. +//! - T5: presence checks (`has_*`) and the missing-`ob` RAISE. +//! +//! All migrations (`001`–`007`) auto-apply to every `#[sqlx::test]` pool, so the +//! real `ore` table (ids 1–1000) and both schemas are available with no setup. + +use std::collections::HashSet; + +use anyhow::Result; +use eql_tests::assert_raises; +use sqlx::PgPool; + +/// A single term built directly from hex — no encryption needed for the +/// structural/edge-case tests. +fn term(hex: &str) -> String { + format!("ROW(decode('{hex}', 'hex'))::eql_v3.ore_block_u64_8_256_term") +} + +/// T1 — Differential parity: the same real `ob` payload must compare identically +/// through the `eql_v2` and `eql_v3` array comparators. `eql_v2` is the trusted +/// oracle; `eql_v3` is the byte-port. Both sides route through the SAME path +/// (jsonb extractor → composite → `compare_ore_block_u64_8_256_terms`) so the +/// schema prefix is the only variable — any divergence is a genuine port bug. +/// v3 has no encrypted-arg `compare` overload, hence the extractor routing. +#[sqlx::test] +async fn ore_v2_v3_comparator_parity_on_real_fixtures(pool: PgPool) -> Result<()> { + // Pairs spanning equal and unequal ids. Plaintext order of the fixtures is + // undocumented, so we assert v2≡v3 agreement (not a specific sign). + let pairs = [ + (1i64, 1i64), + (1, 2), + (2, 1), + (1, 500), + (500, 1), + (42, 42), + (10, 900), + (900, 10), + ]; + + let sql = r#" + WITH a AS (SELECT e::jsonb AS j FROM ore WHERE id = $1), + b AS (SELECT e::jsonb AS j FROM ore WHERE id = $2) + SELECT + eql_v2.compare_ore_block_u64_8_256_terms( + eql_v2.ore_block_u64_8_256(a.j), eql_v2.ore_block_u64_8_256(b.j)) AS v2, + eql_v3.compare_ore_block_u64_8_256_terms( + eql_v3.ore_block_u64_8_256(a.j), eql_v3.ore_block_u64_8_256(b.j)) AS v3 + FROM a, b + "#; + + let mut v3_signs: HashSet = HashSet::new(); + for (x, y) in pairs { + let (v2, v3): (i32, i32) = sqlx::query_as(sql).bind(x).bind(y).fetch_one(&pool).await?; + assert_eq!( + v2, v3, + "eql_v2 and eql_v3 ORE comparators disagree on ids ({x},{y}): v2={v2} v3={v3}" + ); + v3_signs.insert(v3); + } + + // Non-triviality: the sample must have actually exercised lt, eq, and gt — + // otherwise the parity check could pass on a degenerate all-equal path. + assert!( + v3_signs.contains(&0), + "sample must include an equal pair (0)" + ); + assert!( + v3_signs.contains(&-1), + "sample must include a less-than pair (-1)" + ); + assert!( + v3_signs.contains(&1), + "sample must include a greater-than pair (1)" + ); + Ok(()) +} + +/// T2 — The term comparator must reject ciphertexts of different lengths. This +/// guard is unreachable via the matrix (every real fixture is equal length). +#[sqlx::test] +async fn ore_term_comparator_rejects_different_length_ciphertexts(pool: PgPool) -> Result<()> { + let sql = format!( + "SELECT eql_v3.compare_ore_block_u64_8_256_term({}, {})", + term("aabbccdd"), // 4 bytes + term("aabbccddee"), // 5 bytes + ); + assert_raises(&pool, &sql, &[], "Ciphertexts are different lengths").await?; + Ok(()) +} + +/// T3 — NULL-term ordering inside `compare_ore_block_u64_8_256_term`. The +/// function is intentionally NOT `STRICT`, so these defensive branches are +/// reachable by a direct call (the `STRICT` comparison wrappers never reach +/// them). Pins: `(NULL, t) = -1`, `(t, NULL) = 1`, `(NULL, NULL) = 0`. +#[sqlx::test] +async fn ore_term_comparator_null_ordering(pool: PgPool) -> Result<()> { + let t = term("aabb"); + let n = "NULL::eql_v3.ore_block_u64_8_256_term"; + + let cases = [ + ( + format!("SELECT eql_v3.compare_ore_block_u64_8_256_term({n}, {t})"), + -1, + ), + ( + format!("SELECT eql_v3.compare_ore_block_u64_8_256_term({t}, {n})"), + 1, + ), + ( + format!("SELECT eql_v3.compare_ore_block_u64_8_256_term({n}, {n})"), + 0, + ), + ]; + + for (sql, expected) in cases { + let got: i32 = sqlx::query_scalar(&sql).fetch_one(&pool).await?; + assert_eq!(got, expected, "null-term ordering: {sql}"); + } + Ok(()) +} + +/// T4 — Array-level NULL and empty/cardinality base cases of the recursive +/// `compare_ore_block_u64_8_256_terms(term[], term[])`. NULL array → NULL; +/// both empty → 0; empty vs non-empty → -1; non-empty vs empty → 1. +#[sqlx::test] +async fn ore_terms_array_null_and_empty_base_cases(pool: PgPool) -> Result<()> { + let t = format!("ARRAY[{}]", term("aabb")); + let empty = "ARRAY[]::eql_v3.ore_block_u64_8_256_term[]"; + let null_arr = "NULL::eql_v3.ore_block_u64_8_256_term[]"; + + // NULL array operand → NULL result (the array overload returns NULL; it is + // not STRICT). Typed as Option; the shared `assert_null` helper only + // types Option, so query directly here. + for sql in [ + format!("SELECT eql_v3.compare_ore_block_u64_8_256_terms({null_arr}, {t})"), + format!("SELECT eql_v3.compare_ore_block_u64_8_256_terms({t}, {null_arr})"), + ] { + let got: Option = sqlx::query_scalar(&sql).fetch_one(&pool).await?; + assert!(got.is_none(), "NULL array operand must yield NULL: {sql}"); + } + + let cases = [ + ( + format!("SELECT eql_v3.compare_ore_block_u64_8_256_terms({empty}, {empty})"), + 0, + ), + ( + format!("SELECT eql_v3.compare_ore_block_u64_8_256_terms({empty}, {t})"), + -1, + ), + ( + format!("SELECT eql_v3.compare_ore_block_u64_8_256_terms({t}, {empty})"), + 1, + ), + ]; + for (sql, expected) in cases { + let got: i32 = sqlx::query_scalar(&sql).fetch_one(&pool).await?; + assert_eq!(got, expected, "array base case: {sql}"); + } + Ok(()) +} + +/// T5 — SEM presence checks (`has_ore_block_u64_8_256`, `has_hmac_256`), the +/// extractor's missing-`ob` RAISE, and its NULL-jsonb short-circuit. +#[sqlx::test] +async fn sem_presence_checks_and_missing_ob_behaviour(pool: PgPool) -> Result<()> { + let bool_cases = [ + ( + r#"SELECT eql_v3.has_ore_block_u64_8_256('{"ob":["aa"]}'::jsonb)"#, + true, + ), + ( + r#"SELECT eql_v3.has_ore_block_u64_8_256('{}'::jsonb)"#, + false, + ), + // json-null `ob` → `->>` yields NULL → absent. + ( + r#"SELECT eql_v3.has_ore_block_u64_8_256('{"ob":null}'::jsonb)"#, + false, + ), + (r#"SELECT eql_v3.has_hmac_256('{"hm":"abc"}'::jsonb)"#, true), + (r#"SELECT eql_v3.has_hmac_256('{}'::jsonb)"#, false), + ]; + for (sql, expected) in bool_cases { + let got: bool = sqlx::query_scalar(sql).fetch_one(&pool).await?; + assert_eq!(got, expected, "presence check: {sql}"); + } + + // Missing `ob` → RAISE. + assert_raises( + &pool, + r#"SELECT eql_v3.ore_block_u64_8_256('{"foo":1}'::jsonb)"#, + &[], + "Expected an ore index (ob) value", + ) + .await?; + + // NULL jsonb → NULL composite (STRICT short-circuit), NOT a raise. + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.ore_block_u64_8_256(NULL::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "NULL jsonb must extract to a NULL composite, not raise" + ); + Ok(()) +} + +/// T6 — Characterization of `eql_v3.jsonb_array_to_bytea_array(jsonb)` across its +/// three real-world input shapes. This is the safety net for the plpgsql→sql +/// inlining refactor (the function is reached per-encrypted-value, so it must be +/// inlinable). Behaviour pinned: +/// - JSON null (`'null'`) → NULL (the load-bearing null guard) +/// - empty array (`'[]'`) → NULL (array_agg over zero rows is NULL) +/// - populated array → decoded bytea[] +/// +/// Note the deliberate divergence the inlinable CASE form introduces vs. the +/// v2 plpgsql equivalent: a non-array JSON *scalar* (e.g. a number) returns NULL +/// rather than raising `cannot extract elements from a scalar`. Both callers only +/// ever pass an array or json-null (`val->'ob'`), so this is unreachable in +/// practice; we pin it here so the divergence is intentional and visible. +#[sqlx::test] +async fn jsonb_array_to_bytea_array_input_shapes(pool: PgPool) -> Result<()> { + // SQL NULL (distinct from JSON null `'null'`). The function is NOT STRICT, + // so the body runs: `jsonb_typeof(NULL)` is NULL → the CASE guard + // `WHEN jsonb_typeof(val) = 'array'` is not-true → ELSE NULL. + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_bytea_array(NULL::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "SQL NULL must yield NULL bytea[] (function is not STRICT)" + ); + + // JSON null → NULL. + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_bytea_array('null'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!(is_null, "JSON null must yield NULL bytea[]"); + + // Empty array → NULL (array_agg over zero rows). + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_bytea_array('[]'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!(is_null, "empty JSON array must yield NULL bytea[]"); + + // Single-element array → one decoded bytea element. + let decoded: Vec> = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_bytea_array('[\"aabb\"]'::jsonb)") + .fetch_one(&pool) + .await?; + assert_eq!( + decoded, + vec![vec![0xaau8, 0xbb]], + "single-element array must hex-decode to a 1-element bytea[]" + ); + + // Populated array → hex-decoded bytea[] round-trip. + let decoded: Vec> = sqlx::query_scalar( + "SELECT eql_v3.jsonb_array_to_bytea_array('[\"aabb\",\"ccdd\"]'::jsonb)", + ) + .fetch_one(&pool) + .await?; + assert_eq!( + decoded, + vec![vec![0xaau8, 0xbb], vec![0xccu8, 0xdd]], + "populated array must hex-decode to bytea[]" + ); + + // Deliberate delta: a non-array JSON scalar returns NULL (not a raise). + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_bytea_array('5'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "non-array JSON scalar must yield NULL (documented delta)" + ); + + // Same delta for a non-array JSON object — `jsonb_typeof` is 'object', so + // the CASE guard is not-true → ELSE NULL (not a raise). + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_bytea_array('{}'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "non-array JSON object must yield NULL (documented delta)" + ); + + Ok(()) +} + +/// T7 — Characterization of `eql_v3.jsonb_array_to_ore_block_u64_8_256(jsonb)` +/// across the same three input shapes. Safety net for the same plpgsql→sql +/// inlining refactor. Behaviour pinned: +/// - JSON null (`'null'`) → NULL composite +/// - empty array (`'[]'`) → NULL composite (inner array_agg is NULL) +/// - populated array → non-NULL composite with one term per element +/// +/// Same documented delta as T6 for a non-array JSON scalar. +#[sqlx::test] +async fn jsonb_array_to_ore_block_input_shapes(pool: PgPool) -> Result<()> { + // SQL NULL (distinct from JSON null `'null'`). Not STRICT, so the body + // runs: `jsonb_typeof(NULL)` is NULL → CASE guard not-true → ELSE NULL. + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_ore_block_u64_8_256(NULL::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "SQL NULL must yield NULL composite (function is not STRICT)" + ); + + // JSON null → NULL composite. + let is_null: bool = sqlx::query_scalar( + "SELECT eql_v3.jsonb_array_to_ore_block_u64_8_256('null'::jsonb) IS NULL", + ) + .fetch_one(&pool) + .await?; + assert!(is_null, "JSON null must yield NULL composite"); + + // Empty array → NULL composite. + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_ore_block_u64_8_256('[]'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!(is_null, "empty JSON array must yield NULL composite"); + + // Single-element array → non-NULL composite with exactly 1 term. + let term_count: i32 = sqlx::query_scalar( + "SELECT cardinality((eql_v3.jsonb_array_to_ore_block_u64_8_256('[\"aabb\"]'::jsonb)).terms)", + ) + .fetch_one(&pool) + .await?; + assert_eq!( + term_count, 1, + "single-element array must yield exactly one term" + ); + + // Populated array → non-NULL composite with one term per element. + let term_count: i32 = sqlx::query_scalar( + "SELECT cardinality((eql_v3.jsonb_array_to_ore_block_u64_8_256('[\"aabb\",\"ccdd\",\"eeff\"]'::jsonb)).terms)", + ) + .fetch_one(&pool) + .await?; + assert_eq!( + term_count, 3, + "populated array must yield one term per element" + ); + + // Deliberate delta: a non-array JSON scalar returns NULL (not a raise). + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_ore_block_u64_8_256('5'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "non-array JSON scalar must yield NULL (documented delta)" + ); + + // Same delta for a non-array JSON object — `jsonb_typeof` is 'object', so + // the CASE guard is not-true → ELSE NULL (not a raise). + let is_null: bool = + sqlx::query_scalar("SELECT eql_v3.jsonb_array_to_ore_block_u64_8_256('{}'::jsonb) IS NULL") + .fetch_one(&pool) + .await?; + assert!( + is_null, + "non-array JSON object must yield NULL (documented delta)" + ); + + Ok(()) +} diff --git a/tests/sqlx/tests/encrypted_domain/scalars/mod.rs b/tests/sqlx/tests/encrypted_domain/scalars/mod.rs index f900af3e..72c64f87 100644 --- a/tests/sqlx/tests/encrypted_domain/scalars/mod.rs +++ b/tests/sqlx/tests/encrypted_domain/scalars/mod.rs @@ -1,5 +1,6 @@ -//! Per-scalar matrix suites. Each `pub mod ` targets one scalar type and -//! holds its `ordered_numeric_matrix!` invocation. +//! Per-scalar matrix suites, generated by the `scalar_types!(matrix_suites)` +//! invocation below — one module per scalar type, each holding its +//! `ordered_numeric_matrix!` suite. //! //! The modules are generated from the single harness list in //! `tests/sqlx/src/scalar_types.rs` — adding a type there adds its suite here