diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2dc256c..fc547f7 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,9 +3,9 @@ name: CI on: workflow_call: pull_request: - branches: [main] + branches: [main, feat/ir-first] push: - branches: [main] + branches: [main, feat/ir-first] workflow_dispatch: concurrency: @@ -126,6 +126,10 @@ jobs: run: | uv run maturin develop + - name: Run IR vector contract harness + run: | + uv run pytest -v tests/test_ir_vectors_contract.py + - name: Run pytest run: | uv run pytest -v --cov=src --cov-report=xml --cov-report=term @@ -169,6 +173,10 @@ jobs: run: | uv run maturin develop + - name: Run IR vector contract harness + run: | + uv run pytest -v tests/test_ir_vectors_contract.py + - name: Run pytest run: | uv run pytest -v --cov=src --cov-report=xml --cov-report=term diff --git a/docs/plans/2026-06-19-001-ir-first-roadmap.md b/docs/plans/2026-06-19-001-ir-first-roadmap.md index 8bc67d9..0e4c1c2 100644 --- a/docs/plans/2026-06-19-001-ir-first-roadmap.md +++ b/docs/plans/2026-06-19-001-ir-first-roadmap.md @@ -92,6 +92,21 @@ Definition of done addition: - Last updated: `2026-06-19` - Roadmap owner: `@syn54x` +## Branching and release policy + +IR program integration branch: + +- `feat/ir-first` is the staging branch for all roadmap work. +- Starting work on any phase issue requires creating a new branch from `feat/ir-first`. +- Phase PRs must target `feat/ir-first` (not `main`). +- Completed phase work merges back into `feat/ir-first`. + +Promotion to release: + +- `main` must not receive partial IR migration work. +- Merge `feat/ir-first` into `main` only when the IR program is complete and release-ready. +- Release notes and migration guide updates are required at promotion time. + ## Traceability rule (roadmap <-> GitHub issues) - Every roadmap deliverable and exit gate must reference one or more GitHub issues. @@ -135,7 +150,7 @@ Definition of synchronized: ### Phase 0 - Contract and RFC freeze -Status: `Not started` +Status: `In progress` Issue references: @@ -146,14 +161,26 @@ Issue references: - Define versioned IR contracts and non-negotiable invariants before implementation. **Deliverables** -- [ ] RFC: `SchemaIR`, `QueryIR`, `CodecIR` structure and versioning strategy. -- [ ] Invariant spec doc covering parity, hydration ABI, null/bind correctness. -- [ ] Golden test vector format for schema/query/codec conformance fixtures. +- [x] RFC: `SchemaIR`, `QueryIR`, `CodecIR` structure and versioning strategy. +- [x] Invariant spec doc covering parity, hydration ABI, null/bind correctness. +- [x] Golden test vector format for schema/query/codec conformance fixtures. **Exit gate** - [ ] RFC approved and merged. - [ ] Golden vectors committed and validated by CI harness skeleton. +**Evidence (branch `feat/ir-first`)** +- RFC draft: `docs/rfc/ir-contracts-v1.md` +- Invariant spec draft: `docs/solutions/patterns/ir-invariants.md` +- Golden vectors: `tests/fixtures/ir_vectors/README.md`, `tests/fixtures/ir_vectors/*.json` +- CI harness skeleton: `tests/test_ir_vectors_contract.py` +- CI wiring: `.github/workflows/ci.yml` (IR vector contract harness step in Python test jobs) +- Issue sync comments: + - [#71 comment](https://github.com/syn54x/ferro-orm/issues/71#issuecomment-4752226422) + - [#72 comment](https://github.com/syn54x/ferro-orm/issues/72#issuecomment-4752226080) + - [#73 comment](https://github.com/syn54x/ferro-orm/issues/73#issuecomment-4752226172) + - [#74 comment](https://github.com/syn54x/ferro-orm/issues/74#issuecomment-4752226261) + --- ### Phase 1 - Build IR core and compiler @@ -408,9 +435,10 @@ Use this roadmap as the source for issues and project fields. Append updates as concise entries. - `2026-06-19` - Roadmap initialized. +- `2026-06-19` - Branching policy set: phase work branches from `feat/ir-first` and merges back into `feat/ir-first` until final promotion to `main`. ## Immediate next actions -- [ ] Create GitHub issues for Phase 0 deliverables. -- [ ] Create `IR-P0` milestone and seed with Phase 0 issues. +- [x] Create GitHub issues for Phase 0 deliverables. +- [x] Create `IR-P0` milestone and seed with Phase 0 issues. - [ ] Assign DRO for Phase 0 RFC. diff --git a/docs/plans/ir-first-migration-guide.md b/docs/plans/ir-first-migration-guide.md index f7fc0bb..0c07e49 100644 --- a/docs/plans/ir-first-migration-guide.md +++ b/docs/plans/ir-first-migration-guide.md @@ -27,9 +27,9 @@ No user-facing runtime behavior changes expected. | Issue | Change | Impact | User action | Notes | | --- | --- | --- | --- | --- | -| [#72](https://github.com/syn54x/ferro-orm/issues/72) | IR contract RFC definition | none | none | design-only | -| [#73](https://github.com/syn54x/ferro-orm/issues/73) | Invariant specification | none | none | design-only | -| [#74](https://github.com/syn54x/ferro-orm/issues/74) | Golden vectors + CI harness skeleton | none | none | infra-only | +| [#72](https://github.com/syn54x/ferro-orm/issues/72) | IR contract RFC definition | none | none | design-only; artifact: `docs/rfc/ir-contracts-v1.md` | +| [#73](https://github.com/syn54x/ferro-orm/issues/73) | Invariant specification | none | none | design-only; artifact: `docs/solutions/patterns/ir-invariants.md` | +| [#74](https://github.com/syn54x/ferro-orm/issues/74) | Golden vectors + CI harness skeleton | none | none | infra-only; artifacts: `tests/fixtures/ir_vectors/`, `tests/test_ir_vectors_contract.py` | ### Phase 1 diff --git a/docs/rfc/ir-contracts-v1.md b/docs/rfc/ir-contracts-v1.md new file mode 100644 index 0000000..3b7816e --- /dev/null +++ b/docs/rfc/ir-contracts-v1.md @@ -0,0 +1,228 @@ +--- +title: "IR contracts v1 (SchemaIR, QueryIR, CodecIR)" +type: rfc +status: draft +date: 2026-06-19 +phase: IR-P0 +roadmap: docs/plans/2026-06-19-001-ir-first-roadmap.md +--- + +# IR contracts v1 + +## Purpose + +Define the v1 canonical intermediate representation contracts for schema, query, and bind/fetch codec behavior before Phase 1 implementation. + +This RFC is normative for wire shape, versioning, and validation behavior. + +## Non-goals + +- Implementing new runtime/compiler behavior (Phase 1+). +- Removing compatibility layers in existing JSON pipelines (later phases). + +## Shared contract rules + +### Envelope + +All IR artifacts use a top-level envelope: + +```json +{ + "ir_kind": "schema|query|codec", + "ir_version": 1, + "payload": {} +} +``` + +Rules: + +- `ir_kind` and `ir_version` are required. +- Unknown `ir_kind` is a hard error. +- Unsupported `ir_version` is a hard error. +- `payload` must be an object; missing or non-object payload is a hard error. + +### Compatibility policy + +- Minor additive evolution inside `ir_version: 1` may add optional payload fields. +- Existing required fields in v1 cannot be removed or retyped. +- New required fields require a major version bump. +- Readers must fail loudly on malformed required fields. +- Writers must emit deterministic field ordering where serialization APIs allow it. + +## SchemaIR v1 + +`SchemaIR` represents canonical model schema semantics consumed by runtime DDL, migration planning, and Alembic adapters. + +### SchemaIR payload shape + +```json +{ + "dialect_agnostic": true, + "models": [ + { + "model_name": "Invoice", + "table_name": "invoice", + "columns": [ + { + "name": "id", + "logical_type": "integer", + "db_type": "bigint", + "nullable": false, + "primary_key": true, + "autoincrement": true, + "unique": false, + "index": false, + "default": null, + "format": null + } + ], + "foreign_keys": [ + { + "column": "customer_id", + "to_table": "customer", + "to_column": "id", + "on_delete": "CASCADE", + "name": null + } + ], + "indexes": [ + { + "name": "idx_invoice_created_at", + "columns": ["created_at"], + "unique": false + } + ], + "uniques": [ + { + "name": "uq_invoice_number", + "columns": ["number"] + } + ], + "checks": [ + { + "name": "ck_invoice_total", + "expression": "total >= 0" + } + ] + } + ] +} +``` + +### SchemaIR requirements + +- `table_name` must be canonical lower-case model table name. +- `db_type` must use canonical Ferro tokens (`text`, `varchar(N)`, `smallint`, `int`, `bigint`, `uuid`, `timestamp`, `timestamptz`, `date`, `time`). +- `nullable` is explicit and never inferred by the reader. +- Constraint/index names are required and must follow cross-emitter parity naming rules. +- Foreign key shadow columns (for `ForeignKey`) are represented as normal columns plus FK metadata. +- Any schema artifact emitted by one emitter must be representable without lossy translation. + +## QueryIR v1 + +`QueryIR` represents typed query intent currently serialized through ad-hoc query JSON. + +### QueryIR payload shape + +```json +{ + "model_name": "User", + "where": [ + { + "node_kind": "compound", + "operator": "AND", + "left": { + "node_kind": "leaf", + "column": "active", + "operator": "==", + "value": {"kind": "bool", "value": true} + }, + "right": { + "node_kind": "leaf", + "column": "email", + "operator": "LIKE", + "value": {"kind": "string", "value": "%@example.com"} + } + } + ], + "order_by": [ + {"column": "id", "direction": "asc"} + ], + "limit": 100, + "offset": 0, + "m2m": null +} +``` + +### QueryIR requirements + +- Node variants are explicit (`leaf` vs `compound`), never inferred from nullable fields. +- Operator domain is restricted to: `==`, `!=`, `<`, `<=`, `>`, `>=`, `IN`, `LIKE`, `AND`, `OR`. +- `where` is a list of root predicate trees combined by implicit AND semantics unless nested compound nodes define otherwise. +- `order_by.direction` is normalized to `asc|desc`. +- Value literals are typed nodes (`kind`, `value`) so codec selection does not depend on lossy JSON inference. +- Null comparisons for equality/inequality map to `IS NULL` / `IS NOT NULL` semantics in execution lowering. + +## CodecIR v1 + +`CodecIR` centralizes type semantics for both bind and fetch paths. + +### CodecIR payload shape + +```json +{ + "bind_rules": [ + { + "logical_type": "uuid", + "db_type": "uuid", + "non_null_wire_kind": "uuid", + "null_wire_kind": "uuid_null" + }, + { + "logical_type": "integer", + "db_type": "bigint", + "non_null_wire_kind": "i64", + "null_wire_kind": "i64_null" + } + ], + "fetch_rules": [ + { + "db_type": "uuid", + "wire_kind": "uuid", + "python_kind": "uuid.UUID" + } + ], + "hydration_abi": { + "constructor_mode": "direct_dict", + "required_slots": [ + "__pydantic_fields_set__", + "__pydantic_extra__", + "__pydantic_private__" + ] + } +} +``` + +### CodecIR requirements + +- Typed null kinds are first-class and must not degrade to untyped text null in schema-driven paths. +- Bind and fetch semantics are defined per logical/db type pair. +- Hydration ABI explicitly requires slot initialization for observational equivalence with Pydantic initialization semantics. +- Runtime lowering must fail loudly when a required codec rule is absent. + +## Validation behavior + +For all IR kinds in v1: + +- Parse/shape validation errors are fatal and actionable. +- Unknown required enum values are fatal. +- Unknown optional fields may be ignored by readers in the same major version. +- CI conformance vectors are the executable source of truth for schema/query/codec payload acceptance. + +## Phase 1 handoff + +Phase 1 implementation must: + +1. Introduce strongly typed Rust/Python representations matching this RFC. +2. Add compile/serialize tests that round-trip all golden vectors without shape drift. +3. Keep existing behavior unchanged unless explicitly called out by roadmap phase gates. diff --git a/docs/solutions/patterns/ir-invariants.md b/docs/solutions/patterns/ir-invariants.md new file mode 100644 index 0000000..3b60064 --- /dev/null +++ b/docs/solutions/patterns/ir-invariants.md @@ -0,0 +1,123 @@ +--- +title: IR-first invariants (parity, hydration ABI, null/bind correctness) +type: pattern +tags: [convention, invariant, ir, schema, query, codec, bridge, rust, python] +related_files: + - AGENTS.md + - docs/rfc/ir-contracts-v1.md + - docs/solutions/patterns/cross-emitter-ddl-parity.md + - docs/solutions/patterns/typed-null-binds.md + - src/ferro/migrations/alembic.py + - src/schema.rs + - src/query.rs + - src/operations.rs + - src/backend.rs + - tests/test_cross_emitter_parity.py + - tests/test_db_type_cross_emitter_parity.py + - tests/test_typed_null_binds.py + - tests/test_hydration.py +related_issues: [71, 72, 73, 74] +related_prs: [] +captured: 2026-06-19 +--- + +## Problem + +Ferro currently crosses Python and Rust boundaries using multiple JSON-shaped contracts and independently enforced conventions. Without one explicit invariant spec, drift can appear as phantom DDL diffs, typed-null regressions, or hydration attribute errors that only show up at runtime. + +## Takeaway + +Treat these as non-negotiable IR invariants: + +1. **Cross-emitter parity**: every schema artifact name/type/default/nullability must match across emitters. +2. **Hydration ABI**: zero-copy hydration must initialize required Pydantic slots exactly. +3. **Typed null/bind correctness**: schema-driven paths must emit type-correct binds, including typed NULLs. + +Every IR contract change must preserve or explicitly version these invariants. + +## Invariant I: Cross-emitter parity + +### Contract + +Given one model definition, all DDL emission paths must produce equivalent schema artifacts: + +- Table names. +- Column names (including FK shadow `*_id` columns). +- Column type mapping/tokenization. +- Constraint/index/check naming. +- Nullability/default semantics. + +### Why it exists + +If emitters disagree, users get phantom autogenerate diffs and noisy migration history. + +### Enforcement anchors + +- `src/ferro/migrations/alembic.py` naming convention. +- `src/schema.rs` naming/type emission helpers. +- `tests/test_cross_emitter_parity.py` +- `tests/test_db_type_cross_emitter_parity.py` +- `tests/test_schema_constraints.py` +- `tests/test_alembic_autogenerate.py` + +## Invariant II: Hydration ABI + +### Contract + +Rust hydration must keep direct-to-dict construction while being observationally equivalent to Pydantic initialization for required slots. + +Minimum required initialized slots: + +- `__pydantic_fields_set__` +- `__pydantic_extra__` +- `__pydantic_private__` + +### Why it exists + +Skipping slot initialization can pass basic reads but later fails with runtime `AttributeError` when user code or Pydantic internals touch missing attributes. + +### Enforcement anchors + +- `src/operations.rs` (`set_pydantic_hydration_slots`) +- `src/backend.rs` row materialization flow +- `tests/test_hydration.py` +- `docs/solutions/issues/pydantic-slots-missing-after-ferro-hydration.md` + +## Invariant III: Typed null/bind correctness + +### Contract + +Schema-driven bind paths must preserve type identity for non-null values and NULL values. + +- UUID values must remain UUID-typed on strict backends. +- Typed NULL must be selected from schema/type context where available. +- Raw SQL path may use untyped fallback only where schema context is unavailable. + +### Why it exists + +Untyped binds (especially NULL/UUID) can cause backend-specific mismatches or hidden coercion bugs. + +### Enforcement anchors + +- `src/query.rs` (`value_rhs_simple_expr_for_backend`, typed-null selection) +- `src/operations.rs` (`engine_bind_values_from_sea`) +- `src/backend.rs` (`EngineBindValue`, `NullKind`) +- `tests/test_typed_null_binds.py` + +## Applying invariants to IR contracts + +When updating `SchemaIR`, `QueryIR`, or `CodecIR`: + +1. State which invariant(s) the change touches. +2. Add or update golden vectors that encode the invariant boundary. +3. Add a regression test that fails before the change and passes after. +4. Reject designs that only mitigate symptoms; fill the primitive gap. + +## How to recognize a violation + +- Alembic autogenerate proposes drop/recreate with no real model change. +- Query/filter updates start failing only on one backend for NULL/UUID values. +- Hydrated instances error on `__pydantic_*` slot access. +- New IR field appears in one emitter path but not another. + +If any appears, treat it as a correctness bug, not a warning-level mismatch. diff --git a/tests/fixtures/ir_vectors/README.md b/tests/fixtures/ir_vectors/README.md new file mode 100644 index 0000000..27eabb9 --- /dev/null +++ b/tests/fixtures/ir_vectors/README.md @@ -0,0 +1,45 @@ +# IR golden vectors + +Phase 0 conformance vectors for IR contracts. + +## Purpose + +- Pin the canonical wire shape for `SchemaIR`, `QueryIR`, and `CodecIR`. +- Provide deterministic fixtures that CI can validate before Phase 1 runtime cutover work. + +## File format + +Each vector is one JSON file with this envelope: + +```json +{ + "vector_name": "schema_invoice_baseline_v1", + "domain": "schema|query|codec", + "expect_valid": true, + "ir": { + "ir_kind": "schema|query|codec", + "ir_version": 1, + "payload": {} + } +} +``` + +Rules: + +- `domain` and `ir.ir_kind` must match. +- `ir.ir_version` must equal `1` for Phase 0 vectors. +- `expect_valid` currently supports only `true` fixtures (negative vectors can be added later). +- Fixture file names use `__v1.json`. + +## Coverage requirements (Phase 0 minimum) + +- `schema`: one vector with parity-sensitive artifact names (`idx_*`, `uq_*`, `ck_*`, FK metadata). +- `query`: one vector with compound predicates and typed value nodes. +- `codec`: one vector with typed null and hydration ABI slot requirements. + +## How to extend + +1. Add a new JSON fixture in this directory. +2. Keep `vector_name` unique. +3. Update `tests/test_ir_vectors_contract.py` if new required fields are introduced. +4. Ensure CI remains deterministic (no generated timestamps/random IDs in fixtures). diff --git a/tests/fixtures/ir_vectors/codec_registry_core_v1.json b/tests/fixtures/ir_vectors/codec_registry_core_v1.json new file mode 100644 index 0000000..fba47fe --- /dev/null +++ b/tests/fixtures/ir_vectors/codec_registry_core_v1.json @@ -0,0 +1,56 @@ +{ + "vector_name": "codec_registry_core_v1", + "domain": "codec", + "expect_valid": true, + "ir": { + "ir_kind": "codec", + "ir_version": 1, + "payload": { + "bind_rules": [ + { + "logical_type": "uuid", + "db_type": "uuid", + "non_null_wire_kind": "uuid", + "null_wire_kind": "uuid_null" + }, + { + "logical_type": "integer", + "db_type": "bigint", + "non_null_wire_kind": "i64", + "null_wire_kind": "i64_null" + }, + { + "logical_type": "string", + "db_type": "text", + "non_null_wire_kind": "string", + "null_wire_kind": "string_null" + } + ], + "fetch_rules": [ + { + "db_type": "uuid", + "wire_kind": "uuid", + "python_kind": "uuid.UUID" + }, + { + "db_type": "bigint", + "wire_kind": "i64", + "python_kind": "int" + }, + { + "db_type": "text", + "wire_kind": "string", + "python_kind": "str" + } + ], + "hydration_abi": { + "constructor_mode": "direct_dict", + "required_slots": [ + "__pydantic_fields_set__", + "__pydantic_extra__", + "__pydantic_private__" + ] + } + } + } +} diff --git a/tests/fixtures/ir_vectors/query_user_compound_v1.json b/tests/fixtures/ir_vectors/query_user_compound_v1.json new file mode 100644 index 0000000..1e14d90 --- /dev/null +++ b/tests/fixtures/ir_vectors/query_user_compound_v1.json @@ -0,0 +1,58 @@ +{ + "vector_name": "query_user_compound_v1", + "domain": "query", + "expect_valid": true, + "ir": { + "ir_kind": "query", + "ir_version": 1, + "payload": { + "model_name": "User", + "where": [ + { + "node_kind": "compound", + "operator": "AND", + "left": { + "node_kind": "leaf", + "column": "active", + "operator": "==", + "value": { + "kind": "bool", + "value": true + } + }, + "right": { + "node_kind": "compound", + "operator": "OR", + "left": { + "node_kind": "leaf", + "column": "email", + "operator": "LIKE", + "value": { + "kind": "string", + "value": "%@ferro.dev" + } + }, + "right": { + "node_kind": "leaf", + "column": "role", + "operator": "IN", + "value": { + "kind": "list", + "value": ["admin", "owner"] + } + } + } + } + ], + "order_by": [ + { + "column": "id", + "direction": "asc" + } + ], + "limit": 100, + "offset": 0, + "m2m": null + } + } +} diff --git a/tests/fixtures/ir_vectors/schema_invoice_baseline_v1.json b/tests/fixtures/ir_vectors/schema_invoice_baseline_v1.json new file mode 100644 index 0000000..82394c6 --- /dev/null +++ b/tests/fixtures/ir_vectors/schema_invoice_baseline_v1.json @@ -0,0 +1,101 @@ +{ + "vector_name": "schema_invoice_baseline_v1", + "domain": "schema", + "expect_valid": true, + "ir": { + "ir_kind": "schema", + "ir_version": 1, + "payload": { + "dialect_agnostic": true, + "models": [ + { + "model_name": "Invoice", + "table_name": "invoice", + "columns": [ + { + "name": "id", + "logical_type": "integer", + "db_type": "bigint", + "nullable": false, + "primary_key": true, + "autoincrement": true, + "unique": false, + "index": false, + "default": null, + "format": null + }, + { + "name": "number", + "logical_type": "string", + "db_type": "varchar(64)", + "nullable": false, + "primary_key": false, + "autoincrement": false, + "unique": true, + "index": false, + "default": null, + "format": null + }, + { + "name": "customer_id", + "logical_type": "integer", + "db_type": "bigint", + "nullable": false, + "primary_key": false, + "autoincrement": false, + "unique": false, + "index": true, + "default": null, + "format": null + }, + { + "name": "created_at", + "logical_type": "datetime", + "db_type": "timestamptz", + "nullable": false, + "primary_key": false, + "autoincrement": false, + "unique": false, + "index": true, + "default": "now()", + "format": "date-time" + } + ], + "foreign_keys": [ + { + "column": "customer_id", + "to_table": "customer", + "to_column": "id", + "on_delete": "CASCADE", + "name": "fk_invoice_customer_id_customer" + } + ], + "indexes": [ + { + "name": "idx_invoice_created_at", + "columns": ["created_at"], + "unique": false + }, + { + "name": "idx_invoice_customer_id", + "columns": ["customer_id"], + "unique": false + } + ], + "uniques": [ + { + "name": "uq_invoice_number", + "columns": ["number"] + } + ], + "checks": [ + { + "name": "ck_invoice_number", + "expression": "length(number) > 0" + } + ] + } + ] + } + } +} diff --git a/tests/test_ir_vectors_contract.py b/tests/test_ir_vectors_contract.py new file mode 100644 index 0000000..f406495 --- /dev/null +++ b/tests/test_ir_vectors_contract.py @@ -0,0 +1,160 @@ +from __future__ import annotations + +import json +from pathlib import Path +from typing import Any + + +VECTORS_DIR = Path(__file__).parent / "fixtures" / "ir_vectors" +SUPPORTED_DOMAINS = {"schema", "query", "codec"} +SUPPORTED_IR_VERSION = 1 +QUERY_OPERATORS = {"==", "!=", "<", "<=", ">", ">=", "IN", "LIKE", "AND", "OR"} + + +def _load_vectors() -> list[tuple[Path, dict[str, Any]]]: + loaded: list[tuple[Path, dict[str, Any]]] = [] + for path in sorted(VECTORS_DIR.glob("*.json")): + loaded.append((path, json.loads(path.read_text(encoding="utf-8")))) + return loaded + + +def _require_keys(obj: dict[str, Any], required: set[str], label: str) -> None: + missing = required - set(obj.keys()) + assert not missing, f"{label} missing keys: {sorted(missing)}" + + +def _validate_query_node(node: dict[str, Any], label: str) -> None: + _require_keys(node, {"node_kind", "operator"}, label) + node_kind = node["node_kind"] + operator = node["operator"] + assert node_kind in {"leaf", "compound"}, f"{label}.node_kind invalid: {node_kind!r}" + assert operator in QUERY_OPERATORS, f"{label}.operator invalid: {operator!r}" + + if node_kind == "leaf": + _require_keys(node, {"column", "value"}, label) + assert isinstance(node["column"], str) and node["column"], ( + f"{label}.column must be non-empty string" + ) + value = node["value"] + assert isinstance(value, dict), f"{label}.value must be object" + _require_keys(value, {"kind", "value"}, f"{label}.value") + return + + _require_keys(node, {"left", "right"}, label) + assert isinstance(node["left"], dict), f"{label}.left must be object" + assert isinstance(node["right"], dict), f"{label}.right must be object" + _validate_query_node(node["left"], f"{label}.left") + _validate_query_node(node["right"], f"{label}.right") + + +def _validate_schema_payload(payload: dict[str, Any], label: str) -> None: + _require_keys(payload, {"dialect_agnostic", "models"}, label) + assert isinstance(payload["dialect_agnostic"], bool), ( + f"{label}.dialect_agnostic must be bool" + ) + models = payload["models"] + assert isinstance(models, list) and models, f"{label}.models must be non-empty list" + for i, model in enumerate(models): + model_label = f"{label}.models[{i}]" + assert isinstance(model, dict), f"{model_label} must be object" + _require_keys( + model, + {"model_name", "table_name", "columns", "foreign_keys", "indexes", "uniques", "checks"}, + model_label, + ) + assert isinstance(model["model_name"], str) and model["model_name"], ( + f"{model_label}.model_name must be non-empty string" + ) + assert isinstance(model["table_name"], str) and model["table_name"], ( + f"{model_label}.table_name must be non-empty string" + ) + assert isinstance(model["columns"], list) and model["columns"], ( + f"{model_label}.columns must be non-empty list" + ) + + +def _validate_query_payload(payload: dict[str, Any], label: str) -> None: + _require_keys(payload, {"model_name", "where", "order_by", "limit", "offset", "m2m"}, label) + assert isinstance(payload["model_name"], str) and payload["model_name"], ( + f"{label}.model_name must be non-empty string" + ) + where_nodes = payload["where"] + assert isinstance(where_nodes, list) and where_nodes, f"{label}.where must be non-empty list" + for i, node in enumerate(where_nodes): + node_label = f"{label}.where[{i}]" + assert isinstance(node, dict), f"{node_label} must be object" + _validate_query_node(node, node_label) + assert isinstance(payload["order_by"], list), f"{label}.order_by must be list" + if payload["limit"] is not None: + assert isinstance(payload["limit"], int) and payload["limit"] >= 0, ( + f"{label}.limit must be null or non-negative int" + ) + if payload["offset"] is not None: + assert isinstance(payload["offset"], int) and payload["offset"] >= 0, ( + f"{label}.offset must be null or non-negative int" + ) + if payload["m2m"] is not None: + assert isinstance(payload["m2m"], dict), f"{label}.m2m must be null or object" + + +def _validate_codec_payload(payload: dict[str, Any], label: str) -> None: + _require_keys(payload, {"bind_rules", "fetch_rules", "hydration_abi"}, label) + assert isinstance(payload["bind_rules"], list) and payload["bind_rules"], ( + f"{label}.bind_rules must be non-empty list" + ) + assert isinstance(payload["fetch_rules"], list) and payload["fetch_rules"], ( + f"{label}.fetch_rules must be non-empty list" + ) + hydration_abi = payload["hydration_abi"] + assert isinstance(hydration_abi, dict), f"{label}.hydration_abi must be object" + _require_keys(hydration_abi, {"constructor_mode", "required_slots"}, f"{label}.hydration_abi") + assert hydration_abi["constructor_mode"] == "direct_dict", ( + f"{label}.hydration_abi.constructor_mode must be direct_dict" + ) + required_slots = hydration_abi["required_slots"] + assert isinstance(required_slots, list) and required_slots, ( + f"{label}.hydration_abi.required_slots must be non-empty list" + ) + + +def _validate_domain_payload(domain: str, payload: dict[str, Any], label: str) -> None: + if domain == "schema": + _validate_schema_payload(payload, label) + elif domain == "query": + _validate_query_payload(payload, label) + elif domain == "codec": + _validate_codec_payload(payload, label) + else: + raise AssertionError(f"{label}.domain unsupported: {domain}") + + +def test_ir_vectors_directory_has_seed_vectors() -> None: + vectors = _load_vectors() + assert vectors, "Expected at least one IR vector fixture in tests/fixtures/ir_vectors" + found_domains = {payload["domain"] for _, payload in vectors if "domain" in payload} + assert SUPPORTED_DOMAINS.issubset(found_domains), ( + f"Expected at least one vector for each domain: {sorted(SUPPORTED_DOMAINS)}" + ) + + +def test_ir_vectors_match_phase0_contract_envelope() -> None: + for path, vector in _load_vectors(): + label = path.name + _require_keys(vector, {"vector_name", "domain", "expect_valid", "ir"}, label) + assert isinstance(vector["vector_name"], str) and vector["vector_name"], ( + f"{label}.vector_name must be non-empty string" + ) + assert vector["domain"] in SUPPORTED_DOMAINS, f"{label}.domain unsupported: {vector['domain']!r}" + assert vector["expect_valid"] is True, f"{label}.expect_valid must be true for Phase 0" + + ir = vector["ir"] + assert isinstance(ir, dict), f"{label}.ir must be object" + _require_keys(ir, {"ir_kind", "ir_version", "payload"}, f"{label}.ir") + assert ir["ir_kind"] == vector["domain"], ( + f"{label}.ir.ir_kind ({ir['ir_kind']!r}) must match domain ({vector['domain']!r})" + ) + assert ir["ir_version"] == SUPPORTED_IR_VERSION, ( + f"{label}.ir.ir_version must equal {SUPPORTED_IR_VERSION}" + ) + assert isinstance(ir["payload"], dict), f"{label}.ir.payload must be object" + _validate_domain_payload(vector["domain"], ir["payload"], f"{label}.ir.payload")