diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fc547f7..f4cb9c5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -16,6 +16,33 @@ permissions: contents: read jobs: + changed-shadow-paths: + name: Detect Shadow-Report Paths + runs-on: ubuntu-latest + if: github.event_name == 'pull_request' + outputs: + requires_shadow_reports: ${{ steps.filter.outputs.shadow }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Detect planner/IR path changes + id: filter + uses: dorny/paths-filter@v3 + with: + filters: | + shadow: + - 'src/operations.rs' + - 'src/query.rs' + - 'src/schema.rs' + - 'src/migrate.rs' + - 'src/backend.rs' + - 'src/connection.rs' + - 'src/ferro/ir/**' + - 'crates/ferro-schema-ir/**' + - 'tests/test_shadow_reports.py' + - 'tests/fixtures/shadow_reports/**' + lint-and-format: name: Lint & Format (Pre-commit / Prek) runs-on: ubuntu-latest @@ -244,6 +271,64 @@ jobs: run: | uv run pytest -v -m "backend_matrix or postgres_only" --db-backends=sqlite,postgres + test-shadow-reports-pr: + name: Shadow reports (touched paths) + runs-on: ubuntu-latest + needs: [changed-shadow-paths] + if: github.event_name == 'pull_request' && needs.changed-shadow-paths.outputs.requires_shadow_reports == 'true' + services: + postgres: + image: postgres:17 + env: + POSTGRES_USER: ferro + POSTGRES_PASSWORD: ferro + POSTGRES_DB: ferro + ports: + - 5432:5432 + options: >- + --health-cmd "pg_isready -U ferro -d ferro" + --health-interval 10s + --health-timeout 5s + --health-retries 5 + env: + FERRO_SUPABASE_URL: postgresql://ferro:ferro@127.0.0.1:5432/ferro?sslmode=disable + FERRO_SHADOW_RUNTIME: "1" + FERRO_SHADOW_RUNTIME_STRICT: "1" + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.13' + + - name: Install UV + uses: astral-sh/setup-uv@v5 + with: + enable-cache: true + + - name: Set up Rust + uses: dtolnay/rust-toolchain@stable + + - name: Cache Rust build + uses: Swatinem/rust-cache@v2 + with: + prefix-key: v1 + cache-on-failure: true + + - name: Install dependencies + run: | + uv sync --only-group ci-test --no-install-project --python 3.13 + + - name: Build Rust extension + run: | + uv run maturin develop + + - name: Verify stable shadow reports + run: | + uv run pytest -v tests/test_shadow_reports.py::test_shadow_report_fixture_stable --db-backends=sqlite,postgres + check-conventional-commits: name: Check Conventional Commits runs-on: ubuntu-latest @@ -293,7 +378,7 @@ jobs: all-checks: name: All Checks Passed - needs: [lint-and-format, test-python-pr, test-python-main, test-python-backend-matrix, test-rust] + needs: [changed-shadow-paths, lint-and-format, test-python-pr, test-python-main, test-python-backend-matrix, test-shadow-reports-pr, test-rust] runs-on: ubuntu-latest if: always() steps: @@ -306,6 +391,7 @@ jobs: if ! ok "${{ needs.test-python-pr.result }}"; then exit 1; fi if ! ok "${{ needs.test-python-main.result }}"; then exit 1; fi if ! ok "${{ needs.test-python-backend-matrix.result }}"; then exit 1; fi + if ! ok "${{ needs.test-shadow-reports-pr.result }}"; then exit 1; fi if ! ok "${{ needs.test-rust.result }}"; then exit 1; fi echo "All checks passed!" diff --git a/docs/plans/2026-06-19-001-ir-first-roadmap.md b/docs/plans/2026-06-19-001-ir-first-roadmap.md index 3b31496..c0165db 100644 --- a/docs/plans/2026-06-19-001-ir-first-roadmap.md +++ b/docs/plans/2026-06-19-001-ir-first-roadmap.md @@ -484,6 +484,7 @@ Append updates as concise entries. - `2026-06-19` - Branching policy set: phase work branches from `feat/ir-first` and merges back into `feat/ir-first` until final promotion to `main`. - `2026-06-19` - Phase 0 completed and merged via [#75](https://github.com/syn54x/ferro-orm/pull/75). - `2026-06-19` - Phase 1 implementation landed on working branch: added `ferro-schema-ir`, Python->SchemaIR compiler, model-set fingerprinting, and stable representative snapshot checks. +- `2026-06-19` - Phase 2 scaffolding landed on working branch: internal shadow runtime flag/hook wiring, semantic comparison harness, stable SQLite/Postgres shadow report fixtures, and touched-path CI gate for shadow reports. ## Immediate next actions diff --git a/docs/plans/ir-first-migration-guide.md b/docs/plans/ir-first-migration-guide.md index 17da8c3..76ed0a5 100644 --- a/docs/plans/ir-first-migration-guide.md +++ b/docs/plans/ir-first-migration-guide.md @@ -43,7 +43,13 @@ No user-facing runtime behavior changes expected. ### Phase 2 -_TBD_ +No user-facing runtime behavior changes expected. Shadow planning is internal-only and defaults off. + +| Issue | Change | Impact | User action | Notes | +| --- | --- | --- | --- | --- | +| [#81](https://github.com/syn54x/ferro-orm/issues/81) | Internal shadow planner flag and runtime dual-run compare hooks for query/DDL planning | none | none | Internal env-controlled verification path (`FERRO_SHADOW_RUNTIME` / `FERRO_SHADOW_RUNTIME_STRICT`) for CI and maintainers; no public API behavior cutover | +| [#82](https://github.com/syn54x/ferro-orm/issues/82) | Semantic diff harness for query planning semantics and bind semantics | none | none | Test-only helper `_shadow_compare_query_plan_for_test` + backend-matrix strict checks | +| [#83](https://github.com/syn54x/ferro-orm/issues/83) | Stable SQLite/Postgres shadow reports + touched-path CI enforcement | none | none | Golden shadow reports in `tests/fixtures/shadow_reports/` and path-gated CI workflow job | ### Phase 3 diff --git a/src/backend.rs b/src/backend.rs index d718db7..3413c58 100644 --- a/src/backend.rs +++ b/src/backend.rs @@ -102,6 +102,8 @@ pub struct EngineHandle { spec: Option, /// When false, Ferro skips the identity map for this connection (no lookup/register on load). identity_map_enabled: bool, + /// Enables internal IR shadow-planner comparisons at runtime. + shadow_runtime_enabled: bool, } #[derive(Clone, Debug)] @@ -204,6 +206,7 @@ impl EngineHandle { pool: Arc::new(RwLock::new(pool)), spec: Some(spec), identity_map_enabled: true, + shadow_runtime_enabled: false, }) } @@ -217,6 +220,7 @@ impl EngineHandle { pool: Arc::new(RwLock::new(BackendPool::Sqlite(Arc::new(pool)))), spec: None, identity_map_enabled: true, + shadow_runtime_enabled: false, } } @@ -230,6 +234,7 @@ impl EngineHandle { pool: Arc::new(RwLock::new(BackendPool::Postgres(Arc::new(pool)))), spec: None, identity_map_enabled: true, + shadow_runtime_enabled: false, } } @@ -321,6 +326,17 @@ impl EngineHandle { self } + #[must_use] + pub fn is_shadow_runtime_enabled(&self) -> bool { + self.shadow_runtime_enabled + } + + #[must_use] + pub fn with_shadow_runtime_enabled(mut self, enabled: bool) -> Self { + self.shadow_runtime_enabled = enabled; + self + } + pub fn backend(&self) -> BackendKind { self.backend } diff --git a/src/connection.rs b/src/connection.rs index d70d8e1..49ffbb4 100644 --- a/src/connection.rs +++ b/src/connection.rs @@ -134,6 +134,15 @@ fn normalized_connection_name(name: Option) -> PyResult<(String, bool)> } } +fn shadow_runtime_enabled_from_env() -> bool { + std::env::var("FERRO_SHADOW_RUNTIME") + .map(|value| { + let value = value.trim().to_ascii_lowercase(); + value == "1" || value == "true" || value == "yes" || value == "on" + }) + .unwrap_or(false) +} + async fn connect_engine_handle( connection_url: &str, backend: BackendKind, @@ -233,7 +242,8 @@ pub fn connect( redacted_url, e )) })? - .with_identity_map_enabled(identity_map); + .with_identity_map_enabled(identity_map) + .with_shadow_runtime_enabled(shadow_runtime_enabled_from_env()); let engine_handle = Arc::new(engine_handle); diff --git a/src/ferro/_core.pyi b/src/ferro/_core.pyi index 94e477d..45ce6b0 100644 --- a/src/ferro/_core.pyi +++ b/src/ferro/_core.pyi @@ -55,6 +55,12 @@ def _render_migration_sql_for_test( """ ... +def _shadow_compare_query_plan_for_test( + query_json: str, dialect: str, operation: str = "select" +) -> str: + """Test-only: compare legacy vs QueryIR-roundtrip query planning semantics.""" + ... + async def fetch_all( cls: object, tx_id: Optional[str] = None, using: Optional[str] = None ) -> list[Any]: ... diff --git a/src/lib.rs b/src/lib.rs index c5bfc29..e97c4d2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -112,6 +112,10 @@ fn _core(m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction!(operations::raw_execute, m)?)?; m.add_function(wrap_pyfunction!(operations::raw_fetch_all, m)?)?; m.add_function(wrap_pyfunction!(operations::raw_fetch_one, m)?)?; + m.add_function(wrap_pyfunction!( + operations::_shadow_compare_query_plan_for_test, + m + )?)?; m.add_function(wrap_pyfunction!(connection::reset_engine, m)?)?; m.add_function(wrap_pyfunction!(connection::set_default_connection, m)?)?; m.add_function(wrap_pyfunction!(clear_registry, m)?)?; diff --git a/src/migrate.rs b/src/migrate.rs index 4adbbbb..dc5c80b 100644 --- a/src/migrate.rs +++ b/src/migrate.rs @@ -305,6 +305,35 @@ pub fn plan_table_migration( Ok(plan) } +fn shadow_compare_migration_plan( + table_lower: &str, + schema: &serde_json::Value, + live: &[LiveColumn], + backend: SqlDialect, + opts: MigrateOptions, +) -> Result<(), String> { + let legacy = + plan_table_migration(table_lower, schema, live, backend, opts).map_err(|e| e.to_string())?; + let schema_roundtrip: serde_json::Value = + serde_json::from_str(&serde_json::to_string(schema).map_err(|e| e.to_string())?) + .map_err(|e| e.to_string())?; + let live_roundtrip = live.to_vec(); + let shadow = plan_table_migration(table_lower, &schema_roundtrip, &live_roundtrip, backend, opts) + .map_err(|e| e.to_string())?; + if legacy.statements == shadow.statements + && legacy.drop_columns == shadow.drop_columns + && legacy.warnings == shadow.warnings + { + return Ok(()); + } + Err(format!( + "shadow migration-plan mismatch for '{}': legacy={} shadow={}", + table_lower, + serde_json::to_string(&legacy.statements).unwrap_or_else(|_| "".to_string()), + serde_json::to_string(&shadow.statements).unwrap_or_else(|_| "".to_string()) + )) +} + /// Plan the `ADD COLUMN` (and any follow-up DDL) for a model column missing /// from the live table. fn plan_missing_column( @@ -592,6 +621,17 @@ pub async fn internal_migrate(engine: Arc, opts: MigrateOptions) - }; let mut plan = plan_table_migration(&table_lower, &schema, &live, backend, opts)?; + if engine.is_shadow_runtime_enabled() + && let Err(diff) = shadow_compare_migration_plan(&table_lower, &schema, &live, backend, opts) + { + crate::log_debug(format!("⚠️ Ferro shadow runtime mismatch: {diff}")); + if std::env::var("FERRO_SHADOW_RUNTIME_STRICT") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + { + return Err(pyo3::exceptions::PyRuntimeError::new_err(diff)); + } + } if plan.is_empty() { warnings.append(&mut plan.warnings); continue; diff --git a/src/operations.rs b/src/operations.rs index d88c295..70a3c9e 100644 --- a/src/operations.rs +++ b/src/operations.rs @@ -6,7 +6,7 @@ use crate::backend::{ BackendKind, EngineBindValue, EngineHandle, EngineRow, EngineValue, NullKind, }; -use crate::query::QueryDef; +use crate::query::{QueryDef, query_def_from_ir_payload}; use crate::state::{ IDENTITY_MAP, MODEL_REGISTRY, RustValue, SqlDialect, TRANSACTION_REGISTRY, TransactionConnection, TransactionHandle, connection_for_route, engine_for_connection, @@ -16,6 +16,7 @@ use sea_query::{ Alias, Expr, Iden, InsertStatement, OnConflict, Order, PostgresQueryBuilder, Query, SimpleExpr, SqliteQueryBuilder, UpdateStatement, Value as SeaValue, }; +use serde::Serialize; use std::collections::{HashMap, HashSet}; use std::sync::Arc; @@ -353,6 +354,87 @@ macro_rules! sea_query_to_string_for_backend { }}; } +#[derive(Debug, Clone, Serialize, PartialEq, Eq)] +struct QueryPlanArtifact { + operation: String, + semantic_signature: Vec, + bind_semantics: Vec, +} + +fn bind_semantics(bind_values: &[SeaValue]) -> Vec { + engine_bind_values_from_sea(bind_values) + .into_iter() + .map(|value| format!("{value:?}")) + .collect() +} + +fn query_plan_artifact( + operation: &str, + query_def: &QueryDef, + bind_values: &[SeaValue], +) -> QueryPlanArtifact { + let mut semantic_signature = query_def + .semantic_signature() + .where_semantics + .into_iter() + .collect::>(); + semantic_signature.sort(); + + QueryPlanArtifact { + operation: operation.to_string(), + semantic_signature, + bind_semantics: bind_semantics(bind_values), + } +} + +fn shadow_artifact_from_ir_roundtrip( + operation: &str, + query_def: &QueryDef, + bind_values: &[SeaValue], +) -> Result { + let ir_payload = query_def.to_ir_payload(); + let ir_roundtrip = query_def_from_ir_payload(ir_payload)?; + Ok(query_plan_artifact(operation, &ir_roundtrip, bind_values)) +} + +fn compare_shadow_query_artifacts( + operation: &str, + query_def: &QueryDef, + bind_values: &[SeaValue], +) -> Result<(), String> { + let legacy = query_plan_artifact(operation, query_def, bind_values); + let shadow = shadow_artifact_from_ir_roundtrip(operation, query_def, bind_values)?; + if legacy == shadow { + return Ok(()); + } + let legacy_json = serde_json::to_string(&legacy).unwrap_or_else(|_| "".to_string()); + let shadow_json = serde_json::to_string(&shadow).unwrap_or_else(|_| "".to_string()); + Err(format!( + "shadow planner mismatch for '{operation}': legacy={legacy_json} shadow={shadow_json}" + )) +} + +fn maybe_compare_shadow_query_artifacts( + engine: &EngineHandle, + operation: &str, + query_def: &QueryDef, + bind_values: &[SeaValue], +) -> PyResult<()> { + if !engine.is_shadow_runtime_enabled() { + return Ok(()); + } + if let Err(diff) = compare_shadow_query_artifacts(operation, query_def, bind_values) { + crate::log_debug(format!("⚠️ Ferro shadow runtime mismatch: {diff}")); + if std::env::var("FERRO_SHADOW_RUNTIME_STRICT") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + { + return Err(pyo3::exceptions::PyRuntimeError::new_err(diff)); + } + } + Ok(()) +} + /// On Postgres, cast text-like special columns in SELECT output so Python hydration /// sees the same string representation as SQLite. fn property_json_type(col_info: &serde_json::Value) -> Option<&str> { @@ -1635,6 +1717,7 @@ pub fn fetch_filtered<'py>( let (s, values) = sea_query_build_for_backend!(select, backend); (s, values, pk, schema.clone()) }; + maybe_compare_shadow_query_artifacts(&engine, "fetch_filtered", &query_def, &bind_values.0)?; let parsed_data = match tx_conn { Some(conn_arc) => { @@ -1793,6 +1876,7 @@ pub fn count_filtered( select.cond_where(query_def.to_condition_for_backend(backend)); sea_query_build_for_backend!(select, backend) }; + maybe_compare_shadow_query_artifacts(&engine, "count_filtered", &query_def, &bind_values.0)?; let engine_bind_values = engine_bind_values_from_sea(&bind_values.0); let count = match tx_conn { @@ -1951,6 +2035,7 @@ pub fn delete_filtered( .cond_where(query_def.to_condition_for_backend(backend)); sea_query_build_for_backend!(delete, backend) }; + maybe_compare_shadow_query_artifacts(&engine, "delete_filtered", &query_def, &bind_values.0)?; let rows_affected = execute_statement_with_optional_tx(&engine, tx_conn, &sql, &bind_values.0) @@ -2030,6 +2115,7 @@ pub fn update_filtered( } sea_query_build_for_backend!(update, backend) }; + maybe_compare_shadow_query_artifacts(&engine, "update_filtered", &query_def, &bind_values.0)?; let rows_affected = execute_statement_with_optional_tx(&engine, tx_conn, &sql, &bind_values.0) @@ -2434,6 +2520,66 @@ pub fn raw_fetch_one<'py>( }) } +#[pyfunction] +#[pyo3(name = "_shadow_compare_query_plan_for_test")] +#[pyo3(signature = (query_json, dialect, operation="select".to_string()))] +pub fn _shadow_compare_query_plan_for_test( + query_json: String, + dialect: String, + operation: String, +) -> PyResult { + let backend = match dialect.as_str() { + "postgres" => SqlDialect::Postgres, + "sqlite" => SqlDialect::Sqlite, + other => { + return Err(pyo3::exceptions::PyValueError::new_err(format!( + "Unknown dialect {:?}; expected 'postgres' or 'sqlite'", + other + ))); + } + }; + let query_def: QueryDef = serde_json::from_str(&query_json).map_err(|e| { + pyo3::exceptions::PyValueError::new_err(format!("Invalid query JSON: {}", e)) + })?; + let mut select_legacy = Query::select(); + select_legacy.from(Alias::new(query_def.model_name.to_lowercase())); + select_legacy.column((Alias::new(query_def.model_name.to_lowercase()), sea_query::Asterisk)); + select_legacy.cond_where(query_def.to_condition_for_backend(backend)); + if let Some(ref orders) = query_def.order_by { + for order in orders { + let dir = if order.direction.to_lowercase() == "desc" { + Order::Desc + } else { + Order::Asc + }; + select_legacy.order_by(Alias::new(&order.column), dir); + } + } + if let Some(limit) = query_def.limit { + select_legacy.limit(limit); + } + if let Some(offset) = query_def.offset { + select_legacy.offset(offset); + } + let (legacy_sql, legacy_values) = sea_query_build_for_backend!(select_legacy, backend); + let legacy = query_plan_artifact(&operation, &query_def, &legacy_values.0); + + let shadow = shadow_artifact_from_ir_roundtrip(&operation, &query_def, &legacy_values.0) + .map_err(pyo3::exceptions::PyRuntimeError::new_err)?; + let payload = serde_json::json!({ + "matches": legacy == shadow, + "legacy": { + "sql": legacy_sql, + "artifact": legacy, + }, + "shadow": { + "artifact": shadow, + }, + }); + serde_json::to_string(&payload) + .map_err(|e| pyo3::exceptions::PyRuntimeError::new_err(format!("Failed to encode JSON: {e}"))) +} + #[cfg(test)] mod m2m_value_tests { use super::{backend_column_value_expr, python_to_sea_value}; diff --git a/src/query.rs b/src/query.rs index 206c3ad..8c6b12d 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,10 +1,13 @@ use crate::state::{MODEL_REGISTRY, SqlDialect}; +use ferro_schema_ir::{ + QueryIrPayload, QueryNode as QueryIrNode, QueryOrderBy as QueryIrOrderBy, QueryValue, +}; use sea_query::{Alias, Condition, Expr, SimpleExpr}; -use serde::Deserialize; +use serde::{Deserialize, Serialize}; use serde_json::Value; use std::collections::HashMap; -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct QueryNode { pub is_compound: bool, pub operator: String, @@ -16,13 +19,13 @@ pub struct QueryNode { pub right: Option>, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct OrderBy { pub column: String, pub direction: String, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct M2mContext { pub join_table: String, pub source_col: String, @@ -30,7 +33,7 @@ pub struct M2mContext { pub source_id: Value, } -#[derive(Debug, Deserialize)] +#[derive(Debug, Clone, Deserialize, Serialize)] pub struct QueryDef { #[allow(dead_code)] pub model_name: String, @@ -45,6 +48,16 @@ pub struct QueryDef { pub postgres_enum_udt: HashMap, } +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct QuerySemanticSignature { + pub model_name: String, + pub where_semantics: Vec, + pub order_by: Vec<(String, String)>, + pub limit: Option, + pub offset: Option, + pub m2m: Option<(String, String, String, String)>, +} + impl QueryDef { pub fn to_condition_for_backend(&self, backend: SqlDialect) -> Condition { let mut condition = Condition::all(); @@ -259,6 +272,226 @@ impl QueryDef { } Expr::value(json_to_sea_value(val)) } + + pub fn to_ir_payload(&self) -> QueryIrPayload { + QueryIrPayload { + model_name: self.model_name.clone(), + where_clause: self.where_clause.iter().map(query_node_to_ir).collect(), + order_by: self + .order_by + .as_ref() + .map(|items| { + items + .iter() + .map(|item| QueryIrOrderBy { + column: item.column.clone(), + direction: item.direction.clone(), + }) + .collect() + }) + .unwrap_or_default(), + limit: self.limit, + offset: self.offset, + m2m: self + .m2m + .as_ref() + .and_then(|m2m| serde_json::to_value(m2m).ok()), + } + } + + pub fn semantic_signature(&self) -> QuerySemanticSignature { + QuerySemanticSignature { + model_name: self.model_name.clone(), + where_semantics: self + .where_clause + .iter() + .map(query_node_semantic_string) + .collect(), + order_by: self + .order_by + .as_ref() + .map(|items| { + items + .iter() + .map(|item| (item.column.clone(), item.direction.to_ascii_lowercase())) + .collect() + }) + .unwrap_or_default(), + limit: self.limit, + offset: self.offset, + m2m: self.m2m.as_ref().map(|m2m| { + ( + m2m.join_table.clone(), + m2m.source_col.clone(), + m2m.target_col.clone(), + query_value_semantic_string(&m2m.source_id), + ) + }), + } + } +} + +pub fn query_def_from_ir_payload(payload: QueryIrPayload) -> Result { + let m2m: Option = match payload.m2m { + Some(value) => serde_json::from_value(value) + .map(Some) + .map_err(|e| format!("invalid QueryIR m2m payload: {e}"))?, + None => None, + }; + Ok(QueryDef { + model_name: payload.model_name, + where_clause: payload.where_clause.iter().map(query_node_from_ir).collect(), + order_by: if payload.order_by.is_empty() { + None + } else { + Some( + payload + .order_by + .iter() + .map(|item| OrderBy { + column: item.column.clone(), + direction: item.direction.clone(), + }) + .collect(), + ) + }, + limit: payload.limit, + offset: payload.offset, + m2m, + postgres_enum_udt: HashMap::new(), + }) +} + +fn query_node_to_ir(node: &QueryNode) -> QueryIrNode { + if node.is_compound { + let left = node + .left + .as_ref() + .map(|inner| Box::new(query_node_to_ir(inner))) + .unwrap_or_else(|| { + Box::new(QueryIrNode::Leaf { + operator: "==".to_string(), + column: "__invalid__".to_string(), + value: QueryValue { + kind: "null".to_string(), + value: Value::Null, + }, + }) + }); + let right = node + .right + .as_ref() + .map(|inner| Box::new(query_node_to_ir(inner))) + .unwrap_or_else(|| { + Box::new(QueryIrNode::Leaf { + operator: "==".to_string(), + column: "__invalid__".to_string(), + value: QueryValue { + kind: "null".to_string(), + value: Value::Null, + }, + }) + }); + return QueryIrNode::Compound { + operator: node.operator.clone(), + left, + right, + }; + } + + let value = node.value.clone().unwrap_or(Value::Null); + QueryIrNode::Leaf { + operator: node.operator.clone(), + column: node.column.clone().unwrap_or_default(), + value: QueryValue { + kind: query_value_kind(&value).to_string(), + value, + }, + } +} + +fn query_node_from_ir(node: &QueryIrNode) -> QueryNode { + match node { + QueryIrNode::Leaf { + operator, + column, + value, + } => QueryNode { + is_compound: false, + operator: operator.clone(), + column: Some(column.clone()), + value: if value.value.is_null() { + None + } else { + Some(value.value.clone()) + }, + left: None, + right: None, + }, + QueryIrNode::Compound { + operator, + left, + right, + } => QueryNode { + is_compound: true, + operator: operator.clone(), + column: None, + value: None, + left: Some(Box::new(query_node_from_ir(left))), + right: Some(Box::new(query_node_from_ir(right))), + }, + } +} + +fn query_value_kind(value: &Value) -> &'static str { + match value { + Value::Null => "null", + Value::Bool(_) => "bool", + Value::Number(n) => { + if n.is_i64() || n.is_u64() { + "int" + } else { + "float" + } + } + Value::String(_) => "string", + Value::Array(_) => "array", + Value::Object(_) => "object", + } +} + +fn query_node_semantic_string(node: &QueryNode) -> String { + if node.is_compound { + let left = node + .left + .as_ref() + .map(|inner| query_node_semantic_string(inner)) + .unwrap_or_else(|| "".to_string()); + let right = node + .right + .as_ref() + .map(|inner| query_node_semantic_string(inner)) + .unwrap_or_else(|| "".to_string()); + return format!("({left} {} {right})", node.operator.to_ascii_uppercase()); + } + + let column = node + .column + .as_ref() + .map_or_else(|| "".to_string(), Clone::clone); + let value = node + .value + .as_ref() + .map_or_else(|| "null".to_string(), query_value_semantic_string); + format!("{} {} {}", column, node.operator, value) +} + +fn query_value_semantic_string(value: &Value) -> String { + match value { + Value::String(s) => format!("\"{s}\""), + Value::Null => "null".to_string(), + _ => value.to_string(), + } } /// Pick a typed SeaQuery `None` variant for a `NULL` value in @@ -765,4 +998,40 @@ mod tests { "decimal cast preserved until follow-up: {sql}" ); } + + #[test] + fn query_ir_roundtrip_preserves_semantics_signature() { + let query_def: QueryDef = serde_json::from_value(json!({ + "model_name": "Widget", + "where_clause": [ + { + "is_compound": true, + "operator": "OR", + "left": { + "is_compound": false, + "column": "age", + "operator": ">=", + "value": 18 + }, + "right": { + "is_compound": false, + "column": "name", + "operator": "LIKE", + "value": "a%" + } + } + ], + "order_by": [{"column": "age", "direction": "DESC"}], + "limit": 10, + "offset": 5, + "m2m": null + })) + .expect("query json must deserialize"); + let before = query_def.semantic_signature(); + let ir = query_def.to_ir_payload(); + let roundtrip = super::query_def_from_ir_payload(ir).expect("QueryIR roundtrip"); + let after = roundtrip.semantic_signature(); + + assert_eq!(before, after); + } } diff --git a/src/schema.rs b/src/schema.rs index 783d91b..5d5614a 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -631,6 +631,27 @@ fn build_create_table_sqls( (table_sql, index_sqls) } +fn shadow_compare_create_table_sqls( + name: &str, + schema: &serde_json::Value, + backend: SqlDialect, +) -> Result<(), String> { + let legacy = build_create_table_sqls(name, schema, backend); + let schema_roundtrip: serde_json::Value = + serde_json::from_str(&serde_json::to_string(schema).map_err(|e| e.to_string())?) + .map_err(|e| e.to_string())?; + let shadow = build_create_table_sqls(name, &schema_roundtrip, backend); + if legacy == shadow { + return Ok(()); + } + Err(format!( + "shadow create-table mismatch for '{}': legacy={} shadow={}", + name, + serde_json::to_string(&legacy).unwrap_or_else(|_| "".to_string()), + serde_json::to_string(&shadow).unwrap_or_else(|_| "".to_string()), + )) +} + /// Internal utility to create all registered tables in the database. /// /// This is used by both the `connect(auto_migrate=True)` flow and the @@ -650,6 +671,17 @@ pub async fn internal_create_tables(engine: Arc) -> PyResult<()> { for (name, schema) in order_schemas_for_creation(schemas) { let (sql, index_sqls) = build_create_table_sqls(&name, &schema, backend); + if engine.is_shadow_runtime_enabled() + && let Err(diff) = shadow_compare_create_table_sqls(&name, &schema, backend) + { + crate::log_debug(format!("⚠️ Ferro shadow runtime mismatch: {diff}")); + if std::env::var("FERRO_SHADOW_RUNTIME_STRICT") + .map(|value| value == "1" || value.eq_ignore_ascii_case("true")) + .unwrap_or(false) + { + return Err(pyo3::exceptions::PyRuntimeError::new_err(diff)); + } + } engine.execute_sql(&sql).await.map_err(|e| { pyo3::exceptions::PyRuntimeError::new_err(format!( diff --git a/tests/fixtures/shadow_reports/postgres.json b/tests/fixtures/shadow_reports/postgres.json new file mode 100644 index 0000000..da0adb6 --- /dev/null +++ b/tests/fixtures/shadow_reports/postgres.json @@ -0,0 +1,47 @@ +{ + "create_table": [ + "CREATE TABLE IF NOT EXISTS \"shadowuser\" ( \"age\" integer, \"id\" serial PRIMARY KEY, \"name\" varchar )", + [] + ], + "migration": [ + [ + "ALTER TABLE \"shadowuser\" ADD COLUMN \"age\" integer", + "ALTER TABLE \"shadowuser\" ADD COLUMN \"name\" varchar" + ], + [] + ], + "query_compare": { + "legacy": { + "artifact": { + "bind_semantics": [ + "I64(18)", + "String(\"a%\")", + "I64(5)", + "I64(1)" + ], + "operation": "select", + "semantic_signature": [ + "age >= 18", + "name LIKE \"a%\"" + ] + }, + "sql": "SELECT \"shadowuser\".* FROM \"shadowuser\" WHERE \"age\" >= $1 AND \"name\" LIKE $2 ORDER BY \"age\" DESC LIMIT $3 OFFSET $4" + }, + "matches": true, + "shadow": { + "artifact": { + "bind_semantics": [ + "I64(18)", + "String(\"a%\")", + "I64(5)", + "I64(1)" + ], + "operation": "select", + "semantic_signature": [ + "age >= 18", + "name LIKE \"a%\"" + ] + } + } + } +} diff --git a/tests/fixtures/shadow_reports/sqlite.json b/tests/fixtures/shadow_reports/sqlite.json new file mode 100644 index 0000000..ea3f0b3 --- /dev/null +++ b/tests/fixtures/shadow_reports/sqlite.json @@ -0,0 +1,47 @@ +{ + "create_table": [ + "CREATE TABLE IF NOT EXISTS \"shadowuser\" ( \"age\" integer, \"id\" integer PRIMARY KEY AUTOINCREMENT, \"name\" varchar )", + [] + ], + "migration": [ + [ + "ALTER TABLE \"shadowuser\" ADD COLUMN \"age\" integer", + "ALTER TABLE \"shadowuser\" ADD COLUMN \"name\" varchar" + ], + [] + ], + "query_compare": { + "legacy": { + "artifact": { + "bind_semantics": [ + "I64(18)", + "String(\"a%\")", + "I64(5)", + "I64(1)" + ], + "operation": "select", + "semantic_signature": [ + "age >= 18", + "name LIKE \"a%\"" + ] + }, + "sql": "SELECT \"shadowuser\".* FROM \"shadowuser\" WHERE \"age\" >= ? AND \"name\" LIKE ? ORDER BY \"age\" DESC LIMIT ? OFFSET ?" + }, + "matches": true, + "shadow": { + "artifact": { + "bind_semantics": [ + "I64(18)", + "String(\"a%\")", + "I64(5)", + "I64(1)" + ], + "operation": "select", + "semantic_signature": [ + "age >= 18", + "name LIKE \"a%\"" + ] + } + } + } +} diff --git a/tests/test_shadow_reports.py b/tests/test_shadow_reports.py new file mode 100644 index 0000000..18cdc80 --- /dev/null +++ b/tests/test_shadow_reports.py @@ -0,0 +1,102 @@ +import json +from pathlib import Path + +import pytest +from pydantic import Field + +from ferro import Model, connect +from ferro._core import ( + _render_create_table_sql_for_test, + _render_migration_sql_for_test, + _shadow_compare_query_plan_for_test, +) +from ferro.query.builder import _query_def_to_json + +pytestmark = pytest.mark.backend_matrix + +SHADOW_FIXTURES = Path(__file__).parent / "fixtures" / "shadow_reports" + + +def _report_for_backend(dialect: str) -> dict: + schema = { + "properties": { + "id": {"type": "integer", "primary_key": True, "autoincrement": True}, + "name": {"type": "string", "ferro_nullable": True}, + "age": {"type": "integer"}, + } + } + query_json = _query_def_to_json( + { + "model_name": "ShadowUser", + "where_clause": [ + {"is_compound": False, "column": "age", "operator": ">=", "value": 18}, + {"is_compound": False, "column": "name", "operator": "LIKE", "value": "a%"}, + ], + "order_by": [{"column": "age", "direction": "desc"}], + "limit": 5, + "offset": 1, + "m2m": None, + } + ) + query_compare = json.loads( + _shadow_compare_query_plan_for_test(query_json, dialect, "select") + ) + create_table_sql, create_table_extras = _render_create_table_sql_for_test( + "ShadowUser", json.dumps(schema), dialect + ) + migration_stmts, migration_warns = _render_migration_sql_for_test( + "ShadowUser", + json.dumps(schema), + json.dumps( + [ + { + "name": "id", + "declared_type": "integer", + "is_primary_key": True, + "is_nullable": False, + } + ] + ), + dialect, + True, + False, + ) + return { + "query_compare": query_compare, + "create_table": [create_table_sql, list(create_table_extras)], + "migration": [list(migration_stmts), list(migration_warns)], + } + + +def test_shadow_report_fixture_stable(db_backend: str) -> None: + report = _report_for_backend(db_backend) + fixture_path = SHADOW_FIXTURES / f"{db_backend}.json" + expected = json.loads(fixture_path.read_text(encoding="utf-8")) + assert report == expected + + +@pytest.mark.asyncio +async def test_shadow_runtime_strict_has_no_mismatch(monkeypatch: pytest.MonkeyPatch, db_url: str): + monkeypatch.setenv("FERRO_SHADOW_RUNTIME", "1") + monkeypatch.setenv("FERRO_SHADOW_RUNTIME_STRICT", "1") + + class ShadowRuntimeUser(Model): + id: int = Field(json_schema_extra={"primary_key": True}) + name: str + age: int + + await connect(db_url, auto_migrate=True) + await ShadowRuntimeUser(id=1, name="alice", age=22).save() + await ShadowRuntimeUser(id=2, name="bob", age=17).save() + + rows = await ShadowRuntimeUser.where(lambda t: t.age >= 18).all() + assert [row.name for row in rows] == ["alice"] + + count = await ShadowRuntimeUser.where(lambda t: t.age >= 18).count() + assert count == 1 + + updated = await ShadowRuntimeUser.where(lambda t: t.name == "alice").update(age=23) + assert updated == 1 + + deleted = await ShadowRuntimeUser.where(lambda t: t.name == "bob").delete() + assert deleted == 1