From 2e4d0fca356beeb139946506b196bfdd92a20729 Mon Sep 17 00:00:00 2001
From: JuliaEdom <uedomskikh@gmail.com>
Date: Mon, 29 Jun 2026 20:14:11 +0300
Subject: [PATCH] test(mutation): mutate sql_guard live, not just declared

The mutation gate (scripts/mutation_report.py) ran only the duckdb-free
sdk/agentflow/retry.py; every serving module was declared-only, documented as
blocked by "mutmut x duckdb". The real blocker was narrower: mutmut's trampoline
asserts a module name does not start with "src.", and the serving unit tests
also drag the duckdb-backed query engine into mutmut's mutants/ workspace.

sql_guard imports only sqlglot, so it can be mutated as a top-level `serving`
package (like retry.py is mutated as agentflow.retry, not src.*) against a narrow
duckdb-free test. Add that path:

- mutation_report.py: serving-target workspace handling (copy src/serving ->
  serving, also_copy = serving/config/scripts) + sql_guard in MODULE_TARGETS
  (threshold 0.90).
- tests/unit/test_sql_guard_mutation.py: narrow duckdb-free test, dual-context
  import (serving under mutmut, src.serving under pytest). Every pytest.raises
  pins the message so error-text mutants die; the Anonymous-vs-typed forbidden-
  function cases both exercise the .lower() casing (a surviving mutant there is a
  denylist bypass -- mutation testing surfaced that gap).
- Correct the now-stale "serving cannot be mutated / duckdb limitation" notes in
  pyproject [tool.mutmut] and test_mutmut_policy.py.

Verified in a Linux venv (mutmut 3.6): sql_guard scores 44/44 mutants killed
(100%). The remaining serving modules stay declared-only until each gets its own
duckdb-free unit test.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pyproject.toml                        | 17 ++---
 scripts/mutation_report.py            | 32 +++++++---
 tests/unit/test_mutmut_policy.py      |  9 +--
 tests/unit/test_sql_guard_mutation.py | 90 +++++++++++++++++++++++++++
 4 files changed, 128 insertions(+), 20 deletions(-)
 create mode 100644 tests/unit/test_sql_guard_mutation.py

diff --git a/pyproject.toml b/pyproject.toml
index 17d48d2..c3ccbb2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -203,15 +203,16 @@ asyncio_mode = "auto"
 [tool.mutmut]
 # This list is the *declared* mutation-target policy, enforced by
 # tests/unit/test_mutmut_policy.py (paths exist, define real logic, cover the
-# security-critical surfaces). It is NOT what the CI mutation gate actually
+# security-critical surfaces). It is a superset of what the CI gate actually
 # mutates: scripts/mutation_report.py drives the gate from its own
-# MODULE_TARGETS and currently mutates only the duckdb-free
-# sdk/agentflow/retry.py. The serving modules below cannot be mutation-tested
-# with the current harness -- they transitively import duckdb, whose compiled
-# subpackage (_duckdb._sqltypes) fails to import inside mutmut's mutants/
-# workspace and crashes the run (reproduced on Linux, so it is a mutmut x
-# duckdb limitation, not platform-specific). Extending the gate needs isolated
-# mutant execution (subprocess/spawn) or a different mutation tool. See
+# MODULE_TARGETS, which now mutates sdk/agentflow/retry.py AND
+# src/serving/semantic_layer/sql_guard.py live. sql_guard is mutated as a
+# top-level `serving` package against a duckdb-free narrow test: mutmut's
+# trampoline rejects a module name starting with `src.`, which (not duckdb) was
+# the real blocker. The remaining serving modules below stay declared-only for
+# now -- their unit tests pull the duckdb-backed query engine, so mutating them
+# in isolation needs a duckdb-free test per module (the pattern sql_guard now
+# uses); the blocker is the test import chain, not the module. See
 # scripts/mutation_report.py.
 paths_to_mutate = [
     "src/serving/api/auth/manager.py",
diff --git a/scripts/mutation_report.py b/scripts/mutation_report.py
index c36528b..3e03363 100644
--- a/scripts/mutation_report.py
+++ b/scripts/mutation_report.py
@@ -23,19 +23,26 @@ class ModuleTarget:
     tests: tuple[str, ...]
 
 
-# The CI mutation gate runs exactly these module/test pairs. It is deliberately
-# limited to the duckdb-free SDK surface: every serving module in the
-# [tool.mutmut] policy (pyproject.toml) transitively imports duckdb, and
-# duckdb's compiled subpackage (_duckdb._sqltypes) fails to import inside
-# mutmut's mutants/ workspace ("'_duckdb' is not a package"), crashing the run
-# with every mutant left "not checked". This is a mutmut x duckdb harness
-# limitation (reproduced on Linux), not a missing test. Adding serving targets
-# needs isolated mutant execution (subprocess/spawn) or a different tool.
+# The CI mutation gate runs exactly these module/test pairs. Each module is
+# mutated under a TOP-LEVEL package name (no "src." prefix): mutmut's trampoline
+# asserts the module name does not start with "src." and crashes otherwise --
+# that, not duckdb, was the real blocker for the serving modules. The fix is to
+# (a) copy the module so it imports as a top-level package and (b) pair it with a
+# NARROW test that does not pull the duckdb-backed engine import chain. So
+# retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard mutates
+# as serving.semantic_layer.sql_guard (from src/serving) against a duckdb-free
+# test. Serving modules whose tests still need the duckdb engine (the
+# query/masking/auth surfaces) remain harder to isolate and stay declared-only in
+# the [tool.mutmut] policy until they get duckdb-free unit tests of their own.
 MODULE_TARGETS = {
     Path("agentflow/retry.py"): ModuleTarget(
         threshold=0.75,
         tests=("tests/sdk/test_retry.py",),
     ),
+    Path("serving/semantic_layer/sql_guard.py"): ModuleTarget(
+        threshold=0.90,
+        tests=("tests/unit/test_sql_guard_mutation.py",),
+    ),
 }
 
 STATUS_BY_EXIT_CODE = {
@@ -124,6 +131,8 @@ def render_mutmut_section(module_path: Path, tests: tuple[str, ...]) -> str:
     tests_block = "\n".join(f'    "{test_path}",' for test_path in tests)
     if module_path.parts and module_path.parts[0] == "agentflow":
         also_copy_block = '    "agentflow",\n    "config",\n    "scripts",'
+    elif module_path.parts and module_path.parts[0] == "serving":
+        also_copy_block = '    "serving",\n    "config",\n    "scripts",'
     else:
         also_copy_block = '    "src",\n    "config",\n    "sdk",\n    "scripts",'
     return (
@@ -161,12 +170,19 @@ def copy_or_link(source: Path, destination: Path) -> None:
 
 def prepare_workspace(workspace: Path, module_path: Path, target: ModuleTarget) -> None:
     is_agentflow_target = bool(module_path.parts) and module_path.parts[0] == "agentflow"
+    is_serving_target = bool(module_path.parts) and module_path.parts[0] == "serving"
     for name in WORKSPACE_LINKS:
         if is_agentflow_target and name == "sdk":
             continue
+        # The serving target imports as a top-level `serving` package copied from
+        # src/serving below; copying `src` too would shadow it with src.serving.
+        if is_serving_target and name == "src":
+            continue
         copy_or_link(ROOT / name, workspace / name)
     if is_agentflow_target:
         copy_or_link(ROOT / "sdk" / "agentflow", workspace / "agentflow")
+    if is_serving_target:
+        copy_or_link(ROOT / "src" / "serving", workspace / "serving")
     (workspace / "pyproject.toml").write_text(
         build_workspace_pyproject(module_path, target),
         encoding="utf-8",
diff --git a/tests/unit/test_mutmut_policy.py b/tests/unit/test_mutmut_policy.py
index 5edd6c3..11217ef 100644
--- a/tests/unit/test_mutmut_policy.py
+++ b/tests/unit/test_mutmut_policy.py
@@ -17,10 +17,11 @@
 #   - sql_builder: every entity/metric SQL string the engine executes is
 #     assembled here.
 # NOTE: these are the *declared* targets (intent). Actual mutation execution is
-# gated by scripts/mutation_report.py (MODULE_TARGETS), which currently runs only
-# the duckdb-free retry.py -- the serving modules below break mutmut's mutants/
-# workspace via duckdb (see that script's note). These assertions guard the
-# declared policy, not live mutation coverage.
+# gated by scripts/mutation_report.py (MODULE_TARGETS), which now runs retry.py
+# AND sql_guard.py live (sql_guard via a duckdb-free narrow test, mutated as a
+# top-level `serving` package so mutmut's trampoline accepts it). The other
+# serving modules below stay declared-only until they get duckdb-free unit tests
+# of their own. These assertions guard the declared policy, not live coverage.
 REQUIRED_MUTATION_TARGETS = {
     "src/serving/semantic_layer/sql_guard.py",
     "src/serving/api/auth/manager.py",
diff --git a/tests/unit/test_sql_guard_mutation.py b/tests/unit/test_sql_guard_mutation.py
new file mode 100644
index 0000000..2a40032
--- /dev/null
+++ b/tests/unit/test_sql_guard_mutation.py
@@ -0,0 +1,90 @@
+"""Narrow, duckdb-free mutation test for the sql_guard NL->SQL denylist.
+
+This is the test the mutation gate runs against
+src/serving/semantic_layer/sql_guard.py (see scripts/mutation_report.py
+MODULE_TARGETS). sql_guard imports only sqlglot, so keeping this test free of the
+QueryEngine/duckdb import chain lets mutmut mutate the module without dragging
+duckdb's compiled subpackage into its mutants/ workspace.
+
+Dual-context import: under the mutation harness the module is copied to the
+workspace as a top-level ``serving`` package (no ``src.`` prefix, which mutmut's
+trampoline rejects -- the same reason retry.py is mutated as ``agentflow.retry``
+and not ``src.…``); under ordinary pytest it lives under the ``src`` package.
+
+Every ``pytest.raises`` pins the message so the error-text mutants die too, and
+the Anonymous-vs-typed function cases both exercise the ``.lower()`` casing in
+the forbidden-function check -- a surviving mutant there is a denylist bypass.
+"""
+
+try:  # mutation-harness workspace exposes it as a top-level package
+    from serving.semantic_layer.sql_guard import UnsafeSQLError, validate_nl_sql
+except ImportError:  # ordinary pytest sees it under the src package
+    from src.serving.semantic_layer.sql_guard import UnsafeSQLError, validate_nl_sql
+
+import pytest
+
+ALLOWED = {"orders", "customers"}
+
+
+def test_plain_select_ok():
+    validate_nl_sql("SELECT id FROM orders", ALLOWED)
+
+
+def test_join_and_cte_ok():
+    validate_nl_sql(
+        "WITH c AS (SELECT id FROM customers) SELECT o.id FROM orders o JOIN c ON o.id = c.id",
+        ALLOWED,
+    )
+
+
+def test_known_table_case_insensitive_ok():
+    validate_nl_sql("SELECT id FROM ORDERS", ALLOWED)
+
+
+def test_unparseable_raises():
+    with pytest.raises(UnsafeSQLError, match="Unparseable SQL"):
+        validate_nl_sql("SELECT FROM", ALLOWED)
+
+
+def test_multiple_statements_raise():
+    with pytest.raises(UnsafeSQLError, match="Expected single statement, got 2"):
+        validate_nl_sql("SELECT 1; SELECT 2", ALLOWED)
+
+
+def test_non_select_drop_raises():
+    with pytest.raises(UnsafeSQLError, match="Statement type Drop not allowed"):
+        validate_nl_sql("DROP TABLE orders", ALLOWED)
+
+
+def test_insert_rejected():
+    with pytest.raises(UnsafeSQLError, match="Statement type Insert not allowed"):
+        validate_nl_sql("INSERT INTO orders VALUES (1)", ALLOWED)
+
+
+def test_typed_scan_function_in_subquery_raises():
+    # read_csv is modelled by sqlglot as a typed Func (exp.ReadCSV).
+    with pytest.raises(UnsafeSQLError, match="Forbidden function: read_csv"):
+        validate_nl_sql("SELECT id FROM orders WHERE id IN (SELECT read_csv('x.csv'))", ALLOWED)
+
+
+def test_anonymous_scan_function_in_projection_raises():
+    # postgres_scan is modelled as exp.Anonymous, exercising the name.lower()
+    # branch; pins the casing so a denylisted call cannot slip through under a
+    # different case (the .lower()->.upper() mutant must die here).
+    with pytest.raises(UnsafeSQLError, match="Forbidden function: postgres_scan"):
+        validate_nl_sql("SELECT postgres_scan('dsn', 'tbl') FROM orders", ALLOWED)
+
+
+def test_table_valued_function_raises():
+    with pytest.raises(UnsafeSQLError, match=r"^Table-valued functions not allowed$"):
+        validate_nl_sql("SELECT * FROM glob('*')", ALLOWED)
+
+
+def test_schema_qualified_table_raises():
+    with pytest.raises(UnsafeSQLError, match="Schema-qualified table names are not allowed"):
+        validate_nl_sql("SELECT id FROM victim.orders", ALLOWED)
+
+
+def test_unknown_table_raises():
+    with pytest.raises(UnsafeSQLError, match=r"Unknown tables: \['secrets'\]"):
+        validate_nl_sql("SELECT id FROM secrets", ALLOWED)