From 2e4d0fca356beeb139946506b196bfdd92a20729 Mon Sep 17 00:00:00 2001 From: JuliaEdom Date: Mon, 29 Jun 2026 20:14:11 +0300 Subject: [PATCH] test(mutation): mutate sql_guard live, not just declared The mutation gate (scripts/mutation_report.py) ran only the duckdb-free sdk/agentflow/retry.py; every serving module was declared-only, documented as blocked by "mutmut x duckdb". The real blocker was narrower: mutmut's trampoline asserts a module name does not start with "src.", and the serving unit tests also drag the duckdb-backed query engine into mutmut's mutants/ workspace. sql_guard imports only sqlglot, so it can be mutated as a top-level `serving` package (like retry.py is mutated as agentflow.retry, not src.*) against a narrow duckdb-free test. Add that path: - mutation_report.py: serving-target workspace handling (copy src/serving -> serving, also_copy = serving/config/scripts) + sql_guard in MODULE_TARGETS (threshold 0.90). - tests/unit/test_sql_guard_mutation.py: narrow duckdb-free test, dual-context import (serving under mutmut, src.serving under pytest). Every pytest.raises pins the message so error-text mutants die; the Anonymous-vs-typed forbidden- function cases both exercise the .lower() casing (a surviving mutant there is a denylist bypass -- mutation testing surfaced that gap). - Correct the now-stale "serving cannot be mutated / duckdb limitation" notes in pyproject [tool.mutmut] and test_mutmut_policy.py. Verified in a Linux venv (mutmut 3.6): sql_guard scores 44/44 mutants killed (100%). The remaining serving modules stay declared-only until each gets its own duckdb-free unit test. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 17 ++--- scripts/mutation_report.py | 32 +++++++--- tests/unit/test_mutmut_policy.py | 9 +-- tests/unit/test_sql_guard_mutation.py | 90 +++++++++++++++++++++++++++ 4 files changed, 128 insertions(+), 20 deletions(-) create mode 100644 tests/unit/test_sql_guard_mutation.py diff --git a/pyproject.toml b/pyproject.toml index 17d48d2..c3ccbb2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -203,15 +203,16 @@ asyncio_mode = "auto" [tool.mutmut] # This list is the *declared* mutation-target policy, enforced by # tests/unit/test_mutmut_policy.py (paths exist, define real logic, cover the -# security-critical surfaces). It is NOT what the CI mutation gate actually +# security-critical surfaces). It is a superset of what the CI gate actually # mutates: scripts/mutation_report.py drives the gate from its own -# MODULE_TARGETS and currently mutates only the duckdb-free -# sdk/agentflow/retry.py. The serving modules below cannot be mutation-tested -# with the current harness -- they transitively import duckdb, whose compiled -# subpackage (_duckdb._sqltypes) fails to import inside mutmut's mutants/ -# workspace and crashes the run (reproduced on Linux, so it is a mutmut x -# duckdb limitation, not platform-specific). Extending the gate needs isolated -# mutant execution (subprocess/spawn) or a different mutation tool. See +# MODULE_TARGETS, which now mutates sdk/agentflow/retry.py AND +# src/serving/semantic_layer/sql_guard.py live. sql_guard is mutated as a +# top-level `serving` package against a duckdb-free narrow test: mutmut's +# trampoline rejects a module name starting with `src.`, which (not duckdb) was +# the real blocker. The remaining serving modules below stay declared-only for +# now -- their unit tests pull the duckdb-backed query engine, so mutating them +# in isolation needs a duckdb-free test per module (the pattern sql_guard now +# uses); the blocker is the test import chain, not the module. See # scripts/mutation_report.py. paths_to_mutate = [ "src/serving/api/auth/manager.py", diff --git a/scripts/mutation_report.py b/scripts/mutation_report.py index c36528b..3e03363 100644 --- a/scripts/mutation_report.py +++ b/scripts/mutation_report.py @@ -23,19 +23,26 @@ class ModuleTarget: tests: tuple[str, ...] -# The CI mutation gate runs exactly these module/test pairs. It is deliberately -# limited to the duckdb-free SDK surface: every serving module in the -# [tool.mutmut] policy (pyproject.toml) transitively imports duckdb, and -# duckdb's compiled subpackage (_duckdb._sqltypes) fails to import inside -# mutmut's mutants/ workspace ("'_duckdb' is not a package"), crashing the run -# with every mutant left "not checked". This is a mutmut x duckdb harness -# limitation (reproduced on Linux), not a missing test. Adding serving targets -# needs isolated mutant execution (subprocess/spawn) or a different tool. +# The CI mutation gate runs exactly these module/test pairs. Each module is +# mutated under a TOP-LEVEL package name (no "src." prefix): mutmut's trampoline +# asserts the module name does not start with "src." and crashes otherwise -- +# that, not duckdb, was the real blocker for the serving modules. The fix is to +# (a) copy the module so it imports as a top-level package and (b) pair it with a +# NARROW test that does not pull the duckdb-backed engine import chain. So +# retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard mutates +# as serving.semantic_layer.sql_guard (from src/serving) against a duckdb-free +# test. Serving modules whose tests still need the duckdb engine (the +# query/masking/auth surfaces) remain harder to isolate and stay declared-only in +# the [tool.mutmut] policy until they get duckdb-free unit tests of their own. MODULE_TARGETS = { Path("agentflow/retry.py"): ModuleTarget( threshold=0.75, tests=("tests/sdk/test_retry.py",), ), + Path("serving/semantic_layer/sql_guard.py"): ModuleTarget( + threshold=0.90, + tests=("tests/unit/test_sql_guard_mutation.py",), + ), } STATUS_BY_EXIT_CODE = { @@ -124,6 +131,8 @@ def render_mutmut_section(module_path: Path, tests: tuple[str, ...]) -> str: tests_block = "\n".join(f' "{test_path}",' for test_path in tests) if module_path.parts and module_path.parts[0] == "agentflow": also_copy_block = ' "agentflow",\n "config",\n "scripts",' + elif module_path.parts and module_path.parts[0] == "serving": + also_copy_block = ' "serving",\n "config",\n "scripts",' else: also_copy_block = ' "src",\n "config",\n "sdk",\n "scripts",' return ( @@ -161,12 +170,19 @@ def copy_or_link(source: Path, destination: Path) -> None: def prepare_workspace(workspace: Path, module_path: Path, target: ModuleTarget) -> None: is_agentflow_target = bool(module_path.parts) and module_path.parts[0] == "agentflow" + is_serving_target = bool(module_path.parts) and module_path.parts[0] == "serving" for name in WORKSPACE_LINKS: if is_agentflow_target and name == "sdk": continue + # The serving target imports as a top-level `serving` package copied from + # src/serving below; copying `src` too would shadow it with src.serving. + if is_serving_target and name == "src": + continue copy_or_link(ROOT / name, workspace / name) if is_agentflow_target: copy_or_link(ROOT / "sdk" / "agentflow", workspace / "agentflow") + if is_serving_target: + copy_or_link(ROOT / "src" / "serving", workspace / "serving") (workspace / "pyproject.toml").write_text( build_workspace_pyproject(module_path, target), encoding="utf-8", diff --git a/tests/unit/test_mutmut_policy.py b/tests/unit/test_mutmut_policy.py index 5edd6c3..11217ef 100644 --- a/tests/unit/test_mutmut_policy.py +++ b/tests/unit/test_mutmut_policy.py @@ -17,10 +17,11 @@ # - sql_builder: every entity/metric SQL string the engine executes is # assembled here. # NOTE: these are the *declared* targets (intent). Actual mutation execution is -# gated by scripts/mutation_report.py (MODULE_TARGETS), which currently runs only -# the duckdb-free retry.py -- the serving modules below break mutmut's mutants/ -# workspace via duckdb (see that script's note). These assertions guard the -# declared policy, not live mutation coverage. +# gated by scripts/mutation_report.py (MODULE_TARGETS), which now runs retry.py +# AND sql_guard.py live (sql_guard via a duckdb-free narrow test, mutated as a +# top-level `serving` package so mutmut's trampoline accepts it). The other +# serving modules below stay declared-only until they get duckdb-free unit tests +# of their own. These assertions guard the declared policy, not live coverage. REQUIRED_MUTATION_TARGETS = { "src/serving/semantic_layer/sql_guard.py", "src/serving/api/auth/manager.py", diff --git a/tests/unit/test_sql_guard_mutation.py b/tests/unit/test_sql_guard_mutation.py new file mode 100644 index 0000000..2a40032 --- /dev/null +++ b/tests/unit/test_sql_guard_mutation.py @@ -0,0 +1,90 @@ +"""Narrow, duckdb-free mutation test for the sql_guard NL->SQL denylist. + +This is the test the mutation gate runs against +src/serving/semantic_layer/sql_guard.py (see scripts/mutation_report.py +MODULE_TARGETS). sql_guard imports only sqlglot, so keeping this test free of the +QueryEngine/duckdb import chain lets mutmut mutate the module without dragging +duckdb's compiled subpackage into its mutants/ workspace. + +Dual-context import: under the mutation harness the module is copied to the +workspace as a top-level ``serving`` package (no ``src.`` prefix, which mutmut's +trampoline rejects -- the same reason retry.py is mutated as ``agentflow.retry`` +and not ``src.…``); under ordinary pytest it lives under the ``src`` package. + +Every ``pytest.raises`` pins the message so the error-text mutants die too, and +the Anonymous-vs-typed function cases both exercise the ``.lower()`` casing in +the forbidden-function check -- a surviving mutant there is a denylist bypass. +""" + +try: # mutation-harness workspace exposes it as a top-level package + from serving.semantic_layer.sql_guard import UnsafeSQLError, validate_nl_sql +except ImportError: # ordinary pytest sees it under the src package + from src.serving.semantic_layer.sql_guard import UnsafeSQLError, validate_nl_sql + +import pytest + +ALLOWED = {"orders", "customers"} + + +def test_plain_select_ok(): + validate_nl_sql("SELECT id FROM orders", ALLOWED) + + +def test_join_and_cte_ok(): + validate_nl_sql( + "WITH c AS (SELECT id FROM customers) SELECT o.id FROM orders o JOIN c ON o.id = c.id", + ALLOWED, + ) + + +def test_known_table_case_insensitive_ok(): + validate_nl_sql("SELECT id FROM ORDERS", ALLOWED) + + +def test_unparseable_raises(): + with pytest.raises(UnsafeSQLError, match="Unparseable SQL"): + validate_nl_sql("SELECT FROM", ALLOWED) + + +def test_multiple_statements_raise(): + with pytest.raises(UnsafeSQLError, match="Expected single statement, got 2"): + validate_nl_sql("SELECT 1; SELECT 2", ALLOWED) + + +def test_non_select_drop_raises(): + with pytest.raises(UnsafeSQLError, match="Statement type Drop not allowed"): + validate_nl_sql("DROP TABLE orders", ALLOWED) + + +def test_insert_rejected(): + with pytest.raises(UnsafeSQLError, match="Statement type Insert not allowed"): + validate_nl_sql("INSERT INTO orders VALUES (1)", ALLOWED) + + +def test_typed_scan_function_in_subquery_raises(): + # read_csv is modelled by sqlglot as a typed Func (exp.ReadCSV). + with pytest.raises(UnsafeSQLError, match="Forbidden function: read_csv"): + validate_nl_sql("SELECT id FROM orders WHERE id IN (SELECT read_csv('x.csv'))", ALLOWED) + + +def test_anonymous_scan_function_in_projection_raises(): + # postgres_scan is modelled as exp.Anonymous, exercising the name.lower() + # branch; pins the casing so a denylisted call cannot slip through under a + # different case (the .lower()->.upper() mutant must die here). + with pytest.raises(UnsafeSQLError, match="Forbidden function: postgres_scan"): + validate_nl_sql("SELECT postgres_scan('dsn', 'tbl') FROM orders", ALLOWED) + + +def test_table_valued_function_raises(): + with pytest.raises(UnsafeSQLError, match=r"^Table-valued functions not allowed$"): + validate_nl_sql("SELECT * FROM glob('*')", ALLOWED) + + +def test_schema_qualified_table_raises(): + with pytest.raises(UnsafeSQLError, match="Schema-qualified table names are not allowed"): + validate_nl_sql("SELECT id FROM victim.orders", ALLOWED) + + +def test_unknown_table_raises(): + with pytest.raises(UnsafeSQLError, match=r"Unknown tables: \['secrets'\]"): + validate_nl_sql("SELECT id FROM secrets", ALLOWED)