Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 9 additions & 8 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -203,15 +203,16 @@ asyncio_mode = "auto"
[tool.mutmut]
# This list is the *declared* mutation-target policy, enforced by
# tests/unit/test_mutmut_policy.py (paths exist, define real logic, cover the
# security-critical surfaces). It is NOT what the CI mutation gate actually
# security-critical surfaces). It is a superset of what the CI gate actually
# mutates: scripts/mutation_report.py drives the gate from its own
# MODULE_TARGETS and currently mutates only the duckdb-free
# sdk/agentflow/retry.py. The serving modules below cannot be mutation-tested
# with the current harness -- they transitively import duckdb, whose compiled
# subpackage (_duckdb._sqltypes) fails to import inside mutmut's mutants/
# workspace and crashes the run (reproduced on Linux, so it is a mutmut x
# duckdb limitation, not platform-specific). Extending the gate needs isolated
# mutant execution (subprocess/spawn) or a different mutation tool. See
# MODULE_TARGETS, which now mutates sdk/agentflow/retry.py AND
# src/serving/semantic_layer/sql_guard.py live. sql_guard is mutated as a
# top-level `serving` package against a duckdb-free narrow test: mutmut's
# trampoline rejects a module name starting with `src.`, which (not duckdb) was
# the real blocker. The remaining serving modules below stay declared-only for
# now -- their unit tests pull the duckdb-backed query engine, so mutating them
# in isolation needs a duckdb-free test per module (the pattern sql_guard now
# uses); the blocker is the test import chain, not the module. See
# scripts/mutation_report.py.
paths_to_mutate = [
"src/serving/api/auth/manager.py",
Expand Down
32 changes: 24 additions & 8 deletions scripts/mutation_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,26 @@ class ModuleTarget:
tests: tuple[str, ...]


# The CI mutation gate runs exactly these module/test pairs. It is deliberately
# limited to the duckdb-free SDK surface: every serving module in the
# [tool.mutmut] policy (pyproject.toml) transitively imports duckdb, and
# duckdb's compiled subpackage (_duckdb._sqltypes) fails to import inside
# mutmut's mutants/ workspace ("'_duckdb' is not a package"), crashing the run
# with every mutant left "not checked". This is a mutmut x duckdb harness
# limitation (reproduced on Linux), not a missing test. Adding serving targets
# needs isolated mutant execution (subprocess/spawn) or a different tool.
# The CI mutation gate runs exactly these module/test pairs. Each module is
# mutated under a TOP-LEVEL package name (no "src." prefix): mutmut's trampoline
# asserts the module name does not start with "src." and crashes otherwise --
# that, not duckdb, was the real blocker for the serving modules. The fix is to
# (a) copy the module so it imports as a top-level package and (b) pair it with a
# NARROW test that does not pull the duckdb-backed engine import chain. So
# retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard mutates
# as serving.semantic_layer.sql_guard (from src/serving) against a duckdb-free
# test. Serving modules whose tests still need the duckdb engine (the
# query/masking/auth surfaces) remain harder to isolate and stay declared-only in
# the [tool.mutmut] policy until they get duckdb-free unit tests of their own.
MODULE_TARGETS = {
Path("agentflow/retry.py"): ModuleTarget(
threshold=0.75,
tests=("tests/sdk/test_retry.py",),
),
Path("serving/semantic_layer/sql_guard.py"): ModuleTarget(
threshold=0.90,
tests=("tests/unit/test_sql_guard_mutation.py",),
),
}

STATUS_BY_EXIT_CODE = {
Expand Down Expand Up @@ -124,6 +131,8 @@ def render_mutmut_section(module_path: Path, tests: tuple[str, ...]) -> str:
tests_block = "\n".join(f' "{test_path}",' for test_path in tests)
if module_path.parts and module_path.parts[0] == "agentflow":
also_copy_block = ' "agentflow",\n "config",\n "scripts",'
elif module_path.parts and module_path.parts[0] == "serving":
also_copy_block = ' "serving",\n "config",\n "scripts",'
else:
also_copy_block = ' "src",\n "config",\n "sdk",\n "scripts",'
return (
Expand Down Expand Up @@ -161,12 +170,19 @@ def copy_or_link(source: Path, destination: Path) -> None:

def prepare_workspace(workspace: Path, module_path: Path, target: ModuleTarget) -> None:
is_agentflow_target = bool(module_path.parts) and module_path.parts[0] == "agentflow"
is_serving_target = bool(module_path.parts) and module_path.parts[0] == "serving"
for name in WORKSPACE_LINKS:
if is_agentflow_target and name == "sdk":
continue
# The serving target imports as a top-level `serving` package copied from
# src/serving below; copying `src` too would shadow it with src.serving.
if is_serving_target and name == "src":
continue
copy_or_link(ROOT / name, workspace / name)
if is_agentflow_target:
copy_or_link(ROOT / "sdk" / "agentflow", workspace / "agentflow")
if is_serving_target:
copy_or_link(ROOT / "src" / "serving", workspace / "serving")
(workspace / "pyproject.toml").write_text(
build_workspace_pyproject(module_path, target),
encoding="utf-8",
Expand Down
9 changes: 5 additions & 4 deletions tests/unit/test_mutmut_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@
# - sql_builder: every entity/metric SQL string the engine executes is
# assembled here.
# NOTE: these are the *declared* targets (intent). Actual mutation execution is
# gated by scripts/mutation_report.py (MODULE_TARGETS), which currently runs only
# the duckdb-free retry.py -- the serving modules below break mutmut's mutants/
# workspace via duckdb (see that script's note). These assertions guard the
# declared policy, not live mutation coverage.
# gated by scripts/mutation_report.py (MODULE_TARGETS), which now runs retry.py
# AND sql_guard.py live (sql_guard via a duckdb-free narrow test, mutated as a
# top-level `serving` package so mutmut's trampoline accepts it). The other
# serving modules below stay declared-only until they get duckdb-free unit tests
# of their own. These assertions guard the declared policy, not live coverage.
REQUIRED_MUTATION_TARGETS = {
"src/serving/semantic_layer/sql_guard.py",
"src/serving/api/auth/manager.py",
Expand Down
90 changes: 90 additions & 0 deletions tests/unit/test_sql_guard_mutation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
"""Narrow, duckdb-free mutation test for the sql_guard NL->SQL denylist.

This is the test the mutation gate runs against
src/serving/semantic_layer/sql_guard.py (see scripts/mutation_report.py
MODULE_TARGETS). sql_guard imports only sqlglot, so keeping this test free of the
QueryEngine/duckdb import chain lets mutmut mutate the module without dragging
duckdb's compiled subpackage into its mutants/ workspace.

Dual-context import: under the mutation harness the module is copied to the
workspace as a top-level ``serving`` package (no ``src.`` prefix, which mutmut's
trampoline rejects -- the same reason retry.py is mutated as ``agentflow.retry``
and not ``src.…``); under ordinary pytest it lives under the ``src`` package.

Every ``pytest.raises`` pins the message so the error-text mutants die too, and
the Anonymous-vs-typed function cases both exercise the ``.lower()`` casing in
the forbidden-function check -- a surviving mutant there is a denylist bypass.
"""

try: # mutation-harness workspace exposes it as a top-level package
from serving.semantic_layer.sql_guard import UnsafeSQLError, validate_nl_sql
except ImportError: # ordinary pytest sees it under the src package
from src.serving.semantic_layer.sql_guard import UnsafeSQLError, validate_nl_sql

import pytest

ALLOWED = {"orders", "customers"}


def test_plain_select_ok():
validate_nl_sql("SELECT id FROM orders", ALLOWED)


def test_join_and_cte_ok():
validate_nl_sql(
"WITH c AS (SELECT id FROM customers) SELECT o.id FROM orders o JOIN c ON o.id = c.id",
ALLOWED,
)


def test_known_table_case_insensitive_ok():
validate_nl_sql("SELECT id FROM ORDERS", ALLOWED)


def test_unparseable_raises():
with pytest.raises(UnsafeSQLError, match="Unparseable SQL"):
validate_nl_sql("SELECT FROM", ALLOWED)


def test_multiple_statements_raise():
with pytest.raises(UnsafeSQLError, match="Expected single statement, got 2"):
validate_nl_sql("SELECT 1; SELECT 2", ALLOWED)


def test_non_select_drop_raises():
with pytest.raises(UnsafeSQLError, match="Statement type Drop not allowed"):
validate_nl_sql("DROP TABLE orders", ALLOWED)


def test_insert_rejected():
with pytest.raises(UnsafeSQLError, match="Statement type Insert not allowed"):
validate_nl_sql("INSERT INTO orders VALUES (1)", ALLOWED)


def test_typed_scan_function_in_subquery_raises():
# read_csv is modelled by sqlglot as a typed Func (exp.ReadCSV).
with pytest.raises(UnsafeSQLError, match="Forbidden function: read_csv"):
validate_nl_sql("SELECT id FROM orders WHERE id IN (SELECT read_csv('x.csv'))", ALLOWED)


def test_anonymous_scan_function_in_projection_raises():
# postgres_scan is modelled as exp.Anonymous, exercising the name.lower()
# branch; pins the casing so a denylisted call cannot slip through under a
# different case (the .lower()->.upper() mutant must die here).
with pytest.raises(UnsafeSQLError, match="Forbidden function: postgres_scan"):
validate_nl_sql("SELECT postgres_scan('dsn', 'tbl') FROM orders", ALLOWED)


def test_table_valued_function_raises():
with pytest.raises(UnsafeSQLError, match=r"^Table-valued functions not allowed$"):
validate_nl_sql("SELECT * FROM glob('*')", ALLOWED)


def test_schema_qualified_table_raises():
with pytest.raises(UnsafeSQLError, match="Schema-qualified table names are not allowed"):
validate_nl_sql("SELECT id FROM victim.orders", ALLOWED)


def test_unknown_table_raises():
with pytest.raises(UnsafeSQLError, match=r"Unknown tables: \['secrets'\]"):
validate_nl_sql("SELECT id FROM secrets", ALLOWED)