Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 14 additions & 12 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -208,18 +208,20 @@ asyncio_mode = "auto"
# MODULE_TARGETS, which now mutates sdk/agentflow/retry.py,
# src/serving/semantic_layer/sql_guard.py, src/serving/masking.py,
# src/serving/api/rate_limiter.py, src/serving/semantic_layer/query/sql_builder.py,
# src/serving/semantic_layer/query/nl_queries.py AND src/serving/api/auth/manager.py
# live. The serving modules are mutated as a top-level `serving` package against
# duckdb-free narrow tests: mutmut's trampoline rejects a module name starting with
# `src.`, which (not duckdb) was the real blocker. manager.py runs at an honest
# 0.80 threshold (not 0.90): it is a large stateful auth class whose residual
# survivors are equivalent mutants (structured-logging args, model_copy updates
# equal to their defaults, redis-url strings masked by the `_redis = None`
# override, the env-only-dead write path) -- every behaviour-reachable mutant,
# including every auth-bypass and throttle off-by-one, is killed. The remaining
# declared serving surface -- auth/key_rotation -- stays declared-only until it
# gets a duckdb-free test of its own (the pattern the live modules use); the
# blocker is the test import chain, not the module. See scripts/mutation_report.py.
# src/serving/semantic_layer/query/nl_queries.py, src/serving/api/auth/manager.py
# AND src/serving/api/auth/key_rotation.py live -- every declared serving surface
# is now mutated. The serving modules are mutated as a top-level `serving` package
# against duckdb-free narrow tests: mutmut's trampoline rejects a module name
# starting with `src.`, which (not duckdb) was the real blocker. (The gate runner
# also strips the repo's relative pytest --basetemp inside the mutmut workspace --
# under py3.11 it breaks coverage->mutant mapping for file-I/O targets like
# key_rotation; see scripts/mutation_report.py.) manager.py runs at an honest 0.80
# threshold (not 0.90): it is a
# large stateful auth class whose residual survivors are equivalent mutants
# (structured-logging args, model_copy updates equal to their defaults, redis-url
# strings masked by the `_redis = None` override, the env-only-dead write path) --
# every behaviour-reachable mutant, including every auth-bypass and throttle
# off-by-one, is killed. See scripts/mutation_report.py.
paths_to_mutate = [
"src/serving/api/auth/manager.py",
"src/serving/api/auth/key_rotation.py",
Expand Down
36 changes: 30 additions & 6 deletions scripts/mutation_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ class ModuleTarget:
# (a) copy the module so it imports as a top-level package and (b) pair it with a
# NARROW test that does not pull the duckdb-backed engine import chain. So
# retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard,
# masking, rate_limiter, sql_builder, nl_queries and auth/manager mutate as
# serving.* (from src/serving) against duckdb-free tests. Each duckdb-free test
# masking, rate_limiter, sql_builder, nl_queries, auth/manager and
# auth/key_rotation mutate as serving.* (from src/serving) against duckdb-free
# tests. Each duckdb-free test
# also avoids fixtures and calls the module's methods directly: under
# mutate_only_covered_lines a fixture-built object left every method line
# uncovered, so only __init__ got mutated. rate_limiter additionally imports
Expand All @@ -43,10 +44,13 @@ class ModuleTarget:
# helpers) before import. auth/manager imports as the auth package whose __init__
# imports duckdb plus the key_rotation/usage_table chain, but manager.py itself
# never calls duckdb (all usage-table I/O lives in usage_table.py), so its test
# swaps in a fake top-level `duckdb` module and mutates manager duckdb-free. The
# only remaining declared-but-not-live serving surface is auth/key_rotation,
# which uses the duckdb connection directly; it stays declared-only in the
# [tool.mutmut] policy until it gets a duckdb-free unit test of its own.
# swaps in a fake top-level `duckdb` module and mutates manager duckdb-free.
# auth/key_rotation does the same (fake `duckdb`, stub the connect path) and pins
# the create/rotate/revoke/grace lifecycle; it is the last serving surface to go
# live. Its mutants are only mappable because build_workspace_pyproject drops the
# repo's relative pytest --basetemp from the workspace config -- under py3.11 that
# relative tmp path breaks coverage.py's line->mutant attribution for key_rotation
# (which writes its key store under tmp_path) and the module scores zero.
MODULE_TARGETS = {
Path("agentflow/retry.py"): ModuleTarget(
threshold=0.75,
Expand Down Expand Up @@ -91,6 +95,18 @@ class ModuleTarget:
threshold=0.80,
tests=("tests/unit/test_auth_manager_mutation.py",),
),
# key_rotation runs at 0.90. Its residual survivors (local mutmut: 21 of 365)
# are documented equivalents: wall-clock boundary flips on datetime.now(UTC)
# comparisons, the revoke-prune / timer-cancel masked by load()'s blanket
# cancel+reprune, model_copy "key" popped from the storage payload, and
# write_text encoding/newline platform-equivalents. Every behaviour-reachable
# create/rotate/revoke/grace mutant is killed. The three duckdb-querying usage
# methods are an observability surface (not an auth boundary) and are stubbed
# out of the duckdb-free harness, pinned instead by tests/unit/test_key_rotation.py.
Path("serving/api/auth/key_rotation.py"): ModuleTarget(
threshold=0.90,
tests=("tests/unit/test_key_rotation_mutation.py",),
),
}

STATUS_BY_EXIT_CODE = {
Expand Down Expand Up @@ -200,6 +216,14 @@ def render_mutmut_section(module_path: Path, tests: tuple[str, ...]) -> str:

def build_workspace_pyproject(module_path: Path, target: ModuleTarget) -> str:
original = (ROOT / "pyproject.toml").read_text(encoding="utf-8")
# Drop the repo's RELATIVE pytest --basetemp (.tmp/pytest-basetemp) from the
# workspace config. Inside the mutmut workspace under py3.11 that relative tmp
# path defeats coverage.py's line->file attribution for file-I/O-heavy targets
# (auth/key_rotation writes its rotated key store under tmp_path), so mutmut
# maps zero tests to the generated mutants and reports "could not find any test
# case for any mutant" -> the module scores zero. Default (absolute) basetemp
# attributes coverage correctly. (TOML tolerates the trailing comma left behind.)
original = re.sub(r'\s*,?\s*"--basetemp=[^"]*"', "", original)
rendered = render_mutmut_section(module_path, target.tests)
if not MUTMUT_SECTION_RE.search(original):
raise RuntimeError("Could not find [tool.mutmut] section in pyproject.toml")
Expand Down
9 changes: 4 additions & 5 deletions tests/unit/test_mutmut_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,10 @@
# assembled here.
# NOTE: these are the *declared* targets (intent). Actual mutation execution is
# gated by scripts/mutation_report.py (MODULE_TARGETS), which now runs retry.py,
# sql_guard.py, masking.py, rate_limiter.py, sql_builder.py, nl_queries.py AND
# auth/manager.py live (the serving modules via duckdb-free narrow tests, mutated
# as a top-level `serving` package so mutmut's trampoline accepts them). The only
# remaining declared serving surface -- auth/key_rotation -- stays declared-only
# until it gets a duckdb-free unit test of its own. These assertions guard the
# sql_guard.py, masking.py, rate_limiter.py, sql_builder.py, nl_queries.py,
# auth/manager.py AND auth/key_rotation.py live -- every declared serving surface
# is now mutated (via duckdb-free narrow tests, mutated as a top-level `serving`
# package so mutmut's trampoline accepts them). These assertions guard the
# declared policy, not live coverage.
REQUIRED_MUTATION_TARGETS = {
"src/serving/semantic_layer/sql_guard.py",
Expand Down