Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 13 additions & 9 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -207,15 +207,19 @@ asyncio_mode = "auto"
# mutates: scripts/mutation_report.py drives the gate from its own
# MODULE_TARGETS, which now mutates sdk/agentflow/retry.py,
# src/serving/semantic_layer/sql_guard.py, src/serving/masking.py,
# src/serving/api/rate_limiter.py, src/serving/semantic_layer/query/sql_builder.py
# AND src/serving/semantic_layer/query/nl_queries.py live. The serving modules are
# mutated as a top-level `serving` package against duckdb-free narrow tests:
# mutmut's trampoline rejects a module name starting with `src.`, which (not
# duckdb) was the real blocker. The remaining declared serving surface -- the auth
# manager / key_rotation -- stays declared-only for now: its unit tests pull the
# duckdb-backed query engine, so mutating it in isolation needs a duckdb-free test
# (the pattern the six live modules now use); the blocker is the test import
# chain, not the module. See scripts/mutation_report.py.
# src/serving/api/rate_limiter.py, src/serving/semantic_layer/query/sql_builder.py,
# src/serving/semantic_layer/query/nl_queries.py AND src/serving/api/auth/manager.py
# live. The serving modules are mutated as a top-level `serving` package against
# duckdb-free narrow tests: mutmut's trampoline rejects a module name starting with
# `src.`, which (not duckdb) was the real blocker. manager.py runs at an honest
# 0.80 threshold (not 0.90): it is a large stateful auth class whose residual
# survivors are equivalent mutants (structured-logging args, model_copy updates
# equal to their defaults, redis-url strings masked by the `_redis = None`
# override, the env-only-dead write path) -- every behaviour-reachable mutant,
# including every auth-bypass and throttle off-by-one, is killed. The remaining
# declared serving surface -- auth/key_rotation -- stays declared-only until it
# gets a duckdb-free test of its own (the pattern the live modules use); the
# blocker is the test import chain, not the module. See scripts/mutation_report.py.
paths_to_mutate = [
"src/serving/api/auth/manager.py",
"src/serving/api/auth/key_rotation.py",
Expand Down
50 changes: 36 additions & 14 deletions scripts/mutation_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,23 @@ class ModuleTarget:
# (a) copy the module so it imports as a top-level package and (b) pair it with a
# NARROW test that does not pull the duckdb-backed engine import chain. So
# retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard,
# masking, rate_limiter, sql_builder and nl_queries mutate as serving.* (from
# src/serving) against duckdb-free tests. Each duckdb-free test also avoids
# fixtures and calls the module's methods directly: under mutate_only_covered_lines
# a fixture-built object left every method line uncovered, so only __init__ got
# mutated. rate_limiter additionally imports `from src.constants import ...`; its
# test registers a tiny src.constants stub before importing the module, because
# the serving workspace copies src/serving -> top-level `serving` without `src`.
# sql_builder and nl_queries live under the query package whose __init__ imports
# the duckdb-backed QueryEngine, so their tests also stub
# serving.semantic_layer.query.{engine,contracts} (and the src.* helpers) before
# import. The only remaining declared-but-not-live serving surface is auth
# (manager / key_rotation), whose tests still need the duckdb engine; it stays
# declared-only in the [tool.mutmut] policy until it gets duckdb-free unit tests
# of its own.
# masking, rate_limiter, sql_builder, nl_queries and auth/manager mutate as
# serving.* (from src/serving) against duckdb-free tests. Each duckdb-free test
# also avoids fixtures and calls the module's methods directly: under
# mutate_only_covered_lines a fixture-built object left every method line
# uncovered, so only __init__ got mutated. rate_limiter additionally imports
# `from src.constants import ...`; its test registers a tiny src.constants stub
# before importing the module, because the serving workspace copies src/serving
# -> top-level `serving` without `src`. sql_builder and nl_queries live under the
# query package whose __init__ imports the duckdb-backed QueryEngine, so their
# tests also stub serving.semantic_layer.query.{engine,contracts} (and the src.*
# helpers) before import. auth/manager imports as the auth package whose __init__
# imports duckdb plus the key_rotation/usage_table chain, but manager.py itself
# never calls duckdb (all usage-table I/O lives in usage_table.py), so its test
# swaps in a fake top-level `duckdb` module and mutates manager duckdb-free. The
# only remaining declared-but-not-live serving surface is auth/key_rotation,
# which uses the duckdb connection directly; it stays declared-only in the
# [tool.mutmut] policy until it gets a duckdb-free unit test of its own.
MODULE_TARGETS = {
Path("agentflow/retry.py"): ModuleTarget(
threshold=0.75,
Expand All @@ -69,6 +72,25 @@ class ModuleTarget:
threshold=0.90,
tests=("tests/unit/test_nl_queries_mutation.py",),
),
# manager.py runs at 0.80, not the 0.90 the pure-function guards (sql_guard,
# masking, sql_builder, ...) hold. It is a ~400-line stateful auth class whose
# surviving mutants are dominated by EQUIVALENTS that no behaviour-level test
# can kill: structured-logging arguments (the auth logger event names / kwargs),
# `model_copy(update=...)` dicts whose mutated field equals its default
# ("matched_slot" already defaults to "current"; "key"==api_key on a plaintext
# match), the redis-url strings masked by the `_redis = None` override under the
# duckdb-free harness, and the config-file write path that is dead under the
# env-only test. Every BEHAVIOUR-reachable mutant is killed -- crucially every
# auth bypass (the verify_api_key argument-swap mutants on the indexed / legacy
# / previous-key paths and in _matches_key_material) and every rate-limit /
# failed-auth throttle off-by-one. Local mutmut (py3.10) scores 405/483 = 83.9%;
# 0.80 leaves headroom for equivalent-mutant noise while still enforcing a real
# floor (the do-nothing baseline was 76.5%). key_rotation is the next target and
# stays declared-only until it gets its own duckdb-free test.
Path("serving/api/auth/manager.py"): ModuleTarget(
threshold=0.80,
tests=("tests/unit/test_auth_manager_mutation.py",),
),
}

STATUS_BY_EXIT_CODE = {
Expand Down
Loading