From 96f4ca128f1cfa371bb9aceff39e246e67a604b8 Mon Sep 17 00:00:00 2001
From: JuliaEdom <uedomskikh@gmail.com>
Date: Tue, 30 Jun 2026 01:12:12 +0300
Subject: [PATCH 1/2] test(mutation): mutate sql_builder + nl_queries live (B2)

Close the declared-vs-live gap for the query surface: sql_builder.py and
nl_queries.py were declared mutation targets but the gate only ran retry,
sql_guard, masking and rate_limiter. Add duckdb-free fixtureless mutation
tests for both and wire them into scripts/mutation_report.py MODULE_TARGETS
@ 0.90, so 6 of 8 security modules are now mutated live (auth manager /
key_rotation remain declared-only pending their own duckdb-free tests).

Both modules live under the query package whose __init__ imports the
duckdb-backed QueryEngine, so each test stubs
serving.semantic_layer.query.{engine,contracts} (plus the src.* helpers)
before importing the module under a top-level `serving` name. nl_queries'
.sql_guard shim is repointed at the real top-level sql_guard so the
validate_nl_sql boundary runs against the genuine guard.

Reproduced on the WSL/mutmut harness (py3.10): sql_builder 96.0%
(killed 167/174), nl_queries 94.4% (killed 323/342). Remaining survivors
are documented equivalents (typing.cast type-string, dialect=duckdb->None
renders, sqlglot/regex table-extraction convergence, telemetry no-ops,
*1001 elapsed). The CI gate (mutation.yml, py3.11) is the source of truth.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 pyproject.toml                          |  19 +-
 scripts/mutation_report.py              |  32 +-
 tests/unit/test_mutmut_policy.py        |  11 +-
 tests/unit/test_nl_queries_mutation.py  | 855 ++++++++++++++++++++++++
 tests/unit/test_sql_builder_mutation.py | 583 ++++++++++++++++
 5 files changed, 1476 insertions(+), 24 deletions(-)
 create mode 100644 tests/unit/test_nl_queries_mutation.py
 create mode 100644 tests/unit/test_sql_builder_mutation.py

diff --git a/pyproject.toml b/pyproject.toml
index 24fa5aa..bf3ae6e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -206,15 +206,16 @@ asyncio_mode = "auto"
 # security-critical surfaces). It is a superset of what the CI gate actually
 # mutates: scripts/mutation_report.py drives the gate from its own
 # MODULE_TARGETS, which now mutates sdk/agentflow/retry.py,
-# src/serving/semantic_layer/sql_guard.py, src/serving/masking.py AND
-# src/serving/api/rate_limiter.py live. The serving modules are mutated as a
-# top-level `serving` package against duckdb-free narrow tests: mutmut's
-# trampoline rejects a module name starting with `src.`, which (not duckdb) was
-# the real blocker. The remaining serving modules below stay declared-only for
-# now -- their unit tests pull the duckdb-backed query engine, so mutating them
-# in isolation needs a duckdb-free test per module (the pattern sql_guard,
-# masking and rate_limiter now use); the blocker is the test import chain, not
-# the module. See scripts/mutation_report.py.
+# src/serving/semantic_layer/sql_guard.py, src/serving/masking.py,
+# src/serving/api/rate_limiter.py, src/serving/semantic_layer/query/sql_builder.py
+# AND src/serving/semantic_layer/query/nl_queries.py live. The serving modules are
+# mutated as a top-level `serving` package against duckdb-free narrow tests:
+# mutmut's trampoline rejects a module name starting with `src.`, which (not
+# duckdb) was the real blocker. The remaining declared serving surface -- the auth
+# manager / key_rotation -- stays declared-only for now: its unit tests pull the
+# duckdb-backed query engine, so mutating it in isolation needs a duckdb-free test
+# (the pattern the six live modules now use); the blocker is the test import
+# chain, not the module. See scripts/mutation_report.py.
 paths_to_mutate = [
     "src/serving/api/auth/manager.py",
     "src/serving/api/auth/key_rotation.py",
diff --git a/scripts/mutation_report.py b/scripts/mutation_report.py
index 970c7aa..a74c378 100644
--- a/scripts/mutation_report.py
+++ b/scripts/mutation_report.py
@@ -30,16 +30,20 @@ class ModuleTarget:
 # (a) copy the module so it imports as a top-level package and (b) pair it with a
 # NARROW test that does not pull the duckdb-backed engine import chain. So
 # retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard,
-# masking and rate_limiter mutate as serving.* (from src/serving) against
-# duckdb-free tests. Each duckdb-free test also avoids fixtures and calls the
-# module's methods directly: under mutate_only_covered_lines a fixture-built
-# object left every method line uncovered, so only __init__ got mutated.
-# rate_limiter additionally imports `from src.constants import ...`; its test
-# registers a tiny src.constants stub before importing the module, because the
-# serving workspace copies src/serving -> top-level `serving` without `src`. The
-# remaining serving modules whose tests still need the duckdb engine (the
-# query/auth surfaces) stay declared-only in the [tool.mutmut] policy until they
-# get duckdb-free unit tests of their own.
+# masking, rate_limiter, sql_builder and nl_queries mutate as serving.* (from
+# src/serving) against duckdb-free tests. Each duckdb-free test also avoids
+# fixtures and calls the module's methods directly: under mutate_only_covered_lines
+# a fixture-built object left every method line uncovered, so only __init__ got
+# mutated. rate_limiter additionally imports `from src.constants import ...`; its
+# test registers a tiny src.constants stub before importing the module, because
+# the serving workspace copies src/serving -> top-level `serving` without `src`.
+# sql_builder and nl_queries live under the query package whose __init__ imports
+# the duckdb-backed QueryEngine, so their tests also stub
+# serving.semantic_layer.query.{engine,contracts} (and the src.* helpers) before
+# import. The only remaining declared-but-not-live serving surface is auth
+# (manager / key_rotation), whose tests still need the duckdb engine; it stays
+# declared-only in the [tool.mutmut] policy until it gets duckdb-free unit tests
+# of its own.
 MODULE_TARGETS = {
     Path("agentflow/retry.py"): ModuleTarget(
         threshold=0.75,
@@ -57,6 +61,14 @@ class ModuleTarget:
         threshold=0.90,
         tests=("tests/unit/test_rate_limiter_mutation.py",),
     ),
+    Path("serving/semantic_layer/query/sql_builder.py"): ModuleTarget(
+        threshold=0.90,
+        tests=("tests/unit/test_sql_builder_mutation.py",),
+    ),
+    Path("serving/semantic_layer/query/nl_queries.py"): ModuleTarget(
+        threshold=0.90,
+        tests=("tests/unit/test_nl_queries_mutation.py",),
+    ),
 }
 
 STATUS_BY_EXIT_CODE = {
diff --git a/tests/unit/test_mutmut_policy.py b/tests/unit/test_mutmut_policy.py
index 7b5fdf9..4d26f56 100644
--- a/tests/unit/test_mutmut_policy.py
+++ b/tests/unit/test_mutmut_policy.py
@@ -18,11 +18,12 @@
 #     assembled here.
 # NOTE: these are the *declared* targets (intent). Actual mutation execution is
 # gated by scripts/mutation_report.py (MODULE_TARGETS), which now runs retry.py,
-# sql_guard.py, masking.py AND rate_limiter.py live (the serving modules via
-# duckdb-free narrow tests, mutated as a top-level `serving` package so mutmut's
-# trampoline accepts them). The other serving modules below stay declared-only
-# until they get duckdb-free unit tests of their own. These assertions guard the
-# declared policy, not live coverage.
+# sql_guard.py, masking.py, rate_limiter.py, sql_builder.py AND nl_queries.py live
+# (the serving modules via duckdb-free narrow tests, mutated as a top-level
+# `serving` package so mutmut's trampoline accepts them). The remaining declared
+# serving surface -- auth manager / key_rotation -- stays declared-only until it
+# gets duckdb-free unit tests of its own. These assertions guard the declared
+# policy, not live coverage.
 REQUIRED_MUTATION_TARGETS = {
     "src/serving/semantic_layer/sql_guard.py",
     "src/serving/api/auth/manager.py",
diff --git a/tests/unit/test_nl_queries_mutation.py b/tests/unit/test_nl_queries_mutation.py
new file mode 100644
index 0000000..f05e4e1
--- /dev/null
+++ b/tests/unit/test_nl_queries_mutation.py
@@ -0,0 +1,855 @@
+"""Narrow, duckdb-free mutation test for the NL query execution path
+(src/serving/semantic_layer/query/nl_queries.py).
+
+This is the test the mutation gate runs against
+``serving/semantic_layer/query/nl_queries.py`` (see scripts/mutation_report.py
+MODULE_TARGETS). nl_queries is the only ``validate_nl_sql()`` enforcement
+boundary (``_prepare_nl_sql``) plus the pagination / row-cap SQL wrappers built
+around the prevalidated NL SQL -- a surviving mutant in the row cap, the
+validate wrap or the cursor checks is an un-paginated full-table read or a
+denylist bypass (audit_28_06_26.md #8), exactly what a mutation gate should pin.
+
+Design rules, shared with test_rate_limiter_mutation.py /
+test_sql_builder_mutation.py (see fable_handoff.md cont.16-19):
+
+1. **duckdb-free.** The ordinary query-engine tests build a QueryEngine, which
+   imports duckdb and crashes mutmut's ``mutants/`` workspace. This file drives
+   the NLQueryMixin methods through a hand-built host with a fake backend, so
+   duckdb is never imported.
+
+2. **No fixtures for the subject -- inline construction + direct method calls.**
+   With ``mutate_only_covered_lines = true`` a fixture-built host left every
+   method line uncovered (only ``__init__`` mutated, score 0%). The host is built
+   inline and the methods are called directly. (The one pytest fixture below only
+   toggles the telemetry flag; it constructs nothing under test.)
+
+3. **Import shims.** The mutation harness copies ``src/serving`` to a top-level
+   ``serving`` package *without* ``src``. Several things on nl_queries' import
+   path would otherwise fail or drag duckdb in: ``from src.processing.tracing
+   import telemetry_disabled`` and ``from src.serving.backends import
+   BackendExecutionError`` (no ``src``), the ``serving.semantic_layer.query``
+   package ``__init__`` (``from .engine import QueryEngine`` -> duckdb) and
+   ``.contracts`` (imports the duckdb backend for type hints). The ``.sql_guard``
+   import is a shim that does ``from src.serving.semantic_layer.sql_guard import
+   ...``; we point that src-name at the REAL top-level sql_guard (sqlglot-only,
+   duckdb-free) so the validate boundary runs against the genuine guard. Under
+   ordinary pytest the real ``src`` package is importable, so no shim is installed
+   and the real modules load.
+
+Reproduced at 94.4% (killed 323, survived 19) via the WSL/mutmut harness (py3.10);
+the CI gate (mutation.yml on py3.11) is the source of truth. The surviving mutants
+are genuine equivalents, documented at the bottom of this file.
+"""
+
+from __future__ import annotations
+
+import base64
+import hashlib
+import sys
+import types
+
+
+def _src_package_available() -> bool:
+    try:
+        import src  # noqa: F401
+
+        return True
+    except ModuleNotFoundError:
+        return False
+
+
+def _ensure_module(name: str) -> types.ModuleType:
+    module = sys.modules.get(name)
+    if module is None:
+        module = types.ModuleType(name)
+        sys.modules[name] = module
+    return module
+
+
+def _install_harness_stubs() -> None:
+    _ensure_module("src")
+    processing_pkg = _ensure_module("src.processing")
+    tracing_mod = _ensure_module("src.processing.tracing")
+
+    def telemetry_disabled() -> bool:
+        return True
+
+    tracing_mod.telemetry_disabled = telemetry_disabled
+    processing_pkg.tracing = tracing_mod
+
+    serving_pkg = _ensure_module("src.serving")
+    backends_mod = _ensure_module("src.serving.backends")
+
+    class BackendExecutionError(RuntimeError):
+        """Mirror of src.serving.backends.BackendExecutionError."""
+
+    backends_mod.BackendExecutionError = BackendExecutionError
+    serving_pkg.backends = backends_mod
+
+    # nl_engine is imported lazily inside explain(); supply a rule-based default.
+    semantic_pkg = _ensure_module("src.serving.semantic_layer")
+    nl_engine_mod = _ensure_module("src.serving.semantic_layer.nl_engine")
+    nl_engine_mod._ANTHROPIC_KEY = ""
+    semantic_pkg.nl_engine = nl_engine_mod
+
+    # nl_queries imports `.sql_guard`, a shim that does
+    # `from src.serving.semantic_layer.sql_guard import ...`. Point that src-name
+    # at the REAL top-level sql_guard (sqlglot-only, duckdb-free, importable in
+    # the harness) so the validate_nl_sql boundary is exercised against the
+    # genuine guard rather than a stub.
+    import serving.semantic_layer.sql_guard as _real_sql_guard
+
+    sys.modules["src.serving.semantic_layer.sql_guard"] = _real_sql_guard
+    semantic_pkg.sql_guard = _real_sql_guard
+
+    # Neuter the query package __init__ (`from .engine import QueryEngine`) and
+    # the contracts module; both pull duckdb via the QueryEngine import chain and
+    # neither contributes runtime behaviour to nl_queries.
+    engine_stub = _ensure_module("serving.semantic_layer.query.engine")
+    engine_stub.QueryEngine = object
+    contracts_stub = _ensure_module("serving.semantic_layer.query.contracts")
+    contracts_stub.SQLBuilderHost = object
+    contracts_stub.QueryExecutionHost = object
+    contracts_stub.NLQueryHost = object
+
+
+if not _src_package_available():
+    _install_harness_stubs()
+
+try:  # mutation-harness workspace exposes it as a top-level package
+    from serving.semantic_layer.query import nl_queries as nlq_module
+except ImportError:  # ordinary pytest sees it under the src package
+    from src.serving.semantic_layer.query import nl_queries as nlq_module
+
+import pytest
+
+NLQueryMixin = nlq_module.NLQueryMixin
+UnsafeNLQueryError = nlq_module.UnsafeNLQueryError
+
+
+# --------------------------------------------------------------------------- #
+# In-process host + fake backend + recording span (no duckdb, no real tracer).
+# --------------------------------------------------------------------------- #
+
+
+class _Entity:
+    def __init__(self, table: str) -> None:
+        self.table = table
+
+
+class _Catalog:
+    def __init__(
+        self, tables: tuple[str, ...] = ("orders",), metrics: tuple[str, ...] = ()
+    ) -> None:
+        self.entities = {table: _Entity(table) for table in tables}
+        self.metrics = dict.fromkeys(metrics)
+
+
+class _FakeBackend:
+    """Records every SQL string handed to execute/scalar/explain so the
+    pagination wrappers and the row-cap can be pinned exactly."""
+
+    def __init__(self, rows=None, scalar_value=0, explain_rows=None) -> None:
+        self._rows = rows if rows is not None else []
+        self._scalar_value = scalar_value
+        self._explain_rows = explain_rows if explain_rows is not None else []
+        self.executed: list[str] = []
+        self.scalared: list[str] = []
+        self.explained: list[str] = []
+
+    def execute(self, sql: str):
+        self.executed.append(sql)
+        return self._rows
+
+    def scalar(self, sql: str):
+        self.scalared.append(sql)
+        return self._scalar_value
+
+    def explain(self, sql: str):
+        self.explained.append(sql)
+        return self._explain_rows
+
+
+class _RecordingSpan:
+    def __init__(self) -> None:
+        self.attributes: dict = {}
+
+    def is_recording(self) -> bool:
+        return True
+
+    def set_attribute(self, key, value) -> None:
+        self.attributes[key] = value
+
+
+class _SpanCtx:
+    def __init__(self, span: _RecordingSpan) -> None:
+        self._span = span
+
+    def __enter__(self) -> _RecordingSpan:
+        return self._span
+
+    def __exit__(self, *exc) -> bool:
+        return False
+
+
+class _RecordingTracer:
+    def __init__(self) -> None:
+        self.started: list = []
+        self.spans: list[_RecordingSpan] = []
+
+    def start_as_current_span(self, name):
+        self.started.append(name)
+        span = _RecordingSpan()
+        self.spans.append(span)
+        return _SpanCtx(span)
+
+
+class _Host(NLQueryMixin):
+    def __init__(
+        self,
+        *,
+        catalog: _Catalog | None = None,
+        backend: _FakeBackend | None = None,
+        backend_name: str = "duckdb",
+        translated: str = "SELECT id FROM orders",
+    ) -> None:
+        self.catalog = catalog if catalog is not None else _Catalog()
+        self._backend = backend if backend is not None else _FakeBackend()
+        self._backend_name = backend_name
+        self._translated = translated
+        self.translate_calls: list[tuple[str, str | None]] = []
+        self.scope_calls: list[tuple[str, str | None]] = []
+
+    # Overridden so the test never pulls nl_engine / the tenant router; these
+    # live in other modules and are mutation-tested separately. Calls are
+    # recorded so the call sites' argument forwarding can be pinned.
+    def _translate_question_to_sql(self, question: str, tenant_id: str | None = None) -> str:
+        self.translate_calls.append((question, tenant_id))
+        return self._translated
+
+    def _scope_sql(self, sql: str, tenant_id: str | None) -> str:
+        self.scope_calls.append((sql, tenant_id))
+        return sql
+
+    def _resolve_tenant_id(self, tenant_id: str | None) -> str | None:
+        return tenant_id
+
+
+@pytest.fixture(autouse=True)
+def _disable_telemetry(monkeypatch):
+    # Force the no-span path deterministically in both contexts (the harness stub
+    # already returns True; under ordinary pytest the real one may not). Telemetry
+    # tests below re-enable it explicitly.
+    monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: True)
+
+
+def _fixed_clock(monkeypatch, start_t: float, end_t: float) -> None:
+    # Pin time.monotonic so elapsed_ms is deterministic: first call returns the
+    # method's start stamp, every later call returns the end stamp. This makes the
+    # elapsed arithmetic killable without depending on the absolute wall clock (a
+    # `- start`->`+ start` flip would otherwise pass or fail by machine uptime).
+    state = {"calls": 0}
+
+    def _monotonic() -> float:
+        state["calls"] += 1
+        return start_t if state["calls"] == 1 else end_t
+
+    monkeypatch.setattr(nlq_module, "time", types.SimpleNamespace(monotonic=_monotonic))
+
+
+# --------------------------------------------------------------------------- #
+# _default_allowed_tables: catalog tables + pipeline_events.
+# --------------------------------------------------------------------------- #
+
+
+def test_default_allowed_tables_includes_catalog_and_pipeline_events():
+    host = _Host(catalog=_Catalog(tables=("orders", "customers")))
+    allowed = nlq_module._default_allowed_tables(host)
+    assert allowed == {"orders", "customers", "pipeline_events"}
+
+
+# --------------------------------------------------------------------------- #
+# _prepare_nl_sql: the validate_nl_sql enforcement boundary.
+# --------------------------------------------------------------------------- #
+
+
+def test_prepare_nl_sql_returns_validated_sql_unchanged():
+    sql = "SELECT id FROM orders"
+    assert nlq_module._prepare_nl_sql(sql, {"orders"}) == sql
+
+
+def test_prepare_nl_sql_wraps_unsafe_sql_as_403():
+    with pytest.raises(UnsafeNLQueryError) as exc_info:
+        nlq_module._prepare_nl_sql("DROP TABLE orders", {"orders"})
+    assert exc_info.value.status_code == 403
+    assert exc_info.value.detail.startswith("NL-to-SQL produced unsafe query:")
+
+
+def test_prepare_nl_sql_rejects_unknown_table():
+    with pytest.raises(UnsafeNLQueryError):
+        nlq_module._prepare_nl_sql("SELECT id FROM secret_table", {"orders"})
+
+
+def test_unsafe_nl_query_error_is_403():
+    err = UnsafeNLQueryError("boom")
+    assert err.status_code == 403
+    assert err.detail == "boom"
+
+
+# --------------------------------------------------------------------------- #
+# _build_query_hash: tenant-scoped sha256.
+# --------------------------------------------------------------------------- #
+
+
+def test_build_query_hash_matches_sha256_of_tenant_and_sql():
+    host = _Host()
+    expected = hashlib.sha256(b"acme:SELECT 1").hexdigest()
+    assert host._build_query_hash("SELECT 1", "acme") == expected
+
+
+def test_build_query_hash_uses_default_when_tenant_none():
+    host = _Host()
+    expected = hashlib.sha256(b"default:SELECT 1").hexdigest()
+    assert host._build_query_hash("SELECT 1", None) == expected
+
+
+def test_build_query_hash_is_sql_sensitive():
+    host = _Host()
+    assert host._build_query_hash("SELECT 1", "acme") != host._build_query_hash("SELECT 2", "acme")
+
+
+# --------------------------------------------------------------------------- #
+# _encode_cursor / _decode_cursor: base64 round-trip + validation.
+# --------------------------------------------------------------------------- #
+
+
+def test_cursor_round_trip():
+    host = _Host()
+    cursor = host._encode_cursor(42, "abc123")
+    assert host._decode_cursor(cursor) == (42, "abc123")
+
+
+def test_encode_cursor_is_base64_of_offset_and_hash():
+    host = _Host()
+    cursor = host._encode_cursor(7, "deadbeef")
+    assert base64.urlsafe_b64decode(cursor.encode()).decode() == "7:deadbeef"
+
+
+def test_decode_cursor_accepts_zero_offset():
+    # offset 0 is valid (`offset < 0` rejects, not `<= 0` / `< 1`).
+    host = _Host()
+    cursor = base64.urlsafe_b64encode(b"0:abc").decode()
+    assert host._decode_cursor(cursor) == (0, "abc")
+
+
+def test_decode_cursor_keeps_colon_in_hash():
+    # The hash is split off with maxsplit=1, so a hash containing ':' survives.
+    # Kills split(":") (no maxsplit), rsplit, and maxsplit=2.
+    host = _Host()
+    cursor = base64.urlsafe_b64encode(b"5:a:b:c").decode()
+    assert host._decode_cursor(cursor) == (5, "a:b:c")
+
+
+def test_decode_cursor_rejects_negative_offset():
+    host = _Host()
+    bad = base64.urlsafe_b64encode(b"-1:abc").decode()
+    with pytest.raises(ValueError) as exc_info:
+        host._decode_cursor(bad)
+    assert str(exc_info.value) == "Invalid cursor value."
+
+
+def test_decode_cursor_rejects_empty_hash():
+    host = _Host()
+    bad = base64.urlsafe_b64encode(b"5:").decode()
+    with pytest.raises(ValueError) as exc_info:
+        host._decode_cursor(bad)
+    assert str(exc_info.value) == "Invalid cursor value."
+
+
+def test_decode_cursor_rejects_non_numeric_offset():
+    host = _Host()
+    bad = base64.urlsafe_b64encode(b"x:abc").decode()
+    with pytest.raises(ValueError) as exc_info:
+        host._decode_cursor(bad)
+    assert str(exc_info.value) == "Invalid cursor value."
+
+
+def test_decode_cursor_rejects_garbage():
+    host = _Host()
+    with pytest.raises(ValueError) as exc_info:
+        host._decode_cursor("!!!not-base64!!!")
+    assert str(exc_info.value) == "Invalid cursor value."
+
+
+# --------------------------------------------------------------------------- #
+# execute_nl_query: the un-paginated path bounds rows with a hard LIMIT.
+# --------------------------------------------------------------------------- #
+
+
+def test_execute_nl_query_bounds_rows_with_hard_limit():
+    # The bounded wrapper caps the row count at _MAX_NL_QUERY_ROWS (1000) so an
+    # un-paginated NL item cannot stream a whole table (audit_28 #8). Pin the
+    # exact SQL string AND every key of the result dict.
+    backend = _FakeBackend(rows=[("r1",), ("r2",)])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.execute_nl_query("how many orders", allowed_tables={"orders"})
+    assert backend.executed == [
+        "SELECT * FROM (SELECT id FROM orders) AS bounded_nl_query LIMIT 1000"
+    ]
+    assert result["data"] == [("r1",), ("r2",)]
+    assert result["sql"] == "SELECT id FROM orders"
+    assert result["row_count"] == 2
+    assert result["freshness_seconds"] is None
+    assert isinstance(result["execution_time_ms"], int)
+    assert set(result) == {"data", "sql", "row_count", "execution_time_ms", "freshness_seconds"}
+
+
+def test_execute_nl_query_max_rows_constant_is_1000():
+    assert nlq_module._MAX_NL_QUERY_ROWS == 1000
+
+
+def test_execute_nl_query_forwards_question_tenant_and_scope():
+    backend = _FakeBackend(rows=[("a",)])
+    host = _Host(backend=backend)
+    host.execute_nl_query("the question", tenant_id="acme", allowed_tables={"orders"})
+    assert host.translate_calls == [("the question", "acme")]
+    assert host.scope_calls == [("SELECT id FROM orders", "acme")]
+
+
+def test_execute_nl_query_uses_passed_allowed_tables_not_default():
+    # allowed_tables is honoured even when it differs from the catalog-derived
+    # default: an `is not None`->`is None` flip would validate against the
+    # (here unrelated) default and reject. Empty catalog -> default lacks 'orders'.
+    backend = _FakeBackend(rows=[("a",)])
+    host = _Host(catalog=_Catalog(tables=()), backend=backend, translated="SELECT id FROM orders")
+    result = host.execute_nl_query("q", allowed_tables={"orders"})
+    assert result["row_count"] == 1
+
+
+def test_execute_nl_query_uses_default_allowed_tables_when_none():
+    # allowed_tables=None -> _default_allowed_tables(self) governs validation; a
+    # `(self)`->`(None)` mutant raises AttributeError on None.catalog.
+    backend = _FakeBackend(rows=[("a",)])
+    host = _Host(catalog=_Catalog(tables=("orders",)), backend=backend)
+    result = host.execute_nl_query("q", allowed_tables=None)
+    assert result["row_count"] == 1
+
+
+def test_execute_nl_query_elapsed_is_monotonic_delta_in_ms(monkeypatch):
+    # elapsed_ms = int((end - start) * 1000) on a pinned clock. Kills `=None`,
+    # `/1000` and `- start`->`+ start` (the *1001 variant rounds to the same 50).
+    _fixed_clock(monkeypatch, 1.0, 1.5)
+    host = _Host(backend=_FakeBackend(rows=[("a",)]))
+    result = host.execute_nl_query("q", allowed_tables={"orders"})
+    assert result["execution_time_ms"] == 500  # int((1.5 - 1.0) * 1000)
+
+
+def test_execute_nl_query_records_span_attributes(monkeypatch):
+    monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: False)
+    tracer = _RecordingTracer()
+    monkeypatch.setattr(nlq_module, "tracer", tracer)
+    backend = _FakeBackend(rows=[("a",), ("b",)])
+    host = _Host(backend=backend, backend_name="duckdb")
+    host.execute_nl_query("q", tenant_id="acme", allowed_tables={"orders"})
+    assert tracer.started == ["duckdb.query"]
+    span = tracer.spans[0]
+    assert span.attributes["sql"] == "SELECT id FROM orders"
+    assert span.attributes["tenant_id"] == "acme"
+    assert span.attributes["row_count"] == 2
+
+
+def test_execute_nl_query_wraps_backend_error_as_value_error():
+    class _BoomBackend(_FakeBackend):
+        def execute(self, sql):
+            raise nlq_module.BackendExecutionError("boom")
+
+    host = _Host(backend=_BoomBackend())
+    with pytest.raises(ValueError, match="Query execution failed: boom"):
+        host.execute_nl_query("q", allowed_tables={"orders"})
+
+
+# --------------------------------------------------------------------------- #
+# paginated_query: the page/count SQL wrappers + the pagination arithmetic.
+# --------------------------------------------------------------------------- #
+
+
+def test_paginated_query_builds_page_and_count_sql():
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert backend.executed == [
+        "SELECT * FROM (SELECT id FROM orders) AS paginated_query LIMIT 3 OFFSET 0"
+    ]
+    assert backend.scalared == [
+        "SELECT COUNT(*) FROM (SELECT 1 FROM (SELECT id FROM orders) AS count_query "
+        "LIMIT 10001) AS bounded_count"
+    ]
+
+
+def test_paginated_query_default_limit_is_100():
+    # No limit -> default 100 -> page SQL asks for limit+1 = 101.
+    backend = _FakeBackend(rows=[("a",)], scalar_value=1)
+    host = _Host(backend=backend)
+    host.paginated_query("q", allowed_tables={"orders"})
+    assert backend.executed == [
+        "SELECT * FROM (SELECT id FROM orders) AS paginated_query LIMIT 101 OFFSET 0"
+    ]
+
+
+def test_paginated_query_result_dict_keys_and_values():
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert result["sql"] == "SELECT id FROM orders"
+    assert result["freshness_seconds"] is None
+    assert isinstance(result["execution_time_ms"], int)
+    assert set(result) == {
+        "data",
+        "sql",
+        "row_count",
+        "total_count",
+        "next_cursor",
+        "has_more",
+        "page_size",
+        "execution_time_ms",
+        "freshness_seconds",
+    }
+
+
+def test_paginated_query_has_more_and_data_slice():
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert result["has_more"] is True
+    assert result["data"] == [("a",), ("b",)]
+    assert result["row_count"] == 2
+    assert result["page_size"] == 2
+
+
+def test_paginated_query_no_more_when_rows_within_limit():
+    backend = _FakeBackend(rows=[("a",), ("b",)], scalar_value=2)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert result["has_more"] is False
+    assert result["next_cursor"] is None
+    assert result["data"] == [("a",), ("b",)]
+
+
+def test_paginated_query_next_cursor_advances_offset():
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert result["next_cursor"] is not None
+    offset, _hash = host._decode_cursor(result["next_cursor"])
+    assert offset == 2  # offset(0) + limit(2)
+
+
+def test_paginated_query_cursor_hash_is_tenant_and_sql_bound():
+    # The cursor hash must derive from the scoped SQL and the tenant id; a mutant
+    # that hashes None for either yields a hash that doesn't match the recomputed
+    # value, which would also break the same-query cursor check.
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=2, tenant_id="acme", allowed_tables={"orders"})
+    _offset, cursor_hash = host._decode_cursor(result["next_cursor"])
+    assert cursor_hash == host._build_query_hash("SELECT id FROM orders", "acme")
+
+
+def test_paginated_query_total_count_below_threshold_is_exact():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=5)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=10, allowed_tables={"orders"})
+    assert result["total_count"] == 5
+
+
+def test_paginated_query_total_count_is_none_above_threshold():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=10_001)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=10, allowed_tables={"orders"})
+    assert result["total_count"] is None
+
+
+def test_paginated_query_total_count_exactly_threshold_is_exact():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=10_000)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=10, allowed_tables={"orders"})
+    assert result["total_count"] == 10_000
+
+
+def test_paginated_query_total_count_zero_when_scalar_none():
+    # A None scalar coalesces to 0 (kills the `else 0`->`else 1` mutant).
+    backend = _FakeBackend(rows=[("a",)], scalar_value=None)
+    host = _Host(backend=backend)
+    result = host.paginated_query("q", limit=10, allowed_tables={"orders"})
+    assert result["total_count"] == 0
+
+
+def test_paginated_query_second_page_uses_cursor_offset():
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend)
+    first = host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    backend.executed.clear()
+    host.paginated_query("q", limit=2, cursor=first["next_cursor"], allowed_tables={"orders"})
+    assert backend.executed == [
+        "SELECT * FROM (SELECT id FROM orders) AS paginated_query LIMIT 3 OFFSET 2"
+    ]
+
+
+def test_paginated_query_rejects_cursor_for_different_query():
+    host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1))
+    foreign_cursor = host._encode_cursor(2, "not-the-right-hash")
+    with pytest.raises(ValueError) as exc_info:
+        host.paginated_query("q", limit=2, cursor=foreign_cursor, allowed_tables={"orders"})
+    assert str(exc_info.value) == "Cursor does not match the requested query."
+
+
+def test_paginated_query_rejects_limit_below_one():
+    host = _Host()
+    with pytest.raises(ValueError) as exc_info:
+        host.paginated_query("q", limit=0, allowed_tables={"orders"})
+    assert str(exc_info.value) == "limit must be between 1 and 1000"
+
+
+def test_paginated_query_rejects_limit_above_1000():
+    host = _Host()
+    with pytest.raises(ValueError) as exc_info:
+        host.paginated_query("q", limit=1001, allowed_tables={"orders"})
+    assert str(exc_info.value) == "limit must be between 1 and 1000"
+
+
+def test_paginated_query_allows_boundary_limits():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=1)
+    host = _Host(backend=backend)
+    host.paginated_query("q", limit=1, allowed_tables={"orders"})
+    host.paginated_query("q", limit=1000, allowed_tables={"orders"})  # no raise
+
+
+def test_paginated_query_forwards_question_tenant_and_scope():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=1)
+    host = _Host(backend=backend)
+    host.paginated_query("the question", limit=2, tenant_id="acme", allowed_tables={"orders"})
+    assert host.translate_calls == [("the question", "acme")]
+    assert host.scope_calls == [("SELECT id FROM orders", "acme")]
+
+
+def test_paginated_query_uses_passed_allowed_tables_not_default():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=1)
+    host = _Host(catalog=_Catalog(tables=()), backend=backend, translated="SELECT id FROM orders")
+    result = host.paginated_query("q", limit=5, allowed_tables={"orders"})
+    assert result["row_count"] == 1
+
+
+def test_paginated_query_uses_default_allowed_tables_when_none():
+    backend = _FakeBackend(rows=[("a",)], scalar_value=1)
+    host = _Host(catalog=_Catalog(tables=("orders",)), backend=backend)
+    result = host.paginated_query("q", limit=5, allowed_tables=None)
+    assert result["row_count"] == 1
+
+
+def test_paginated_query_elapsed_is_monotonic_delta_in_ms(monkeypatch):
+    _fixed_clock(monkeypatch, 1.0, 1.5)
+    host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1))
+    result = host.paginated_query("q", limit=5, allowed_tables={"orders"})
+    assert result["execution_time_ms"] == 500  # int((1.5 - 1.0) * 1000)
+
+
+def test_paginated_query_records_span_attributes(monkeypatch):
+    monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: False)
+    tracer = _RecordingTracer()
+    monkeypatch.setattr(nlq_module, "tracer", tracer)
+    backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3)
+    host = _Host(backend=backend, backend_name="duckdb")
+    host.paginated_query("q", limit=2, tenant_id="acme", allowed_tables={"orders"})
+    assert tracer.started == ["duckdb.query"]
+    span = tracer.spans[0]
+    assert span.attributes["sql"] == "SELECT id FROM orders"
+    assert span.attributes["tenant_id"] == "acme"
+    assert span.attributes["row_count"] == 2  # len(page_rows[:limit])
+
+
+# The span SQL attribute is bounded: <=200 chars verbatim, else sql[:197]+"...".
+# Two boundary lengths pin the <=200 comparison and the 197 truncation offset.
+_SQL_200 = "SELECT id FROM orders".ljust(200)
+_SQL_201 = "SELECT id FROM orders".ljust(201)
+
+
+def _recording_tracer(monkeypatch):
+    monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: False)
+    tracer = _RecordingTracer()
+    monkeypatch.setattr(nlq_module, "tracer", tracer)
+    return tracer
+
+
+def test_paginated_query_span_keeps_sql_at_200_boundary(monkeypatch):
+    tracer = _recording_tracer(monkeypatch)
+    host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1), translated=_SQL_200)
+    host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert tracer.spans[0].attributes["sql"] == _SQL_200  # len == 200 kept whole
+
+
+def test_paginated_query_span_truncates_sql_past_200(monkeypatch):
+    tracer = _recording_tracer(monkeypatch)
+    host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1), translated=_SQL_201)
+    host.paginated_query("q", limit=2, allowed_tables={"orders"})
+    assert tracer.spans[0].attributes["sql"] == _SQL_201[:197] + "..."
+
+
+def test_execute_span_keeps_sql_at_200_boundary(monkeypatch):
+    tracer = _recording_tracer(monkeypatch)
+    host = _Host(backend=_FakeBackend(rows=[("a",)]), translated=_SQL_200)
+    host.execute_nl_query("q", allowed_tables={"orders"})
+    assert tracer.spans[0].attributes["sql"] == _SQL_200
+
+
+def test_execute_span_truncates_sql_past_200(monkeypatch):
+    tracer = _recording_tracer(monkeypatch)
+    host = _Host(backend=_FakeBackend(rows=[("a",)]), translated=_SQL_201)
+    host.execute_nl_query("q", allowed_tables={"orders"})
+    assert tracer.spans[0].attributes["sql"] == _SQL_201[:197] + "..."
+
+
+def test_paginated_query_wraps_backend_error_as_value_error():
+    class _BoomBackend(_FakeBackend):
+        def execute(self, sql):
+            raise nlq_module.BackendExecutionError("kaboom")
+
+    host = _Host(backend=_BoomBackend())
+    with pytest.raises(ValueError, match="Query execution failed: kaboom"):
+        host.paginated_query("q", limit=2, allowed_tables={"orders"})
+
+
+# --------------------------------------------------------------------------- #
+# explain: translate without executing; table extraction + scan warning.
+# --------------------------------------------------------------------------- #
+
+
+def test_explain_reports_tables_estimate_and_scan_warning():
+    backend = _FakeBackend(explain_rows=[(0, "SEQ_SCAN orders"), (1, "estimated ~ 1,234 rows")])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("show orders", allowed_tables={"orders"})
+    assert result["question"] == "show orders"
+    assert result["sql"] == "SELECT id FROM orders"
+    assert result["tables_accessed"] == ["orders"]
+    assert result["estimated_rows"] == 1234
+    assert result["engine"] == "rule_based"
+    assert result["warning"] == "Full table scan on orders (no index)"
+    assert set(result) == {
+        "question",
+        "sql",
+        "tables_accessed",
+        "estimated_rows",
+        "engine",
+        "warning",
+    }
+
+
+def test_explain_warns_on_sequential_scan_text_only():
+    # The warning fires on "Sequential Scan" too, not just "SEQ_SCAN". A plan with
+    # the spaced form (and no SEQ_SCAN) must still warn -- pins the exact
+    # "Sequential Scan" literal and its casing.
+    backend = _FakeBackend(explain_rows=[(0, "Sequential Scan on orders")])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("q", allowed_tables={"orders"})
+    assert result["warning"] == "Full table scan on orders (no index)"
+
+
+def test_explain_no_warning_without_sequential_scan():
+    backend = _FakeBackend(explain_rows=[(0, "HASH_JOIN"), (1, "estimated ~ 10 rows")])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("q", allowed_tables={"orders"})
+    assert result["warning"] is None
+    assert result["estimated_rows"] == 10
+
+
+def test_explain_single_column_row_uses_first_element():
+    # A one-element explain row has no row[1]; the plan text must come from
+    # str(row[0]). Kills `len(row) > 1`->`>= 1` (IndexError) and `str(row[0])`->
+    # `str(None)`/`str(row[1])` (which would drop the SEQ_SCAN text).
+    backend = _FakeBackend(explain_rows=[("SEQ_SCAN orders ~ 42 rows",)])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("q", allowed_tables={"orders"})
+    assert result["estimated_rows"] == 42
+    assert result["warning"] == "Full table scan on orders (no index)"
+
+
+def test_explain_normalizes_box_drawing_around_estimate():
+    # EXPLAIN output draws box characters; they are normalized to spaces so the
+    # row-estimate regex still matches. A box char glued to the estimate must be
+    # blanked. Kills the box-drawing re.sub mutants.
+    backend = _FakeBackend(explain_rows=[(0, "estimated ~ 1234│rows")])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("q", allowed_tables={"orders"})
+    assert result["estimated_rows"] == 1234
+
+
+def test_explain_passes_scoped_sql_to_backend_explain():
+    backend = _FakeBackend(explain_rows=[(0, "x")])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    host.explain("q", allowed_tables={"orders"})
+    assert backend.explained == ["SELECT id FROM orders"]
+
+
+def test_explain_forwards_question_tenant_and_scope():
+    backend = _FakeBackend(explain_rows=[(0, "x")])
+    host = _Host(backend=backend)
+    host.explain("the question", tenant_id="acme", allowed_tables={"orders"})
+    assert host.translate_calls == [("the question", "acme")]
+    assert host.scope_calls == [("SELECT id FROM orders", "acme")]
+
+
+def test_explain_uses_passed_allowed_tables_not_default():
+    backend = _FakeBackend(explain_rows=[(0, "x")])
+    host = _Host(catalog=_Catalog(tables=()), backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("q", allowed_tables={"orders"})
+    assert result["tables_accessed"] == ["orders"]
+
+
+def test_explain_uses_default_allowed_tables_when_none():
+    backend = _FakeBackend(explain_rows=[(0, "x")])
+    host = _Host(catalog=_Catalog(tables=("orders",)), backend=backend)
+    result = host.explain("q", allowed_tables=None)
+    assert result["tables_accessed"] == ["orders"]
+
+
+def test_explain_reports_llm_engine_when_key_and_anthropic_present(monkeypatch):
+    # When a key is configured and `import anthropic` succeeds, the engine label
+    # is "llm". Pins the getattr(nl_engine, "_ANTHROPIC_KEY", "") read: a mutant
+    # that reads the wrong attribute/object collapses to "rule_based".
+    nl_engine_mod = sys.modules["src.serving.semantic_layer.nl_engine"]
+    monkeypatch.setattr(nl_engine_mod, "_ANTHROPIC_KEY", "sk-test", raising=False)
+    monkeypatch.setitem(sys.modules, "anthropic", types.ModuleType("anthropic"))
+    backend = _FakeBackend(explain_rows=[(0, "x")])
+    host = _Host(backend=backend, translated="SELECT id FROM orders")
+    result = host.explain("q", allowed_tables={"orders"})
+    assert result["engine"] == "llm"
+
+
+def test_explain_wraps_backend_error_as_value_error():
+    class _BoomBackend(_FakeBackend):
+        def explain(self, sql):
+            raise nlq_module.BackendExecutionError("nope")
+
+    host = _Host(backend=_BoomBackend())
+    with pytest.raises(ValueError, match="Query explanation failed: nope"):
+        host.explain("q", allowed_tables={"orders"})
+
+
+# --------------------------------------------------------------------------- #
+# Known equivalent survivors (documented, not gaps). 94.4% reproduced on the
+# WSL/mutmut harness; the survivors below cannot change observable behaviour:
+#   * tables-extraction in explain() parses with sqlglot and falls back to a
+#     regex on any exception. For the bare-table SQL that validate_nl_sql permits
+#     (schema/catalog qualifiers are rejected upstream) both paths return the same
+#     list, so the parse-dialect / parse-arg mutants and the fromkeys/find_all
+#     mutants in that block are equivalent.
+#   * the explain() box-drawing re.sub case mutant (╿<->╿ is the same
+#     code point range) and the plan-join separator mutant (the joined plan text
+#     is not returned, only scanned for the same substrings either way).
+#   * the explain() getattr default-value mutants: nl_engine always defines
+#     _ANTHROPIC_KEY, so the third (default) argument is unreachable.
+#   * elapsed_ms `* 1000`->`* 1001`: indistinguishable at unit-test timescales.
+#   * the span SQL slice `[:200]`->`[:201]`: in the `len(sql) <= 200` branch the
+#     slice never reaches 200 chars, so the extra index is a no-op.
+# The telemetry span guards and the <=200 / [:197] truncation boundary ARE pinned
+# by the records_span / span-boundary tests above (telemetry re-enabled).
+# --------------------------------------------------------------------------- #
diff --git a/tests/unit/test_sql_builder_mutation.py b/tests/unit/test_sql_builder_mutation.py
new file mode 100644
index 0000000..499f98d
--- /dev/null
+++ b/tests/unit/test_sql_builder_mutation.py
@@ -0,0 +1,583 @@
+"""Narrow, duckdb-free mutation test for the tenant SQL builder
+(src/serving/semantic_layer/query/sql_builder.py).
+
+This is the test the mutation gate runs against
+``serving/semantic_layer/query/sql_builder.py`` (see scripts/mutation_report.py
+MODULE_TARGETS). Every entity/metric SQL string the engine executes flows through
+``_scope_sql`` / ``_qualify_table`` here, so a surviving mutant in the
+tenant-schema qualification is a cross-tenant read (audit_28_06_26.md #5), exactly
+the kind of code a mutation gate should pin.
+
+Three design rules, shared with test_rate_limiter_mutation.py /
+test_masking_mutation.py / test_sql_guard_mutation.py (see fable_handoff.md
+cont.16-19):
+
+1. **duckdb-free.** The ordinary query-engine tests build a QueryEngine, which
+   imports duckdb's compiled subpackage and crashes mutmut's ``mutants/``
+   workspace. sql_builder itself imports only sqlglot + a tenant-id helper, so
+   this file touches the mixin methods through a hand-built host and never drags
+   duckdb in.
+
+2. **No fixtures -- inline construction + direct method calls.** With
+   ``mutate_only_covered_lines = true`` the gate collects coverage first; a
+   fixture-built host left every method line uncovered, so only ``__init__`` got
+   mutated (score 0%). Building the host inline and calling ``_scope_sql`` /
+   ``_qualify_table`` / ``_quote_literal`` directly attributes every method line.
+
+3. **Import shims.** The mutation harness copies ``src/serving`` to a top-level
+   ``serving`` package *without* ``src`` (copying ``src`` would shadow it). Three
+   things on sql_builder's import path would otherwise fail or drag duckdb in:
+   ``from src.serving.api.auth import get_current_tenant_id`` (no ``src``), the
+   ``serving.semantic_layer.query`` package ``__init__`` (``from .engine import
+   QueryEngine`` -> duckdb) and ``.contracts`` (imports the duckdb backend for
+   type hints). We register stand-ins for all three before importing the module.
+   They are only used as a default-arg helper and as runtime-unused type hints
+   (``from __future__ import annotations`` keeps the annotations as strings), so
+   the stand-ins change no executed logic. Under ordinary pytest the real ``src``
+   package is importable, so no shim is installed and the real modules load.
+
+Reproduced at 96.0% (killed 167, survived 7) via the WSL/mutmut harness (py3.10);
+the CI gate (mutation.yml on py3.11) is the source of truth. The 7 survivors are
+genuine equivalent mutants, not gaps: four mutate the *string* inside
+``cast("dict[...]", value)`` -- the runtime ``typing.cast`` ignores its first
+argument, so any text change there is a no-op -- and three flip
+``parse_one(..., dialect="duckdb")`` / ``parsed.sql(dialect=...)`` to
+``dialect=None``, which renders the plain SELECTs this builder handles identically.
+"""
+
+from __future__ import annotations
+
+import sys
+import types
+from datetime import datetime
+
+
+def _src_package_available() -> bool:
+    # In the mutation harness the workspace has no top-level `src` package
+    # (only `serving`); under ordinary pytest `src` is the real package.
+    try:
+        import src  # noqa: F401
+
+        return True
+    except ModuleNotFoundError:
+        return False
+
+
+def _ensure_module(name: str) -> types.ModuleType:
+    module = sys.modules.get(name)
+    if module is None:
+        module = types.ModuleType(name)
+        sys.modules[name] = module
+    return module
+
+
+def _install_harness_stubs() -> None:
+    # src.serving.api.auth.get_current_tenant_id: a contextvar reader in
+    # production; here a default-arg passthrough (the host controls the tenant).
+    _ensure_module("src")
+    serving_pkg = _ensure_module("src.serving")
+    api_pkg = _ensure_module("src.serving.api")
+    auth_pkg = _ensure_module("src.serving.api.auth")
+
+    def get_current_tenant_id(default: str | None = None) -> str | None:
+        return default
+
+    auth_pkg.get_current_tenant_id = get_current_tenant_id
+    api_pkg.auth = auth_pkg
+    serving_pkg.api = api_pkg
+
+    # Neuter the query package __init__ (`from .engine import QueryEngine`) and
+    # the contracts module; both pull duckdb via the QueryEngine import chain and
+    # neither contributes runtime behaviour to sql_builder.
+    engine_stub = _ensure_module("serving.semantic_layer.query.engine")
+    engine_stub.QueryEngine = object
+    contracts_stub = _ensure_module("serving.semantic_layer.query.contracts")
+    contracts_stub.SQLBuilderHost = object
+    contracts_stub.QueryExecutionHost = object
+    contracts_stub.NLQueryHost = object
+
+
+if not _src_package_available():
+    _install_harness_stubs()
+
+try:  # mutation-harness workspace exposes it as a top-level package
+    from serving.semantic_layer.query import sql_builder as sql_builder_module
+except ImportError:  # ordinary pytest sees it under the src package
+    from src.serving.semantic_layer.query import sql_builder as sql_builder_module
+
+import pytest
+
+SQLBuilderMixin = sql_builder_module.SQLBuilderMixin
+
+
+# --------------------------------------------------------------------------- #
+# In-process host doubles (no duckdb, no real tenant router).
+# --------------------------------------------------------------------------- #
+
+
+class _Entity:
+    def __init__(self, table: str) -> None:
+        self.table = table
+
+
+class _Catalog:
+    def __init__(self, *tables: str) -> None:
+        self.entities = {table: _Entity(table) for table in tables}
+
+
+class _Tenant:
+    def __init__(self, duckdb_schema: str) -> None:
+        self.duckdb_schema = duckdb_schema
+
+
+class _TenantsConfig:
+    def __init__(self, tenants: tuple[_Tenant, ...]) -> None:
+        self.tenants = tenants
+
+
+class _TenantRouter:
+    def __init__(
+        self,
+        *,
+        has_config: bool = False,
+        schema_by_tenant: dict[str | None, str] | None = None,
+        tenants: tuple[_Tenant, ...] = (),
+    ) -> None:
+        self._has_config = has_config
+        self._schema_by_tenant = dict(schema_by_tenant or {})
+        self._tenants = tenants
+
+    def has_config(self) -> bool:
+        return self._has_config
+
+    def get_duckdb_schema(self, tenant_id: str | None) -> str | None:
+        return self._schema_by_tenant.get(tenant_id)
+
+    def load(self) -> _TenantsConfig:
+        return _TenantsConfig(self._tenants)
+
+
+class _Host(SQLBuilderMixin):
+    def __init__(
+        self,
+        *,
+        catalog: _Catalog,
+        tenant_router: _TenantRouter,
+        table_columns: dict[str, set[str]] | None = None,
+        cache: dict | None = None,
+    ) -> None:
+        self.catalog = catalog
+        self._tenant_router = tenant_router
+        self._table_columns_map = dict(table_columns or {})
+        if cache is not None:
+            self._qualified_table_cache = cache
+
+    def _table_columns(self, table_name: str) -> set[str]:
+        return self._table_columns_map.get(table_name, set())
+
+
+def _host(**kwargs) -> _Host:
+    catalog = kwargs.pop("catalog", _Catalog("orders", "customers"))
+    tenant_router = kwargs.pop("tenant_router", _TenantRouter())
+    return _Host(catalog=catalog, tenant_router=tenant_router, **kwargs)
+
+
+# --------------------------------------------------------------------------- #
+# _resolve_tenant_id: explicit id wins; else the context reader with a
+# config-dependent default.
+# --------------------------------------------------------------------------- #
+
+
+def test_resolve_tenant_id_returns_explicit_id():
+    # tenant_id is not None -> returned verbatim, the context reader is not
+    # consulted. Kills `is not None` -> `is None`.
+    host = _host()
+    assert host._resolve_tenant_id("acme") == "acme"
+
+
+def test_resolve_tenant_id_defaults_to_demo_without_config():
+    # No tenant config -> default_tenant is "demo", handed to the context reader.
+    host = _host(tenant_router=_TenantRouter(has_config=False))
+    assert host._resolve_tenant_id(None) == "demo"
+
+
+def test_resolve_tenant_id_no_default_when_config_present():
+    # With a tenant config the default is None (a multi-tenant deployment must
+    # not silently fall back to "demo"). Kills `not self._tenant_router...` flip
+    # and the "demo" literal leaking into the configured path.
+    host = _host(tenant_router=_TenantRouter(has_config=True))
+    assert host._resolve_tenant_id(None) is None
+
+
+def test_resolve_tenant_id_uses_context_value(monkeypatch):
+    # When the context reader returns a tenant, _resolve_tenant_id forwards it.
+    monkeypatch.setattr(
+        sql_builder_module, "get_current_tenant_id", lambda default=None: "ctx-tenant"
+    )
+    host = _host(tenant_router=_TenantRouter(has_config=True))
+    assert host._resolve_tenant_id(None) == "ctx-tenant"
+
+
+def test_resolve_tenant_id_passes_default_through_to_reader(monkeypatch):
+    # The default arg must reach the reader (a `default=...`->`default=None`
+    # mutant would drop "demo"). Echo the default back to prove it was passed.
+    monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: default)
+    host = _host(tenant_router=_TenantRouter(has_config=False))
+    assert host._resolve_tenant_id(None) == "demo"
+
+
+# --------------------------------------------------------------------------- #
+# _get_tenant_schema: schema lookup + identifier validation.
+# --------------------------------------------------------------------------- #
+
+
+def test_get_tenant_schema_returns_valid_schema():
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"})
+    )
+    assert host._get_tenant_schema("acme") == "acme_schema"
+
+
+def test_get_tenant_schema_returns_none_when_unmapped():
+    # Unknown tenant -> get_duckdb_schema returns None -> early None. Kills the
+    # `if schema is None` flip (which would fall through to the regex on None).
+    host = _host(tenant_router=_TenantRouter(has_config=True, schema_by_tenant={}))
+    assert host._get_tenant_schema("acme") is None
+
+
+def test_get_tenant_schema_rejects_invalid_identifier():
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "bad-schema!"})
+    )
+    with pytest.raises(ValueError, match="Invalid DuckDB schema 'bad-schema!' for tenant 'acme'"):
+        host._get_tenant_schema("acme")
+
+
+def test_get_tenant_schema_accepts_leading_underscore():
+    # The regex allows a leading underscore; pin it so `[A-Za-z_]`->`[A-Za-z]`
+    # (which would reject `_staging`) dies.
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "_staging"})
+    )
+    assert host._get_tenant_schema("acme") == "_staging"
+
+
+# --------------------------------------------------------------------------- #
+# _quote_identifier: double-quote wrapping with embedded-quote doubling.
+# --------------------------------------------------------------------------- #
+
+
+def test_quote_identifier_wraps_in_double_quotes():
+    assert _host()._quote_identifier("orders") == '"orders"'
+
+
+def test_quote_identifier_doubles_embedded_quotes():
+    # An embedded `"` must be doubled so the identifier can't be broken out of.
+    assert _host()._quote_identifier('a"b') == '"a""b"'
+
+
+# --------------------------------------------------------------------------- #
+# _quote_literal: per-type rendering (the order matters: bool before int).
+# --------------------------------------------------------------------------- #
+
+
+def test_quote_literal_none_is_sql_null():
+    assert _host()._quote_literal(None) == "NULL"
+
+
+def test_quote_literal_true_is_sql_true_not_one():
+    # bool is checked before int (bool is an int subclass); a dropped bool branch
+    # would render True as "1". Pin both bool values.
+    assert _host()._quote_literal(True) == "TRUE"
+
+
+def test_quote_literal_false_is_sql_false():
+    assert _host()._quote_literal(False) == "FALSE"
+
+
+def test_quote_literal_int_is_bare():
+    assert _host()._quote_literal(42) == "42"
+
+
+def test_quote_literal_float_is_bare():
+    assert _host()._quote_literal(3.5) == "3.5"
+
+
+def test_quote_literal_datetime_uses_iso_seconds():
+    assert _host()._quote_literal(datetime(2026, 6, 30, 14, 5, 9)) == "'2026-06-30 14:05:09'"
+
+
+def test_quote_literal_string_is_quoted_and_escaped():
+    # A single quote in a string literal must be doubled (anti-injection).
+    assert _host()._quote_literal("O'Brien") == "'O''Brien'"
+
+
+# --------------------------------------------------------------------------- #
+# _qualify_table: cache, cross-tenant guard, schema qualification.
+# --------------------------------------------------------------------------- #
+
+
+def test_qualify_table_qualifies_with_tenant_schema():
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    assert host._qualify_table("orders", "acme") == '"acme_schema"."orders"'
+
+
+def test_qualify_table_returns_bare_name_when_no_schema():
+    # resolved tenant maps to no schema -> the table is returned unqualified.
+    host = _host(tenant_router=_TenantRouter(has_config=False, schema_by_tenant={}))
+    assert host._qualify_table("orders", "acme") == "orders"
+
+
+def test_qualify_table_uses_cache_when_present():
+    # A pre-seeded cache entry is returned without recomputation. Kills the
+    # `cache is not None and cache_key in cache` guard flips.
+    cache = {("orders", "acme"): "CACHED"}
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+        cache=cache,
+    )
+    assert host._qualify_table("orders", "acme") == "CACHED"
+
+
+def test_qualify_table_writes_qualified_result_to_cache():
+    cache: dict = {}
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+        cache=cache,
+    )
+    host._qualify_table("orders", "acme")
+    assert cache[("orders", "acme")] == '"acme_schema"."orders"'
+
+
+def test_qualify_table_writes_bare_name_to_cache():
+    cache: dict = {}
+    host = _host(tenant_router=_TenantRouter(has_config=False, schema_by_tenant={}), cache=cache)
+    host._qualify_table("orders", "acme")
+    assert cache[("orders", "acme")] == "orders"
+
+
+def test_qualify_table_rejects_invalid_schema():
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "bad schema"}),
+    )
+    with pytest.raises(ValueError, match="Invalid DuckDB schema 'bad schema' for tenant 'acme'"):
+        host._qualify_table("orders", "acme")
+
+
+def test_qualify_table_requires_tenant_context_for_scoped_table(monkeypatch):
+    # resolved tenant is None but a configured tenant owns columns for the table
+    # -> ambiguous, so reading it without a tenant must raise (no silent
+    # cross-tenant read). Pins the message and the table name.
+    monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None)
+    router = _TenantRouter(
+        has_config=True,
+        schema_by_tenant={None: None},
+        tenants=(_Tenant("acme_schema"),),
+    )
+    host = _host(tenant_router=router, table_columns={'"acme_schema"."orders"': {"id"}})
+    with pytest.raises(
+        ValueError, match="Tenant context is required for tenant-scoped table 'orders'"
+    ):
+        host._qualify_table("orders", None)
+
+
+def test_qualify_table_no_context_passes_when_no_tenant_owns_table(monkeypatch):
+    # Same None-context path, but no configured tenant has columns for the table
+    # -> no raise, falls through to the (here unmapped) schema -> bare name. Kills
+    # a mutant that always raises in the loop.
+    monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None)
+    router = _TenantRouter(
+        has_config=True,
+        schema_by_tenant={None: None},
+        tenants=(_Tenant("acme_schema"),),
+    )
+    host = _host(tenant_router=router, table_columns={})
+    assert host._qualify_table("orders", None) == "orders"
+
+
+# --------------------------------------------------------------------------- #
+# _scope_sql: the core. Two paths -- no tenant schema (validate-only) and
+# tenant schema (rewrite every known table into the schema).
+# --------------------------------------------------------------------------- #
+
+
+def test_scope_sql_rewrites_known_table_into_tenant_schema():
+    host = _host(
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    scoped = host._scope_sql("SELECT * FROM orders", "acme")
+    assert scoped == 'SELECT * FROM "acme_schema"."orders"'
+
+
+def test_scope_sql_rewrites_pipeline_events_table():
+    # pipeline_events is added to known_tables outside the catalog; pin that the
+    # `.add("pipeline_events")` line is real.
+    host = _host(
+        catalog=_Catalog("orders"),
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    scoped = host._scope_sql("SELECT * FROM pipeline_events", "acme")
+    assert scoped == 'SELECT * FROM "acme_schema"."pipeline_events"'
+
+
+def test_scope_sql_leaves_unknown_table_untouched():
+    # A table not in the catalog is not rewritten even under a tenant schema.
+    host = _host(
+        catalog=_Catalog("orders"),
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    scoped = host._scope_sql("SELECT * FROM widgets", "acme")
+    assert "acme_schema" not in scoped
+    assert "widgets" in scoped
+
+
+def test_scope_sql_does_not_rewrite_cte_name():
+    # A CTE named like a catalog table must not be schema-qualified (it's a local
+    # alias, not the physical table). Kills dropping the `in cte_names` skip.
+    host = _host(
+        catalog=_Catalog("orders"),
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    scoped = host._scope_sql("WITH orders AS (SELECT 1 AS id) SELECT id FROM orders", "acme")
+    assert "acme_schema" not in scoped
+
+
+def test_scope_sql_no_schema_returns_sql_unchanged():
+    # No tenant schema -> the SQL text is returned verbatim (validation-only path).
+    host = _host(tenant_router=_TenantRouter(has_config=False, schema_by_tenant={}))
+    sql = "SELECT * FROM orders"
+    assert host._scope_sql(sql, "acme") == sql
+
+
+def test_scope_sql_no_schema_enforces_tenant_context(monkeypatch):
+    # In the no-schema path each known unqualified table is still routed through
+    # _qualify_table, so an ambiguous tenant-scoped table raises rather than
+    # leaking. Pins that the no-schema branch actually calls _qualify_table.
+    monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None)
+    router = _TenantRouter(
+        has_config=True,
+        schema_by_tenant={None: None},
+        tenants=(_Tenant("acme_schema"),),
+    )
+    host = _host(
+        catalog=_Catalog("orders"),
+        tenant_router=router,
+        table_columns={'"acme_schema"."orders"': {"id"}},
+    )
+    with pytest.raises(
+        ValueError, match="Tenant context is required for tenant-scoped table 'orders'"
+    ):
+        host._scope_sql("SELECT * FROM orders", None)
+
+
+def test_scope_sql_no_schema_skips_already_qualified_table(monkeypatch):
+    # An already schema-qualified table in the no-schema path is skipped (db set),
+    # so it does not trigger the tenant-context guard. Kills dropping the
+    # `table.db` part of the skip condition.
+    calls: list[tuple[str, str | None]] = []
+    monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None)
+    router = _TenantRouter(has_config=True, schema_by_tenant={None: None})
+    host = _host(catalog=_Catalog("orders"), tenant_router=router)
+    original = host._qualify_table
+
+    def _spy(table_name: str, tenant_id: str | None) -> str:
+        calls.append((table_name, tenant_id))
+        return original(table_name, tenant_id)
+
+    host._qualify_table = _spy  # type: ignore[method-assign]
+    host._scope_sql('SELECT * FROM other_schema."orders"', None)
+    assert calls == []  # qualified table was skipped, _qualify_table not called
+
+
+# --------------------------------------------------------------------------- #
+# Targeted mutant-killers: identifier-regex casing, the explicit-tenant
+# short-circuit, the skip-condition boolean structure, continue-vs-break, and
+# the catalog-clearing in the re-scope branch.
+# --------------------------------------------------------------------------- #
+
+
+def test_get_tenant_schema_accepts_uppercase_identifier():
+    # The identifier regex must accept uppercase letters: a `[A-Za-z_]`->`[a-za-z_]`
+    # mutant would reject a valid mixed-case schema and raise. Pin acceptance.
+    host = _host(tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "Acme_DW"}))
+    assert host._get_tenant_schema("acme") == "Acme_DW"
+
+
+def test_qualify_table_accepts_uppercase_schema():
+    # Same regex-casing pin on the _qualify_table copy of the check.
+    host = _host(tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "Acme_DW"}))
+    assert host._qualify_table("orders", "acme") == '"Acme_DW"."orders"'
+
+
+def test_qualify_table_with_explicit_tenant_skips_ambiguity_scan():
+    # resolved tenant is not None -> the `resolved is None AND has_config` guard
+    # is False, so the cross-tenant ambiguity scan is skipped. An `and`->`or`
+    # mutant would enter the scan and wrongly raise on a foreign-tenant column
+    # match. Pin that an explicit tenant qualifies without raising.
+    router = _TenantRouter(
+        has_config=True,
+        schema_by_tenant={"acme": "acme_schema"},
+        tenants=(_Tenant("other_schema"),),
+    )
+    host = _host(tenant_router=router, table_columns={'"other_schema"."orders"': {"id"}})
+    assert host._qualify_table("orders", "acme") == '"acme_schema"."orders"'
+
+
+def test_scope_sql_no_schema_qualifies_only_known_non_cte_tables():
+    # In the no-schema path _qualify_table is called for each known, unqualified,
+    # non-CTE table with the forwarded tenant id -- and NOT for unknown tables.
+    # Pins: the `not-in-known OR in-cte` boolean structure (an AND-flip would
+    # qualify unknown tables), continue-not-break (a break would stop before the
+    # known table), and the forwarded tenant id (a `->None` would drop it).
+    calls: list[tuple[str, str | None]] = []
+    router = _TenantRouter(has_config=False, schema_by_tenant={"acme": None})
+    host = _host(catalog=_Catalog("orders"), tenant_router=router)
+    original = host._qualify_table
+    host._qualify_table = (  # type: ignore[method-assign]
+        lambda name, tid: calls.append((name, tid)) or original(name, tid)
+    )
+    host._scope_sql("SELECT * FROM widgets JOIN orders ON widgets.id = orders.id", "acme")
+    assert calls == [("orders", "acme")]
+
+
+def test_scope_sql_no_schema_skips_cte_named_like_table():
+    # A CTE named like a catalog table must not be qualified. The cte-name check
+    # lowercases the name; an `in cte_names`->`upper() in cte_names` mutant would
+    # miss the lowercase CTE and qualify it. Pin that no qualify call happens.
+    calls: list[tuple[str, str | None]] = []
+    router = _TenantRouter(has_config=False, schema_by_tenant={"acme": None})
+    host = _host(catalog=_Catalog("orders"), tenant_router=router)
+    original = host._qualify_table
+    host._qualify_table = (  # type: ignore[method-assign]
+        lambda name, tid: calls.append((name, tid)) or original(name, tid)
+    )
+    host._scope_sql("WITH orders AS (SELECT 1 AS id) SELECT id FROM orders", "acme")
+    assert calls == []
+
+
+def test_scope_sql_schema_skips_unknown_then_qualifies_known():
+    # schema branch: an unknown table is skipped with continue (not break) so a
+    # later known table is still rewritten. A continue->break mutant stops early
+    # and leaves 'orders' unqualified.
+    host = _host(
+        catalog=_Catalog("orders"),
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    out = host._scope_sql("SELECT * FROM widgets JOIN orders ON widgets.id = orders.id", "acme")
+    assert '"acme_schema"."orders"' in out
+    assert '"acme_schema"."widgets"' not in out  # unknown table stays bare
+
+
+def test_scope_sql_schema_clears_existing_qualification():
+    # A table arriving already catalog/schema-qualified is fully re-scoped into
+    # the caller's tenant schema -- the foreign catalog is cleared
+    # (audit_28_06_26.md #5). Pins table.set("catalog", None): a mutant that
+    # mis-keys or skips the catalog clear leaves the foreign catalog in the SQL.
+    host = _host(
+        catalog=_Catalog("orders"),
+        tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}),
+    )
+    out = host._scope_sql("SELECT * FROM oldcat.oldschema.orders", "acme")
+    assert out == 'SELECT * FROM "acme_schema"."orders"'

From 2aebace031dabeb7e632c379fc4e8e8c534fe670 Mon Sep 17 00:00:00 2001
From: JuliaEdom <uedomskikh@gmail.com>
Date: Tue, 30 Jun 2026 01:41:46 +0300
Subject: [PATCH 2/2] fix(mutation): make sql_builder/nl_queries tests
 duckdb-free in the harness (B2)

The B2 duckdb-free mutation tests gated stub installation on `import src`
succeeding ("workspace has no top-level src"). That assumption is false: the
editable-installed repo keeps the real `src` on sys.path even inside mutmut's
mutants/ workspace, so the stubs were skipped there and the import fell through
to the real `src.serving.semantic_layer.query` package -> `from .engine import
QueryEngine` -> `import duckdb`. Under mutmut's coverage-instrumented stats pass
on py3.11 that trips duckdb's lazy `_duckdb._sqltypes` import (the same break
ci.yml already works around with `coverage run`), so `mutmut run` exited -11 with
no scored mutants and the gate failed.

Discriminate on the real signal instead: the workspace copies src/serving to a
TOP-LEVEL `serving` package, which ordinary pytest never has. Gate stub install
on `importlib.util.find_spec("serving")`. The stub path (engine/contracts/auth
neutered to no-ops) was effectively never exercised before -- WSL py3.10 also had
`src` importable, so its 96.0%/94.4% came from the real modules.

Verified locally in a simulated workspace (top-level `serving` + editable `src`,
as in CI): both modules import with duckdb NOT in sys.modules, subject under test
is `serving.semantic_layer.query.*` (the mutation target), 101 tests pass via the
stub path; the ordinary-pytest path still passes 101.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 tests/unit/test_nl_queries_mutation.py  | 20 ++++++++++++++------
 tests/unit/test_sql_builder_mutation.py | 22 ++++++++++++++--------
 2 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/tests/unit/test_nl_queries_mutation.py b/tests/unit/test_nl_queries_mutation.py
index f05e4e1..7273868 100644
--- a/tests/unit/test_nl_queries_mutation.py
+++ b/tests/unit/test_nl_queries_mutation.py
@@ -49,12 +49,20 @@
 import types
 
 
-def _src_package_available() -> bool:
-    try:
-        import src  # noqa: F401
+def _in_mutation_workspace() -> bool:
+    # mutmut's mutants/ workspace copies src/serving to a TOP-LEVEL `serving`
+    # package (scripts/mutation_report.py prepare_workspace); ordinary pytest has
+    # no top-level `serving` (only src.serving), so its presence cleanly marks the
+    # harness. The old `import src` probe did not: the editable-installed repo keeps
+    # the real `src` importable even inside the workspace, so the stubs were skipped
+    # there and the real duckdb-backed engine import crashed mutmut's
+    # coverage-instrumented stats pass on py3.11 (duckdb's lazy `_duckdb._sqltypes`
+    # import breaks under coverage tracing -- see .github/workflows/ci.yml).
+    import importlib.util
 
-        return True
-    except ModuleNotFoundError:
+    try:
+        return importlib.util.find_spec("serving") is not None
+    except (ImportError, ValueError):
         return False
 
 
@@ -113,7 +121,7 @@ class BackendExecutionError(RuntimeError):
     contracts_stub.NLQueryHost = object
 
 
-if not _src_package_available():
+if _in_mutation_workspace():
     _install_harness_stubs()
 
 try:  # mutation-harness workspace exposes it as a top-level package
diff --git a/tests/unit/test_sql_builder_mutation.py b/tests/unit/test_sql_builder_mutation.py
index 499f98d..9a8ab83 100644
--- a/tests/unit/test_sql_builder_mutation.py
+++ b/tests/unit/test_sql_builder_mutation.py
@@ -52,14 +52,20 @@
 from datetime import datetime
 
 
-def _src_package_available() -> bool:
-    # In the mutation harness the workspace has no top-level `src` package
-    # (only `serving`); under ordinary pytest `src` is the real package.
-    try:
-        import src  # noqa: F401
+def _in_mutation_workspace() -> bool:
+    # mutmut's mutants/ workspace copies src/serving to a TOP-LEVEL `serving`
+    # package (scripts/mutation_report.py prepare_workspace); ordinary pytest has
+    # no top-level `serving` (only src.serving), so its presence cleanly marks the
+    # harness. The old `import src` probe did not: the editable-installed repo keeps
+    # the real `src` importable even inside the workspace, so the stubs were skipped
+    # there and the real duckdb-backed engine import crashed mutmut's
+    # coverage-instrumented stats pass on py3.11 (duckdb's lazy `_duckdb._sqltypes`
+    # import breaks under coverage tracing -- see .github/workflows/ci.yml).
+    import importlib.util
 
-        return True
-    except ModuleNotFoundError:
+    try:
+        return importlib.util.find_spec("serving") is not None
+    except (ImportError, ValueError):
         return False
 
 
@@ -97,7 +103,7 @@ def get_current_tenant_id(default: str | None = None) -> str | None:
     contracts_stub.NLQueryHost = object
 
 
-if not _src_package_available():
+if _in_mutation_workspace():
     _install_harness_stubs()
 
 try:  # mutation-harness workspace exposes it as a top-level package