From 96f4ca128f1cfa371bb9aceff39e246e67a604b8 Mon Sep 17 00:00:00 2001 From: JuliaEdom Date: Tue, 30 Jun 2026 01:12:12 +0300 Subject: [PATCH 1/2] test(mutation): mutate sql_builder + nl_queries live (B2) Close the declared-vs-live gap for the query surface: sql_builder.py and nl_queries.py were declared mutation targets but the gate only ran retry, sql_guard, masking and rate_limiter. Add duckdb-free fixtureless mutation tests for both and wire them into scripts/mutation_report.py MODULE_TARGETS @ 0.90, so 6 of 8 security modules are now mutated live (auth manager / key_rotation remain declared-only pending their own duckdb-free tests). Both modules live under the query package whose __init__ imports the duckdb-backed QueryEngine, so each test stubs serving.semantic_layer.query.{engine,contracts} (plus the src.* helpers) before importing the module under a top-level `serving` name. nl_queries' .sql_guard shim is repointed at the real top-level sql_guard so the validate_nl_sql boundary runs against the genuine guard. Reproduced on the WSL/mutmut harness (py3.10): sql_builder 96.0% (killed 167/174), nl_queries 94.4% (killed 323/342). Remaining survivors are documented equivalents (typing.cast type-string, dialect=duckdb->None renders, sqlglot/regex table-extraction convergence, telemetry no-ops, *1001 elapsed). The CI gate (mutation.yml, py3.11) is the source of truth. Co-Authored-By: Claude Opus 4.8 (1M context) --- pyproject.toml | 19 +- scripts/mutation_report.py | 32 +- tests/unit/test_mutmut_policy.py | 11 +- tests/unit/test_nl_queries_mutation.py | 855 ++++++++++++++++++++++++ tests/unit/test_sql_builder_mutation.py | 583 ++++++++++++++++ 5 files changed, 1476 insertions(+), 24 deletions(-) create mode 100644 tests/unit/test_nl_queries_mutation.py create mode 100644 tests/unit/test_sql_builder_mutation.py diff --git a/pyproject.toml b/pyproject.toml index 24fa5aa..bf3ae6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -206,15 +206,16 @@ asyncio_mode = "auto" # security-critical surfaces). It is a superset of what the CI gate actually # mutates: scripts/mutation_report.py drives the gate from its own # MODULE_TARGETS, which now mutates sdk/agentflow/retry.py, -# src/serving/semantic_layer/sql_guard.py, src/serving/masking.py AND -# src/serving/api/rate_limiter.py live. The serving modules are mutated as a -# top-level `serving` package against duckdb-free narrow tests: mutmut's -# trampoline rejects a module name starting with `src.`, which (not duckdb) was -# the real blocker. The remaining serving modules below stay declared-only for -# now -- their unit tests pull the duckdb-backed query engine, so mutating them -# in isolation needs a duckdb-free test per module (the pattern sql_guard, -# masking and rate_limiter now use); the blocker is the test import chain, not -# the module. See scripts/mutation_report.py. +# src/serving/semantic_layer/sql_guard.py, src/serving/masking.py, +# src/serving/api/rate_limiter.py, src/serving/semantic_layer/query/sql_builder.py +# AND src/serving/semantic_layer/query/nl_queries.py live. The serving modules are +# mutated as a top-level `serving` package against duckdb-free narrow tests: +# mutmut's trampoline rejects a module name starting with `src.`, which (not +# duckdb) was the real blocker. The remaining declared serving surface -- the auth +# manager / key_rotation -- stays declared-only for now: its unit tests pull the +# duckdb-backed query engine, so mutating it in isolation needs a duckdb-free test +# (the pattern the six live modules now use); the blocker is the test import +# chain, not the module. See scripts/mutation_report.py. paths_to_mutate = [ "src/serving/api/auth/manager.py", "src/serving/api/auth/key_rotation.py", diff --git a/scripts/mutation_report.py b/scripts/mutation_report.py index 970c7aa..a74c378 100644 --- a/scripts/mutation_report.py +++ b/scripts/mutation_report.py @@ -30,16 +30,20 @@ class ModuleTarget: # (a) copy the module so it imports as a top-level package and (b) pair it with a # NARROW test that does not pull the duckdb-backed engine import chain. So # retry.py mutates as agentflow.retry (from sdk/agentflow), and sql_guard, -# masking and rate_limiter mutate as serving.* (from src/serving) against -# duckdb-free tests. Each duckdb-free test also avoids fixtures and calls the -# module's methods directly: under mutate_only_covered_lines a fixture-built -# object left every method line uncovered, so only __init__ got mutated. -# rate_limiter additionally imports `from src.constants import ...`; its test -# registers a tiny src.constants stub before importing the module, because the -# serving workspace copies src/serving -> top-level `serving` without `src`. The -# remaining serving modules whose tests still need the duckdb engine (the -# query/auth surfaces) stay declared-only in the [tool.mutmut] policy until they -# get duckdb-free unit tests of their own. +# masking, rate_limiter, sql_builder and nl_queries mutate as serving.* (from +# src/serving) against duckdb-free tests. Each duckdb-free test also avoids +# fixtures and calls the module's methods directly: under mutate_only_covered_lines +# a fixture-built object left every method line uncovered, so only __init__ got +# mutated. rate_limiter additionally imports `from src.constants import ...`; its +# test registers a tiny src.constants stub before importing the module, because +# the serving workspace copies src/serving -> top-level `serving` without `src`. +# sql_builder and nl_queries live under the query package whose __init__ imports +# the duckdb-backed QueryEngine, so their tests also stub +# serving.semantic_layer.query.{engine,contracts} (and the src.* helpers) before +# import. The only remaining declared-but-not-live serving surface is auth +# (manager / key_rotation), whose tests still need the duckdb engine; it stays +# declared-only in the [tool.mutmut] policy until it gets duckdb-free unit tests +# of its own. MODULE_TARGETS = { Path("agentflow/retry.py"): ModuleTarget( threshold=0.75, @@ -57,6 +61,14 @@ class ModuleTarget: threshold=0.90, tests=("tests/unit/test_rate_limiter_mutation.py",), ), + Path("serving/semantic_layer/query/sql_builder.py"): ModuleTarget( + threshold=0.90, + tests=("tests/unit/test_sql_builder_mutation.py",), + ), + Path("serving/semantic_layer/query/nl_queries.py"): ModuleTarget( + threshold=0.90, + tests=("tests/unit/test_nl_queries_mutation.py",), + ), } STATUS_BY_EXIT_CODE = { diff --git a/tests/unit/test_mutmut_policy.py b/tests/unit/test_mutmut_policy.py index 7b5fdf9..4d26f56 100644 --- a/tests/unit/test_mutmut_policy.py +++ b/tests/unit/test_mutmut_policy.py @@ -18,11 +18,12 @@ # assembled here. # NOTE: these are the *declared* targets (intent). Actual mutation execution is # gated by scripts/mutation_report.py (MODULE_TARGETS), which now runs retry.py, -# sql_guard.py, masking.py AND rate_limiter.py live (the serving modules via -# duckdb-free narrow tests, mutated as a top-level `serving` package so mutmut's -# trampoline accepts them). The other serving modules below stay declared-only -# until they get duckdb-free unit tests of their own. These assertions guard the -# declared policy, not live coverage. +# sql_guard.py, masking.py, rate_limiter.py, sql_builder.py AND nl_queries.py live +# (the serving modules via duckdb-free narrow tests, mutated as a top-level +# `serving` package so mutmut's trampoline accepts them). The remaining declared +# serving surface -- auth manager / key_rotation -- stays declared-only until it +# gets duckdb-free unit tests of its own. These assertions guard the declared +# policy, not live coverage. REQUIRED_MUTATION_TARGETS = { "src/serving/semantic_layer/sql_guard.py", "src/serving/api/auth/manager.py", diff --git a/tests/unit/test_nl_queries_mutation.py b/tests/unit/test_nl_queries_mutation.py new file mode 100644 index 0000000..f05e4e1 --- /dev/null +++ b/tests/unit/test_nl_queries_mutation.py @@ -0,0 +1,855 @@ +"""Narrow, duckdb-free mutation test for the NL query execution path +(src/serving/semantic_layer/query/nl_queries.py). + +This is the test the mutation gate runs against +``serving/semantic_layer/query/nl_queries.py`` (see scripts/mutation_report.py +MODULE_TARGETS). nl_queries is the only ``validate_nl_sql()`` enforcement +boundary (``_prepare_nl_sql``) plus the pagination / row-cap SQL wrappers built +around the prevalidated NL SQL -- a surviving mutant in the row cap, the +validate wrap or the cursor checks is an un-paginated full-table read or a +denylist bypass (audit_28_06_26.md #8), exactly what a mutation gate should pin. + +Design rules, shared with test_rate_limiter_mutation.py / +test_sql_builder_mutation.py (see fable_handoff.md cont.16-19): + +1. **duckdb-free.** The ordinary query-engine tests build a QueryEngine, which + imports duckdb and crashes mutmut's ``mutants/`` workspace. This file drives + the NLQueryMixin methods through a hand-built host with a fake backend, so + duckdb is never imported. + +2. **No fixtures for the subject -- inline construction + direct method calls.** + With ``mutate_only_covered_lines = true`` a fixture-built host left every + method line uncovered (only ``__init__`` mutated, score 0%). The host is built + inline and the methods are called directly. (The one pytest fixture below only + toggles the telemetry flag; it constructs nothing under test.) + +3. **Import shims.** The mutation harness copies ``src/serving`` to a top-level + ``serving`` package *without* ``src``. Several things on nl_queries' import + path would otherwise fail or drag duckdb in: ``from src.processing.tracing + import telemetry_disabled`` and ``from src.serving.backends import + BackendExecutionError`` (no ``src``), the ``serving.semantic_layer.query`` + package ``__init__`` (``from .engine import QueryEngine`` -> duckdb) and + ``.contracts`` (imports the duckdb backend for type hints). The ``.sql_guard`` + import is a shim that does ``from src.serving.semantic_layer.sql_guard import + ...``; we point that src-name at the REAL top-level sql_guard (sqlglot-only, + duckdb-free) so the validate boundary runs against the genuine guard. Under + ordinary pytest the real ``src`` package is importable, so no shim is installed + and the real modules load. + +Reproduced at 94.4% (killed 323, survived 19) via the WSL/mutmut harness (py3.10); +the CI gate (mutation.yml on py3.11) is the source of truth. The surviving mutants +are genuine equivalents, documented at the bottom of this file. +""" + +from __future__ import annotations + +import base64 +import hashlib +import sys +import types + + +def _src_package_available() -> bool: + try: + import src # noqa: F401 + + return True + except ModuleNotFoundError: + return False + + +def _ensure_module(name: str) -> types.ModuleType: + module = sys.modules.get(name) + if module is None: + module = types.ModuleType(name) + sys.modules[name] = module + return module + + +def _install_harness_stubs() -> None: + _ensure_module("src") + processing_pkg = _ensure_module("src.processing") + tracing_mod = _ensure_module("src.processing.tracing") + + def telemetry_disabled() -> bool: + return True + + tracing_mod.telemetry_disabled = telemetry_disabled + processing_pkg.tracing = tracing_mod + + serving_pkg = _ensure_module("src.serving") + backends_mod = _ensure_module("src.serving.backends") + + class BackendExecutionError(RuntimeError): + """Mirror of src.serving.backends.BackendExecutionError.""" + + backends_mod.BackendExecutionError = BackendExecutionError + serving_pkg.backends = backends_mod + + # nl_engine is imported lazily inside explain(); supply a rule-based default. + semantic_pkg = _ensure_module("src.serving.semantic_layer") + nl_engine_mod = _ensure_module("src.serving.semantic_layer.nl_engine") + nl_engine_mod._ANTHROPIC_KEY = "" + semantic_pkg.nl_engine = nl_engine_mod + + # nl_queries imports `.sql_guard`, a shim that does + # `from src.serving.semantic_layer.sql_guard import ...`. Point that src-name + # at the REAL top-level sql_guard (sqlglot-only, duckdb-free, importable in + # the harness) so the validate_nl_sql boundary is exercised against the + # genuine guard rather than a stub. + import serving.semantic_layer.sql_guard as _real_sql_guard + + sys.modules["src.serving.semantic_layer.sql_guard"] = _real_sql_guard + semantic_pkg.sql_guard = _real_sql_guard + + # Neuter the query package __init__ (`from .engine import QueryEngine`) and + # the contracts module; both pull duckdb via the QueryEngine import chain and + # neither contributes runtime behaviour to nl_queries. + engine_stub = _ensure_module("serving.semantic_layer.query.engine") + engine_stub.QueryEngine = object + contracts_stub = _ensure_module("serving.semantic_layer.query.contracts") + contracts_stub.SQLBuilderHost = object + contracts_stub.QueryExecutionHost = object + contracts_stub.NLQueryHost = object + + +if not _src_package_available(): + _install_harness_stubs() + +try: # mutation-harness workspace exposes it as a top-level package + from serving.semantic_layer.query import nl_queries as nlq_module +except ImportError: # ordinary pytest sees it under the src package + from src.serving.semantic_layer.query import nl_queries as nlq_module + +import pytest + +NLQueryMixin = nlq_module.NLQueryMixin +UnsafeNLQueryError = nlq_module.UnsafeNLQueryError + + +# --------------------------------------------------------------------------- # +# In-process host + fake backend + recording span (no duckdb, no real tracer). +# --------------------------------------------------------------------------- # + + +class _Entity: + def __init__(self, table: str) -> None: + self.table = table + + +class _Catalog: + def __init__( + self, tables: tuple[str, ...] = ("orders",), metrics: tuple[str, ...] = () + ) -> None: + self.entities = {table: _Entity(table) for table in tables} + self.metrics = dict.fromkeys(metrics) + + +class _FakeBackend: + """Records every SQL string handed to execute/scalar/explain so the + pagination wrappers and the row-cap can be pinned exactly.""" + + def __init__(self, rows=None, scalar_value=0, explain_rows=None) -> None: + self._rows = rows if rows is not None else [] + self._scalar_value = scalar_value + self._explain_rows = explain_rows if explain_rows is not None else [] + self.executed: list[str] = [] + self.scalared: list[str] = [] + self.explained: list[str] = [] + + def execute(self, sql: str): + self.executed.append(sql) + return self._rows + + def scalar(self, sql: str): + self.scalared.append(sql) + return self._scalar_value + + def explain(self, sql: str): + self.explained.append(sql) + return self._explain_rows + + +class _RecordingSpan: + def __init__(self) -> None: + self.attributes: dict = {} + + def is_recording(self) -> bool: + return True + + def set_attribute(self, key, value) -> None: + self.attributes[key] = value + + +class _SpanCtx: + def __init__(self, span: _RecordingSpan) -> None: + self._span = span + + def __enter__(self) -> _RecordingSpan: + return self._span + + def __exit__(self, *exc) -> bool: + return False + + +class _RecordingTracer: + def __init__(self) -> None: + self.started: list = [] + self.spans: list[_RecordingSpan] = [] + + def start_as_current_span(self, name): + self.started.append(name) + span = _RecordingSpan() + self.spans.append(span) + return _SpanCtx(span) + + +class _Host(NLQueryMixin): + def __init__( + self, + *, + catalog: _Catalog | None = None, + backend: _FakeBackend | None = None, + backend_name: str = "duckdb", + translated: str = "SELECT id FROM orders", + ) -> None: + self.catalog = catalog if catalog is not None else _Catalog() + self._backend = backend if backend is not None else _FakeBackend() + self._backend_name = backend_name + self._translated = translated + self.translate_calls: list[tuple[str, str | None]] = [] + self.scope_calls: list[tuple[str, str | None]] = [] + + # Overridden so the test never pulls nl_engine / the tenant router; these + # live in other modules and are mutation-tested separately. Calls are + # recorded so the call sites' argument forwarding can be pinned. + def _translate_question_to_sql(self, question: str, tenant_id: str | None = None) -> str: + self.translate_calls.append((question, tenant_id)) + return self._translated + + def _scope_sql(self, sql: str, tenant_id: str | None) -> str: + self.scope_calls.append((sql, tenant_id)) + return sql + + def _resolve_tenant_id(self, tenant_id: str | None) -> str | None: + return tenant_id + + +@pytest.fixture(autouse=True) +def _disable_telemetry(monkeypatch): + # Force the no-span path deterministically in both contexts (the harness stub + # already returns True; under ordinary pytest the real one may not). Telemetry + # tests below re-enable it explicitly. + monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: True) + + +def _fixed_clock(monkeypatch, start_t: float, end_t: float) -> None: + # Pin time.monotonic so elapsed_ms is deterministic: first call returns the + # method's start stamp, every later call returns the end stamp. This makes the + # elapsed arithmetic killable without depending on the absolute wall clock (a + # `- start`->`+ start` flip would otherwise pass or fail by machine uptime). + state = {"calls": 0} + + def _monotonic() -> float: + state["calls"] += 1 + return start_t if state["calls"] == 1 else end_t + + monkeypatch.setattr(nlq_module, "time", types.SimpleNamespace(monotonic=_monotonic)) + + +# --------------------------------------------------------------------------- # +# _default_allowed_tables: catalog tables + pipeline_events. +# --------------------------------------------------------------------------- # + + +def test_default_allowed_tables_includes_catalog_and_pipeline_events(): + host = _Host(catalog=_Catalog(tables=("orders", "customers"))) + allowed = nlq_module._default_allowed_tables(host) + assert allowed == {"orders", "customers", "pipeline_events"} + + +# --------------------------------------------------------------------------- # +# _prepare_nl_sql: the validate_nl_sql enforcement boundary. +# --------------------------------------------------------------------------- # + + +def test_prepare_nl_sql_returns_validated_sql_unchanged(): + sql = "SELECT id FROM orders" + assert nlq_module._prepare_nl_sql(sql, {"orders"}) == sql + + +def test_prepare_nl_sql_wraps_unsafe_sql_as_403(): + with pytest.raises(UnsafeNLQueryError) as exc_info: + nlq_module._prepare_nl_sql("DROP TABLE orders", {"orders"}) + assert exc_info.value.status_code == 403 + assert exc_info.value.detail.startswith("NL-to-SQL produced unsafe query:") + + +def test_prepare_nl_sql_rejects_unknown_table(): + with pytest.raises(UnsafeNLQueryError): + nlq_module._prepare_nl_sql("SELECT id FROM secret_table", {"orders"}) + + +def test_unsafe_nl_query_error_is_403(): + err = UnsafeNLQueryError("boom") + assert err.status_code == 403 + assert err.detail == "boom" + + +# --------------------------------------------------------------------------- # +# _build_query_hash: tenant-scoped sha256. +# --------------------------------------------------------------------------- # + + +def test_build_query_hash_matches_sha256_of_tenant_and_sql(): + host = _Host() + expected = hashlib.sha256(b"acme:SELECT 1").hexdigest() + assert host._build_query_hash("SELECT 1", "acme") == expected + + +def test_build_query_hash_uses_default_when_tenant_none(): + host = _Host() + expected = hashlib.sha256(b"default:SELECT 1").hexdigest() + assert host._build_query_hash("SELECT 1", None) == expected + + +def test_build_query_hash_is_sql_sensitive(): + host = _Host() + assert host._build_query_hash("SELECT 1", "acme") != host._build_query_hash("SELECT 2", "acme") + + +# --------------------------------------------------------------------------- # +# _encode_cursor / _decode_cursor: base64 round-trip + validation. +# --------------------------------------------------------------------------- # + + +def test_cursor_round_trip(): + host = _Host() + cursor = host._encode_cursor(42, "abc123") + assert host._decode_cursor(cursor) == (42, "abc123") + + +def test_encode_cursor_is_base64_of_offset_and_hash(): + host = _Host() + cursor = host._encode_cursor(7, "deadbeef") + assert base64.urlsafe_b64decode(cursor.encode()).decode() == "7:deadbeef" + + +def test_decode_cursor_accepts_zero_offset(): + # offset 0 is valid (`offset < 0` rejects, not `<= 0` / `< 1`). + host = _Host() + cursor = base64.urlsafe_b64encode(b"0:abc").decode() + assert host._decode_cursor(cursor) == (0, "abc") + + +def test_decode_cursor_keeps_colon_in_hash(): + # The hash is split off with maxsplit=1, so a hash containing ':' survives. + # Kills split(":") (no maxsplit), rsplit, and maxsplit=2. + host = _Host() + cursor = base64.urlsafe_b64encode(b"5:a:b:c").decode() + assert host._decode_cursor(cursor) == (5, "a:b:c") + + +def test_decode_cursor_rejects_negative_offset(): + host = _Host() + bad = base64.urlsafe_b64encode(b"-1:abc").decode() + with pytest.raises(ValueError) as exc_info: + host._decode_cursor(bad) + assert str(exc_info.value) == "Invalid cursor value." + + +def test_decode_cursor_rejects_empty_hash(): + host = _Host() + bad = base64.urlsafe_b64encode(b"5:").decode() + with pytest.raises(ValueError) as exc_info: + host._decode_cursor(bad) + assert str(exc_info.value) == "Invalid cursor value." + + +def test_decode_cursor_rejects_non_numeric_offset(): + host = _Host() + bad = base64.urlsafe_b64encode(b"x:abc").decode() + with pytest.raises(ValueError) as exc_info: + host._decode_cursor(bad) + assert str(exc_info.value) == "Invalid cursor value." + + +def test_decode_cursor_rejects_garbage(): + host = _Host() + with pytest.raises(ValueError) as exc_info: + host._decode_cursor("!!!not-base64!!!") + assert str(exc_info.value) == "Invalid cursor value." + + +# --------------------------------------------------------------------------- # +# execute_nl_query: the un-paginated path bounds rows with a hard LIMIT. +# --------------------------------------------------------------------------- # + + +def test_execute_nl_query_bounds_rows_with_hard_limit(): + # The bounded wrapper caps the row count at _MAX_NL_QUERY_ROWS (1000) so an + # un-paginated NL item cannot stream a whole table (audit_28 #8). Pin the + # exact SQL string AND every key of the result dict. + backend = _FakeBackend(rows=[("r1",), ("r2",)]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.execute_nl_query("how many orders", allowed_tables={"orders"}) + assert backend.executed == [ + "SELECT * FROM (SELECT id FROM orders) AS bounded_nl_query LIMIT 1000" + ] + assert result["data"] == [("r1",), ("r2",)] + assert result["sql"] == "SELECT id FROM orders" + assert result["row_count"] == 2 + assert result["freshness_seconds"] is None + assert isinstance(result["execution_time_ms"], int) + assert set(result) == {"data", "sql", "row_count", "execution_time_ms", "freshness_seconds"} + + +def test_execute_nl_query_max_rows_constant_is_1000(): + assert nlq_module._MAX_NL_QUERY_ROWS == 1000 + + +def test_execute_nl_query_forwards_question_tenant_and_scope(): + backend = _FakeBackend(rows=[("a",)]) + host = _Host(backend=backend) + host.execute_nl_query("the question", tenant_id="acme", allowed_tables={"orders"}) + assert host.translate_calls == [("the question", "acme")] + assert host.scope_calls == [("SELECT id FROM orders", "acme")] + + +def test_execute_nl_query_uses_passed_allowed_tables_not_default(): + # allowed_tables is honoured even when it differs from the catalog-derived + # default: an `is not None`->`is None` flip would validate against the + # (here unrelated) default and reject. Empty catalog -> default lacks 'orders'. + backend = _FakeBackend(rows=[("a",)]) + host = _Host(catalog=_Catalog(tables=()), backend=backend, translated="SELECT id FROM orders") + result = host.execute_nl_query("q", allowed_tables={"orders"}) + assert result["row_count"] == 1 + + +def test_execute_nl_query_uses_default_allowed_tables_when_none(): + # allowed_tables=None -> _default_allowed_tables(self) governs validation; a + # `(self)`->`(None)` mutant raises AttributeError on None.catalog. + backend = _FakeBackend(rows=[("a",)]) + host = _Host(catalog=_Catalog(tables=("orders",)), backend=backend) + result = host.execute_nl_query("q", allowed_tables=None) + assert result["row_count"] == 1 + + +def test_execute_nl_query_elapsed_is_monotonic_delta_in_ms(monkeypatch): + # elapsed_ms = int((end - start) * 1000) on a pinned clock. Kills `=None`, + # `/1000` and `- start`->`+ start` (the *1001 variant rounds to the same 50). + _fixed_clock(monkeypatch, 1.0, 1.5) + host = _Host(backend=_FakeBackend(rows=[("a",)])) + result = host.execute_nl_query("q", allowed_tables={"orders"}) + assert result["execution_time_ms"] == 500 # int((1.5 - 1.0) * 1000) + + +def test_execute_nl_query_records_span_attributes(monkeypatch): + monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: False) + tracer = _RecordingTracer() + monkeypatch.setattr(nlq_module, "tracer", tracer) + backend = _FakeBackend(rows=[("a",), ("b",)]) + host = _Host(backend=backend, backend_name="duckdb") + host.execute_nl_query("q", tenant_id="acme", allowed_tables={"orders"}) + assert tracer.started == ["duckdb.query"] + span = tracer.spans[0] + assert span.attributes["sql"] == "SELECT id FROM orders" + assert span.attributes["tenant_id"] == "acme" + assert span.attributes["row_count"] == 2 + + +def test_execute_nl_query_wraps_backend_error_as_value_error(): + class _BoomBackend(_FakeBackend): + def execute(self, sql): + raise nlq_module.BackendExecutionError("boom") + + host = _Host(backend=_BoomBackend()) + with pytest.raises(ValueError, match="Query execution failed: boom"): + host.execute_nl_query("q", allowed_tables={"orders"}) + + +# --------------------------------------------------------------------------- # +# paginated_query: the page/count SQL wrappers + the pagination arithmetic. +# --------------------------------------------------------------------------- # + + +def test_paginated_query_builds_page_and_count_sql(): + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend, translated="SELECT id FROM orders") + host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert backend.executed == [ + "SELECT * FROM (SELECT id FROM orders) AS paginated_query LIMIT 3 OFFSET 0" + ] + assert backend.scalared == [ + "SELECT COUNT(*) FROM (SELECT 1 FROM (SELECT id FROM orders) AS count_query " + "LIMIT 10001) AS bounded_count" + ] + + +def test_paginated_query_default_limit_is_100(): + # No limit -> default 100 -> page SQL asks for limit+1 = 101. + backend = _FakeBackend(rows=[("a",)], scalar_value=1) + host = _Host(backend=backend) + host.paginated_query("q", allowed_tables={"orders"}) + assert backend.executed == [ + "SELECT * FROM (SELECT id FROM orders) AS paginated_query LIMIT 101 OFFSET 0" + ] + + +def test_paginated_query_result_dict_keys_and_values(): + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert result["sql"] == "SELECT id FROM orders" + assert result["freshness_seconds"] is None + assert isinstance(result["execution_time_ms"], int) + assert set(result) == { + "data", + "sql", + "row_count", + "total_count", + "next_cursor", + "has_more", + "page_size", + "execution_time_ms", + "freshness_seconds", + } + + +def test_paginated_query_has_more_and_data_slice(): + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert result["has_more"] is True + assert result["data"] == [("a",), ("b",)] + assert result["row_count"] == 2 + assert result["page_size"] == 2 + + +def test_paginated_query_no_more_when_rows_within_limit(): + backend = _FakeBackend(rows=[("a",), ("b",)], scalar_value=2) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert result["has_more"] is False + assert result["next_cursor"] is None + assert result["data"] == [("a",), ("b",)] + + +def test_paginated_query_next_cursor_advances_offset(): + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert result["next_cursor"] is not None + offset, _hash = host._decode_cursor(result["next_cursor"]) + assert offset == 2 # offset(0) + limit(2) + + +def test_paginated_query_cursor_hash_is_tenant_and_sql_bound(): + # The cursor hash must derive from the scoped SQL and the tenant id; a mutant + # that hashes None for either yields a hash that doesn't match the recomputed + # value, which would also break the same-query cursor check. + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=2, tenant_id="acme", allowed_tables={"orders"}) + _offset, cursor_hash = host._decode_cursor(result["next_cursor"]) + assert cursor_hash == host._build_query_hash("SELECT id FROM orders", "acme") + + +def test_paginated_query_total_count_below_threshold_is_exact(): + backend = _FakeBackend(rows=[("a",)], scalar_value=5) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=10, allowed_tables={"orders"}) + assert result["total_count"] == 5 + + +def test_paginated_query_total_count_is_none_above_threshold(): + backend = _FakeBackend(rows=[("a",)], scalar_value=10_001) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=10, allowed_tables={"orders"}) + assert result["total_count"] is None + + +def test_paginated_query_total_count_exactly_threshold_is_exact(): + backend = _FakeBackend(rows=[("a",)], scalar_value=10_000) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=10, allowed_tables={"orders"}) + assert result["total_count"] == 10_000 + + +def test_paginated_query_total_count_zero_when_scalar_none(): + # A None scalar coalesces to 0 (kills the `else 0`->`else 1` mutant). + backend = _FakeBackend(rows=[("a",)], scalar_value=None) + host = _Host(backend=backend) + result = host.paginated_query("q", limit=10, allowed_tables={"orders"}) + assert result["total_count"] == 0 + + +def test_paginated_query_second_page_uses_cursor_offset(): + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend) + first = host.paginated_query("q", limit=2, allowed_tables={"orders"}) + backend.executed.clear() + host.paginated_query("q", limit=2, cursor=first["next_cursor"], allowed_tables={"orders"}) + assert backend.executed == [ + "SELECT * FROM (SELECT id FROM orders) AS paginated_query LIMIT 3 OFFSET 2" + ] + + +def test_paginated_query_rejects_cursor_for_different_query(): + host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1)) + foreign_cursor = host._encode_cursor(2, "not-the-right-hash") + with pytest.raises(ValueError) as exc_info: + host.paginated_query("q", limit=2, cursor=foreign_cursor, allowed_tables={"orders"}) + assert str(exc_info.value) == "Cursor does not match the requested query." + + +def test_paginated_query_rejects_limit_below_one(): + host = _Host() + with pytest.raises(ValueError) as exc_info: + host.paginated_query("q", limit=0, allowed_tables={"orders"}) + assert str(exc_info.value) == "limit must be between 1 and 1000" + + +def test_paginated_query_rejects_limit_above_1000(): + host = _Host() + with pytest.raises(ValueError) as exc_info: + host.paginated_query("q", limit=1001, allowed_tables={"orders"}) + assert str(exc_info.value) == "limit must be between 1 and 1000" + + +def test_paginated_query_allows_boundary_limits(): + backend = _FakeBackend(rows=[("a",)], scalar_value=1) + host = _Host(backend=backend) + host.paginated_query("q", limit=1, allowed_tables={"orders"}) + host.paginated_query("q", limit=1000, allowed_tables={"orders"}) # no raise + + +def test_paginated_query_forwards_question_tenant_and_scope(): + backend = _FakeBackend(rows=[("a",)], scalar_value=1) + host = _Host(backend=backend) + host.paginated_query("the question", limit=2, tenant_id="acme", allowed_tables={"orders"}) + assert host.translate_calls == [("the question", "acme")] + assert host.scope_calls == [("SELECT id FROM orders", "acme")] + + +def test_paginated_query_uses_passed_allowed_tables_not_default(): + backend = _FakeBackend(rows=[("a",)], scalar_value=1) + host = _Host(catalog=_Catalog(tables=()), backend=backend, translated="SELECT id FROM orders") + result = host.paginated_query("q", limit=5, allowed_tables={"orders"}) + assert result["row_count"] == 1 + + +def test_paginated_query_uses_default_allowed_tables_when_none(): + backend = _FakeBackend(rows=[("a",)], scalar_value=1) + host = _Host(catalog=_Catalog(tables=("orders",)), backend=backend) + result = host.paginated_query("q", limit=5, allowed_tables=None) + assert result["row_count"] == 1 + + +def test_paginated_query_elapsed_is_monotonic_delta_in_ms(monkeypatch): + _fixed_clock(monkeypatch, 1.0, 1.5) + host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1)) + result = host.paginated_query("q", limit=5, allowed_tables={"orders"}) + assert result["execution_time_ms"] == 500 # int((1.5 - 1.0) * 1000) + + +def test_paginated_query_records_span_attributes(monkeypatch): + monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: False) + tracer = _RecordingTracer() + monkeypatch.setattr(nlq_module, "tracer", tracer) + backend = _FakeBackend(rows=[("a",), ("b",), ("c",)], scalar_value=3) + host = _Host(backend=backend, backend_name="duckdb") + host.paginated_query("q", limit=2, tenant_id="acme", allowed_tables={"orders"}) + assert tracer.started == ["duckdb.query"] + span = tracer.spans[0] + assert span.attributes["sql"] == "SELECT id FROM orders" + assert span.attributes["tenant_id"] == "acme" + assert span.attributes["row_count"] == 2 # len(page_rows[:limit]) + + +# The span SQL attribute is bounded: <=200 chars verbatim, else sql[:197]+"...". +# Two boundary lengths pin the <=200 comparison and the 197 truncation offset. +_SQL_200 = "SELECT id FROM orders".ljust(200) +_SQL_201 = "SELECT id FROM orders".ljust(201) + + +def _recording_tracer(monkeypatch): + monkeypatch.setattr(nlq_module, "telemetry_disabled", lambda: False) + tracer = _RecordingTracer() + monkeypatch.setattr(nlq_module, "tracer", tracer) + return tracer + + +def test_paginated_query_span_keeps_sql_at_200_boundary(monkeypatch): + tracer = _recording_tracer(monkeypatch) + host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1), translated=_SQL_200) + host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert tracer.spans[0].attributes["sql"] == _SQL_200 # len == 200 kept whole + + +def test_paginated_query_span_truncates_sql_past_200(monkeypatch): + tracer = _recording_tracer(monkeypatch) + host = _Host(backend=_FakeBackend(rows=[("a",)], scalar_value=1), translated=_SQL_201) + host.paginated_query("q", limit=2, allowed_tables={"orders"}) + assert tracer.spans[0].attributes["sql"] == _SQL_201[:197] + "..." + + +def test_execute_span_keeps_sql_at_200_boundary(monkeypatch): + tracer = _recording_tracer(monkeypatch) + host = _Host(backend=_FakeBackend(rows=[("a",)]), translated=_SQL_200) + host.execute_nl_query("q", allowed_tables={"orders"}) + assert tracer.spans[0].attributes["sql"] == _SQL_200 + + +def test_execute_span_truncates_sql_past_200(monkeypatch): + tracer = _recording_tracer(monkeypatch) + host = _Host(backend=_FakeBackend(rows=[("a",)]), translated=_SQL_201) + host.execute_nl_query("q", allowed_tables={"orders"}) + assert tracer.spans[0].attributes["sql"] == _SQL_201[:197] + "..." + + +def test_paginated_query_wraps_backend_error_as_value_error(): + class _BoomBackend(_FakeBackend): + def execute(self, sql): + raise nlq_module.BackendExecutionError("kaboom") + + host = _Host(backend=_BoomBackend()) + with pytest.raises(ValueError, match="Query execution failed: kaboom"): + host.paginated_query("q", limit=2, allowed_tables={"orders"}) + + +# --------------------------------------------------------------------------- # +# explain: translate without executing; table extraction + scan warning. +# --------------------------------------------------------------------------- # + + +def test_explain_reports_tables_estimate_and_scan_warning(): + backend = _FakeBackend(explain_rows=[(0, "SEQ_SCAN orders"), (1, "estimated ~ 1,234 rows")]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.explain("show orders", allowed_tables={"orders"}) + assert result["question"] == "show orders" + assert result["sql"] == "SELECT id FROM orders" + assert result["tables_accessed"] == ["orders"] + assert result["estimated_rows"] == 1234 + assert result["engine"] == "rule_based" + assert result["warning"] == "Full table scan on orders (no index)" + assert set(result) == { + "question", + "sql", + "tables_accessed", + "estimated_rows", + "engine", + "warning", + } + + +def test_explain_warns_on_sequential_scan_text_only(): + # The warning fires on "Sequential Scan" too, not just "SEQ_SCAN". A plan with + # the spaced form (and no SEQ_SCAN) must still warn -- pins the exact + # "Sequential Scan" literal and its casing. + backend = _FakeBackend(explain_rows=[(0, "Sequential Scan on orders")]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.explain("q", allowed_tables={"orders"}) + assert result["warning"] == "Full table scan on orders (no index)" + + +def test_explain_no_warning_without_sequential_scan(): + backend = _FakeBackend(explain_rows=[(0, "HASH_JOIN"), (1, "estimated ~ 10 rows")]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.explain("q", allowed_tables={"orders"}) + assert result["warning"] is None + assert result["estimated_rows"] == 10 + + +def test_explain_single_column_row_uses_first_element(): + # A one-element explain row has no row[1]; the plan text must come from + # str(row[0]). Kills `len(row) > 1`->`>= 1` (IndexError) and `str(row[0])`-> + # `str(None)`/`str(row[1])` (which would drop the SEQ_SCAN text). + backend = _FakeBackend(explain_rows=[("SEQ_SCAN orders ~ 42 rows",)]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.explain("q", allowed_tables={"orders"}) + assert result["estimated_rows"] == 42 + assert result["warning"] == "Full table scan on orders (no index)" + + +def test_explain_normalizes_box_drawing_around_estimate(): + # EXPLAIN output draws box characters; they are normalized to spaces so the + # row-estimate regex still matches. A box char glued to the estimate must be + # blanked. Kills the box-drawing re.sub mutants. + backend = _FakeBackend(explain_rows=[(0, "estimated ~ 1234│rows")]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.explain("q", allowed_tables={"orders"}) + assert result["estimated_rows"] == 1234 + + +def test_explain_passes_scoped_sql_to_backend_explain(): + backend = _FakeBackend(explain_rows=[(0, "x")]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + host.explain("q", allowed_tables={"orders"}) + assert backend.explained == ["SELECT id FROM orders"] + + +def test_explain_forwards_question_tenant_and_scope(): + backend = _FakeBackend(explain_rows=[(0, "x")]) + host = _Host(backend=backend) + host.explain("the question", tenant_id="acme", allowed_tables={"orders"}) + assert host.translate_calls == [("the question", "acme")] + assert host.scope_calls == [("SELECT id FROM orders", "acme")] + + +def test_explain_uses_passed_allowed_tables_not_default(): + backend = _FakeBackend(explain_rows=[(0, "x")]) + host = _Host(catalog=_Catalog(tables=()), backend=backend, translated="SELECT id FROM orders") + result = host.explain("q", allowed_tables={"orders"}) + assert result["tables_accessed"] == ["orders"] + + +def test_explain_uses_default_allowed_tables_when_none(): + backend = _FakeBackend(explain_rows=[(0, "x")]) + host = _Host(catalog=_Catalog(tables=("orders",)), backend=backend) + result = host.explain("q", allowed_tables=None) + assert result["tables_accessed"] == ["orders"] + + +def test_explain_reports_llm_engine_when_key_and_anthropic_present(monkeypatch): + # When a key is configured and `import anthropic` succeeds, the engine label + # is "llm". Pins the getattr(nl_engine, "_ANTHROPIC_KEY", "") read: a mutant + # that reads the wrong attribute/object collapses to "rule_based". + nl_engine_mod = sys.modules["src.serving.semantic_layer.nl_engine"] + monkeypatch.setattr(nl_engine_mod, "_ANTHROPIC_KEY", "sk-test", raising=False) + monkeypatch.setitem(sys.modules, "anthropic", types.ModuleType("anthropic")) + backend = _FakeBackend(explain_rows=[(0, "x")]) + host = _Host(backend=backend, translated="SELECT id FROM orders") + result = host.explain("q", allowed_tables={"orders"}) + assert result["engine"] == "llm" + + +def test_explain_wraps_backend_error_as_value_error(): + class _BoomBackend(_FakeBackend): + def explain(self, sql): + raise nlq_module.BackendExecutionError("nope") + + host = _Host(backend=_BoomBackend()) + with pytest.raises(ValueError, match="Query explanation failed: nope"): + host.explain("q", allowed_tables={"orders"}) + + +# --------------------------------------------------------------------------- # +# Known equivalent survivors (documented, not gaps). 94.4% reproduced on the +# WSL/mutmut harness; the survivors below cannot change observable behaviour: +# * tables-extraction in explain() parses with sqlglot and falls back to a +# regex on any exception. For the bare-table SQL that validate_nl_sql permits +# (schema/catalog qualifiers are rejected upstream) both paths return the same +# list, so the parse-dialect / parse-arg mutants and the fromkeys/find_all +# mutants in that block are equivalent. +# * the explain() box-drawing re.sub case mutant (╿<->╿ is the same +# code point range) and the plan-join separator mutant (the joined plan text +# is not returned, only scanned for the same substrings either way). +# * the explain() getattr default-value mutants: nl_engine always defines +# _ANTHROPIC_KEY, so the third (default) argument is unreachable. +# * elapsed_ms `* 1000`->`* 1001`: indistinguishable at unit-test timescales. +# * the span SQL slice `[:200]`->`[:201]`: in the `len(sql) <= 200` branch the +# slice never reaches 200 chars, so the extra index is a no-op. +# The telemetry span guards and the <=200 / [:197] truncation boundary ARE pinned +# by the records_span / span-boundary tests above (telemetry re-enabled). +# --------------------------------------------------------------------------- # diff --git a/tests/unit/test_sql_builder_mutation.py b/tests/unit/test_sql_builder_mutation.py new file mode 100644 index 0000000..499f98d --- /dev/null +++ b/tests/unit/test_sql_builder_mutation.py @@ -0,0 +1,583 @@ +"""Narrow, duckdb-free mutation test for the tenant SQL builder +(src/serving/semantic_layer/query/sql_builder.py). + +This is the test the mutation gate runs against +``serving/semantic_layer/query/sql_builder.py`` (see scripts/mutation_report.py +MODULE_TARGETS). Every entity/metric SQL string the engine executes flows through +``_scope_sql`` / ``_qualify_table`` here, so a surviving mutant in the +tenant-schema qualification is a cross-tenant read (audit_28_06_26.md #5), exactly +the kind of code a mutation gate should pin. + +Three design rules, shared with test_rate_limiter_mutation.py / +test_masking_mutation.py / test_sql_guard_mutation.py (see fable_handoff.md +cont.16-19): + +1. **duckdb-free.** The ordinary query-engine tests build a QueryEngine, which + imports duckdb's compiled subpackage and crashes mutmut's ``mutants/`` + workspace. sql_builder itself imports only sqlglot + a tenant-id helper, so + this file touches the mixin methods through a hand-built host and never drags + duckdb in. + +2. **No fixtures -- inline construction + direct method calls.** With + ``mutate_only_covered_lines = true`` the gate collects coverage first; a + fixture-built host left every method line uncovered, so only ``__init__`` got + mutated (score 0%). Building the host inline and calling ``_scope_sql`` / + ``_qualify_table`` / ``_quote_literal`` directly attributes every method line. + +3. **Import shims.** The mutation harness copies ``src/serving`` to a top-level + ``serving`` package *without* ``src`` (copying ``src`` would shadow it). Three + things on sql_builder's import path would otherwise fail or drag duckdb in: + ``from src.serving.api.auth import get_current_tenant_id`` (no ``src``), the + ``serving.semantic_layer.query`` package ``__init__`` (``from .engine import + QueryEngine`` -> duckdb) and ``.contracts`` (imports the duckdb backend for + type hints). We register stand-ins for all three before importing the module. + They are only used as a default-arg helper and as runtime-unused type hints + (``from __future__ import annotations`` keeps the annotations as strings), so + the stand-ins change no executed logic. Under ordinary pytest the real ``src`` + package is importable, so no shim is installed and the real modules load. + +Reproduced at 96.0% (killed 167, survived 7) via the WSL/mutmut harness (py3.10); +the CI gate (mutation.yml on py3.11) is the source of truth. The 7 survivors are +genuine equivalent mutants, not gaps: four mutate the *string* inside +``cast("dict[...]", value)`` -- the runtime ``typing.cast`` ignores its first +argument, so any text change there is a no-op -- and three flip +``parse_one(..., dialect="duckdb")`` / ``parsed.sql(dialect=...)`` to +``dialect=None``, which renders the plain SELECTs this builder handles identically. +""" + +from __future__ import annotations + +import sys +import types +from datetime import datetime + + +def _src_package_available() -> bool: + # In the mutation harness the workspace has no top-level `src` package + # (only `serving`); under ordinary pytest `src` is the real package. + try: + import src # noqa: F401 + + return True + except ModuleNotFoundError: + return False + + +def _ensure_module(name: str) -> types.ModuleType: + module = sys.modules.get(name) + if module is None: + module = types.ModuleType(name) + sys.modules[name] = module + return module + + +def _install_harness_stubs() -> None: + # src.serving.api.auth.get_current_tenant_id: a contextvar reader in + # production; here a default-arg passthrough (the host controls the tenant). + _ensure_module("src") + serving_pkg = _ensure_module("src.serving") + api_pkg = _ensure_module("src.serving.api") + auth_pkg = _ensure_module("src.serving.api.auth") + + def get_current_tenant_id(default: str | None = None) -> str | None: + return default + + auth_pkg.get_current_tenant_id = get_current_tenant_id + api_pkg.auth = auth_pkg + serving_pkg.api = api_pkg + + # Neuter the query package __init__ (`from .engine import QueryEngine`) and + # the contracts module; both pull duckdb via the QueryEngine import chain and + # neither contributes runtime behaviour to sql_builder. + engine_stub = _ensure_module("serving.semantic_layer.query.engine") + engine_stub.QueryEngine = object + contracts_stub = _ensure_module("serving.semantic_layer.query.contracts") + contracts_stub.SQLBuilderHost = object + contracts_stub.QueryExecutionHost = object + contracts_stub.NLQueryHost = object + + +if not _src_package_available(): + _install_harness_stubs() + +try: # mutation-harness workspace exposes it as a top-level package + from serving.semantic_layer.query import sql_builder as sql_builder_module +except ImportError: # ordinary pytest sees it under the src package + from src.serving.semantic_layer.query import sql_builder as sql_builder_module + +import pytest + +SQLBuilderMixin = sql_builder_module.SQLBuilderMixin + + +# --------------------------------------------------------------------------- # +# In-process host doubles (no duckdb, no real tenant router). +# --------------------------------------------------------------------------- # + + +class _Entity: + def __init__(self, table: str) -> None: + self.table = table + + +class _Catalog: + def __init__(self, *tables: str) -> None: + self.entities = {table: _Entity(table) for table in tables} + + +class _Tenant: + def __init__(self, duckdb_schema: str) -> None: + self.duckdb_schema = duckdb_schema + + +class _TenantsConfig: + def __init__(self, tenants: tuple[_Tenant, ...]) -> None: + self.tenants = tenants + + +class _TenantRouter: + def __init__( + self, + *, + has_config: bool = False, + schema_by_tenant: dict[str | None, str] | None = None, + tenants: tuple[_Tenant, ...] = (), + ) -> None: + self._has_config = has_config + self._schema_by_tenant = dict(schema_by_tenant or {}) + self._tenants = tenants + + def has_config(self) -> bool: + return self._has_config + + def get_duckdb_schema(self, tenant_id: str | None) -> str | None: + return self._schema_by_tenant.get(tenant_id) + + def load(self) -> _TenantsConfig: + return _TenantsConfig(self._tenants) + + +class _Host(SQLBuilderMixin): + def __init__( + self, + *, + catalog: _Catalog, + tenant_router: _TenantRouter, + table_columns: dict[str, set[str]] | None = None, + cache: dict | None = None, + ) -> None: + self.catalog = catalog + self._tenant_router = tenant_router + self._table_columns_map = dict(table_columns or {}) + if cache is not None: + self._qualified_table_cache = cache + + def _table_columns(self, table_name: str) -> set[str]: + return self._table_columns_map.get(table_name, set()) + + +def _host(**kwargs) -> _Host: + catalog = kwargs.pop("catalog", _Catalog("orders", "customers")) + tenant_router = kwargs.pop("tenant_router", _TenantRouter()) + return _Host(catalog=catalog, tenant_router=tenant_router, **kwargs) + + +# --------------------------------------------------------------------------- # +# _resolve_tenant_id: explicit id wins; else the context reader with a +# config-dependent default. +# --------------------------------------------------------------------------- # + + +def test_resolve_tenant_id_returns_explicit_id(): + # tenant_id is not None -> returned verbatim, the context reader is not + # consulted. Kills `is not None` -> `is None`. + host = _host() + assert host._resolve_tenant_id("acme") == "acme" + + +def test_resolve_tenant_id_defaults_to_demo_without_config(): + # No tenant config -> default_tenant is "demo", handed to the context reader. + host = _host(tenant_router=_TenantRouter(has_config=False)) + assert host._resolve_tenant_id(None) == "demo" + + +def test_resolve_tenant_id_no_default_when_config_present(): + # With a tenant config the default is None (a multi-tenant deployment must + # not silently fall back to "demo"). Kills `not self._tenant_router...` flip + # and the "demo" literal leaking into the configured path. + host = _host(tenant_router=_TenantRouter(has_config=True)) + assert host._resolve_tenant_id(None) is None + + +def test_resolve_tenant_id_uses_context_value(monkeypatch): + # When the context reader returns a tenant, _resolve_tenant_id forwards it. + monkeypatch.setattr( + sql_builder_module, "get_current_tenant_id", lambda default=None: "ctx-tenant" + ) + host = _host(tenant_router=_TenantRouter(has_config=True)) + assert host._resolve_tenant_id(None) == "ctx-tenant" + + +def test_resolve_tenant_id_passes_default_through_to_reader(monkeypatch): + # The default arg must reach the reader (a `default=...`->`default=None` + # mutant would drop "demo"). Echo the default back to prove it was passed. + monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: default) + host = _host(tenant_router=_TenantRouter(has_config=False)) + assert host._resolve_tenant_id(None) == "demo" + + +# --------------------------------------------------------------------------- # +# _get_tenant_schema: schema lookup + identifier validation. +# --------------------------------------------------------------------------- # + + +def test_get_tenant_schema_returns_valid_schema(): + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}) + ) + assert host._get_tenant_schema("acme") == "acme_schema" + + +def test_get_tenant_schema_returns_none_when_unmapped(): + # Unknown tenant -> get_duckdb_schema returns None -> early None. Kills the + # `if schema is None` flip (which would fall through to the regex on None). + host = _host(tenant_router=_TenantRouter(has_config=True, schema_by_tenant={})) + assert host._get_tenant_schema("acme") is None + + +def test_get_tenant_schema_rejects_invalid_identifier(): + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "bad-schema!"}) + ) + with pytest.raises(ValueError, match="Invalid DuckDB schema 'bad-schema!' for tenant 'acme'"): + host._get_tenant_schema("acme") + + +def test_get_tenant_schema_accepts_leading_underscore(): + # The regex allows a leading underscore; pin it so `[A-Za-z_]`->`[A-Za-z]` + # (which would reject `_staging`) dies. + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "_staging"}) + ) + assert host._get_tenant_schema("acme") == "_staging" + + +# --------------------------------------------------------------------------- # +# _quote_identifier: double-quote wrapping with embedded-quote doubling. +# --------------------------------------------------------------------------- # + + +def test_quote_identifier_wraps_in_double_quotes(): + assert _host()._quote_identifier("orders") == '"orders"' + + +def test_quote_identifier_doubles_embedded_quotes(): + # An embedded `"` must be doubled so the identifier can't be broken out of. + assert _host()._quote_identifier('a"b') == '"a""b"' + + +# --------------------------------------------------------------------------- # +# _quote_literal: per-type rendering (the order matters: bool before int). +# --------------------------------------------------------------------------- # + + +def test_quote_literal_none_is_sql_null(): + assert _host()._quote_literal(None) == "NULL" + + +def test_quote_literal_true_is_sql_true_not_one(): + # bool is checked before int (bool is an int subclass); a dropped bool branch + # would render True as "1". Pin both bool values. + assert _host()._quote_literal(True) == "TRUE" + + +def test_quote_literal_false_is_sql_false(): + assert _host()._quote_literal(False) == "FALSE" + + +def test_quote_literal_int_is_bare(): + assert _host()._quote_literal(42) == "42" + + +def test_quote_literal_float_is_bare(): + assert _host()._quote_literal(3.5) == "3.5" + + +def test_quote_literal_datetime_uses_iso_seconds(): + assert _host()._quote_literal(datetime(2026, 6, 30, 14, 5, 9)) == "'2026-06-30 14:05:09'" + + +def test_quote_literal_string_is_quoted_and_escaped(): + # A single quote in a string literal must be doubled (anti-injection). + assert _host()._quote_literal("O'Brien") == "'O''Brien'" + + +# --------------------------------------------------------------------------- # +# _qualify_table: cache, cross-tenant guard, schema qualification. +# --------------------------------------------------------------------------- # + + +def test_qualify_table_qualifies_with_tenant_schema(): + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + assert host._qualify_table("orders", "acme") == '"acme_schema"."orders"' + + +def test_qualify_table_returns_bare_name_when_no_schema(): + # resolved tenant maps to no schema -> the table is returned unqualified. + host = _host(tenant_router=_TenantRouter(has_config=False, schema_by_tenant={})) + assert host._qualify_table("orders", "acme") == "orders" + + +def test_qualify_table_uses_cache_when_present(): + # A pre-seeded cache entry is returned without recomputation. Kills the + # `cache is not None and cache_key in cache` guard flips. + cache = {("orders", "acme"): "CACHED"} + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + cache=cache, + ) + assert host._qualify_table("orders", "acme") == "CACHED" + + +def test_qualify_table_writes_qualified_result_to_cache(): + cache: dict = {} + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + cache=cache, + ) + host._qualify_table("orders", "acme") + assert cache[("orders", "acme")] == '"acme_schema"."orders"' + + +def test_qualify_table_writes_bare_name_to_cache(): + cache: dict = {} + host = _host(tenant_router=_TenantRouter(has_config=False, schema_by_tenant={}), cache=cache) + host._qualify_table("orders", "acme") + assert cache[("orders", "acme")] == "orders" + + +def test_qualify_table_rejects_invalid_schema(): + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "bad schema"}), + ) + with pytest.raises(ValueError, match="Invalid DuckDB schema 'bad schema' for tenant 'acme'"): + host._qualify_table("orders", "acme") + + +def test_qualify_table_requires_tenant_context_for_scoped_table(monkeypatch): + # resolved tenant is None but a configured tenant owns columns for the table + # -> ambiguous, so reading it without a tenant must raise (no silent + # cross-tenant read). Pins the message and the table name. + monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None) + router = _TenantRouter( + has_config=True, + schema_by_tenant={None: None}, + tenants=(_Tenant("acme_schema"),), + ) + host = _host(tenant_router=router, table_columns={'"acme_schema"."orders"': {"id"}}) + with pytest.raises( + ValueError, match="Tenant context is required for tenant-scoped table 'orders'" + ): + host._qualify_table("orders", None) + + +def test_qualify_table_no_context_passes_when_no_tenant_owns_table(monkeypatch): + # Same None-context path, but no configured tenant has columns for the table + # -> no raise, falls through to the (here unmapped) schema -> bare name. Kills + # a mutant that always raises in the loop. + monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None) + router = _TenantRouter( + has_config=True, + schema_by_tenant={None: None}, + tenants=(_Tenant("acme_schema"),), + ) + host = _host(tenant_router=router, table_columns={}) + assert host._qualify_table("orders", None) == "orders" + + +# --------------------------------------------------------------------------- # +# _scope_sql: the core. Two paths -- no tenant schema (validate-only) and +# tenant schema (rewrite every known table into the schema). +# --------------------------------------------------------------------------- # + + +def test_scope_sql_rewrites_known_table_into_tenant_schema(): + host = _host( + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + scoped = host._scope_sql("SELECT * FROM orders", "acme") + assert scoped == 'SELECT * FROM "acme_schema"."orders"' + + +def test_scope_sql_rewrites_pipeline_events_table(): + # pipeline_events is added to known_tables outside the catalog; pin that the + # `.add("pipeline_events")` line is real. + host = _host( + catalog=_Catalog("orders"), + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + scoped = host._scope_sql("SELECT * FROM pipeline_events", "acme") + assert scoped == 'SELECT * FROM "acme_schema"."pipeline_events"' + + +def test_scope_sql_leaves_unknown_table_untouched(): + # A table not in the catalog is not rewritten even under a tenant schema. + host = _host( + catalog=_Catalog("orders"), + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + scoped = host._scope_sql("SELECT * FROM widgets", "acme") + assert "acme_schema" not in scoped + assert "widgets" in scoped + + +def test_scope_sql_does_not_rewrite_cte_name(): + # A CTE named like a catalog table must not be schema-qualified (it's a local + # alias, not the physical table). Kills dropping the `in cte_names` skip. + host = _host( + catalog=_Catalog("orders"), + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + scoped = host._scope_sql("WITH orders AS (SELECT 1 AS id) SELECT id FROM orders", "acme") + assert "acme_schema" not in scoped + + +def test_scope_sql_no_schema_returns_sql_unchanged(): + # No tenant schema -> the SQL text is returned verbatim (validation-only path). + host = _host(tenant_router=_TenantRouter(has_config=False, schema_by_tenant={})) + sql = "SELECT * FROM orders" + assert host._scope_sql(sql, "acme") == sql + + +def test_scope_sql_no_schema_enforces_tenant_context(monkeypatch): + # In the no-schema path each known unqualified table is still routed through + # _qualify_table, so an ambiguous tenant-scoped table raises rather than + # leaking. Pins that the no-schema branch actually calls _qualify_table. + monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None) + router = _TenantRouter( + has_config=True, + schema_by_tenant={None: None}, + tenants=(_Tenant("acme_schema"),), + ) + host = _host( + catalog=_Catalog("orders"), + tenant_router=router, + table_columns={'"acme_schema"."orders"': {"id"}}, + ) + with pytest.raises( + ValueError, match="Tenant context is required for tenant-scoped table 'orders'" + ): + host._scope_sql("SELECT * FROM orders", None) + + +def test_scope_sql_no_schema_skips_already_qualified_table(monkeypatch): + # An already schema-qualified table in the no-schema path is skipped (db set), + # so it does not trigger the tenant-context guard. Kills dropping the + # `table.db` part of the skip condition. + calls: list[tuple[str, str | None]] = [] + monkeypatch.setattr(sql_builder_module, "get_current_tenant_id", lambda default=None: None) + router = _TenantRouter(has_config=True, schema_by_tenant={None: None}) + host = _host(catalog=_Catalog("orders"), tenant_router=router) + original = host._qualify_table + + def _spy(table_name: str, tenant_id: str | None) -> str: + calls.append((table_name, tenant_id)) + return original(table_name, tenant_id) + + host._qualify_table = _spy # type: ignore[method-assign] + host._scope_sql('SELECT * FROM other_schema."orders"', None) + assert calls == [] # qualified table was skipped, _qualify_table not called + + +# --------------------------------------------------------------------------- # +# Targeted mutant-killers: identifier-regex casing, the explicit-tenant +# short-circuit, the skip-condition boolean structure, continue-vs-break, and +# the catalog-clearing in the re-scope branch. +# --------------------------------------------------------------------------- # + + +def test_get_tenant_schema_accepts_uppercase_identifier(): + # The identifier regex must accept uppercase letters: a `[A-Za-z_]`->`[a-za-z_]` + # mutant would reject a valid mixed-case schema and raise. Pin acceptance. + host = _host(tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "Acme_DW"})) + assert host._get_tenant_schema("acme") == "Acme_DW" + + +def test_qualify_table_accepts_uppercase_schema(): + # Same regex-casing pin on the _qualify_table copy of the check. + host = _host(tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "Acme_DW"})) + assert host._qualify_table("orders", "acme") == '"Acme_DW"."orders"' + + +def test_qualify_table_with_explicit_tenant_skips_ambiguity_scan(): + # resolved tenant is not None -> the `resolved is None AND has_config` guard + # is False, so the cross-tenant ambiguity scan is skipped. An `and`->`or` + # mutant would enter the scan and wrongly raise on a foreign-tenant column + # match. Pin that an explicit tenant qualifies without raising. + router = _TenantRouter( + has_config=True, + schema_by_tenant={"acme": "acme_schema"}, + tenants=(_Tenant("other_schema"),), + ) + host = _host(tenant_router=router, table_columns={'"other_schema"."orders"': {"id"}}) + assert host._qualify_table("orders", "acme") == '"acme_schema"."orders"' + + +def test_scope_sql_no_schema_qualifies_only_known_non_cte_tables(): + # In the no-schema path _qualify_table is called for each known, unqualified, + # non-CTE table with the forwarded tenant id -- and NOT for unknown tables. + # Pins: the `not-in-known OR in-cte` boolean structure (an AND-flip would + # qualify unknown tables), continue-not-break (a break would stop before the + # known table), and the forwarded tenant id (a `->None` would drop it). + calls: list[tuple[str, str | None]] = [] + router = _TenantRouter(has_config=False, schema_by_tenant={"acme": None}) + host = _host(catalog=_Catalog("orders"), tenant_router=router) + original = host._qualify_table + host._qualify_table = ( # type: ignore[method-assign] + lambda name, tid: calls.append((name, tid)) or original(name, tid) + ) + host._scope_sql("SELECT * FROM widgets JOIN orders ON widgets.id = orders.id", "acme") + assert calls == [("orders", "acme")] + + +def test_scope_sql_no_schema_skips_cte_named_like_table(): + # A CTE named like a catalog table must not be qualified. The cte-name check + # lowercases the name; an `in cte_names`->`upper() in cte_names` mutant would + # miss the lowercase CTE and qualify it. Pin that no qualify call happens. + calls: list[tuple[str, str | None]] = [] + router = _TenantRouter(has_config=False, schema_by_tenant={"acme": None}) + host = _host(catalog=_Catalog("orders"), tenant_router=router) + original = host._qualify_table + host._qualify_table = ( # type: ignore[method-assign] + lambda name, tid: calls.append((name, tid)) or original(name, tid) + ) + host._scope_sql("WITH orders AS (SELECT 1 AS id) SELECT id FROM orders", "acme") + assert calls == [] + + +def test_scope_sql_schema_skips_unknown_then_qualifies_known(): + # schema branch: an unknown table is skipped with continue (not break) so a + # later known table is still rewritten. A continue->break mutant stops early + # and leaves 'orders' unqualified. + host = _host( + catalog=_Catalog("orders"), + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + out = host._scope_sql("SELECT * FROM widgets JOIN orders ON widgets.id = orders.id", "acme") + assert '"acme_schema"."orders"' in out + assert '"acme_schema"."widgets"' not in out # unknown table stays bare + + +def test_scope_sql_schema_clears_existing_qualification(): + # A table arriving already catalog/schema-qualified is fully re-scoped into + # the caller's tenant schema -- the foreign catalog is cleared + # (audit_28_06_26.md #5). Pins table.set("catalog", None): a mutant that + # mis-keys or skips the catalog clear leaves the foreign catalog in the SQL. + host = _host( + catalog=_Catalog("orders"), + tenant_router=_TenantRouter(has_config=True, schema_by_tenant={"acme": "acme_schema"}), + ) + out = host._scope_sql("SELECT * FROM oldcat.oldschema.orders", "acme") + assert out == 'SELECT * FROM "acme_schema"."orders"' From 2aebace031dabeb7e632c379fc4e8e8c534fe670 Mon Sep 17 00:00:00 2001 From: JuliaEdom Date: Tue, 30 Jun 2026 01:41:46 +0300 Subject: [PATCH 2/2] fix(mutation): make sql_builder/nl_queries tests duckdb-free in the harness (B2) The B2 duckdb-free mutation tests gated stub installation on `import src` succeeding ("workspace has no top-level src"). That assumption is false: the editable-installed repo keeps the real `src` on sys.path even inside mutmut's mutants/ workspace, so the stubs were skipped there and the import fell through to the real `src.serving.semantic_layer.query` package -> `from .engine import QueryEngine` -> `import duckdb`. Under mutmut's coverage-instrumented stats pass on py3.11 that trips duckdb's lazy `_duckdb._sqltypes` import (the same break ci.yml already works around with `coverage run`), so `mutmut run` exited -11 with no scored mutants and the gate failed. Discriminate on the real signal instead: the workspace copies src/serving to a TOP-LEVEL `serving` package, which ordinary pytest never has. Gate stub install on `importlib.util.find_spec("serving")`. The stub path (engine/contracts/auth neutered to no-ops) was effectively never exercised before -- WSL py3.10 also had `src` importable, so its 96.0%/94.4% came from the real modules. Verified locally in a simulated workspace (top-level `serving` + editable `src`, as in CI): both modules import with duckdb NOT in sys.modules, subject under test is `serving.semantic_layer.query.*` (the mutation target), 101 tests pass via the stub path; the ordinary-pytest path still passes 101. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/unit/test_nl_queries_mutation.py | 20 ++++++++++++++------ tests/unit/test_sql_builder_mutation.py | 22 ++++++++++++++-------- 2 files changed, 28 insertions(+), 14 deletions(-) diff --git a/tests/unit/test_nl_queries_mutation.py b/tests/unit/test_nl_queries_mutation.py index f05e4e1..7273868 100644 --- a/tests/unit/test_nl_queries_mutation.py +++ b/tests/unit/test_nl_queries_mutation.py @@ -49,12 +49,20 @@ import types -def _src_package_available() -> bool: - try: - import src # noqa: F401 +def _in_mutation_workspace() -> bool: + # mutmut's mutants/ workspace copies src/serving to a TOP-LEVEL `serving` + # package (scripts/mutation_report.py prepare_workspace); ordinary pytest has + # no top-level `serving` (only src.serving), so its presence cleanly marks the + # harness. The old `import src` probe did not: the editable-installed repo keeps + # the real `src` importable even inside the workspace, so the stubs were skipped + # there and the real duckdb-backed engine import crashed mutmut's + # coverage-instrumented stats pass on py3.11 (duckdb's lazy `_duckdb._sqltypes` + # import breaks under coverage tracing -- see .github/workflows/ci.yml). + import importlib.util - return True - except ModuleNotFoundError: + try: + return importlib.util.find_spec("serving") is not None + except (ImportError, ValueError): return False @@ -113,7 +121,7 @@ class BackendExecutionError(RuntimeError): contracts_stub.NLQueryHost = object -if not _src_package_available(): +if _in_mutation_workspace(): _install_harness_stubs() try: # mutation-harness workspace exposes it as a top-level package diff --git a/tests/unit/test_sql_builder_mutation.py b/tests/unit/test_sql_builder_mutation.py index 499f98d..9a8ab83 100644 --- a/tests/unit/test_sql_builder_mutation.py +++ b/tests/unit/test_sql_builder_mutation.py @@ -52,14 +52,20 @@ from datetime import datetime -def _src_package_available() -> bool: - # In the mutation harness the workspace has no top-level `src` package - # (only `serving`); under ordinary pytest `src` is the real package. - try: - import src # noqa: F401 +def _in_mutation_workspace() -> bool: + # mutmut's mutants/ workspace copies src/serving to a TOP-LEVEL `serving` + # package (scripts/mutation_report.py prepare_workspace); ordinary pytest has + # no top-level `serving` (only src.serving), so its presence cleanly marks the + # harness. The old `import src` probe did not: the editable-installed repo keeps + # the real `src` importable even inside the workspace, so the stubs were skipped + # there and the real duckdb-backed engine import crashed mutmut's + # coverage-instrumented stats pass on py3.11 (duckdb's lazy `_duckdb._sqltypes` + # import breaks under coverage tracing -- see .github/workflows/ci.yml). + import importlib.util - return True - except ModuleNotFoundError: + try: + return importlib.util.find_spec("serving") is not None + except (ImportError, ValueError): return False @@ -97,7 +103,7 @@ def get_current_tenant_id(default: str | None = None) -> str | None: contracts_stub.NLQueryHost = object -if not _src_package_available(): +if _in_mutation_workspace(): _install_harness_stubs() try: # mutation-harness workspace exposes it as a top-level package