From 98670f9ead578d0a1b83d17fdeae7c3876051c1c Mon Sep 17 00:00:00 2001
From: Kent <iamcxa@gmail.com>
Date: Wed, 3 Jun 2026 17:50:28 +0800
Subject: [PATCH 1/4] feat(mcp): surface run_id in run-backed tool results for
 citation (DRC-3532)

The cloud-backend MCP tools (row_count_diff, profile_diff, value_diff, query,
query_diff, top_k_diff, histogram_diff) routed through _tool_run_backed returned
only run["result"], dropping run_id. The recce-cloud summary agent therefore
never saw the run_id and could not emit deterministic {{run:<run_id>}} citation
markers, forcing fragile server-side fuzzy prose matching.

Merge run_id into the result dict (additive; existing result fields preserved).
Only added when the response carries a run_id and the result is a dict, so
run-less or non-dict responses are untouched (never synthesize a run_id).

Cross-repo (DRC-3532): the recce-cloud summary agent prompt is updated to emit
the markers; server-side marker replacement already exists in recce_task_func.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Kent <iamcxa@gmail.com>
---
 recce/mcp_server.py             | 12 ++++++++-
 tests/test_mcp_cloud_backend.py | 44 ++++++++++++++++++++++++++++++++-
 2 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/recce/mcp_server.py b/recce/mcp_server.py
index 6533379db..fefe7f2b4 100644
--- a/recce/mcp_server.py
+++ b/recce/mcp_server.py
@@ -178,7 +178,17 @@ async def _tool_run_backed(self, run_type: str, params: Dict[str, Any]) -> Dict[
             "runs",
             json={"type": run_type, "params": params, "nowait": False},
         )
-        return run.get("result", run)
+        result = run.get("result", run)
+        # DRC-3532: surface run_id alongside the result so the summary agent can
+        # cite the exact run inline via {{run:<run_id>}} markers. Additive
+        # (option i): existing result fields are preserved. Only added when the
+        # response actually carries a run_id and the result is a dict, so
+        # run-less responses and non-dict results are left untouched (the agent
+        # must never synthesize a run_id it was not given).
+        run_id = run.get("run_id")
+        if run_id is not None and isinstance(result, dict):
+            result = {**result, "run_id": str(run_id)}
+        return result
 
     async def _tool_list_checks(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         checks = await self._request("GET", "checks")
diff --git a/tests/test_mcp_cloud_backend.py b/tests/test_mcp_cloud_backend.py
index 174d9391b..acad5c4fc 100644
--- a/tests/test_mcp_cloud_backend.py
+++ b/tests/test_mcp_cloud_backend.py
@@ -125,6 +125,47 @@ async def test_run_check_auto_approve_failure_does_not_mask_run_result(cloud_req
     assert result["result"] == {"ok": True}
 
 
+@pytest.mark.asyncio
+async def test_run_backed_tool_surfaces_run_id_in_result(cloud_requests):
+    # DRC-3532: run-backed analysis tools (row_count_diff, profile_diff,
+    # value_diff, query, ...) must surface the run_id so the summary agent can
+    # cite the exact run via {{run:<run_id>}} markers. Option (i): run_id is
+    # merged into the result dict (additive — existing result fields preserved).
+    cloud_requests.side_effect = [
+        MockResponse(204),
+        MockResponse(
+            200,
+            {
+                "run_id": "run-xyz",
+                "result": {"results": [{"node_id": "model.pkg.orders", "base": 100, "current": 105}]},
+            },
+        ),
+    ]
+    backend = await CloudBackend.create(session_id="sess-123", api_token="token-abc")
+
+    result = await backend.call_tool("row_count_diff", {"node_names": ["orders"]})
+
+    assert result["run_id"] == "run-xyz"
+    # Existing result fields are preserved (additive, not a wrapper).
+    assert result["results"][0]["base"] == 100
+
+
+@pytest.mark.asyncio
+async def test_run_backed_tool_without_run_id_is_unchanged(cloud_requests):
+    # A run-backed response missing run_id leaves the result untouched (no new
+    # key invented) — anti-fabrication: never synthesize a run_id.
+    cloud_requests.side_effect = [
+        MockResponse(204),
+        MockResponse(200, {"result": {"results": [{"node_id": "model.pkg.orders"}]}}),
+    ]
+    backend = await CloudBackend.create(session_id="sess-123", api_token="token-abc")
+
+    result = await backend.call_tool("row_count_diff", {"node_names": ["orders"]})
+
+    assert "run_id" not in result
+    assert result["results"][0]["node_id"] == "model.pkg.orders"
+
+
 @pytest.mark.asyncio
 async def test_create_check_runs_lineage_diff_via_checks_run_endpoint(cloud_requests):
     cloud_requests.side_effect = [
@@ -747,7 +788,8 @@ async def test_cloud_backend_routes_run_tool_types_through_run_backed(cloud_requ
 
     result = await backend.call_tool("row_count_diff", {"node_names": ["customers"]})
 
-    assert result == {"customers": {"base": 1, "curr": 2}}
+    # run_id is now surfaced alongside the result (DRC-3532, additive).
+    assert result == {"customers": {"base": 1, "curr": 2}, "run_id": "run-1"}
     runs_call = cloud_requests.call_args_list[1]
     assert runs_call.args[1].endswith("/runs")
     assert runs_call.kwargs["json"]["type"] == "row_count_diff"

From 6cf33ade209466ab7270ddd51afd908f6ae47bfd Mon Sep 17 00:00:00 2001
From: Kent <iamcxa@gmail.com>
Date: Sat, 13 Jun 2026 00:05:19 +0800
Subject: [PATCH 2/4] =?UTF-8?q?feat(mcp):=20DRC-3634=20local-mode=20run-ba?=
 =?UTF-8?q?cked=20=E2=80=94=20persist=20Runs=20to=20in-process=20state?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Local-mode ad-hoc diff tools (row_count_diff, profile_diff, query,
query_diff, value_diff) now route through _tool_run_backed_local, which
uses the same submit_run machinery as the recce server's run_router to
persist Runs to default_context().runs.  The tool result carries run_id
alongside the diff output, matching the CloudBackend._tool_run_backed
shape (commit 98670f9e).

When default_context() is None (cloud SSE path already handled by
CloudBackend), the method falls back to direct task execution without
run_id — identical to pre-DRC-3634 behaviour, no double execution.

The MCP server remains the sole state exporter; runs created mid-session
via _tool_run_backed_local are included in the exported payload because
they land in the same in-process RecceContext.runs list that
export_state() serialises.

Tests: 12 new tests in TestLocalModeRunBacked covering run_id presence,
run persistence in context.runs, run type mapping (query/query_base), and
the isolation boundary (value_diff_detail stays outside run-backed scope).
175 MCP tests green (135 server + 40 cloud backend); ruff clean.

Signed-off-by: Kent <iamcxa@gmail.com>
---
 recce/mcp_server.py      | 119 +++++++++++++++++---------
 tests/test_mcp_server.py | 177 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 255 insertions(+), 41 deletions(-)

diff --git a/recce/mcp_server.py b/recce/mcp_server.py
index fefe7f2b4..66d19df58 100644
--- a/recce/mcp_server.py
+++ b/recce/mcp_server.py
@@ -21,13 +21,16 @@
 from mcp.server.stdio import stdio_server
 from mcp.types import TextContent, Tool
 
+# DRC-3634: submit_run is the run-persistence entry point shared with the
+# recce server's run_router.  Local-mode ad-hoc diff tools import it to
+# persist Runs to the in-process RecceContext so the state exported to S3
+# at MCP-server shutdown carries the analysis runs.
+from recce.apis.run_func import submit_run as _submit_run_fn  # noqa: E402
 from recce.core import RecceContext, load_context
 from recce.exceptions import RecceException
 from recce.server import RecceServerMode
 from recce.tasks.dataframe import DataFrame
 from recce.tasks.histogram import HistogramDiffTask
-from recce.tasks.profile import ProfileDiffTask
-from recce.tasks.query import QueryDiffTask, QueryTask
 from recce.tasks.rowcount import (
     PERMISSION_DENIED_INDICATORS,
     SYNTAX_ERROR_INDICATORS,
@@ -35,7 +38,7 @@
     RowCountDiffTask,
 )
 from recce.tasks.top_k import TopKDiffTask
-from recce.tasks.valuediff import ValueDiffDetailTask, ValueDiffTask
+from recce.tasks.valuediff import ValueDiffDetailTask
 from recce.util.recce_cloud import RECCE_CLOUD_API_HOST, RecceCloudException
 
 logger = logging.getLogger(__name__)
@@ -568,6 +571,58 @@ def _maybe_add_single_env_warning(self, result: Dict[str, Any]) -> Dict[str, Any
             return {**result, "_warning": SINGLE_ENV_WARNING}
         return result
 
+    async def _tool_run_backed_local(self, run_type: str, params: Dict[str, Any]) -> Dict[str, Any]:
+        """Execute a run-backed diff tool in local mode (DRC-3634).
+
+        When an in-process ``RecceContext`` is available as ``default_context()``,
+        this method mirrors CloudBackend._tool_run_backed (commit 98670f9e): it
+        uses the same ``submit_run`` machinery as the recce server's run_router
+        so that the Run is persisted to ``default_context().runs`` and therefore
+        included in the S3-exported state the infra comment-builder reads.  The
+        returned dict carries ``run_id`` alongside the diff result.
+
+        When ``default_context()`` is None (e.g. the cloud-backend SSE path that
+        routes through the ``CloudBackend`` class instead), this method falls back
+        to a plain in-executor task execution and returns the result without a
+        ``run_id`` — identical to the pre-DRC-3634 behaviour.
+
+        The Run executes **once** via ``submit_run``'s thread-pool executor —
+        there is no double execution.
+        """
+        from recce.core import default_context
+
+        if default_context() is None:
+            # Fallback path: no in-process context, execute task directly.
+            from recce.apis.run_func import create_task
+            from recce.models.types import RunType
+
+            try:
+                run_type_enum = RunType(run_type)
+                task = create_task(run_type_enum, params)
+            except (ValueError, NotImplementedError):
+                raise ValueError(f"Run type '{run_type}' not supported in local mode")
+            raw_result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
+            if hasattr(raw_result, "model_dump"):
+                return raw_result.model_dump(mode="json")
+            return raw_result if isinstance(raw_result, dict) else {}
+
+        run, future = _submit_run_fn(run_type, params, triggered_by="recce_ai")
+        # Await the executor future so run.result is populated before we return.
+        await asyncio.wrap_future(future)
+        # Normalise run.result to a plain dict (tasks may return Pydantic models).
+        raw_result = run.result
+        if raw_result is None:
+            result: Dict[str, Any] = {}
+        elif isinstance(raw_result, dict):
+            result = raw_result
+        elif hasattr(raw_result, "model_dump"):
+            result = raw_result.model_dump(mode="json")
+        else:
+            result = {}
+        # Surface run_id alongside the result — matching CloudBackend shape:
+        # ``result = {**result, "run_id": str(run_id)}`` (commit 98670f9e).
+        return {**result, "run_id": str(run.run_id)}
+
     def _setup_handlers(self):
         """Register all tool handlers"""
 
@@ -1606,60 +1661,42 @@ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
 
     async def _tool_row_count_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Execute row count diff task"""
-        task = RowCountDiffTask(params=arguments)
-
-        # Offload sync task to thread pool to avoid blocking the event loop
-        result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
-
+        # DRC-3634: route through _tool_run_backed_local so the Run is persisted
+        # to the in-process context and the result carries run_id.
+        result = await self._tool_run_backed_local("row_count_diff", arguments)
         return self._maybe_add_single_env_warning(result)
 
     async def _tool_query(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Execute a query"""
-        sql_template = arguments.get("sql_template")
+        # DRC-3634: route through _tool_run_backed_local so the Run is persisted
+        # to the in-process context and the result carries run_id.
+        # Map the `base` flag to the run_type used by the server run_router:
+        # QueryTask (is_base=False) → "query"; QueryBaseTask (is_base=True) → "query_base".
         is_base = arguments.get("base", False)
-
-        params = {"sql_template": sql_template}
-        task = QueryTask(params=params)
-        task.is_base = is_base
-
-        # Execute task
-        result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
-
-        # Convert to dict if it's a model
-        if hasattr(result, "model_dump"):
-            return result.model_dump(mode="json")
-        return result
+        run_type = "query_base" if is_base else "query"
+        # params for QueryTask / QueryBaseTask accept {"sql_template": ...} only.
+        params = {"sql_template": arguments.get("sql_template")}
+        return await self._tool_run_backed_local(run_type, params)
 
     async def _tool_query_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Execute query diff task"""
-        task = QueryDiffTask(params=arguments)
-
-        # Execute task
-        result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
-
-        # Convert to dict if it's a model
-        if hasattr(result, "model_dump"):
-            result = result.model_dump(mode="json")
+        # DRC-3634: route through _tool_run_backed_local so the Run is persisted
+        # to the in-process context and the result carries run_id.
+        result = await self._tool_run_backed_local("query_diff", arguments)
         return self._maybe_add_single_env_warning(result)
 
     async def _tool_profile_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Execute profile diff task"""
-        task = ProfileDiffTask(params=arguments)
-
-        # Execute task
-        result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
-
-        # Convert to dict if it's a model
-        if hasattr(result, "model_dump"):
-            result = result.model_dump(mode="json")
+        # DRC-3634: route through _tool_run_backed_local so the Run is persisted
+        # to the in-process context and the result carries run_id.
+        result = await self._tool_run_backed_local("profile_diff", arguments)
         return self._maybe_add_single_env_warning(result)
 
     async def _tool_value_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
         """Execute value diff task"""
-        task = ValueDiffTask(params=arguments)
-        result = await asyncio.get_event_loop().run_in_executor(None, task.execute)
-        if hasattr(result, "model_dump"):
-            result = result.model_dump(mode="json")
+        # DRC-3634: route through _tool_run_backed_local so the Run is persisted
+        # to the in-process context and the result carries run_id.
+        result = await self._tool_run_backed_local("value_diff", arguments)
         return self._maybe_add_single_env_warning(result)
 
     async def _tool_value_diff_detail(self, arguments: Dict[str, Any]) -> Dict[str, Any]:
diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index 35439c816..77f484f01 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -2812,3 +2812,180 @@ async def test_response_uses_new_field_names(self, setup_impact_mocks):
         assert "max_affected_row_count" in result
         assert "total_affected_row_count" not in result
         assert "suggested_deep_dives" not in result
+
+
+class TestLocalModeRunBacked:
+    """DRC-3634: local-mode ad-hoc diff tools must persist Runs to the in-process
+    RecceContext and surface run_id in the tool result — matching the shape of
+    CloudBackend._tool_run_backed (commit 98670f9e)."""
+
+    @pytest.fixture(autouse=True)
+    def _setup_default_context(self, request):
+        """Set a minimal RecceContext as the default_context for RunDAO access,
+        then tear it down after each test to avoid cross-test pollution."""
+        from recce.core import set_default_context
+
+        mock_adapter = MagicMock()
+        mock_adapter.adapter_type = "dbt"
+        context = RecceContext(adapter_type="dbt", adapter=mock_adapter, runs=[])
+        set_default_context(context)
+        # Expose context on the test instance so individual tests can inspect runs.
+        request.instance._test_context = context
+        yield
+        set_default_context(None)
+
+    @property
+    def _context(self):
+        return self._test_context
+
+    @pytest.fixture
+    def server(self):
+        """RecceMCPServer backed by the context set up in _setup_default_context."""
+        # _setup_default_context runs before server fixture because autouse=True.
+        context = self._test_context
+        return RecceMCPServer(context)
+
+    # ------------------------------------------------------------------
+    # row_count_diff
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_row_count_diff_returns_run_id(self, server):
+        """row_count_diff in local mode surfaces run_id in the result (DRC-3634)."""
+        mock_raw = {"results": [{"node_id": "model.p.orders", "base": 100, "current": 105, "diff": 5}]}
+        with patch.object(RowCountDiffTask, "execute", return_value=mock_raw):
+            result = await server._tool_row_count_diff({"node_names": ["orders"]})
+        assert "run_id" in result, "run_id must be in result for DRC-3634 run-backed local mode"
+
+    @pytest.mark.asyncio
+    async def test_row_count_diff_persists_run_in_context(self, server):
+        """row_count_diff must create a Run in context.runs (DRC-3634)."""
+        mock_raw = {"results": []}
+        with patch.object(RowCountDiffTask, "execute", return_value=mock_raw):
+            result = await server._tool_row_count_diff({"node_names": ["orders"]})
+        assert len(self._context.runs) == 1, "exactly one Run must be persisted"
+        assert str(self._context.runs[0].run_id) == result["run_id"]
+
+    # ------------------------------------------------------------------
+    # query (current env — run_type "query")
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_query_returns_run_id(self, server):
+        """query in local mode surfaces run_id in the result (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"columns": ["id"], "data": [[1]]}
+        with patch("recce.tasks.query.QueryTask.execute", return_value=mock_model):
+            result = await server._tool_query({"sql_template": "SELECT 1", "base": False})
+        assert "run_id" in result
+
+    @pytest.mark.asyncio
+    async def test_query_persists_run_type_query(self, server):
+        """query (base=False) must persist a run with type 'query' (DRC-3634)."""
+        from recce.models.types import RunType
+
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"columns": ["id"], "data": [[1]]}
+        with patch("recce.tasks.query.QueryTask.execute", return_value=mock_model):
+            await server._tool_query({"sql_template": "SELECT 1", "base": False})
+        assert len(self._context.runs) == 1
+        assert self._context.runs[0].type == RunType.QUERY
+
+    @pytest.mark.asyncio
+    async def test_query_base_persists_run_type_query_base(self, server):
+        """query (base=True) must persist a run with type 'query_base' (DRC-3634)."""
+        from recce.models.types import RunType
+
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"columns": ["id"], "data": [[1]]}
+        with patch("recce.tasks.query.QueryBaseTask.execute", return_value=mock_model):
+            result = await server._tool_query({"sql_template": "SELECT 1", "base": True})
+        assert "run_id" in result
+        assert len(self._context.runs) == 1
+        assert self._context.runs[0].type == RunType.QUERY_BASE
+
+    # ------------------------------------------------------------------
+    # query_diff
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_query_diff_returns_run_id(self, server):
+        """query_diff in local mode surfaces run_id in the result (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"diff": {"added": [], "removed": [], "modified": []}}
+        with patch.object(QueryDiffTask, "execute", return_value=mock_model):
+            result = await server._tool_query_diff(
+                {"sql_template": "SELECT * FROM {{ ref('m') }}", "primary_keys": ["id"]}
+            )
+        assert "run_id" in result
+
+    @pytest.mark.asyncio
+    async def test_query_diff_persists_run_in_context(self, server):
+        """query_diff must create a Run in context.runs (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"diff": {}}
+        with patch.object(QueryDiffTask, "execute", return_value=mock_model):
+            result = await server._tool_query_diff(
+                {"sql_template": "SELECT id FROM {{ ref('m') }}", "primary_keys": ["id"]}
+            )
+        assert len(self._context.runs) == 1
+        assert str(self._context.runs[0].run_id) == result["run_id"]
+
+    # ------------------------------------------------------------------
+    # profile_diff
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_profile_diff_returns_run_id(self, server):
+        """profile_diff in local mode surfaces run_id in the result (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"columns": {"id": {"base": {}, "current": {}}}}
+        with patch.object(ProfileDiffTask, "execute", return_value=mock_model):
+            result = await server._tool_profile_diff({"model": "orders"})
+        assert "run_id" in result
+
+    @pytest.mark.asyncio
+    async def test_profile_diff_persists_run_in_context(self, server):
+        """profile_diff must create a Run in context.runs (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"columns": {}}
+        with patch.object(ProfileDiffTask, "execute", return_value=mock_model):
+            result = await server._tool_profile_diff({"model": "orders"})
+        assert len(self._context.runs) == 1
+        assert str(self._context.runs[0].run_id) == result["run_id"]
+
+    # ------------------------------------------------------------------
+    # value_diff
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_value_diff_returns_run_id(self, server):
+        """value_diff in local mode surfaces run_id in the result (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"summary": {"total": 10}, "data": {}}
+        with patch.object(ValueDiffTask, "execute", return_value=mock_model):
+            result = await server._tool_value_diff({"model": "orders", "primary_key": "id"})
+        assert "run_id" in result
+
+    @pytest.mark.asyncio
+    async def test_value_diff_persists_run_in_context(self, server):
+        """value_diff must create a Run in context.runs (DRC-3634)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"summary": {}, "data": {}}
+        with patch.object(ValueDiffTask, "execute", return_value=mock_model):
+            result = await server._tool_value_diff({"model": "orders", "primary_key": "id"})
+        assert len(self._context.runs) == 1
+        assert str(self._context.runs[0].run_id) == result["run_id"]
+
+    # ------------------------------------------------------------------
+    # Isolation: existing tools without run-backed behavior unchanged
+    # ------------------------------------------------------------------
+
+    @pytest.mark.asyncio
+    async def test_value_diff_detail_no_run_id(self, server):
+        """value_diff_detail is NOT run-backed — result must NOT carry run_id (DRC-3634 scope boundary)."""
+        mock_model = MagicMock()
+        mock_model.model_dump.return_value = {"columns": ["id"], "data": [[1]]}
+        with patch.object(ValueDiffDetailTask, "execute", return_value=mock_model):
+            result = await server._tool_value_diff_detail({"model": "orders", "primary_key": "id"})
+        assert "run_id" not in result, "value_diff_detail is outside run-backed scope"

From 889a01b7a0d3389f2e4ea793f342b9a8afe21aad Mon Sep 17 00:00:00 2001
From: Kent <iamcxa@gmail.com>
Date: Sun, 14 Jun 2026 02:39:27 +0800
Subject: [PATCH 3/4] fix(profile): resolve lowercase columns to physical names
 on case-folding warehouses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

profile_diff/profile filtered the requested `columns` with a case-sensitive
membership test (`column.name in selected_columns`). get_columns() returns
physical catalog names, which case-folding warehouses (e.g. Snowflake) store
UPPERCASE, while the Recce Cloud summary agent supplies lowercase
manifest-convention names. The exact-case filter dropped every column, so the
per-column profiling loop never ran and the tool returned a completely empty
result ({"base":{"columns":[],"data":[]},"current":{"columns":[],"data":[]}}).
This made the cloud summary non-deterministic: lowercase column names yielded an
empty profile that the downstream trust gate suppressed.

Fix: lowercase both sides of the membership test before filtering, mirroring the
case-insensitive normalisation in valuediff's _build_column_case_lookup. The SQL
path is unchanged — it still profiles via the physical column.name — so lowercase
input now resolves to the same physical (uppercase) columns and returns the same
non-empty profile as uppercase input. No-op on already-lowercase duckdb models and
on quoted/case-sensitive identifiers; applied to both ProfileDiffTask and
ProfileTask.

Adds regression tests using an UPPERCASE duckdb CSV header (duckdb preserves
header casing, reproducing Snowflake physical-name casing end-to-end with no
mocking): asserts lowercase and uppercase column requests return identical
non-empty profiles, plus an adapter-safety no-op test on lowercase physical names.

DRC-3674

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
Signed-off-by: Kent <iamcxa@gmail.com>
---
 recce/tasks/profile.py      | 18 +++++++--
 tests/tasks/test_profile.py | 78 +++++++++++++++++++++++++++++++++++++
 2 files changed, 92 insertions(+), 4 deletions(-)

diff --git a/recce/tasks/profile.py b/recce/tasks/profile.py
index c6f94f387..2c2334693 100644
--- a/recce/tasks/profile.py
+++ b/recce/tasks/profile.py
@@ -183,9 +183,15 @@ def execute(self):
             curr_columns = [column for column in dbt_adapter.get_columns(model, base=False)]
 
             if selected_columns:
-                # Only profile the columns in the filter_columns
-                base_columns = [column for column in base_columns if column.name in selected_columns]
-                curr_columns = [column for column in curr_columns if column.name in selected_columns]
+                # Only profile the columns in the filter_columns.
+                # Match case-insensitively: get_columns() returns physical catalog names,
+                # which case-folding warehouses (e.g. Snowflake) store UPPERCASE while
+                # callers supply lowercase manifest-convention names. A case-sensitive
+                # `column.name in selected_columns` test would drop every column and yield
+                # an empty profile. (DRC-3674)
+                requested = {name.lower() for name in selected_columns}
+                base_columns = [column for column in base_columns if column.name.lower() in requested]
+                curr_columns = [column for column in curr_columns if column.name.lower() in requested]
 
             total = len(base_columns) + len(curr_columns)
             completed = 0
@@ -288,7 +294,11 @@ def execute(self):
             curr_columns = [column for column in dbt_adapter.get_columns(model, base=False)]
 
             if selected_columns:
-                curr_columns = [column for column in curr_columns if column.name in selected_columns]
+                # Case-insensitive match — see ProfileDiffTask.execute (DRC-3674): physical
+                # catalog names are UPPERCASE on case-folding warehouses while callers supply
+                # lowercase names, so an exact-case filter would drop every column.
+                requested = {name.lower() for name in selected_columns}
+                curr_columns = [column for column in curr_columns if column.name.lower() in requested]
 
             total = len(curr_columns)
             completed = 0
diff --git a/tests/tasks/test_profile.py b/tests/tasks/test_profile.py
index 3df1a32c7..26e26c90a 100644
--- a/tests/tasks/test_profile.py
+++ b/tests/tasks/test_profile.py
@@ -122,6 +122,84 @@ def validate(params: dict = {}, view_options: dict = {}):
         validate({})
 
 
+# =============================================================================
+# Snowflake column-case regression tests (DRC-3674)
+#
+# Root cause: profile_diff / profile filtered the requested columns with a
+# case-sensitive membership test (``column.name in selected_columns``).
+# get_columns() returns physical catalog names, which case-folding warehouses
+# (e.g. Snowflake) store UPPERCASE, while the cloud summary agent supplies
+# lowercase manifest-convention names. The exact-case filter dropped EVERY
+# column, so the per-column profiling loop never ran and the result came back
+# completely empty:
+#   {"base":{"columns":[],"data":[]},"current":{"columns":[],"data":[]}}
+#
+# DuckDB preserves the CSV-header casing, so an UPPERCASE header reproduces the
+# Snowflake physical-name casing end-to-end (no mocking needed).
+#
+# Fix: lowercase both sides of the membership test before filtering, so
+# lowercase input resolves to the physical (uppercase) columns and yields the
+# same non-empty profile as uppercase input. No-op on already-lowercase
+# duckdb models and on quoted/case-sensitive identifiers (the SQL still profiles
+# via the physical column.name).
+# =============================================================================
+
+csv_data_uppercase = """
+        CUSTOMER_ID,CUSTOMER_LIFETIME_VALUE,NET_CUSTOMER_LIFETIME_VALUE
+        1,100,90
+        2,200,180
+        3,300,270
+        """
+
+
+def test_profile_diff_lowercase_columns_on_uppercase_physical(dbt_test_helper):
+    """DRC-3674: lowercase requested columns must resolve against UPPERCASE physical names.
+
+    Pre-fix: lowercase input yields 0 columns / 0 rows on both sides (empty profile).
+    Post-fix: lowercase input returns the SAME non-empty profile as uppercase input.
+    """
+    dbt_test_helper.create_model("snowflake_customers", csv_data_uppercase, csv_data_uppercase)
+
+    lower = ProfileDiffTask(
+        dict(model="snowflake_customers", columns=["customer_lifetime_value", "net_customer_lifetime_value"])
+    ).execute()
+    upper = ProfileDiffTask(
+        dict(model="snowflake_customers", columns=["CUSTOMER_LIFETIME_VALUE", "NET_CUSTOMER_LIFETIME_VALUE"])
+    ).execute()
+
+    # Lowercase must not be empty (the production bug).
+    assert len(lower.base.data) == 2
+    assert len(lower.current.data) == 2
+
+    # Lowercase and uppercase requests must be identical.
+    assert len(lower.base.data) == len(upper.base.data)
+    assert len(lower.current.data) == len(upper.current.data)
+
+    # Profiled the physical (uppercase) columns, not the lowercase request strings.
+    profiled = {row[0] for row in lower.current.data}
+    assert profiled == {"CUSTOMER_LIFETIME_VALUE", "NET_CUSTOMER_LIFETIME_VALUE"}
+
+
+def test_profile_lowercase_columns_on_uppercase_physical(dbt_test_helper):
+    """DRC-3674: single-sided ProfileTask must also resolve lowercase → physical names."""
+    dbt_test_helper.create_model("snowflake_customers", None, csv_data_uppercase)
+
+    lower = ProfileTask(dict(model="snowflake_customers", columns=["customer_lifetime_value"])).execute()
+    upper = ProfileTask(dict(model="snowflake_customers", columns=["CUSTOMER_LIFETIME_VALUE"])).execute()
+
+    assert len(lower.current.data) == 1
+    assert len(lower.current.data) == len(upper.current.data)
+    assert lower.current.data[0][0] == "CUSTOMER_LIFETIME_VALUE"
+
+
+def test_profile_diff_lowercase_is_noop_on_lowercase_physical(dbt_test_helper):
+    """Adapter-safety: case-insensitive matching is a no-op when physical names are already lowercase (duckdb)."""
+    dbt_test_helper.create_model("customers", csv_data_base, csv_data_curr)
+    run_result = ProfileDiffTask(dict(model="customers", columns=["name", "age"])).execute()
+    assert len(run_result.current.data) == 2
+    assert len(run_result.base.data) == 2
+
+
 def test_profile_column_jinja_template():
 
     class DummyAdapter:

From fc6f3e111de47a0c010bfffeb32f6c103b6a53b1 Mon Sep 17 00:00:00 2001
From: Kent <iamcxa@gmail.com>
Date: Mon, 15 Jun 2026 12:08:32 +0800
Subject: [PATCH 4/4] test(mcp): tolerate DRC-3532 run_id in
 row_count_diff/query result assertions

test_tool_row_count_diff and test_tool_query_with_base_flag asserted exact
equality with the mock result, but run-backed local mode (DRC-3532, _tool_run_backed_local)
now merges a run_id key into the tool result. Compare on the result fields
ignoring run_id; the dedicated TestLocalModeRunBacked tests cover run_id surfacing.
Fixes the Test DBT Versions CI failure on PR #1418. 135 test_mcp_server tests green.

Signed-off-by: Kent <iamcxa@gmail.com>
---
 tests/test_mcp_server.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py
index 77f484f01..a5cca14de 100644
--- a/tests/test_mcp_server.py
+++ b/tests/test_mcp_server.py
@@ -183,8 +183,10 @@ async def test_tool_row_count_diff(self, mcp_server):
         with patch.object(RowCountDiffTask, "execute", return_value=mock_result):
             result = await server._tool_row_count_diff({"node_names": ["my_model"]})
 
-        # Verify the result
-        assert result == mock_result
+        # Verify the result. DRC-3532 run-backed local mode surfaces a run_id key
+        # alongside the diff result; compare on the result fields, ignoring run_id
+        # (the dedicated TestLocalModeRunBacked tests cover run_id surfacing).
+        assert {k: v for k, v in result.items() if k != "run_id"} == mock_result
         assert "results" in result
 
     @pytest.mark.asyncio
@@ -220,8 +222,9 @@ async def test_tool_query_with_base_flag(self, mcp_server):
 
                 result = await server._tool_query({"sql_template": "SELECT 1", "base": True})
 
-                # Verify base flag was set (would need to inspect task creation)
-                assert result == mock_result
+                # Verify base flag was set (would need to inspect task creation).
+                # DRC-3532 run-backed local mode adds a run_id key; ignore it here.
+                assert {k: v for k, v in result.items() if k != "run_id"} == mock_result
 
     @pytest.mark.asyncio
     async def test_tool_query_diff(self, mcp_server):