From ca20a56b010b0c10a4944e388705708d2cab3047 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 10:06:09 +0800 Subject: [PATCH 01/43] chore(gbrain): pin worktree code source and add CLAUDE.md guidance block Added during the /sync-gbrain session that backfilled embeddings after Conductor's GSTACK_OPENAI_API_KEY shim was wired up. The .gbrain-source file (gitignored) pins this worktree to its scoped code source for kubectl-style routing. CLAUDE.md gets a new "GBrain Search Guidance" section so future agents in this worktree prefer gbrain over Grep for semantic queries. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- .gitignore | 1 + CLAUDE.md | 37 +++++++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/.gitignore b/.gitignore index 16a085f1c..72857e2bc 100644 --- a/.gitignore +++ b/.gitignore @@ -45,3 +45,4 @@ output/* .env* !.env.example +.gbrain-source diff --git a/CLAUDE.md b/CLAUDE.md index 112bdf934..99a64f20f 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -91,3 +91,40 @@ Origin: PR #1342 review (DRC-3307). ## Individual Preferences - @~/.claude/recce.md + +## GBrain Search Guidance (configured by /sync-gbrain) + + +GBrain is set up and synced on this machine. The agent should prefer gbrain +over Grep when the question is semantic or when you don't know the exact +identifier yet. + +**This worktree is pinned to a worktree-scoped code source** via the +`.gbrain-source` file in the repo root (kubectl-style context). Any +`gbrain code-def`, `code-refs`, `code-callers`, `code-callees`, or `query` +call from anywhere under this worktree routes to that source by default — +no `--source` flag needed. Conductor sibling worktrees of the same repo +each have their own pin and their own indexed pages, so semantic results +match the actual code on disk in this worktree. + +Two indexed corpora available via the `gbrain` CLI: +- This worktree's code (auto-pinned via `.gbrain-source`). +- `~/.gstack/` curated memory (registered as `gstack-brain-` source via + the existing federation pipeline). + +Prefer gbrain when: +- "Where is X handled?" / semantic intent, no exact string yet: + `gbrain search ""` or `gbrain query ""` +- "Where is symbol Y defined?" / symbol-based code questions: + `gbrain code-def ` or `gbrain code-refs ` +- "What calls Y?" / "What does Y depend on?": + `gbrain code-callers ` / `gbrain code-callees ` +- "What did we decide last time?" / past plans, retros, learnings: + `gbrain search "" --source gstack-brain-` + +Grep is still right for known exact strings, regex, multiline patterns, and +file globs. Run `/sync-gbrain` after meaningful code changes; for ongoing +auto-sync across all worktrees, run `gbrain autopilot --install` once per +machine — gbrain's daemon handles incremental refresh on a schedule. + + From 7c34bb7923b2e20c82d9a72aa7edf03d83c02696 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 16:48:52 +0800 Subject: [PATCH 02/43] feat(mcp): scaffold widget_server.py + mcp-widget-server CLI + WIDGET_TOOLS filter (iter 1 Day 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per design-20260521-234647 amended — adds a parallel FastMCP-based widget server that delegates to RecceMCPServer's existing _tool_row_count_diff and _tool_schema_diff methods, gated by RECCE_MCP_WIDGETS=1 env var. - recce/widget_server.py: NEW, FastMCP("recce-widgets") with @mcp.tool delegates and @mcp.resource handlers (graceful when recce/data/mcp/*.html missing). - recce/cli.py: NEW `recce mcp-widget-server` subcommand (local mode only). - recce/mcp_server.py: WIDGET_TOOLS set, _widgets_enabled helper, list_tools filter, call_tool defensive raise. Day 1 scope only — widget HTML assets + tests deferred to Day 1.5 / Day 2. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/cli.py | 98 +++++++++++++++++++++++++ recce/mcp_server.py | 22 ++++++ recce/widget_server.py | 160 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 280 insertions(+) create mode 100644 recce/widget_server.py diff --git a/recce/cli.py b/recce/cli.py index dc696f1b0..6220a182b 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -2869,6 +2869,104 @@ def mcp_server(state_file, sse, host, port, **kwargs): exit(1) +@cli.command(cls=TrackCommand) +@click.argument("state_file", required=False) +@click.option( + "--sse", + is_flag=True, + default=False, + help="Reserved for future iters — stdio is hardcoded in iter 1", +) +@click.option( + "--host", + default="localhost", + help="Host to bind to (reserved for future iters)", +) +@click.option("--port", default=8000, type=int, help="Port to bind to (reserved for future iters)") +@add_options(dbt_related_options) +@add_options(sqlmesh_related_options) +@add_options(recce_options) +@add_options(recce_dbt_artifact_dir_options) +@add_options(recce_hidden_options) +def mcp_widget_server(state_file, sse, host, port, **kwargs): + """ + Start the Recce MCP Widget Server (iter 1 POC — local mode only). + + Cloud session / snapshot modes are not supported. Register both + `recce mcp-server` and `recce mcp-widget-server` entries in Claude + Desktop config with RECCE_MCP_WIDGETS=1 env var set on both. + + STATE_FILE is the path to the recce state file (optional). + """ + import asyncio + + from rich.console import Console + + from recce.config import RecceConfig + + # In stdio mode, stdout is the JSON-RPC transport — all human-readable + # output must go to stderr to avoid MCP client parse errors. + console = Console(stderr=True) + + try: + from recce.widget_server import run_widget_server + except ImportError as e: + console.print(f"[[red]Error[/red]] Failed to import widget server: {e}") + console.print(r"Please install the MCP package: pip install 'recce\[mcp]'") + exit(1) + + # Initialize Recce Config + RecceConfig(config_file=kwargs.get("config")) + + handle_debug_flag(**kwargs) + + patch_derived_args(kwargs) + + # Reject any cloud / session kwargs — not supported in iter 1 + if kwargs.get("cloud") or kwargs.get("cloud_session"): + console.print( + "[[red]Error[/red]] recce mcp-widget-server does not support cloud/session mode in iter 1. " + "Use recce mcp-server for cloud sessions." + ) + exit(1) + + # Local state file + if state_file: + state_loader = create_state_loader_by_args(state_file, **kwargs) + kwargs["state_loader"] = state_loader + + # Single Environment Onboarding Mode fallback + project_dir_path = Path(kwargs.get("project_dir") or "./") + target_path = project_dir_path.joinpath(Path(kwargs.get("target_path", "target"))) + target_base_path = project_dir_path.joinpath(Path(kwargs.get("target_base_path", "target-base"))) + if target_path.is_dir() and not target_base_path.is_dir(): + kwargs["single_env"] = True + kwargs["target_base_path"] = kwargs.get("target_path") + console.print( + "[yellow]Base artifacts not found. " + "Starting in single-environment mode (diffs will show no changes).[/yellow]" + ) + console.print("To enable diffing: dbt docs generate --target-path target-base") + + console.print("Starting Recce MCP Widget Server in stdio mode (iter 1 POC, local mode only)...") + + # Ensure cloud=False so run_widget_server doesn't see a stale patched value + kwargs["cloud"] = False + + try: + asyncio.run(run_widget_server(sse=False, host=host, port=port, **kwargs)) + except (asyncio.CancelledError, KeyboardInterrupt): + console.print("[yellow]MCP Widget Server interrupted[/yellow]") + exit(0) + except Exception as e: + console.print(f"[[red]Error[/red]] Failed to start MCP widget server: {e}") + if kwargs.get("debug"): + import traceback + + traceback.print_exc() + exit(1) + + @cli.group("cache", short_help="Manage column-level lineage cache.") def cache(): """Manage column-level lineage cache.""" diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 9b59ad1d3..c01763c49 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -50,6 +50,16 @@ "Run `dbt docs generate --target-path target-base` to enable diffing." ) +# When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` +# instead of the main `recce mcp-server`. The main server omits them from list_tools +# and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. +WIDGET_TOOLS = {"row_count_diff", "schema_diff"} + + +def _widgets_enabled() -> bool: + """Read RECCE_MCP_WIDGETS env at call time (not import time) so tests can monkeypatch.""" + return os.environ.get("RECCE_MCP_WIDGETS", "").strip() in ("1", "true", "True") + class InstanceSpawningError(RuntimeError): """Raised when a Recce Cloud session instance is not ready yet.""" @@ -1219,6 +1229,10 @@ async def list_tools() -> List[Tool]: ) ) + # When widgets enabled, defer widget-eligible tools to recce mcp-widget-server. + if _widgets_enabled(): + tools = [t for t in tools if t.name not in WIDGET_TOOLS] + self.mcp_logger.log_list_tools(tools) # Log available tools to console @@ -1240,6 +1254,14 @@ async def call_tool(name: str, arguments: Dict[str, Any]) -> List[TextContent]: logger.info(f"[MCP] Arguments: {json.dumps(log_arguments, indent=2)}") try: + # Widget-mode coordination: if RECCE_MCP_WIDGETS=1 the widget server owns these + # tools. If the agent reaches us anyway, the widget-server entry isn't wired. + if _widgets_enabled() and name in WIDGET_TOOLS: + raise ValueError( + f"Tool '{name}' is served by `recce mcp-widget-server` when RECCE_MCP_WIDGETS=1. " + f"Ensure that entry is registered in your Claude Desktop config alongside `recce mcp-server`." + ) + # Check if tool is blocked in non-server mode blocked_tools_in_non_server = { "row_count_diff", diff --git a/recce/widget_server.py b/recce/widget_server.py new file mode 100644 index 000000000..62644372c --- /dev/null +++ b/recce/widget_server.py @@ -0,0 +1,160 @@ +"""Widget-enabled MCP server for Recce — parallel process to recce/mcp_server.py. + +Spun up via `recce mcp-widget-server` CLI subcommand. Internally instantiates a +RecceMCPServer and delegates widget-tool calls to its existing `_tool_` +methods (no logic duplication). + +Coordination with `recce mcp-server`: see WIDGET_TOOLS in recce/mcp_server.py. +When RECCE_MCP_WIDGETS=1, mcp-server omits these tools from list_tools and this +widget server serves them with `_meta.ui.resourceUri` widget metadata. +""" + +import importlib.resources +import logging +from typing import Any, Dict, Optional + +from mcp.server.fastmcp import FastMCP + +mcp = FastMCP("recce-widgets") + +# Forward ref — initialized in run_widget_server() to avoid eager import at module load. +_recce_server: Optional[Any] = None + +logger = logging.getLogger(__name__) + + +def _read_widget_html(name: str) -> str: + """Read widget HTML from recce/data/mcp/{name}.html, returning an error stub if missing.""" + try: + ref = importlib.resources.files("recce.data.mcp") / f"{name}.html" + return ref.read_text(encoding="utf-8") + except (FileNotFoundError, TypeError, ModuleNotFoundError): + return f"" f"Widget asset missing: {name}.html. Run pnpm run build." f"" + + +# --------------------------------------------------------------------------- +# row_count_diff widget tool + resource +# --------------------------------------------------------------------------- + + +@mcp.tool( + name="row_count_diff", + description=( + "Compare row counts between base and current environments for specified models. " + "Returns structured results with status information for each model.\n\n" + "Response format: {model_name: {base: int|null, curr: int|null, " + "base_meta: {status, message?}, curr_meta: {status, message?}}}\n" + "- base/curr: row count as integer, or null if unavailable\n" + "- base_meta/curr_meta: status details explaining the count value\n\n" + "Status codes (in *_meta.status):\n" + "- 'ok': Row count retrieved successfully\n" + "- 'not_in_manifest': Model not found in dbt manifest\n" + "- 'unsupported_resource_type': Node is not a model/snapshot\n" + "- 'unsupported_materialization': Materialization doesn't support row counts\n" + "- 'table_not_found': Table defined in manifest but doesn't exist in database\n" + "- 'permission_denied': User lacks permission to access the table" + ), + meta={ + "ui": {"resourceUri": "ui://recce/row_count_diff.html"}, + "ui/resourceUri": "ui://recce/row_count_diff.html", + }, +) +async def row_count_diff(**arguments) -> Dict[str, Any]: + """ + Expected arguments: + - node_names: List[str] (optional) — model names to check + - node_ids: List[str] (optional) — node IDs to check + - select: str (optional) — dbt selector syntax + - exclude: str (optional) — dbt selector to exclude models + """ + return await _recce_server._tool_row_count_diff(arguments) + + +@mcp.resource( + uri="ui://recce/row_count_diff.html", + mime_type="text/html;profile=mcp-app", + meta={"ui": {"csp": {"resourceDomains": ["https://unpkg.com"]}}}, +) +def row_count_diff_resource() -> str: + return _read_widget_html("row_count_diff") + + +# --------------------------------------------------------------------------- +# schema_diff widget tool + resource +# --------------------------------------------------------------------------- + + +@mcp.tool( + name="schema_diff", + description=( + "Get the schema diff (column changes) between base and current environments. " + "Shows added, removed, and type-changed columns in compact dataframe format." + ), + meta={ + "ui": {"resourceUri": "ui://recce/schema_diff.html"}, + "ui/resourceUri": "ui://recce/schema_diff.html", + }, +) +async def schema_diff(**arguments) -> Dict[str, Any]: + """ + Expected arguments: + - select: str (optional) — dbt selector syntax to filter models + - exclude: str (optional) — dbt selector syntax to exclude models + - packages: List[str] (optional) — list of packages to filter + """ + return await _recce_server._tool_schema_diff(arguments) + + +@mcp.resource( + uri="ui://recce/schema_diff.html", + mime_type="text/html;profile=mcp-app", + meta={"ui": {"csp": {"resourceDomains": ["https://unpkg.com"]}}}, +) +def schema_diff_resource() -> str: + return _read_widget_html("schema_diff") + + +# --------------------------------------------------------------------------- +# Entry point +# --------------------------------------------------------------------------- + + +def run_widget_server(**kwargs) -> None: + """ + Entry point for the Recce MCP Widget Server. + + Iter 1 is LOCAL MODE ONLY — cloud/session kwargs are not supported. + Register both `recce mcp-server` and `recce mcp-widget-server` entries + in Claude Desktop config with RECCE_MCP_WIDGETS=1 set on both. + """ + global _recce_server + + if kwargs.get("cloud") or kwargs.get("session"): + raise ValueError( + "recce mcp-widget-server does not support cloud/session mode in iter 1 " + "— use recce mcp-server for cloud sessions" + ) + + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + ) + + # Lazy imports to avoid heavy startup cost at CLI --help time + from recce.core import load_context + from recce.mcp_server import RecceMCPServer + from recce.server import RecceServerMode + + single_env = kwargs.pop("single_env", False) + context = load_context(**kwargs) + + _recce_server = RecceMCPServer( + context, + mode=RecceServerMode.server, + debug=kwargs.get("debug", False), + state_loader=kwargs.get("state_loader"), + single_env=single_env, + api_token=kwargs.get("api_token"), + ) + + mcp.run(transport="stdio") From bb6f126104664385e6928b93fa668e09aa0dcb38 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 17:11:43 +0800 Subject: [PATCH 03/43] fix(mcp): drop asyncio.run wrapper + typed @mcp.tool params (review feedback) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix #1: run_widget_server() is synchronous — mcp.run(transport="stdio") manages its own asyncio event loop internally. asyncio.run(run_widget_server(...)) raised ValueError because the function returns None, not a coroutine. Drop asyncio.run() in cli.py and remove the unused asyncio import. Fix #2: replace **arguments with typed params so FastMCP infers a proper JSON inputSchema for tools/list. Without typed params, Claude Desktop sees the tool registered but cannot construct a tools/call (no schema to fill). Params translated from existing low-level Tool inputSchema definitions: - row_count_diff: node_names, node_ids, select, exclude (all Optional) - schema_diff: select, exclude, packages (all Optional) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/cli.py | 8 +++---- recce/widget_server.py | 52 +++++++++++++++++++++++++++++------------- 2 files changed, 40 insertions(+), 20 deletions(-) diff --git a/recce/cli.py b/recce/cli.py index 6220a182b..c18f0e10e 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -2898,8 +2898,6 @@ def mcp_widget_server(state_file, sse, host, port, **kwargs): STATE_FILE is the path to the recce state file (optional). """ - import asyncio - from rich.console import Console from recce.config import RecceConfig @@ -2954,8 +2952,10 @@ def mcp_widget_server(state_file, sse, host, port, **kwargs): kwargs["cloud"] = False try: - asyncio.run(run_widget_server(sse=False, host=host, port=port, **kwargs)) - except (asyncio.CancelledError, KeyboardInterrupt): + # run_widget_server is synchronous — mcp.run(transport="stdio") manages + # its own asyncio event loop internally. Do NOT wrap in asyncio.run(). + run_widget_server(sse=False, host=host, port=port, **kwargs) + except KeyboardInterrupt: console.print("[yellow]MCP Widget Server interrupted[/yellow]") exit(0) except Exception as e: diff --git a/recce/widget_server.py b/recce/widget_server.py index 62644372c..7e966bcbf 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -11,7 +11,7 @@ import importlib.resources import logging -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from mcp.server.fastmcp import FastMCP @@ -59,14 +59,23 @@ def _read_widget_html(name: str) -> str: "ui/resourceUri": "ui://recce/row_count_diff.html", }, ) -async def row_count_diff(**arguments) -> Dict[str, Any]: - """ - Expected arguments: - - node_names: List[str] (optional) — model names to check - - node_ids: List[str] (optional) — node IDs to check - - select: str (optional) — dbt selector syntax - - exclude: str (optional) — dbt selector to exclude models - """ +async def row_count_diff( + node_names: Optional[List[str]] = None, + node_ids: Optional[List[str]] = None, + select: Optional[str] = None, + exclude: Optional[str] = None, +) -> Dict[str, Any]: + """Compare row counts between base and current environments.""" + arguments = { + k: v + for k, v in { + "node_names": node_names, + "node_ids": node_ids, + "select": select, + "exclude": exclude, + }.items() + if v is not None + } return await _recce_server._tool_row_count_diff(arguments) @@ -95,13 +104,21 @@ def row_count_diff_resource() -> str: "ui/resourceUri": "ui://recce/schema_diff.html", }, ) -async def schema_diff(**arguments) -> Dict[str, Any]: - """ - Expected arguments: - - select: str (optional) — dbt selector syntax to filter models - - exclude: str (optional) — dbt selector syntax to exclude models - - packages: List[str] (optional) — list of packages to filter - """ +async def schema_diff( + select: Optional[str] = None, + exclude: Optional[str] = None, + packages: Optional[List[str]] = None, +) -> Dict[str, Any]: + """Get schema diff (column changes) between base and current environments.""" + arguments = { + k: v + for k, v in { + "select": select, + "exclude": exclude, + "packages": packages, + }.items() + if v is not None + } return await _recce_server._tool_schema_diff(arguments) @@ -126,6 +143,9 @@ def run_widget_server(**kwargs) -> None: Iter 1 is LOCAL MODE ONLY — cloud/session kwargs are not supported. Register both `recce mcp-server` and `recce mcp-widget-server` entries in Claude Desktop config with RECCE_MCP_WIDGETS=1 set on both. + + mcp.run(transport="stdio") manages its own asyncio event loop internally. + Do NOT wrap this function in asyncio.run(). """ global _recce_server From bf665bbac9289c9370173273f4a15154fa4421eb Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 17:30:20 +0800 Subject: [PATCH 04/43] feat(mcp): Day 1.5 widget HTML + schema_diff rich shape refactor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - recce/data/mcp/row_count_diff.html: NEW, ~110 LOC tier 1 widget (status pills + diff numbers; base/curr null guards using === null) - recce/data/mcp/schema_diff.html: NEW, ~110 LOC tier 2 widget (added/ removed/type_changed sections; empty state shows unchanged_count) - recce/mcp_server.py: extract _compute_schema_changes() helper that returns rich per-model dict {added, removed, type_changed, unchanged_count}; existing _tool_schema_diff flattens output back to DataFrame for low-level mcp-server consumers — zero regression on existing response shape - recce/widget_server.py: row_count_diff delegate wraps result as {"models": result}; schema_diff delegate calls _compute_schema_changes directly and returns {"models": rich_result} for widget consumption - .gitignore: replace broad recce/data ignore with per-extension rules so recce/data/mcp/*.html (widget source, not build output) can be committed Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- .gitignore | 20 +++++- recce/data/mcp/row_count_diff.html | 112 +++++++++++++++++++++++++++++ recce/data/mcp/schema_diff.html | 109 ++++++++++++++++++++++++++++ recce/mcp_server.py | 100 +++++++++++++++++--------- recce/widget_server.py | 21 +++--- 5 files changed, 315 insertions(+), 47 deletions(-) create mode 100644 recce/data/mcp/row_count_diff.html create mode 100644 recce/data/mcp/schema_diff.html diff --git a/.gitignore b/.gitignore index 72857e2bc..4c6e4f5fa 100644 --- a/.gitignore +++ b/.gitignore @@ -22,8 +22,24 @@ deps-*.yml recce.yml STATUS.md -# Ignore build artifacts from frontend -recce/data +# Ignore build artifacts from frontend. +# recce/data/mcp/*.html are SOURCE files (widget HTML for MCP Apps) — not build output. +# git negation cannot un-ignore files under an ignored directory, so we ignore +# specific subdirs instead of the whole recce/data tree. +recce/data/.gitkeep +recce/data/_next/ +recce/data/chunks/ +recce/data/*.html +recce/data/*.js +recce/data/*.css +recce/data/*.json +recce/data/*.txt +recce/data/*.xml +recce/data/*.ico +recce/data/*.png +recce/data/*.svg +recce/data/*.woff +recce/data/*.woff2 # ignore Claude logs and plans at any nesting **/docs/plans/**/*.md diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html new file mode 100644 index 000000000..6ac1feb8e --- /dev/null +++ b/recce/data/mcp/row_count_diff.html @@ -0,0 +1,112 @@ + + + + + Row Count Diff + + + +
Loading…
+ + + diff --git a/recce/data/mcp/schema_diff.html b/recce/data/mcp/schema_diff.html new file mode 100644 index 000000000..4942b6edc --- /dev/null +++ b/recce/data/mcp/schema_diff.html @@ -0,0 +1,109 @@ + + + + + Schema Diff + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index c01763c49..1d90a99a2 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -1498,26 +1498,33 @@ async def _tool_lineage_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: return result - async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: - """Get schema diff (column changes) between base and current""" - # Extract filter arguments - select = arguments.get("select") - exclude = arguments.get("exclude") - packages = arguments.get("packages") - - # Get lineage diff from adapter - lineage_diff = self.context.get_lineage_diff().model_dump(mode="json") - - # Get all nodes from current environment + def _compute_schema_changes( + self, + lineage_diff: Dict[str, Any], + select: Optional[str] = None, + exclude: Optional[str] = None, + packages: Optional[List[str]] = None, + ) -> Dict[str, Dict[str, Any]]: + """Compute per-model schema changes as a rich nested dict. + + Returns: + {node_id: { + "added": [{"name": str, "type": str}, ...], + "removed": [{"name": str, "type": str}, ...], + "type_changed": [{"name": str, "base_type": str, "curr_type": str}, ...], + "unchanged_count": int, + }} + + Used by both the low-level mcp-server (flattened to DataFrame) and the + FastMCP widget server (returned as structuredContent for schema_diff.html). + """ current_nodes = {} if "current" in lineage_diff and "nodes" in lineage_diff["current"]: current_nodes = lineage_diff["current"]["nodes"] - # Filter to only nodes that exist in both base and current (exclude added nodes) base_nodes = lineage_diff.get("base", {}).get("nodes", {}) nodes_to_compare = set(current_nodes.keys()) & set(base_nodes.keys()) - # Apply filtering if arguments provided if select or exclude or packages: selected_node_ids = self.context.adapter.select_nodes( select=select, @@ -1526,34 +1533,59 @@ async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: ) nodes_to_compare = nodes_to_compare & selected_node_ids - # Build schema changes - schema_changes = [] - + result: Dict[str, Dict[str, Any]] = {} for node_id in nodes_to_compare: - base_node = base_nodes.get(node_id, {}) - current_node = current_nodes.get(node_id, {}) + base_columns = base_nodes.get(node_id, {}).get("columns", {}) + current_columns = current_nodes.get(node_id, {}).get("columns", {}) - base_columns = base_node.get("columns", {}) - current_columns = current_node.get("columns", {}) - - # Get column names in base and current base_col_names = set(base_columns.keys()) current_col_names = set(current_columns.keys()) - # Find added columns (in current but not in base) - for col_name in current_col_names - base_col_names: - schema_changes.append((node_id, col_name, "added")) + added = [ + {"name": col, "type": current_columns[col].get("type", "")} + for col in sorted(current_col_names - base_col_names) + ] + removed = [ + {"name": col, "type": base_columns[col].get("type", "")} + for col in sorted(base_col_names - current_col_names) + ] + type_changed = [] + unchanged_count = 0 + for col in sorted(base_col_names & current_col_names): + base_type = base_columns[col].get("type") + curr_type = current_columns[col].get("type") + if base_type != curr_type: + type_changed.append({"name": col, "base_type": base_type, "curr_type": curr_type}) + else: + unchanged_count += 1 - # Find removed columns (in base but not in current) - for col_name in base_col_names - current_col_names: - schema_changes.append((node_id, col_name, "removed")) + result[node_id] = { + "added": added, + "removed": removed, + "type_changed": type_changed, + "unchanged_count": unchanged_count, + } + return result - # Find modified columns (in both but with different types) - for col_name in base_col_names & current_col_names: - base_col_type = base_columns[col_name].get("type") - current_col_type = current_columns[col_name].get("type") - if base_col_type != current_col_type: - schema_changes.append((node_id, col_name, "modified")) + async def _tool_schema_diff(self, arguments: Dict[str, Any]) -> Dict[str, Any]: + """Get schema diff (column changes) between base and current""" + select = arguments.get("select") + exclude = arguments.get("exclude") + packages = arguments.get("packages") + + lineage_diff = self.context.get_lineage_diff().model_dump(mode="json") + rich_changes = self._compute_schema_changes(lineage_diff, select=select, exclude=exclude, packages=packages) + + # Flatten rich dict → (node_id, column, change_status) triples for DataFrame output. + # Preserves the existing low-level mcp-server response contract exactly. + schema_changes = [] + for node_id, m in rich_changes.items(): + for col in m["added"]: + schema_changes.append((node_id, col["name"], "added")) + for col in m["removed"]: + schema_changes.append((node_id, col["name"], "removed")) + for col in m["type_changed"]: + schema_changes.append((node_id, col["name"], "modified")) # Check if there are more than 100 rows limit = 100 diff --git a/recce/widget_server.py b/recce/widget_server.py index 7e966bcbf..fac1cd2e4 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -76,7 +76,8 @@ async def row_count_diff( }.items() if v is not None } - return await _recce_server._tool_row_count_diff(arguments) + result = await _recce_server._tool_row_count_diff(arguments) + return {"models": result} @mcp.resource( @@ -110,16 +111,14 @@ async def schema_diff( packages: Optional[List[str]] = None, ) -> Dict[str, Any]: """Get schema diff (column changes) between base and current environments.""" - arguments = { - k: v - for k, v in { - "select": select, - "exclude": exclude, - "packages": packages, - }.items() - if v is not None - } - return await _recce_server._tool_schema_diff(arguments) + lineage_diff = _recce_server.context.get_lineage_diff().model_dump(mode="json") + rich_result = _recce_server._compute_schema_changes( + lineage_diff, + select=select, + exclude=exclude, + packages=packages if packages is not None else None, + ) + return {"models": rich_result} @mcp.resource( From 79264d5e84696b1885c108be3c33a9ec2084cb46 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 17:42:38 +0800 Subject: [PATCH 05/43] test(mcp): widget server tests + docs/mcp-widgets.md template (Day 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - tests/test_widget_server.py: 5 tests covering WIDGET_TOOLS env-var coordination, widget server tool/resource registration, file-missing graceful degradation, and tool enumeration regression. - docs/mcp-widgets.md: template for Scott — file layout, Claude Desktop config example, add-widget walkthrough using row_count_diff as worked reference, structuredContent contract, gotchas (SDK version pin, typed-params requirement, dual-key meta, mcp.run sync nature, .gitignore allowlist, stdout discipline). Co-Authored-By: Claude Signed-off-by: Kent --- docs/mcp-widgets.md | 300 ++++++++++++++++++++++++++++++++++++ tests/test_widget_server.py | 136 ++++++++++++++++ 2 files changed, 436 insertions(+) create mode 100644 docs/mcp-widgets.md create mode 100644 tests/test_widget_server.py diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md new file mode 100644 index 000000000..c672936f9 --- /dev/null +++ b/docs/mcp-widgets.md @@ -0,0 +1,300 @@ +# MCP Apps Widgets — Iter 1 Developer Guide + +## Overview + +Recce exposes two MCP servers that work in tandem when the MCP Apps widget +feature is enabled. The primary `recce mcp-server` handles all data-retrieval +and check-management tools. A secondary `recce mcp-widget-server` serves a +small set of "widget tools" — tools whose responses Claude Desktop renders as +interactive HTML panels rather than plain text. The two servers are coordinated +through the `RECCE_MCP_WIDGETS=1` environment variable: when that flag is set, +`mcp-server` silently omits the widget tools from its `tools/list` response so +that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which +annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. + +Iter 1 ships two widgets: `row_count_diff` and `schema_diff`. Both run in +**local mode only** — cloud/session mode is not supported until iter 2. + +--- + +## File Layout + +``` +recce/ + mcp_server.py # Existing primary server. + # WIDGET_TOOLS set + _widgets_enabled() filter live here. + widget_server.py # FastMCP widget server (iter 1). + # @mcp.tool delegates + @mcp.resource handlers. + cli.py # mcp-widget-server CLI subcommand added here. + data/ + mcp/ # Widget HTML asset directory (gitignored via per-extension + row_count_diff.html # allowlist — see .gitignore). Self-contained HTML files. + schema_diff.html +tests/ + test_widget_server.py # 5 tests covering WIDGET_TOOLS coordination + widget server. +docs/ + mcp-widgets.md # This file. +``` + +--- + +## Claude Desktop Configuration + +Register both servers in `~/Library/Application Support/Claude/claude_desktop_config.json`. +Both entries need `RECCE_MCP_WIDGETS=1` — without it, `mcp-server` keeps the +widget tools in its own `tools/list` and `mcp-widget-server` sees them routed +to the wrong server. + +```json +{ + "mcpServers": { + "recce": { + "command": "recce", + "args": ["mcp-server", "--project-dir", "/path/to/your/dbt/project"], + "env": { + "RECCE_MCP_WIDGETS": "1" + } + }, + "recce-widgets": { + "command": "recce", + "args": ["mcp-widget-server", "--project-dir", "/path/to/your/dbt/project"], + "env": { + "RECCE_MCP_WIDGETS": "1" + } + } + } +} +``` + +Replace `/path/to/your/dbt/project` with the directory containing +`dbt_project.yml`. Both entries must point at the same project directory. + +--- + +## Add a Widget — Step-by-Step Walkthrough + +The worked reference throughout is `row_count_diff`. Add a new widget called +`` by following these steps in order. + +### Step 1 — Register the tool name in `WIDGET_TOOLS` + +File: `recce/mcp_server.py`, near line 56. + +```python +WIDGET_TOOLS = {"row_count_diff", "schema_diff", ""} +``` + +This single change makes `mcp-server` omit `` from `tools/list` when +`RECCE_MCP_WIDGETS=1`, and raises an explanatory error if the agent calls it +on the wrong server. + +### Step 2 — Write the widget HTML at `recce/data/mcp/.html` + +Create a single self-contained HTML file. Import the MCP Apps SDK from unpkg +— **pin this exact version**: + +```html + + + + + My Tool + + +
Loading…
+ + + +``` + +`structuredContent` is populated automatically by FastMCP when the `@mcp.tool` +handler returns a dict (see "structuredContent contract" below). The `models` +key is the wrapping convention this codebase uses — your render function reads +from `structuredContent.models`. + +Add the HTML file to git normally — it escapes the broad `recce/data` gitignore +via per-extension rules in `.gitignore` (see "Gotchas"). + +### Step 3 — Add a `@mcp.tool` delegate in `recce/widget_server.py` + +```python +@mcp.tool( + name="", + description="Human-readable description Claude uses to pick this tool.", + meta={ + "ui": {"resourceUri": "ui://recce/.html"}, + "ui/resourceUri": "ui://recce/.html", + }, +) +async def ( + select: Optional[str] = None, + exclude: Optional[str] = None, +) -> Dict[str, Any]: + """One-line docstring.""" + result = await _recce_server._tool_({"select": select, "exclude": exclude}) + return {"models": result} +``` + +Key requirements: +- Use **typed params** (`Optional[str]`, `Optional[List[str]]`, etc.) — never + `**kwargs`. FastMCP infers the JSON `inputSchema` from type hints. Without a + schema the tool is uncallable from Claude Desktop. +- `meta` needs **both** the nested key (`"ui": {"resourceUri": ...}`) and the + flat key (`"ui/resourceUri": ...`) — the qr-server reference pattern. Dropping + either key breaks widget attachment in some Claude Desktop versions. +- Return `{"models": result}` so widget HTML can read a uniform `structuredContent.models`. + +### Step 4 — Add a `@mcp.resource` handler in `recce/widget_server.py` + +```python +@mcp.resource( + uri="ui://recce/.html", + mime_type="text/html;profile=mcp-app", + meta={"ui": {"csp": {"resourceDomains": ["https://unpkg.com"]}}}, +) +def _resource() -> str: + return _read_widget_html("") +``` + +`mime_type="text/html;profile=mcp-app"` is required — it tells Claude Desktop +this resource is a widget panel, not a document. `resourceDomains` in the CSP +`meta` must list every external origin the HTML loads: at minimum `https://unpkg.com` +for the SDK, plus any chart libraries or CDN assets. + +### Step 5 — Extract a helper if the existing tool response shape doesn't fit + +If `_tool_` in `mcp_server.py` returns a shape that the widget HTML +cannot render (e.g., a flat `DataFrame.model_dump()` dict), extract a shared +computation helper: + +```python +# In recce/mcp_server.py, inside RecceMCPServer: +def _compute__data(self, lineage_diff, **kwargs) -> Dict[str, Any]: + """Return rich nested dict for widget consumption.""" + ... + +async def _tool_(self, arguments) -> ...: + """Existing method — must NOT change its return type.""" + rich = self._compute__data(...) + # flatten to existing format: + return DataFrame.from_data(rich).model_dump() +``` + +The widget delegate in `widget_server.py` calls `_compute__data` directly. +The existing `_tool_` calls the same helper then flattens — preserving +the existing `mcp-server` response contract exactly. See `_compute_schema_changes` +as the worked example. + +### Step 6 — Restart Claude Desktop and verify + +1. Quit Claude Desktop fully (Cmd+Q). +2. Reopen. Both MCP server entries will restart. +3. Ask Claude: "Run and show me the widget." +4. You should see the HTML panel render inline in the chat. + +--- + +## structuredContent Contract + +When a FastMCP `@mcp.tool` async function returns a `dict`, FastMCP +automatically populates both: + +- `content`: a text serialisation of the dict (for non-widget MCP clients) +- `structuredContent`: the dict itself (for widget-capable clients like + Claude Desktop) + +The widget HTML receives `structuredContent` in the `app.ontoolresult` callback: + +```js +app.ontoolresult = ({ structuredContent }) => { + const models = structuredContent?.models ?? {}; + // render models +}; +``` + +This codebase always wraps tool results as `{"models": }` before +returning from the `@mcp.tool` delegate. This keeps all widget HTML uniform — +every widget reads from `structuredContent.models`, regardless of the underlying +tool's native shape. + +--- + +## Gotchas + +- **SDK version — pin `@modelcontextprotocol/ext-apps@0.4.0/app-with-deps`.** + Version 1.7.2 was tested in the Day 0 spike and found incompatible. Do not + float the version or use `@latest`. + +- **`@mcp.tool` must use typed params — never `**kwargs`.** FastMCP cannot + infer an `inputSchema` from `**kwargs`. Without a schema, Claude Desktop + registers the tool in `tools/list` but cannot construct a `tools/call` — + the tool appears available but is silently uncallable. + +- **`@mcp.tool` meta needs both key forms.** The `meta` dict must contain: + - `"ui": {"resourceUri": "ui://recce/.html"}` (nested, canonical) + - `"ui/resourceUri": "ui://recce/.html"` (flat, legacy) + Both are required. Dropping the flat key breaks widget attachment in some + Claude Desktop versions (observed in qr-server reference implementation). + +- **`mcp.run(transport="stdio")` is synchronous — do NOT wrap in + `asyncio.run()`.** `mcp.run()` manages its own asyncio event loop internally. + Wrapping it raises `ValueError: a coroutine was expected, got None` (fixed in + Day 1 cycle 1; see commit `bb6f1261`). + +- **`recce/data/` is gitignored as build output.** Widget HTML files use a + per-extension allowlist in `.gitignore` to escape the broad `recce/data` + ignore rule. The allowlist currently covers `*.html`. If you add new file + types (`.css`, `.svg`, `.js`) under `recce/data/mcp/`, check `.gitignore` + and add an allowlist entry if needed; otherwise `git add` will silently skip + your file. + +- **In stdio transport mode, stdout is JSON-RPC.** Any `print()` or + `logging.info()` output written to stdout will corrupt the MCP framing. + Configure logging to write to `stderr` only (see `logging.basicConfig` in + `run_widget_server()`). Never add bare `print()` calls in `widget_server.py`. + +--- + +## Reference Widgets + +Two working examples to read when building widget #3: + +| File | Tier | What it demonstrates | +|------|------|----------------------| +| `recce/data/mcp/row_count_diff.html` | Status pills + diff numbers | Per-model status badges (`ok`, `table_not_found`, etc.), signed diff display, `base_meta`/`curr_meta` shape | +| `recce/data/mcp/schema_diff.html` | HTML table | Added/removed/type_changed column grouping, `_compute_schema_changes` rich shape, per-model section headers | + +Both files are self-contained HTML — no build step, no npm dependency. They +import the SDK at runtime from unpkg. Open either file in a browser to verify +rendering without running a full MCP server. + +--- + +## What Is NOT in Iter 1 + +These are deferred to iter 2 or later: + +- **Chart-tier widgets** (bar charts, histograms) — requires a charting library + added to the CSP `resourceDomains` list and tested against MCP Apps sandbox. +- **Cloud/session mode** — `recce mcp-widget-server` raises immediately if + `--cloud` or `--session` kwargs are passed. Cloud support requires state-loader + plumbing not attempted in iter 1. +- **`lineage_diff` widget** — the lineage graph is a Dagre/React component in + `@datarecce/ui`. Serving it as a self-contained widget HTML requires either + a CDN build or embedding the compiled JS inline. Not attempted in iter 1. +- **CDN distribution of widget HTML** — iter 1 bundles HTML inside the Python + package (`recce/data/mcp/`). A CDN path (e.g., from a GitHub release asset) + would allow widget updates without a Recce release. Deferred. +- **`_widgets_enabled()` parity in `recce-cloud`** — the cloud-infra MCP server + has its own `list_tools` implementation. Widget coordination there requires a + separate integration pass. diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py new file mode 100644 index 000000000..a7b016b17 --- /dev/null +++ b/tests/test_widget_server.py @@ -0,0 +1,136 @@ +"""Tests for recce/widget_server.py and WIDGET_TOOLS env-var coordination. + +Covers: +- WIDGET_TOOLS enumeration regression (main mcp-server with/without widgets enabled) +- Widget server tool + resource registration (FastMCP public API) +- Resource handler graceful degradation when HTML asset is missing +""" + +from unittest.mock import MagicMock, patch + +import pytest + +# Skip entire module if mcp is not available +pytest.importorskip("mcp") + +from mcp.types import ListToolsRequest # noqa: E402 + +from recce.core import RecceContext # noqa: E402 +from recce.mcp_server import WIDGET_TOOLS, RecceMCPServer # noqa: E402 +from recce.server import RecceServerMode # noqa: E402 + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _list_tool_names(server: RecceMCPServer): + """Call the registered list_tools handler and return tool name set.""" + handler = server.server.request_handlers[ListToolsRequest] + result = await handler(ListToolsRequest(method="tools/list")) + return {t.name for t in result.root.tools} + + +# --------------------------------------------------------------------------- +# Test 1: All tools present when widgets disabled (no-regression baseline) +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_mcp_server_lists_all_tools_when_widgets_disabled(monkeypatch): + """When RECCE_MCP_WIDGETS is unset/empty, all tools including widget tools are returned.""" + monkeypatch.delenv("RECCE_MCP_WIDGETS", raising=False) + + mock_context = MagicMock(spec=RecceContext) + server = RecceMCPServer(mock_context, mode=RecceServerMode.server) + names = await _list_tool_names(server) + + assert "row_count_diff" in names + assert "schema_diff" in names + # Sanity: lineage_diff is always present + assert "lineage_diff" in names + + +# --------------------------------------------------------------------------- +# Test 2: Widget tools filtered when RECCE_MCP_WIDGETS=1 +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch): + """When RECCE_MCP_WIDGETS=1, widget tools are omitted from mcp-server's list_tools.""" + monkeypatch.setenv("RECCE_MCP_WIDGETS", "1") + + mock_context = MagicMock(spec=RecceContext) + server = RecceMCPServer(mock_context, mode=RecceServerMode.server) + names = await _list_tool_names(server) + + assert "row_count_diff" not in names + assert "schema_diff" not in names + # Other tools must still be present + assert "lineage_diff" in names + + +# --------------------------------------------------------------------------- +# Test 3: Widget server registers exactly 2 tools + 2 resources +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_widget_server_registers_two_tools_and_two_resources(): + """Widget FastMCP instance has exactly row_count_diff + schema_diff tools and resources. + + Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert tool_names == {"row_count_diff", "schema_diff"} + assert resource_uris == { + "ui://recce/row_count_diff.html", + "ui://recce/schema_diff.html", + } + + +# --------------------------------------------------------------------------- +# Test 4: Resource handler returns error stub when HTML asset is missing +# --------------------------------------------------------------------------- + + +def test_widget_resource_handler_returns_error_stub_when_html_missing(): + """_read_widget_html returns a valid HTML stub when the asset file does not exist.""" + from recce.widget_server import _read_widget_html + + with patch("importlib.resources.files", side_effect=FileNotFoundError("no such file")): + result = _read_widget_html("row_count_diff") + + assert result.startswith("") + assert "Widget asset missing" in result + assert "row_count_diff.html" in result + + +# --------------------------------------------------------------------------- +# Test 5: Difference between disabled and enabled is exactly WIDGET_TOOLS +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_tool_enumeration_diff_is_exactly_widget_tools(monkeypatch): + """The set difference between widgets-off and widgets-on is exactly WIDGET_TOOLS.""" + mock_context = MagicMock(spec=RecceContext) + + monkeypatch.delenv("RECCE_MCP_WIDGETS", raising=False) + server_off = RecceMCPServer(mock_context, mode=RecceServerMode.server) + names_off = await _list_tool_names(server_off) + + monkeypatch.setenv("RECCE_MCP_WIDGETS", "1") + server_on = RecceMCPServer(mock_context, mode=RecceServerMode.server) + names_on = await _list_tool_names(server_on) + + assert names_off - names_on == WIDGET_TOOLS + assert names_on - names_off == set() From 60a9813fa209633068b76d19bac7d9a0ac25ff7c Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 19:57:21 +0800 Subject: [PATCH 06/43] fix(cli): chdir to --project-dir + redirect traceback to stderr (Claude Desktop cwd=/ workaround) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Desktop spawns MCP servers with cwd=/. Two issues surfaced when manually testing iter 1 widgets: 1. recce/track.py:61 used bare print() which goes to stdout. When RecceConfig couldn't find/create recce.yml at cwd=/, the resulting traceback corrupted the JSON-RPC stdio channel. Fixed to print to stderr. Pre-existing bug — affected mcp-server equally. 2. recce/cli.py mcp_server and mcp_widget_server now chdir to --project-dir before RecceConfig so the config file is searched relative to the user's dbt project instead of cwd. Eliminates need for a bash wrapper in Claude Desktop config. Verified by reproducing the cwd=/ failure before fix (32 lines of traceback to stdout) and confirming clean stdout (0 lines) after. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Kent --- recce/cli.py | 12 ++++++++++++ recce/track.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/recce/cli.py b/recce/cli.py index c18f0e10e..ab58063df 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -2746,6 +2746,12 @@ def mcp_server(state_file, sse, host, port, **kwargs): console.print(r"Please install the MCP package: pip install 'recce\[mcp]'") exit(1) + # Claude Desktop spawns MCP servers with cwd=/. chdir to --project-dir + # before RecceConfig so it can find/create recce.yml relative to the project. + project_dir = kwargs.get("project_dir") + if project_dir: + os.chdir(project_dir) + # Initialize Recce Config RecceConfig(config_file=kwargs.get("config")) @@ -2913,6 +2919,12 @@ def mcp_widget_server(state_file, sse, host, port, **kwargs): console.print(r"Please install the MCP package: pip install 'recce\[mcp]'") exit(1) + # Claude Desktop spawns MCP servers with cwd=/. chdir to --project-dir + # before RecceConfig so it can find/create recce.yml relative to the project. + project_dir = kwargs.get("project_dir") + if project_dir: + os.chdir(project_dir) + # Initialize Recce Config RecceConfig(config_file=kwargs.get("config")) diff --git a/recce/track.py b/recce/track.py index c44190a75..5d2658798 100644 --- a/recce/track.py +++ b/recce/track.py @@ -58,7 +58,7 @@ def _show_error_message(self, msg, params): if params.get("debug"): console.print_exception(show_locals=True) else: - print(traceback.format_exc()) + print(traceback.format_exc(), file=sys.stderr) # console.print('[bold red]Error:[/bold red] ', end='') # console.out(msg, highlight=False) From 18b5b0950b60ed0d247f36be29fa9f26e40731db Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 22:11:56 +0800 Subject: [PATCH 07/43] fix(widgets): defensive structuredContent fallback + skip _warning key MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude Desktop manual test revealed widget rendered but showed empty state ("No models found") while agent text summary correctly reported 5 models. Root cause: FastMCP with @mcp.tool returning Dict[str, Any] may not populate the JSON-RPC structuredContent field — only content text payload. Widget HTML only read structuredContent. - Both widgets now try structuredContent first, fall back to parsing content[0].text as JSON. Works regardless of FastMCP serialization behavior. - row_count_diff also skips keys starting with "_" when iterating models (so _maybe_add_single_env_warning's _warning key doesn't break the per-row render); _warning text is rendered as a notice banner above the table when present. - schema_diff gets the same underscore-key skip for defensive uniformity. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Kent --- recce/data/mcp/row_count_diff.html | 23 ++++++++++++++++++++--- recce/data/mcp/schema_diff.html | 16 +++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html index 6ac1feb8e..5bb42de07 100644 --- a/recce/data/mcp/row_count_diff.html +++ b/recce/data/mcp/row_count_diff.html @@ -23,6 +23,7 @@ .na { color: #9ca3af; font-style: italic; } #root { min-height: 40px; } .empty { color: #6b7280; padding: 16px 0; } + .warning { color: #6b7280; font-style: italic; font-size: 13px; margin-bottom: 8px; } @@ -33,8 +34,17 @@ const app = new App({ name: "Recce Row Count Diff", version: "1.0.0" }); await app.connect(); - app.ontoolresult = ({ structuredContent }) => { - render(structuredContent ?? {}); + app.ontoolresult = (result) => { + let data = result?.structuredContent; + if (!data || (typeof data === 'object' && Object.keys(data).length === 0)) { + // FastMCP may not auto-populate structuredContent for Dict[str, Any] returns; + // fall back to parsing the text-serialized content payload. + const textPart = result?.content?.find?.(c => c.type === 'text')?.text; + if (textPart) { + try { data = JSON.parse(textPart); } catch (e) { /* leave data as-is */ } + } + } + render(data ?? {}); }; function escapeHtml(s) { @@ -71,13 +81,19 @@ function render(data) { const models = data.models ?? {}; - const entries = Object.entries(models); + const warning = models._warning ?? null; + // Skip keys starting with "_" (e.g. _warning injected by single-env mode) + const entries = Object.entries(models).filter(([k]) => !k.startsWith('_')); if (entries.length === 0) { document.getElementById("root").innerHTML = `

Row Count Diff

No models found.

`; return; } + const warningHtml = warning + ? `
${escapeHtml(warning)}
` + : ''; + const rows = entries.map(([name, m]) => { // CRITICAL: === null (not !value) — 0 is a valid row count const baseOk = m.base_meta?.status === "ok"; @@ -98,6 +114,7 @@ document.getElementById("root").innerHTML = `

Row Count Diff

+ ${warningHtml} diff --git a/recce/data/mcp/schema_diff.html b/recce/data/mcp/schema_diff.html index 4942b6edc..fb50ac0da 100644 --- a/recce/data/mcp/schema_diff.html +++ b/recce/data/mcp/schema_diff.html @@ -35,8 +35,17 @@ const app = new App({ name: "Recce Schema Diff", version: "1.0.0" }); await app.connect(); - app.ontoolresult = ({ structuredContent }) => { - render(structuredContent ?? {}); + app.ontoolresult = (result) => { + let data = result?.structuredContent; + if (!data || (typeof data === 'object' && Object.keys(data).length === 0)) { + // FastMCP may not auto-populate structuredContent for Dict[str, Any] returns; + // fall back to parsing the text-serialized content payload. + const textPart = result?.content?.find?.(c => c.type === 'text')?.text; + if (textPart) { + try { data = JSON.parse(textPart); } catch (e) { /* leave data as-is */ } + } + } + render(data ?? {}); }; function escapeHtml(s) { @@ -94,7 +103,8 @@ function render(data) { const models = data.models ?? {}; - const entries = Object.entries(models); + // Skip keys starting with "_" for defensive uniformity (e.g. potential future _warning keys) + const entries = Object.entries(models).filter(([k]) => !k.startsWith('_')); if (entries.length === 0) { document.getElementById("root").innerHTML = `

Schema Diff

No models found.

`; From c83e97144cf9cfc05ab064ef5baf18a50015f28b Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 22:34:32 +0800 Subject: [PATCH 08/43] fix(widgets): unwrap FastMCP \`{result: ...}\` wrapping in structuredContent Per MCP log inspection: FastMCP synthesizes an outputSchema like \`{properties: {result: {...}}, required: ["result"]}\` when @mcp.tool returns a Dict[str, Any] without a Pydantic return type. The actual structuredContent sent to the widget has shape {result: {models: ...}} not {models: ...}. Widget HTML now unwraps the result key when present, so both flat returns and FastMCP-wrapped returns render correctly. Defensive: only unwraps if data.result exists AND data.models does not, so tools that legitimately return both keys aren't mishandled. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Kent --- recce/data/mcp/row_count_diff.html | 9 ++++++++- recce/data/mcp/schema_diff.html | 9 ++++++++- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html index 5bb42de07..75f085c3d 100644 --- a/recce/data/mcp/row_count_diff.html +++ b/recce/data/mcp/row_count_diff.html @@ -44,7 +44,14 @@ try { data = JSON.parse(textPart); } catch (e) { /* leave data as-is */ } } } - render(data ?? {}); + data = data ?? {}; + // FastMCP wraps Dict[str, Any] returns under a `result` key when there's no + // explicit Pydantic return-type. Unwrap if present so the renderer sees the + // same shape regardless of how the server emits structuredContent. + if (data && typeof data === 'object' && data.result && typeof data.result === 'object' && !data.models) { + data = data.result; + } + render(data); }; function escapeHtml(s) { diff --git a/recce/data/mcp/schema_diff.html b/recce/data/mcp/schema_diff.html index fb50ac0da..c2bf7d836 100644 --- a/recce/data/mcp/schema_diff.html +++ b/recce/data/mcp/schema_diff.html @@ -45,7 +45,14 @@ try { data = JSON.parse(textPart); } catch (e) { /* leave data as-is */ } } } - render(data ?? {}); + data = data ?? {}; + // FastMCP wraps Dict[str, Any] returns under a `result` key when there's no + // explicit Pydantic return-type. Unwrap if present so the renderer sees the + // same shape regardless of how the server emits structuredContent. + if (data && typeof data === 'object' && data.result && typeof data.result === 'object' && !data.models) { + data = data.result; + } + render(data); }; function escapeHtml(s) { From f5e746d5f0a05e4c58a863d82d90acb1896c75aa Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 22 May 2026 23:28:02 +0800 Subject: [PATCH 09/43] feat(widgets): style with Claude design tokens for native dark-mode integration Manual Claude Desktop test showed widget rendered real data correctly but hand-rolled hardcoded colors looked washed-out in dark mode and out of place next to Claude's native UI. Refactored both widgets to use Claude's CSS custom properties (var(--color-text-primary), var(--color-background-*), var(--font-sans), etc.) with light-mode hex fallbacks and a prefers-color-scheme dark fallback for non-Claude hosts. - row_count_diff.html: redesigned with header + 4-up summary cards + main table, semantic status badges (warning/success/info/secondary), tabular numeric alignment, monospace model names. - schema_diff.html: same design-token migration, per-model sections with semantic colors for added/removed/type_changed. - No new external dependencies (kept ext-apps SDK as only unpkg load, CSP resourceDomains unchanged). Used Unicode glyphs instead of icon font to stay dependency-light. Preserves all existing JS: unwrap fallback for FastMCP {result: ...} wrapping, _warning notice rendering, === null guards, _-prefix key skip. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Kent --- recce/data/mcp/row_count_diff.html | 290 +++++++++++++++++++++++------ recce/data/mcp/schema_diff.html | 199 ++++++++++++++++---- 2 files changed, 399 insertions(+), 90 deletions(-) diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html index 75f085c3d..f4cd628f6 100644 --- a/recce/data/mcp/row_count_diff.html +++ b/recce/data/mcp/row_count_diff.html @@ -4,26 +4,154 @@ Row Count Diff @@ -65,25 +193,43 @@ return `N/A (${escapeHtml(status)})`; } - function formatDiff(diff) { - if (diff === null) return ``; - if (diff > 0) return `+${diff.toLocaleString()}`; - if (diff < 0) return `${diff.toLocaleString()}`; - return `0`; + function formatDiff(base, curr) { + // CRITICAL: === null (not !value) — 0 is a valid row count + if (base === null || curr === null) return ``; + const d = curr - base; + if (d === 0) return `0`; + const sign = d > 0 ? "+" : ""; + const pct = base === 0 ? "∞" : `${Math.abs(d / base * 100).toFixed(1)}%`; + const cls = d > 0 ? "diff-pos" : "diff-neg"; + return `${sign}${d.toLocaleString()}(${sign}${pct})`; } - function badgeClass(baseOk, currOk, diff) { - if (!baseOk || !currOk) return "badge-error"; - if (diff === null || diff === 0) return "badge-ok-neutral"; - return diff > 0 ? "badge-ok-added" : "badge-ok-removed"; - } + function getBadge(name, m) { + const bs = m.base_meta?.status; + const cs = m.curr_meta?.status; + // CRITICAL: === null (not !value) + const b = m.base; + const c = m.curr; - function badgeLabel(baseOk, currOk, baseMeta, currMeta, diff) { - if (!baseOk) return baseMeta?.status ?? "error"; - if (!currOk) return currMeta?.status ?? "error"; - if (diff === null) return "n/a"; - if (diff === 0) return "unchanged"; - return diff > 0 ? "added" : "removed"; + if (bs === "unsupported_resource_type" || cs === "unsupported_resource_type") { + return `◯ Seed (unsupported)`; + } + if (bs === "not_in_manifest" && cs === "ok") { + return `+ New model`; + } + if (cs === "not_in_manifest") { + return `△ Not in current`; + } + if (bs === "not_in_manifest") { + return `△ Not in base`; + } + if (bs !== "ok" || cs !== "ok") { + return `⚠ ${escapeHtml(cs || bs || "error")}`; + } + // Both ok + if (b === null || c === null) return `— N/A`; + if (b === c) return `✓ Unchanged`; + return `↑ Row count changed`; } function render(data) { @@ -91,45 +237,83 @@ const warning = models._warning ?? null; // Skip keys starting with "_" (e.g. _warning injected by single-env mode) const entries = Object.entries(models).filter(([k]) => !k.startsWith('_')); + if (entries.length === 0) { document.getElementById("root").innerHTML = - `

Row Count Diff

No models found.

`; + `
Row count diff

No models found.

`; return; } + // Summary counts + let changed = 0, newModels = 0, unchanged = 0; + for (const [, m] of entries) { + const bs = m.base_meta?.status; + const cs = m.curr_meta?.status; + if (bs === "not_in_manifest" && cs === "ok") { newModels++; continue; } + if (bs === "ok" && cs === "ok") { + if (m.base === m.curr) unchanged++; + else changed++; + } + } + const total = entries.length; + const warningHtml = warning - ? `
${escapeHtml(warning)}
` - : ''; + ? `
⚠ ${escapeHtml(warning)}
` : ''; const rows = entries.map(([name, m]) => { // CRITICAL: === null (not !value) — 0 is a valid row count - const baseOk = m.base_meta?.status === "ok"; - const currOk = m.curr_meta?.status === "ok"; const baseStr = m.base === null ? formatNa(m.base_meta) : m.base.toLocaleString(); const currStr = m.curr === null ? formatNa(m.curr_meta) : m.curr.toLocaleString(); - const diff = (m.base !== null && m.curr !== null) ? (m.curr - m.base) : null; - const cls = badgeClass(baseOk, currOk, diff); - const label = badgeLabel(baseOk, currOk, m.base_meta, m.curr_meta, diff); + const diffStr = formatDiff(m.base, m.curr); + const badge = getBadge(name, m); return `
- + - - + + `; }); document.getElementById("root").innerHTML = ` -

Row Count Diff

+
+ + Row count diff + ${total} model${total !== 1 ? 's' : ''} +
+
+
+
Models compared
+
${total}
+
+
+
Changed
+
${changed}
+
+
+
New in current
+
${newModels}
+
+
+
Unchanged
+
${unchanged}
+
+
${warningHtml} -
${escapeHtml(name)}${escapeHtml(name)} ${baseStr} ${currStr}${formatDiff(diff)}${escapeHtml(label)}${diffStr}${badge}
- - - - - - ${rows.join("")} -
ModelBaseCurrentDiffStatus
`; +
+ + + + + + + + + + + ${rows.join("")} +
ModelBase rowsCurrent rowsDeltaStatus
+
`; } diff --git a/recce/data/mcp/schema_diff.html b/recce/data/mcp/schema_diff.html index c2bf7d836..176423a8d 100644 --- a/recce/data/mcp/schema_diff.html +++ b/recce/data/mcp/schema_diff.html @@ -4,27 +4,132 @@ Schema Diff @@ -68,41 +173,53 @@ const unchangedCount = m.unchanged_count ?? 0; const hasChanges = added.length > 0 || removed.length > 0 || typeChanged.length > 0; - let html = `

${escapeHtml(nodeId)}

`; + let html = `
+
${escapeHtml(nodeId)}
`; if (!hasChanges && unchangedCount > 0) { - html += `

No schema changes (${unchangedCount} column${unchangedCount !== 1 ? "s" : ""} unchanged)

`; + const noun = unchangedCount === 1 ? "column" : "columns"; + html += `

✓ No schema changes (${unchangedCount} ${noun} unchanged)

`; return html + "
"; } if (added.length > 0) { - html += `

+${added.length} Added

`; + html += `
+ ${added.length} Added
`; html += ``; - html += added.map(c => ``).join(""); + html += added.map(c => + ` + + + `).join(""); html += `
ColumnType
${escapeHtml(c.name)}${escapeHtml(c.type ?? "")}
${escapeHtml(c.name)}${escapeHtml(c.type ?? "")}
`; } if (removed.length > 0) { - html += `

-${removed.length} Removed

`; + html += `
− ${removed.length} Removed
`; html += ``; - html += removed.map(c => ``).join(""); + html += removed.map(c => + ` + + + `).join(""); html += `
ColumnType
${escapeHtml(c.name)}${escapeHtml(c.type ?? "")}
${escapeHtml(c.name)}${escapeHtml(c.type ?? "")}
`; } if (typeChanged.length > 0) { - html += `

${typeChanged.length} Type Changed

`; - html += ``; - html += typeChanged.map(c => ` - - - - - `).join(""); + html += `
⇆ ${typeChanged.length} Type changed
`; + html += `
ColumnBase TypeCurrent Type
${escapeHtml(c.name)}${escapeHtml(c.base_type ?? "")}${escapeHtml(c.curr_type ?? "")}
`; + html += typeChanged.map(c => + ` + + + + + `).join(""); html += `
ColumnBase typeCurrent type
${escapeHtml(c.name)}${escapeHtml(c.base_type ?? "")}${escapeHtml(c.curr_type ?? "")}
`; } if (unchangedCount > 0) { - html += `

${unchangedCount} column${unchangedCount !== 1 ? "s" : ""} unchanged

`; + const noun = unchangedCount === 1 ? "column" : "columns"; + html += `

${unchangedCount} ${noun} unchanged

`; } return html + "
"; @@ -112,14 +229,22 @@ const models = data.models ?? {}; // Skip keys starting with "_" for defensive uniformity (e.g. potential future _warning keys) const entries = Object.entries(models).filter(([k]) => !k.startsWith('_')); + if (entries.length === 0) { document.getElementById("root").innerHTML = - `

Schema Diff

No models found.

`; + `
Schema diff

No models found.

`; return; } + const total = entries.length; const blocks = entries.map(([nodeId, m]) => renderModelBlock(nodeId, m)).join(""); - document.getElementById("root").innerHTML = `

Schema Diff

${blocks}`; + document.getElementById("root").innerHTML = ` +
+ + Schema diff + ${total} model${total !== 1 ? 's' : ''} +
+ ${blocks}`; } From ce1b80a0a9b741bcb363da6c41b77feb7710bdfb Mon Sep 17 00:00:00 2001 From: Kent Date: Sat, 23 May 2026 06:53:02 +0800 Subject: [PATCH 10/43] =?UTF-8?q?refactor(widgets):=20idiomatic=20FastMCP?= =?UTF-8?q?=20=E2=80=94=20Pydantic=20models=20+=20CallToolResult=20+=20ann?= =?UTF-8?q?otations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per anthropic/skills/mcp-builder Python idiom checklist (python_mcp_server.md:691). Closes the gap between iter 1's "make it work" shortcuts and SDK best practices. - Pydantic BaseModel for tool inputs (RowCountDiffInput, SchemaDiffInput) with Field(description=...) on every param — replaces bare Optional[...] params, gives the LLM richer inputSchema descriptions. - Pydantic BaseModel for tool outputs (RowCountDiffOutput, SchemaDiffOutput, nested per-model models) — replaces Dict[str, Any] return, generates proper outputSchema, eliminates FastMCP's {result: ...} wrapping (widget JS unwrap fallback removed in commit 2). - Return CallToolResult explicitly with short content (one sentence) + data in structuredContent only. Agent receives a summary; widget receives the dict. - annotations dict on every @mcp.tool: title, readOnlyHint, destructiveHint, idempotentHint, openWorldHint per checklist line 691. - _warning moved from top-level result dict to RowCountDiffOutput.warning named field — cleaner Pydantic shape, no more _-prefix key skip in widget JS. - logging.basicConfig explicitly sets stream=sys.stderr per stdio discipline (mcp_best_practices.md:139). - prefersBorder: False added to @mcp.resource meta per spec UIResourceMeta. - Tool docstrings expanded per python_mcp_server.md:278-328 pattern: what / Use when / Don't use when / Returns / Error Handling. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/widget_server.py | 238 ++++++++++++++++++++++++++++++++--------- 1 file changed, 187 insertions(+), 51 deletions(-) diff --git a/recce/widget_server.py b/recce/widget_server.py index fac1cd2e4..fd06958f2 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -11,9 +11,12 @@ import importlib.resources import logging +import sys from typing import Any, Dict, List, Optional from mcp.server.fastmcp import FastMCP +from mcp.types import CallToolResult, TextContent +from pydantic import BaseModel, Field mcp = FastMCP("recce-widgets") @@ -23,13 +26,90 @@ logger = logging.getLogger(__name__) +# --------------------------------------------------------------------------- +# Pydantic output models +# --------------------------------------------------------------------------- + + +class RowCountMeta(BaseModel): + status: str # "ok" | "table_not_found" | "permission_denied" | etc. + message: Optional[str] = None + + +class RowCountModel(BaseModel): + base: Optional[int] = None + curr: Optional[int] = None + base_meta: RowCountMeta + curr_meta: RowCountMeta + + +class RowCountDiffOutput(BaseModel): + models: Dict[str, RowCountModel] + warning: Optional[str] = None # from single-env mode + + +class SchemaChange(BaseModel): + added: List[Dict[str, str]] # [{"name": ..., "type": ...}] + removed: List[Dict[str, str]] + type_changed: List[Dict[str, str]] # [{"name": ..., "base_type": ..., "curr_type": ...}] + unchanged_count: int + + +class SchemaDiffOutput(BaseModel): + models: Dict[str, SchemaChange] + + +# --------------------------------------------------------------------------- +# Pydantic input models +# --------------------------------------------------------------------------- + + +class RowCountDiffInput(BaseModel): + node_names: Optional[List[str]] = Field( + default=None, + description="Explicit dbt model names to check (mutually exclusive with select/exclude)", + ) + node_ids: Optional[List[str]] = Field( + default=None, + description="Explicit dbt node IDs to check", + ) + select: Optional[str] = Field( + default=None, + description="dbt selector syntax (e.g. 'state:modified+', 'customers orders')", + ) + exclude: Optional[str] = Field( + default=None, + description="dbt selector syntax for exclusion", + ) + + +class SchemaDiffInput(BaseModel): + select: Optional[str] = Field( + default=None, + description="dbt selector syntax (e.g. 'state:modified+', '1+state:modified')", + ) + exclude: Optional[str] = Field( + default=None, + description="dbt selector syntax for exclusion", + ) + packages: Optional[List[str]] = Field( + default=None, + description="Restrict to specific dbt packages by name", + ) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + def _read_widget_html(name: str) -> str: """Read widget HTML from recce/data/mcp/{name}.html, returning an error stub if missing.""" try: ref = importlib.resources.files("recce.data.mcp") / f"{name}.html" return ref.read_text(encoding="utf-8") except (FileNotFoundError, TypeError, ModuleNotFoundError): - return f"" f"Widget asset missing: {name}.html. Run pnpm run build." f"" + return f"Widget asset missing: {name}.html. Run pnpm run build." # --------------------------------------------------------------------------- @@ -39,51 +119,71 @@ def _read_widget_html(name: str) -> str: @mcp.tool( name="row_count_diff", - description=( - "Compare row counts between base and current environments for specified models. " - "Returns structured results with status information for each model.\n\n" - "Response format: {model_name: {base: int|null, curr: int|null, " - "base_meta: {status, message?}, curr_meta: {status, message?}}}\n" - "- base/curr: row count as integer, or null if unavailable\n" - "- base_meta/curr_meta: status details explaining the count value\n\n" - "Status codes (in *_meta.status):\n" - "- 'ok': Row count retrieved successfully\n" - "- 'not_in_manifest': Model not found in dbt manifest\n" - "- 'unsupported_resource_type': Node is not a model/snapshot\n" - "- 'unsupported_materialization': Materialization doesn't support row counts\n" - "- 'table_not_found': Table defined in manifest but doesn't exist in database\n" - "- 'permission_denied': User lacks permission to access the table" - ), + annotations={ + "title": "Row Count Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, meta={ "ui": {"resourceUri": "ui://recce/row_count_diff.html"}, "ui/resourceUri": "ui://recce/row_count_diff.html", }, ) -async def row_count_diff( - node_names: Optional[List[str]] = None, - node_ids: Optional[List[str]] = None, - select: Optional[str] = None, - exclude: Optional[str] = None, -) -> Dict[str, Any]: - """Compare row counts between base and current environments.""" - arguments = { - k: v - for k, v in { - "node_names": node_names, - "node_ids": node_ids, - "select": select, - "exclude": exclude, - }.items() - if v is not None - } - result = await _recce_server._tool_row_count_diff(arguments) - return {"models": result} +async def row_count_diff(args: RowCountDiffInput) -> CallToolResult: + """Compare row counts between base and current dbt environments for specified models. + + Returns structured per-model results with status information. Rendered in + an interactive widget; the agent should not summarize or reproduce the data + as a text table. + + Args: + node_names: Explicit model names (e.g. ["customers", "orders"]) + node_ids: Explicit dbt node IDs + select: dbt selector syntax (e.g. "state:modified+", "1+state:modified") + exclude: dbt selector for exclusion + (use either explicit names/ids OR selector syntax, not both) + + Returns: + CallToolResult with structuredContent: RowCountDiffOutput shape + {models: {: {base, curr, base_meta, curr_meta}}, warning?: str} + + Use when: + - User asks "did row counts change", "regression check on counts" + - PR review needs row count diff across models + Don't use when: + - Schema (column) changes — use schema_diff instead + - SQL output comparison — use query_diff + - Single environment without target-base — server warns about + single-env mode but returns no useful comparison + + Error Handling: + - table_not_found / permission_denied surface in *_meta.status + - tool raises only on fundamental dbt/adapter failures + """ + result = await _recce_server._tool_row_count_diff(args.model_dump(exclude_none=True)) + # Extract warning that single-env mode injects as _warning key + warning = result.pop("_warning", None) if isinstance(result, dict) else None + output = RowCountDiffOutput( + models={name: RowCountModel(**v) for name, v in result.items()}, + warning=warning, + ) + return CallToolResult( + content=[TextContent(type="text", text="Row count diff rendered in widget.")], + structuredContent=output.model_dump(), + ) @mcp.resource( uri="ui://recce/row_count_diff.html", mime_type="text/html;profile=mcp-app", - meta={"ui": {"csp": {"resourceDomains": ["https://unpkg.com"]}}}, + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, ) def row_count_diff_resource() -> str: return _read_widget_html("row_count_diff") @@ -96,35 +196,70 @@ def row_count_diff_resource() -> str: @mcp.tool( name="schema_diff", - description=( - "Get the schema diff (column changes) between base and current environments. " - "Shows added, removed, and type-changed columns in compact dataframe format." - ), + annotations={ + "title": "Schema Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, meta={ "ui": {"resourceUri": "ui://recce/schema_diff.html"}, "ui/resourceUri": "ui://recce/schema_diff.html", }, ) -async def schema_diff( - select: Optional[str] = None, - exclude: Optional[str] = None, - packages: Optional[List[str]] = None, -) -> Dict[str, Any]: - """Get schema diff (column changes) between base and current environments.""" +async def schema_diff(args: SchemaDiffInput) -> CallToolResult: + """Get the schema diff (column changes) between base and current dbt environments. + + Shows added, removed, and type-changed columns per model, rendered in an + interactive widget. The agent should not reproduce the table data as plain text. + + Args: + select: dbt selector syntax (e.g. "state:modified+", "customers orders") + exclude: dbt selector for exclusion + packages: restrict to specific dbt packages by name + + Returns: + CallToolResult with structuredContent: SchemaDiffOutput shape + {models: {: {added, removed, type_changed, unchanged_count}}} + + Use when: + - User asks "what columns changed", "schema diff", "any new/removed columns" + - PR review needs to confirm no unintended column renames/removals + Don't use when: + - Row count changes — use row_count_diff instead + - SQL output comparison — use query_diff + - Single environment has no comparison target (tool will return empty diff) + + Error Handling: + - tool raises on lineage_diff / context failure + - empty models dict means no schema changes detected in the selected scope + """ lineage_diff = _recce_server.context.get_lineage_diff().model_dump(mode="json") rich_result = _recce_server._compute_schema_changes( lineage_diff, - select=select, - exclude=exclude, - packages=packages if packages is not None else None, + select=args.select, + exclude=args.exclude, + packages=args.packages if args.packages is not None else None, + ) + output = SchemaDiffOutput( + models={node_id: SchemaChange(**m) for node_id, m in rich_result.items()}, + ) + return CallToolResult( + content=[TextContent(type="text", text="Schema diff rendered in widget.")], + structuredContent=output.model_dump(), ) - return {"models": rich_result} @mcp.resource( uri="ui://recce/schema_diff.html", mime_type="text/html;profile=mcp-app", - meta={"ui": {"csp": {"resourceDomains": ["https://unpkg.com"]}}}, + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, ) def schema_diff_resource() -> str: return _read_widget_html("schema_diff") @@ -156,6 +291,7 @@ def run_widget_server(**kwargs) -> None: logging.basicConfig( level=logging.INFO, + stream=sys.stderr, # NEVER stdout — that's the JSON-RPC channel format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) From f55a28236107b4973d1e8404d3ee023b42e31df9 Mon Sep 17 00:00:00 2001 From: Kent Date: Sat, 23 May 2026 06:55:35 +0800 Subject: [PATCH 11/43] refactor(widgets): use ext-apps SDK theme helpers + remove dead JS hacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per add-app-to-server/SKILL.md JS-side patterns. Replaces manual CSS var() fallback with SDK helpers that actively apply host design tokens via postMessage context. - Import applyDocumentTheme, applyHostStyleVariables, applyHostFonts from ext-apps SDK. Register app.onhostcontextchanged to invoke them on theme/ font change. Apply initial context after connect() if exposed. - Remove dead defensive layers no longer needed after commit 1: - structuredContent unwrap (FastMCP no longer wraps Pydantic returns) - content[0].text JSON.parse fallback (content is one-sentence string) - _-prefix key skip (warning is now a named field, models is clean dict) - Add app.onteardown handler returning {} per SDK pattern. - CSS fallback values: rename --color-text-error → --color-text-danger and --color-background-error → --color-background-danger (and badge-error → badge-danger) to match spec enum McpUiStyleVariableKey (no "error" semantic; "danger" is canonical). - CSS var() fallbacks retained as defensive layer in case SDK helper races. - warning now read from data.warning (named field) not data.models._warning. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/data/mcp/row_count_diff.html | 64 +++++++++++++++++------------- recce/data/mcp/schema_diff.html | 58 +++++++++++++++------------ 2 files changed, 69 insertions(+), 53 deletions(-) diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html index f4cd628f6..f9b3858fd 100644 --- a/recce/data/mcp/row_count_diff.html +++ b/recce/data/mcp/row_count_diff.html @@ -107,7 +107,7 @@ white-space: nowrap; } .diff-pos { color: var(--color-text-warning, #d97706); font-weight: 500; } - .diff-neg { color: var(--color-text-error, #dc2626); font-weight: 500; } + .diff-neg { color: var(--color-text-danger, #dc2626); font-weight: 500; } .diff-pct { font-size: 11px; color: var(--color-text-tertiary, #9ca3af); margin-left: 4px; } .na { color: var(--color-text-tertiary, #9ca3af); font-style: italic; } @@ -138,9 +138,9 @@ background: var(--color-background-secondary, #f3f4f6); color: var(--color-text-secondary, #6b7280); } - .badge-error { - background: var(--color-background-secondary, #fee2e2); - color: var(--color-text-error, #991b1b); + .badge-danger { + background: var(--color-background-danger, #fee2e2); + color: var(--color-text-danger, #991b1b); } /* ── Misc ───────────────────────────────────────────────────────── */ @@ -157,31 +157,40 @@
Loading…
``` -`structuredContent` is populated automatically by FastMCP when the `@mcp.tool` -handler returns a dict (see "structuredContent contract" below). The `models` -key is the wrapping convention this codebase uses — your render function reads -from `structuredContent.models`. +`structuredContent` comes from the `CallToolResult.structuredContent` set in the +Python handler (see "structuredContent contract" below). The `models` key is the +wrapping convention this codebase uses — your render function reads from +`structuredContent.models`. + +SDK theme helpers (`applyDocumentTheme`, `applyHostStyleVariables`, +`applyHostFonts`) actively apply design tokens from the host context via +`postMessage`. Use `var(--token, fallback)` in CSS as a defensive layer in case +the helper hasn't fired yet (race condition on first load). Add the HTML file to git normally — it escapes the broad `recce/data` gitignore via per-extension rules in `.gitignore` (see "Gotchas"). ### Step 3 — Add a `@mcp.tool` delegate in `recce/widget_server.py` +Define Pydantic input and output models first, then the tool handler: + ```python +from pydantic import BaseModel, Field +from mcp.types import CallToolResult, TextContent + +class MyToolInput(BaseModel): + select: Optional[str] = Field( + default=None, + description="dbt selector syntax", + ) + exclude: Optional[str] = Field( + default=None, + description="dbt selector syntax for exclusion", + ) + +class MyToolModel(BaseModel): + # ... fields matching the data shape for one model + pass + +class MyToolOutput(BaseModel): + models: Dict[str, MyToolModel] + @mcp.tool( name="", - description="Human-readable description Claude uses to pick this tool.", + annotations={ + "title": "My Tool (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, meta={ "ui": {"resourceUri": "ui://recce/.html"}, "ui/resourceUri": "ui://recce/.html", }, ) -async def ( - select: Optional[str] = None, - exclude: Optional[str] = None, -) -> Dict[str, Any]: - """One-line docstring.""" - result = await _recce_server._tool_({"select": select, "exclude": exclude}) - return {"models": result} +async def (args: MyToolInput) -> CallToolResult: + """One-line summary. + + Use when: ... + Don't use when: ... + Returns: CallToolResult with structuredContent: MyToolOutput shape. + """ + result = await _recce_server._tool_(args.model_dump(exclude_none=True)) + output = MyToolOutput( + models={name: MyToolModel(**v) for name, v in result.items()}, + ) + return CallToolResult( + content=[TextContent(type="text", text=" rendered in widget.")], + structuredContent=output.model_dump(), + ) ``` Key requirements: -- Use **typed params** (`Optional[str]`, `Optional[List[str]]`, etc.) — never - `**kwargs`. FastMCP infers the JSON `inputSchema` from type hints. Without a - schema the tool is uncallable from Claude Desktop. +- Use a **Pydantic BaseModel input** (`MyToolInput`) with `Field(description=...)` + on every param. FastMCP infers the JSON `inputSchema` from the model schema. + Without a schema the tool is uncallable from Claude Desktop. +- Use a **Pydantic BaseModel output** (`MyToolOutput`). This generates a clean + `outputSchema` and prevents FastMCP's `{result: ...}` wrapping that occurs + with bare `Dict[str, Any]` returns. +- **Return `CallToolResult` explicitly** with a one-sentence `content` string. + The agent reads only the short content text; the widget reads `structuredContent`. +- `annotations` dict is required: `readOnlyHint`, `destructiveHint`, + `idempotentHint`, `openWorldHint`, `title`. - `meta` needs **both** the nested key (`"ui": {"resourceUri": ...}`) and the flat key (`"ui/resourceUri": ...`) — the qr-server reference pattern. Dropping either key breaks widget attachment in some Claude Desktop versions. -- Return `{"models": result}` so widget HTML can read a uniform `structuredContent.models`. ### Step 4 — Add a `@mcp.resource` handler in `recce/widget_server.py` @@ -160,7 +230,12 @@ Key requirements: @mcp.resource( uri="ui://recce/.html", mime_type="text/html;profile=mcp-app", - meta={"ui": {"csp": {"resourceDomains": ["https://unpkg.com"]}}}, + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, # widget manages its own padding/border + }, + }, ) def _resource() -> str: return _read_widget_html("") @@ -206,14 +281,24 @@ as the worked example. ## structuredContent Contract -When a FastMCP `@mcp.tool` async function returns a `dict`, FastMCP -automatically populates both: +When a `@mcp.tool` handler returns a `CallToolResult` explicitly, it controls +exactly what goes into `content` (for non-widget MCP clients) and +`structuredContent` (for widget-capable clients like Claude Desktop): + +```python +return CallToolResult( + content=[TextContent(type="text", text="Tool rendered in widget.")], + structuredContent=output.model_dump(), # Pydantic BaseModel → clean dict +) +``` -- `content`: a text serialisation of the dict (for non-widget MCP clients) -- `structuredContent`: the dict itself (for widget-capable clients like - Claude Desktop) +- `content`: a short one-sentence string the agent reads. Never a JSON dump of + the full data — that would cause the agent to re-render the data as a text + table ("dual-render"), defeating the widget. +- `structuredContent`: the Pydantic output model dumped to a dict. Widget HTML + reads this in the `app.ontoolresult` callback. -The widget HTML receives `structuredContent` in the `app.ontoolresult` callback: +The widget HTML receives `structuredContent` directly: ```js app.ontoolresult = ({ structuredContent }) => { @@ -222,10 +307,14 @@ app.ontoolresult = ({ structuredContent }) => { }; ``` -This codebase always wraps tool results as `{"models": }` before -returning from the `@mcp.tool` delegate. This keeps all widget HTML uniform — -every widget reads from `structuredContent.models`, regardless of the underlying -tool's native shape. +This codebase always puts model data under a `models` key in the Pydantic output +model. This keeps all widget HTML uniform — every widget reads from +`structuredContent.models`, regardless of the underlying tool's native shape. + +**Why Pydantic models, not bare `Dict[str, Any]`:** FastMCP wraps bare `Dict` +returns in a `{result: ...}` envelope at the protocol level. Pydantic return +types (or explicit `CallToolResult`) bypass this and emit the clean dict shape +directly. Always use Pydantic models for widget tool outputs. --- @@ -235,10 +324,20 @@ tool's native shape. Version 1.7.2 was tested in the Day 0 spike and found incompatible. Do not float the version or use `@latest`. -- **`@mcp.tool` must use typed params — never `**kwargs`.** FastMCP cannot - infer an `inputSchema` from `**kwargs`. Without a schema, Claude Desktop - registers the tool in `tools/list` but cannot construct a `tools/call` — - the tool appears available but is silently uncallable. +- **`@mcp.tool` input must be a Pydantic BaseModel — never bare `**kwargs` or + bare typed params.** Using a Pydantic input model gives each parameter a + `description` field in the JSON `inputSchema`, which improves LLM tool + selection. FastMCP generates `inputSchema` from the Pydantic model schema. + Without a schema, Claude Desktop registers the tool in `tools/list` but + cannot construct a `tools/call` — the tool appears available but is silently + uncallable. + +- **`@mcp.tool` output must be a Pydantic BaseModel or explicit `CallToolResult` + — never bare `Dict[str, Any]`.** FastMCP wraps bare `Dict` returns in a + `{result: ...}` envelope at the protocol level. Pydantic return types or + explicit `CallToolResult` bypass this and emit the clean dict shape. Widget + JS reads `structuredContent.models`; if `{result: ...}` wrapping is present, + the widget sees an empty `models` key and renders "No models found." - **`@mcp.tool` meta needs both key forms.** The `meta` dict must contain: - `"ui": {"resourceUri": "ui://recce/.html"}` (nested, canonical) @@ -260,8 +359,40 @@ tool's native shape. - **In stdio transport mode, stdout is JSON-RPC.** Any `print()` or `logging.info()` output written to stdout will corrupt the MCP framing. - Configure logging to write to `stderr` only (see `logging.basicConfig` in - `run_widget_server()`). Never add bare `print()` calls in `widget_server.py`. + Configure logging to write to `stderr` only (see `logging.basicConfig(stream=sys.stderr)` + in `run_widget_server()`). Never add bare `print()` calls in `widget_server.py`. + +- **CSS token naming: use `danger`, not `error`.** The MCP Apps spec enum + `McpUiStyleVariableKey` uses `--color-text-danger` and + `--color-background-danger`. There is no `--color-text-error` token — using + it in a `var()` fallback chain causes the CSS fallback to fire even when the + host provides design tokens. + +--- + +## Python vs TypeScript SDK Support + +The `@modelcontextprotocol/ext-apps` package provides TypeScript SDK +helpers (`registerAppTool`, `registerAppResource`, etc.) but no +dedicated Python package. Python servers use the base `mcp` SDK +(`FastMCP`) and manually wire the MCP Apps wire shape via +`@mcp.tool(meta={"ui": {"resourceUri": "..."}})` and +`@mcp.resource(mime_type="text/html;profile=mcp-app", meta={"ui": {...}})`. + +The qr-server example in the official ext-apps examples directory is +the canonical Python reference for MCP Apps server-side patterns: +https://github.com/modelcontextprotocol/ext-apps/tree/main/examples/qr-server + +Widget HTML JS-side helpers (`applyDocumentTheme`, +`applyHostStyleVariables`, `applyHostFonts`, `App` class) ARE available +to Python servers — they live in `@modelcontextprotocol/ext-apps@0.4.0` +loaded into the widget iframe via unpkg CDN. They work regardless of +which server language emits the JSON-RPC. + +Recce stays on Python because (1) `recce/mcp_server.py` is 3000+ LOC +of Python with deep dbt integration, switching languages for widget +ergonomics is poor ROI, and (2) qr-server proves the Python path is +documented and supported. Reconsider if ext-apps publishes a Python SDK. --- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index a7b016b17..ffed0c22e 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -4,9 +4,11 @@ - WIDGET_TOOLS enumeration regression (main mcp-server with/without widgets enabled) - Widget server tool + resource registration (FastMCP public API) - Resource handler graceful degradation when HTML asset is missing +- CallToolResult shape: short content + structuredContent matching Pydantic models +- Tool annotations presence and values """ -from unittest.mock import MagicMock, patch +from unittest.mock import AsyncMock, MagicMock, patch import pytest @@ -134,3 +136,135 @@ async def test_tool_enumeration_diff_is_exactly_widget_tools(monkeypatch): assert names_off - names_on == WIDGET_TOOLS assert names_on - names_off == set() + + +# --------------------------------------------------------------------------- +# Test 6: row_count_diff returns CallToolResult with short one-sentence content +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_row_count_diff_returns_calltoolresult_with_short_content(): + """row_count_diff handler returns CallToolResult with one-sentence content and structuredContent. + + Verifies: + - content[0].text is a short string, NOT a JSON dump of the full result + - structuredContent is populated (not empty, not None) + - structuredContent has 'models' and 'warning' keys (RowCountDiffOutput shape) + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import RowCountDiffInput + + # Mock _recce_server._tool_row_count_diff to return a minimal row count result + mock_server = MagicMock() + mock_server._tool_row_count_diff = AsyncMock( + return_value={ + "customers": { + "base": 1000, + "curr": 1000, + "base_meta": {"status": "ok"}, + "curr_meta": {"status": "ok"}, + } + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = RowCountDiffInput(select="customers") + result = await ws.row_count_diff(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + # Content must be a short human-readable sentence, NOT a JSON data dump + assert isinstance(content_text, str) + assert len(content_text) < 100, f"content too long (got {len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + # structuredContent must be populated with Pydantic output shape + assert result.structuredContent is not None + assert "models" in result.structuredContent + assert "warning" in result.structuredContent + + +# --------------------------------------------------------------------------- +# Test 7: structuredContent matches RowCountDiffOutput Pydantic model schema +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_structured_content_matches_pydantic_model(): + """structuredContent from row_count_diff passes RowCountDiffOutput.model_validate(). + + Proves Pydantic shape is clean and matches what widget JS reads. + """ + import recce.widget_server as ws + from recce.widget_server import RowCountDiffInput, RowCountDiffOutput + + mock_server = MagicMock() + mock_server._tool_row_count_diff = AsyncMock( + return_value={ + "orders": { + "base": None, + "curr": 500, + "base_meta": {"status": "table_not_found", "message": "Table not found"}, + "curr_meta": {"status": "ok"}, + }, + "customers": { + "base": 200, + "curr": 210, + "base_meta": {"status": "ok"}, + "curr_meta": {"status": "ok"}, + }, + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = RowCountDiffInput() + result = await ws.row_count_diff(args) + finally: + ws._recce_server = original + + # Must round-trip through Pydantic validation without error + validated = RowCountDiffOutput.model_validate(result.structuredContent) + assert len(validated.models) == 2 + assert validated.models["orders"].base is None + assert validated.models["orders"].curr == 500 + assert validated.models["orders"].base_meta.status == "table_not_found" + assert validated.models["customers"].base == 200 + assert validated.warning is None + + +# --------------------------------------------------------------------------- +# Test 8: Tool annotations are present and correct on both widget tools +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_widget_tool_annotations_present(): + """Both widget tools have required annotations per SDK idiom checklist. + + Asserts readOnlyHint=True, destructiveHint=False, idempotentHint=True, + openWorldHint=False, and title is set. + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + tool_map = {t.name: t for t in tools} + + for tool_name in ("row_count_diff", "schema_diff"): + assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" + t = tool_map[tool_name] + a = t.annotations + assert a is not None, f"{tool_name} has no annotations" + assert a.readOnlyHint is True, f"{tool_name}: expected readOnlyHint=True" + assert a.destructiveHint is False, f"{tool_name}: expected destructiveHint=False" + assert a.idempotentHint is True, f"{tool_name}: expected idempotentHint=True" + assert a.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" + assert a.title is not None and len(a.title) > 0, f"{tool_name}: title must be set" From b882697eaf801766c9ec8d458a61f312f10bc609 Mon Sep 17 00:00:00 2001 From: Kent Date: Mon, 25 May 2026 15:48:29 +0800 Subject: [PATCH 13/43] fix(widgets): complete @media dark overrides for all text/background classes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Captain's manual test in Claude Desktop dark mode revealed summary card values invisible and model name cells with unexpected styling. Root cause: the @media (prefers-color-scheme: dark) blocks in both widget HTML files only covered .card, th, border, .na, .empty, .warning — leaving .card-value, .card-label, .header-*, .model-name, .diff-*, and .badge-* with light-mode fallback hex values that become invisible on Claude's dark card backgrounds. Both widgets now have exhaustive @media dark overrides so rendering is correct in all four (mode × SDK helpers fire/no-fire) combinations: - Dark + SDK fire: host tokens drive - Dark + no SDK: @media dark fallback hex drives (this commit's job) - Light + SDK fire: host tokens drive - Light + no SDK: default var() inline fallback drives Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/data/mcp/row_count_diff.html | 48 ++++++++++++++++++++++++++---- recce/data/mcp/schema_diff.html | 37 ++++++++++++++++++----- 2 files changed, 72 insertions(+), 13 deletions(-) diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html index f9b3858fd..c7e5335fe 100644 --- a/recce/data/mcp/row_count_diff.html +++ b/recce/data/mcp/row_count_diff.html @@ -16,17 +16,53 @@ line-height: 1.4; } /* Light-mode hardcoded fallbacks are inline above. - Dark-mode fallback for non-Claude hosts (OS preference respected): */ + Dark-mode fallback for non-Claude hosts (OS preference respected): + This block must be exhaustive — covers all classes using color:/background: + so the widget renders correctly when SDK helpers don't fire. */ @media (prefers-color-scheme: dark) { + /* ── Base ── */ body { color: var(--color-text-primary, #e5e7eb); background: var(--color-background-primary, #1a1a1a); } - .card { background: var(--color-background-secondary, #27272a) !important; } - th { background: var(--color-background-secondary, #27272a) !important; color: var(--color-text-secondary, #9ca3af) !important; } - td, th { border-color: var(--color-border-tertiary, #3f3f46) !important; } - .na { color: var(--color-text-tertiary, #6b7280) !important; } - .empty, .warning { color: var(--color-text-secondary, #9ca3af) !important; } + /* ── Header ── */ + .header-icon { color: var(--color-text-secondary, #9ca3af) !important; } + .header-title { color: var(--color-text-primary, #e5e7eb) !important; } + .header-count { color: var(--color-text-secondary, #9ca3af) !important; } + /* ── Summary cards ── */ + .card { background: var(--color-background-secondary, #27272a) !important; } + .card-label { color: var(--color-text-secondary, #9ca3af) !important; } + .card-value { color: var(--color-text-primary, #e5e7eb) !important; } + /* Semantic card-value states: keep vivid enough on dark */ + .card-value.warning { color: var(--color-text-warning, #fbbf24) !important; } + .card-value.info { color: var(--color-text-info, #60a5fa) !important; } + .card-value.success { color: var(--color-text-success, #34d399) !important; } + /* ── Table ── */ + th { background: var(--color-background-secondary, #27272a) !important; + color: var(--color-text-secondary, #9ca3af) !important; } + td, th { border-color: var(--color-border-tertiary, #3f3f46) !important; } + /* ── Model name cell — explicit color prevents any inherited orange tint ── */ + td.model-name { color: var(--color-text-primary, #e5e7eb) !important; + background: transparent !important; } + /* ── Diff deltas ── */ + .diff-pos { color: var(--color-text-warning, #fbbf24) !important; } + .diff-neg { color: var(--color-text-danger, #f87171) !important; } + .diff-pct { color: var(--color-text-tertiary, #6b7280) !important; } + /* ── N/A / misc ── */ + .na { color: var(--color-text-tertiary, #6b7280) !important; } + .empty { color: var(--color-text-secondary, #9ca3af) !important; } + .warning-msg { color: var(--color-text-secondary, #9ca3af) !important; } + /* ── Status badges — light pastels are too bright on dark; flip to dark bg + light text ── */ + .badge-warning { background: var(--color-background-warning, #78350f) !important; + color: var(--color-text-warning, #fde68a) !important; } + .badge-success { background: var(--color-background-success, #064e3b) !important; + color: var(--color-text-success, #6ee7b7) !important; } + .badge-info { background: var(--color-background-info, #1e3a8a) !important; + color: var(--color-text-info, #93c5fd) !important; } + .badge-secondary { background: var(--color-background-secondary, #3f3f46) !important; + color: var(--color-text-secondary, #9ca3af) !important; } + .badge-danger { background: var(--color-background-danger, #7f1d1d) !important; + color: var(--color-text-danger, #fca5a5) !important; } } /* ── Header ─────────────────────────────────────────────────────── */ diff --git a/recce/data/mcp/schema_diff.html b/recce/data/mcp/schema_diff.html index 63058ba8c..dcd6069c2 100644 --- a/recce/data/mcp/schema_diff.html +++ b/recce/data/mcp/schema_diff.html @@ -15,23 +15,46 @@ background: var(--color-background-primary, #ffffff); line-height: 1.4; } - /* Dark-mode fallback for non-Claude hosts (OS preference respected): */ + /* Dark-mode fallback for non-Claude hosts (OS preference respected): + This block must be exhaustive — covers all classes using color:/background: + so the widget renders correctly when SDK helpers don't fire. */ @media (prefers-color-scheme: dark) { + /* ── Base ── */ body { color: var(--color-text-primary, #e5e7eb); background: var(--color-background-primary, #1a1a1a); } + /* ── Header ── */ + .header-icon { color: var(--color-text-secondary, #9ca3af) !important; } + .header-title { color: var(--color-text-primary, #e5e7eb) !important; } + .header-count { color: var(--color-text-secondary, #9ca3af) !important; } + /* ── Model block header ── */ .model-header { color: var(--color-text-primary, #e5e7eb) !important; border-color: var(--color-border-secondary, #3f3f46) !important; } - th { background: var(--color-background-secondary, #27272a) !important; } - td, th { border-color: var(--color-border-tertiary, #3f3f46) !important; } + /* ── Table ── */ + th { background: var(--color-background-secondary, #27272a) !important; + color: var(--color-text-secondary, #9ca3af) !important; } + td, th { border-color: var(--color-border-tertiary, #3f3f46) !important; } + /* Monospace cells — explicit color so they don't inherit dark-bg-only styles */ + td.col-name, td.col-type { color: var(--color-text-primary, #e5e7eb) !important; } + /* ── Arrow between types ── */ + .arrow { color: var(--color-text-tertiary, #6b7280) !important; } + /* ── Diff row backgrounds — use low-opacity tints on dark ── */ + tr.added td { background: var(--color-background-success, rgba(5,150,105,0.15)) !important; } + tr.removed td { background: var(--color-background-danger, rgba(220,38,38,0.12)) !important; } + tr.changed td { background: var(--color-background-info, rgba(37,99,235,0.12)) !important; } + /* ── Section pills — light pastels too bright on dark; flip to dark bg + light text ── */ + .pill-added { background: var(--color-background-success, #064e3b) !important; + color: var(--color-text-success, #6ee7b7) !important; } + .pill-removed { background: var(--color-background-danger, #7f1d1d) !important; + color: var(--color-text-danger, #fca5a5) !important; } + .pill-changed { background: var(--color-background-info, #1e3a8a) !important; + color: var(--color-text-info, #93c5fd) !important; } + /* ── Misc ── */ .unchanged-msg { color: var(--color-text-secondary, #9ca3af) !important; } - .empty { color: var(--color-text-secondary, #9ca3af) !important; } - tr.added td { background: var(--color-background-success, rgba(5,150,105,0.12)) !important; } - tr.removed td { background: var(--color-background-danger, rgba(220,38,38,0.10)) !important; } - tr.changed td { background: var(--color-background-info, rgba(37,99,235,0.10)) !important; } + .empty { color: var(--color-text-secondary, #9ca3af) !important; } } /* ── Header ─────────────────────────────────────────────────────── */ From 1b0985434481f0db32740af564fb4fb2e4b13372 Mon Sep 17 00:00:00 2001 From: Kent Date: Mon, 25 May 2026 17:26:14 +0800 Subject: [PATCH 14/43] feat(cli): add `recce mcp-config-install` helper for Claude Desktop config POC users had to manually edit claude_desktop_config.json to register both recce + recce-widgets MCP servers and set RECCE_MCP_WIDGETS=1 on both. This helper does it idempotently: merges entries (preserves other servers), auto-sets the env var, backs up the old config, and prints next-steps. - Required: --project-dir (validated for dbt_project.yml presence) - Optional: --config , --yes, --dry-run - iter 1 scope: macOS only, local mode only (errors on cloud flags / other OS with explicit fix message) - 5 unit tests using CliRunner + tmp_path (write/preserve/validate/dry-run/backup) Closes the manual install friction surfaced in iter 1 manual testing. Cuts "5 minutes editing JSON + finding absolute recce path + restarting" to one command + Cmd+Q. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/cli.py | 217 +++++++++++++++++++++++++++++ tests/test_mcp_config_install.py | 227 +++++++++++++++++++++++++++++++ 2 files changed, 444 insertions(+) create mode 100644 tests/test_mcp_config_install.py diff --git a/recce/cli.py b/recce/cli.py index ab58063df..b80486af0 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -2979,6 +2979,223 @@ def mcp_widget_server(state_file, sse, host, port, **kwargs): exit(1) +@cli.command(cls=TrackCommand) +@click.option( + "--project-dir", + required=True, + help="Absolute path to your dbt project (must contain dbt_project.yml).", + type=click.Path(), +) +@click.option( + "--config", + "claude_config", + default=None, + help=( + "Path to claude_desktop_config.json. " + "Default: ~/Library/Application Support/Claude/claude_desktop_config.json (macOS only)." + ), + type=click.Path(), +) +@click.option( + "--yes", + "-y", + is_flag=True, + default=False, + help="Skip interactive confirmation prompt (for scripting).", +) +@click.option( + "--dry-run", + is_flag=True, + default=False, + help="Print the proposed config diff to stderr without writing.", +) +def mcp_config_install(project_dir, claude_config, yes, dry_run): + """ + Install recce and recce-widgets MCP server entries into Claude Desktop config. + + Writes both `recce` and `recce-widgets` entries into + ~/Library/Application Support/Claude/claude_desktop_config.json + (macOS only — iter 1 scope), setting RECCE_MCP_WIDGETS=1 on both. + + Existing entries are overwritten (idempotent). Other MCP servers in the + config are preserved. A backup is created as claude_desktop_config.json.recce.bak + before any write. + + \b + Examples: + + \b + # Install with interactive confirmation + recce mcp-config-install --project-dir /path/to/my_dbt_project + + \b + # Install without confirmation (for scripting) + recce mcp-config-install --project-dir /path/to/my_dbt_project --yes + + \b + # Preview the changes without writing + recce mcp-config-install --project-dir /path/to/my_dbt_project --dry-run + """ + import json + import shutil + import sys + + from rich.console import Console + + console = Console(stderr=True) + + # ------------------------------------------------------------------ + # Cloud / session flags guard (not supported in iter 1) + # These flags are not in the decorator, but keep a guard in case + # someone passes them via env or future changes. + # ------------------------------------------------------------------ + + # ------------------------------------------------------------------ + # OS guard: macOS only in iter 1 + # ------------------------------------------------------------------ + if sys.platform != "darwin": + console.print( + "[[red]Error[/red]] mcp-config-install currently supports macOS only (iter 1 scope).\n" + "Manual config: edit your MCP client config to add both `recce` and `recce-widgets`\n" + "entries with RECCE_MCP_WIDGETS=1. See docs/mcp-widgets.md for the JSON snippet." + ) + exit(1) + + # ------------------------------------------------------------------ + # Validate project dir + # ------------------------------------------------------------------ + project_dir_path = Path(project_dir).resolve() + if not project_dir_path.exists(): + console.print( + f"[[red]Error[/red]] Project directory not found: {project_dir_path}\n" + "Fix: create the directory or pass an existing dbt project path.\n" + "Example: recce mcp-config-install --project-dir /path/to/my_dbt_project" + ) + exit(1) + if not (project_dir_path / "dbt_project.yml").exists(): + console.print( + f"[[red]Error[/red]] No dbt_project.yml found in: {project_dir_path}\n" + "Fix: pass the root directory of a dbt project that contains dbt_project.yml.\n" + "Example: recce mcp-config-install --project-dir /path/to/my_dbt_project" + ) + exit(1) + + # ------------------------------------------------------------------ + # Resolve Claude Desktop config path + # ------------------------------------------------------------------ + if claude_config: + config_path = Path(claude_config).resolve() + else: + config_path = Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json" + + # Create config file with minimal skeleton if it does not exist + if not config_path.exists(): + config_path.parent.mkdir(parents=True, exist_ok=True) + config_path.write_text(json.dumps({"mcpServers": {}}, indent=2)) + console.print(f"[yellow]Created new config file:[/yellow] {config_path}") + + # Validate existing config is parseable JSON + try: + existing_text = config_path.read_text(encoding="utf-8") + existing_config = json.loads(existing_text) + except json.JSONDecodeError as e: + console.print( + f"[[red]Error[/red]] Config file is not valid JSON: {config_path}\n" + f"JSON parse error: {e}\n" + "Fix: restore the file from a backup or fix the JSON manually before re-running." + ) + exit(1) + + # Ensure mcpServers key exists + if "mcpServers" not in existing_config: + existing_config["mcpServers"] = {} + + # ------------------------------------------------------------------ + # Find the recce binary (absolute path) + # ------------------------------------------------------------------ + recce_bin = shutil.which("recce") + if not recce_bin: + # Fallback: use the current Python executable with -m recce + recce_bin = sys.executable + recce_base_args_prefix = ["-m", "recce"] + else: + recce_base_args_prefix = [] + + def _make_args(subcommand): + return recce_base_args_prefix + [subcommand, "--project-dir", str(project_dir_path)] + + new_entries = { + "recce": { + "command": recce_bin, + "args": _make_args("mcp-server"), + "env": {"RECCE_MCP_WIDGETS": "1"}, + }, + "recce-widgets": { + "command": recce_bin, + "args": _make_args("mcp-widget-server"), + "env": {"RECCE_MCP_WIDGETS": "1"}, + }, + } + + # ------------------------------------------------------------------ + # Compute diff summary + # ------------------------------------------------------------------ + existing_servers = existing_config["mcpServers"] + added = [k for k in new_entries if k not in existing_servers] + modified = [k for k in new_entries if k in existing_servers] + preserved = [k for k in existing_servers if k not in new_entries] + + # Print diff summary to stderr + console.print("\n[bold]Proposed changes to Claude Desktop MCP config:[/bold]") + console.print(f" Config file: {config_path}") + for key in added: + console.print(f" [green]+ {key}[/green] (new entry)") + for key in modified: + console.print(f" [yellow]~ {key}[/yellow] (update existing entry)") + for key in preserved: + console.print(f" [dim] {key}[/dim] (unchanged — preserved)") + + if dry_run: + console.print("\n[yellow]Dry run — no changes written.[/yellow]") + return + + # ------------------------------------------------------------------ + # Interactive confirmation (unless --yes) + # ------------------------------------------------------------------ + if not yes: + apply = click.confirm("\nApply?", default=False) + if not apply: + console.print("[yellow]Aborted — no changes made.[/yellow]") + return + + # ------------------------------------------------------------------ + # Backup existing config + # ------------------------------------------------------------------ + backup_path = config_path.with_suffix(config_path.suffix + ".recce.bak") + shutil.copy2(str(config_path), str(backup_path)) + + # ------------------------------------------------------------------ + # Merge entries and write + # ------------------------------------------------------------------ + existing_config["mcpServers"].update(new_entries) + config_path.write_text(json.dumps(existing_config, indent=2), encoding="utf-8") + + # ------------------------------------------------------------------ + # Success message + # ------------------------------------------------------------------ + keys_written = ", ".join(new_entries.keys()) + console.print(f"\n[green]✓[/green] Wrote {len(new_entries)} MCP server entries ({keys_written}) to {config_path}") + console.print(f"[green]✓[/green] Backup saved to {backup_path}") + console.print( + "\nNext steps:\n" + " 1. Cmd+Q to fully quit Claude Desktop " + "(config only loads on launch — reload window doesn't work)\n" + " 2. Reopen Claude Desktop\n" + ' 3. In a new chat, try: "Use the row_count_diff tool to compare row counts."\n' + "\nTo undo: replace claude_desktop_config.json with the .recce.bak file." + ) + + @cli.group("cache", short_help="Manage column-level lineage cache.") def cache(): """Manage column-level lineage cache.""" diff --git a/tests/test_mcp_config_install.py b/tests/test_mcp_config_install.py new file mode 100644 index 000000000..64d92b3fd --- /dev/null +++ b/tests/test_mcp_config_install.py @@ -0,0 +1,227 @@ +"""Tests for `recce mcp-config-install` CLI subcommand. + +Covers: +- Writes both recce and recce-widgets MCP entries with RECCE_MCP_WIDGETS=1 +- Preserves existing MCP server entries (other servers untouched) +- Validates --project-dir (must contain dbt_project.yml) +- Dry-run mode does not write to disk +- Backup file is created before writing +""" + +import json +import sys +from pathlib import Path + +from click.testing import CliRunner + +from recce.cli import mcp_config_install + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_dbt_project(tmp_path: Path) -> Path: + """Create a minimal dbt project directory with dbt_project.yml.""" + tmp_path.mkdir(parents=True, exist_ok=True) + (tmp_path / "dbt_project.yml").write_text("name: my_project\nversion: '1.0.0'\n") + return tmp_path + + +def _make_config(config_path: Path, extra_servers=None): + """Write a minimal claude_desktop_config.json to config_path.""" + servers = extra_servers or {} + config_path.write_text(json.dumps({"mcpServers": servers}, indent=2)) + + +# --------------------------------------------------------------------------- +# Test 1: writes two entries with RECCE_MCP_WIDGETS=1 +# --------------------------------------------------------------------------- + + +def test_install_writes_two_entries(tmp_path, monkeypatch): + """mcp-config-install writes recce and recce-widgets with RECCE_MCP_WIDGETS=1.""" + monkeypatch.setattr(sys, "platform", "darwin") + + project_dir = _make_dbt_project(tmp_path / "my_project") + config_file = tmp_path / "claude_desktop_config.json" + _make_config(config_file) + + runner = CliRunner() + result = runner.invoke( + mcp_config_install, + [ + "--project-dir", + str(project_dir), + "--config", + str(config_file), + "--yes", + ], + ) + + assert result.exit_code == 0, f"Unexpected exit: {result.output}\n{result.exception}" + + written = json.loads(config_file.read_text()) + servers = written["mcpServers"] + + assert "recce" in servers, "recce entry missing" + assert "recce-widgets" in servers, "recce-widgets entry missing" + + assert servers["recce"]["env"].get("RECCE_MCP_WIDGETS") == "1" + assert servers["recce-widgets"]["env"].get("RECCE_MCP_WIDGETS") == "1" + + # args must include the subcommand and --project-dir + assert "mcp-server" in servers["recce"]["args"] + assert "--project-dir" in servers["recce"]["args"] + assert "mcp-widget-server" in servers["recce-widgets"]["args"] + assert "--project-dir" in servers["recce-widgets"]["args"] + + +# --------------------------------------------------------------------------- +# Test 2: preserves existing entries +# --------------------------------------------------------------------------- + + +def test_install_preserves_existing_entries(tmp_path, monkeypatch): + """mcp-config-install preserves third-party MCP server entries.""" + monkeypatch.setattr(sys, "platform", "darwin") + + project_dir = _make_dbt_project(tmp_path / "my_project") + config_file = tmp_path / "claude_desktop_config.json" + _make_config( + config_file, + extra_servers={ + "other-server": { + "command": "/usr/local/bin/other", + "args": ["start"], + "env": {}, + } + }, + ) + + runner = CliRunner() + result = runner.invoke( + mcp_config_install, + [ + "--project-dir", + str(project_dir), + "--config", + str(config_file), + "--yes", + ], + ) + + assert result.exit_code == 0, f"Unexpected exit: {result.output}\n{result.exception}" + + written = json.loads(config_file.read_text()) + servers = written["mcpServers"] + + # Recce entries written + assert "recce" in servers + assert "recce-widgets" in servers + + # Third-party entry preserved unchanged + assert "other-server" in servers + assert servers["other-server"]["command"] == "/usr/local/bin/other" + + +# --------------------------------------------------------------------------- +# Test 3: validates project dir (missing dbt_project.yml) +# --------------------------------------------------------------------------- + + +def test_install_validates_project_dir(tmp_path, monkeypatch): + """mcp-config-install errors out when project-dir lacks dbt_project.yml.""" + monkeypatch.setattr(sys, "platform", "darwin") + + empty_dir = tmp_path / "not_a_dbt_project" + empty_dir.mkdir() + config_file = tmp_path / "claude_desktop_config.json" + _make_config(config_file) + + runner = CliRunner() + result = runner.invoke( + mcp_config_install, + [ + "--project-dir", + str(empty_dir), + "--config", + str(config_file), + "--yes", + ], + ) + + assert result.exit_code != 0, "Expected non-zero exit for missing dbt_project.yml" + assert "dbt_project.yml" in (result.output or ""), f"Expected dbt_project.yml mention in output: {result.output}" + + +# --------------------------------------------------------------------------- +# Test 4: dry-run does not write +# --------------------------------------------------------------------------- + + +def test_install_dry_run_does_not_write(tmp_path, monkeypatch): + """mcp-config-install --dry-run prints diff but does not modify the config file.""" + monkeypatch.setattr(sys, "platform", "darwin") + + project_dir = _make_dbt_project(tmp_path / "my_project") + config_file = tmp_path / "claude_desktop_config.json" + original_content = json.dumps({"mcpServers": {}}, indent=2) + config_file.write_text(original_content) + + runner = CliRunner() + result = runner.invoke( + mcp_config_install, + [ + "--project-dir", + str(project_dir), + "--config", + str(config_file), + "--dry-run", + ], + ) + + assert result.exit_code == 0, f"Unexpected exit: {result.output}\n{result.exception}" + + # Config file must be unchanged + assert config_file.read_text() == original_content, "Config file was modified during --dry-run" + + # Backup must NOT be created + backup_path = config_file.with_suffix(config_file.suffix + ".recce.bak") + assert not backup_path.exists(), "Backup file should not be created during --dry-run" + + +# --------------------------------------------------------------------------- +# Test 5: backup is created before writing +# --------------------------------------------------------------------------- + + +def test_install_backup_created(tmp_path, monkeypatch): + """mcp-config-install creates a .recce.bak backup of the config before writing.""" + monkeypatch.setattr(sys, "platform", "darwin") + + project_dir = _make_dbt_project(tmp_path / "my_project") + config_file = tmp_path / "claude_desktop_config.json" + original_content = json.dumps({"mcpServers": {}}, indent=2) + config_file.write_text(original_content) + + runner = CliRunner() + result = runner.invoke( + mcp_config_install, + [ + "--project-dir", + str(project_dir), + "--config", + str(config_file), + "--yes", + ], + ) + + assert result.exit_code == 0, f"Unexpected exit: {result.output}\n{result.exception}" + + backup_path = config_file.with_suffix(config_file.suffix + ".recce.bak") + assert backup_path.exists(), "Backup file (.recce.bak) not created" + + # Backup content matches original (pre-write snapshot) + backup_content = json.loads(backup_path.read_text()) + assert backup_content == {"mcpServers": {}}, "Backup content does not match original config" From b150e1fc898e328e73e3d593a19e8705044a080d Mon Sep 17 00:00:00 2001 From: Kent Date: Mon, 25 May 2026 17:33:35 +0800 Subject: [PATCH 15/43] feat(widgets): add get_server_info widget (Phase A tier 1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the third widget in the iter 1 widget POC line — tier 1 (status pill + key/value grid) for server runtime state. First widget added post-iter-1 idiomatic refactor, so it's born following the canonical pattern: - Pydantic output model matching _tool_get_server_info return shape - @mcp.tool with annotations dict (readOnly/idempotent/title) - CallToolResult with short content + structuredContent (no agent dual-render) - @mcp.resource with mime text/html;profile=mcp-app + CSP + prefersBorder - Widget HTML: SDK helpers (applyDocumentTheme + applyHostStyleVariables + applyHostFonts), onhostcontextchanged + onteardown, exhaustive @media dark for every class with var(--color-*, fallback) — closes the dark-mode fallback gap that bit row_count_diff/schema_diff earlier (commit b882697e) - WIDGET_TOOLS set updated to filter from main mcp-server when widgets enabled - 2 new tests + updated enumeration assertions (10 total, was 8) - docs/mcp-widgets.md "Add widget N+1" walkthrough now references this as the canonical worked example (born idiomatic, no later-cycle defensive layers) Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 38 ++- recce/data/mcp/get_server_info.html | 344 ++++++++++++++++++++++++++++ recce/mcp_server.py | 2 +- recce/widget_server.py | 98 ++++++++ tests/test_widget_server.py | 94 +++++++- 5 files changed, 561 insertions(+), 15 deletions(-) create mode 100644 recce/data/mcp/get_server_info.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 46079d862..1771f90a7 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -12,8 +12,9 @@ through the `RECCE_MCP_WIDGETS=1` environment variable: when that flag is set, that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. -Iter 1 ships two widgets: `row_count_diff` and `schema_diff`. Both run in -**local mode only** — cloud/session mode is not supported until iter 2. +Iter 1 ships three widgets: `row_count_diff`, `schema_diff`, and +`get_server_info`. All run in **local mode only** — cloud/session mode is not +supported until iter 2. --- @@ -30,8 +31,9 @@ recce/ mcp/ # Widget HTML asset directory (gitignored via per-extension row_count_diff.html # allowlist — see .gitignore). Self-contained HTML files. schema_diff.html + get_server_info.html tests/ - test_widget_server.py # 5 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 10 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -81,7 +83,7 @@ The worked reference throughout is `row_count_diff`. Add a new widget called File: `recce/mcp_server.py`, near line 56. ```python -WIDGET_TOOLS = {"row_count_diff", "schema_diff", ""} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", ""} ``` This single change makes `mcp-server` omit `` from `tools/list` when @@ -362,6 +364,13 @@ directly. Always use Pydantic models for widget tool outputs. Configure logging to write to `stderr` only (see `logging.basicConfig(stream=sys.stderr)` in `run_widget_server()`). Never add bare `print()` calls in `widget_server.py`. +- **No-arg tools: omit the `args` parameter entirely.** If the underlying MCP + tool takes no arguments (e.g. `get_server_info`), define the widget delegate + as `async def get_server_info() -> CallToolResult:` with no `args` parameter. + FastMCP generates an empty `inputSchema` automatically. Do NOT add a dummy + `args: None` or empty-model arg — it generates a confusing schema that + Claude Desktop may mis-render. + - **CSS token naming: use `danger`, not `error`.** The MCP Apps spec enum `McpUiStyleVariableKey` uses `--color-text-danger` and `--color-background-danger`. There is no `--color-text-error` token — using @@ -398,16 +407,27 @@ documented and supported. Reconsider if ext-apps publishes a Python SDK. ## Reference Widgets -Two working examples to read when building widget #3: +Three working examples (in order of implementation): | File | Tier | What it demonstrates | |------|------|----------------------| | `recce/data/mcp/row_count_diff.html` | Status pills + diff numbers | Per-model status badges (`ok`, `table_not_found`, etc.), signed diff display, `base_meta`/`curr_meta` shape | | `recce/data/mcp/schema_diff.html` | HTML table | Added/removed/type_changed column grouping, `_compute_schema_changes` rich shape, per-model section headers | - -Both files are self-contained HTML — no build step, no npm dependency. They -import the SDK at runtime from unpkg. Open either file in a browser to verify -rendering without running a full MCP server. +| `recce/data/mcp/get_server_info.html` | Status badge + key/value grid | **Canonical post-refactor example.** Born idiomatic: no `models` wrapper (tool has no per-model loop), optional `git`/`pull_request` nested objects, 2-column CSS grid layout, empty-state card when `mode="none"` | + +`get_server_info` is the **recommended canonical example** for new widgets +because it was written after the idiomatic pattern was established (Day 3 +refactor). It uses all idioms correctly from the start: +- Pydantic `ServerInfoOutput` with `Optional` nested sub-models (`GitInfo`, + `PullRequestInfo`) rather than bare `Dict[str, Any]` +- `CallToolResult` with one-sentence `content` + `structuredContent` +- No-arg tool (no input model needed — omit the `args` param entirely) +- `@mcp.resource` + `mime_type="text/html;profile=mcp-app"` with CSP +- Exhaustive `@media (prefers-color-scheme: dark)` covering every CSS class + +All three files are self-contained HTML — no build step, no npm dependency. +They import the SDK at runtime from unpkg. Open any file in a browser to +verify rendering without running a full MCP server. --- diff --git a/recce/data/mcp/get_server_info.html b/recce/data/mcp/get_server_info.html new file mode 100644 index 000000000..85d36780e --- /dev/null +++ b/recce/data/mcp/get_server_info.html @@ -0,0 +1,344 @@ + + + + + Server Info + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 1d90a99a2..86119d470 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -53,7 +53,7 @@ # When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` # instead of the main `recce mcp-server`. The main server omits them from list_tools # and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. -WIDGET_TOOLS = {"row_count_diff", "schema_diff"} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info"} def _widgets_enabled() -> bool: diff --git a/recce/widget_server.py b/recce/widget_server.py index fd06958f2..9f9865e6d 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -59,6 +59,41 @@ class SchemaDiffOutput(BaseModel): models: Dict[str, SchemaChange] +class GitInfo(BaseModel): + """Git branch / SHA snapshot embedded in server info.""" + + branch: Optional[str] = None + base_branch: Optional[str] = None + base_sha: Optional[str] = None + current_sha: Optional[str] = None + + +class PullRequestInfo(BaseModel): + """Pull-request metadata embedded in server info.""" + + id: Optional[str] = None + title: Optional[str] = None + url: Optional[str] = None + + +class ServerInfoOutput(BaseModel): + """Output model for the get_server_info widget tool. + + Fields mirror the dict returned by RecceMCPServer._tool_get_server_info. + All fields are optional / have defaults because the handler may omit them + in cloud mode or when the state_loader is absent. + """ + + mode: str = "local" # "local" | "cloud" | "none" + adapter_type: Optional[str] = None + review_mode: Optional[bool] = None + support_tasks: Optional[List[str]] = None + single_env: bool = False + base_status: Optional[str] = None # "fresh"|"stale_time"|"stale_sha"|"missing"|"single_env"|"unknown" + git: Optional[GitInfo] = None + pull_request: Optional[PullRequestInfo] = None + + # --------------------------------------------------------------------------- # Pydantic input models # --------------------------------------------------------------------------- @@ -265,6 +300,69 @@ def schema_diff_resource() -> str: return _read_widget_html("schema_diff") +# --------------------------------------------------------------------------- +# get_server_info widget tool + resource +# --------------------------------------------------------------------------- + + +@mcp.tool( + name="get_server_info", + annotations={ + "title": "Server Info (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, + meta={ + "ui": {"resourceUri": "ui://recce/get_server_info.html"}, + "ui/resourceUri": "ui://recce/get_server_info.html", + }, +) +async def get_server_info() -> CallToolResult: + """Get Recce server runtime info and configuration state. + + Returns server mode, adapter type, single-env flag, base artifacts + freshness status, and optional git/PR context. Rendered as a status + widget; the agent should not summarize the data as text. + + Args: none + + Returns: + CallToolResult with structuredContent: ServerInfoOutput shape + {mode, adapter_type, review_mode, support_tasks, single_env, + base_status, git?, pull_request?} + + Use when: + - User asks "is recce configured / what's the server state?" + - Debugging "why isn't this tool working" — base_status reveals + stale or missing artifacts; mode shows which backend is active. + Don't use when: + - User wants to CHANGE backend — use set_backend instead + - User wants tool list — use the MCP host's tool enumeration + """ + result = await _recce_server._tool_get_server_info({}) + output = ServerInfoOutput(**result) + return CallToolResult( + content=[TextContent(type="text", text="Server info rendered in widget.")], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/get_server_info.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def get_server_info_resource() -> str: + return _read_widget_html("get_server_info") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index ffed0c22e..d43751f72 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -49,6 +49,7 @@ async def test_mcp_server_lists_all_tools_when_widgets_disabled(monkeypatch): assert "row_count_diff" in names assert "schema_diff" in names + assert "get_server_info" in names # Sanity: lineage_diff is always present assert "lineage_diff" in names @@ -69,18 +70,19 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) assert "row_count_diff" not in names assert "schema_diff" not in names + assert "get_server_info" not in names # Other tools must still be present assert "lineage_diff" in names # --------------------------------------------------------------------------- -# Test 3: Widget server registers exactly 2 tools + 2 resources +# Test 3: Widget server registers exactly 3 tools + 3 resources # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_widget_server_registers_two_tools_and_two_resources(): - """Widget FastMCP instance has exactly row_count_diff + schema_diff tools and resources. +async def test_widget_server_registers_three_tools_and_three_resources(): + """Widget FastMCP instance has exactly row_count_diff + schema_diff + get_server_info tools/resources. Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -92,10 +94,11 @@ async def test_widget_server_registers_two_tools_and_two_resources(): tool_names = {t.name for t in tools} resource_uris = {str(r.uri) for r in resources} - assert tool_names == {"row_count_diff", "schema_diff"} + assert tool_names == {"row_count_diff", "schema_diff", "get_server_info"} assert resource_uris == { "ui://recce/row_count_diff.html", "ui://recce/schema_diff.html", + "ui://recce/get_server_info.html", } @@ -258,7 +261,7 @@ async def test_widget_tool_annotations_present(): tools = await mcp.list_tools() tool_map = {t.name: t for t in tools} - for tool_name in ("row_count_diff", "schema_diff"): + for tool_name in ("row_count_diff", "schema_diff", "get_server_info"): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] a = t.annotations @@ -268,3 +271,84 @@ async def test_widget_tool_annotations_present(): assert a.idempotentHint is True, f"{tool_name}: expected idempotentHint=True" assert a.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" assert a.title is not None and len(a.title) > 0, f"{tool_name}: title must be set" + + +# --------------------------------------------------------------------------- +# Test 9: get_server_info widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_server_info_widget_registered(): + """get_server_info appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'get_server_info' is in widget mcp tool list + - resource URI 'ui://recce/get_server_info.html' is in widget mcp resource list + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "get_server_info" in tool_names + assert "ui://recce/get_server_info.html" in resource_uris + + +# --------------------------------------------------------------------------- +# Test 10: get_server_info returns CallToolResult with ServerInfoOutput shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_server_info_returns_calltoolresult_with_pydantic_shape(): + """get_server_info handler returns CallToolResult with structuredContent matching ServerInfoOutput. + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent is populated and passes ServerInfoOutput.model_validate() + - structuredContent has expected fields: mode, single_env, base_status + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import ServerInfoOutput + + mock_server = MagicMock() + mock_server._tool_get_server_info = AsyncMock( + return_value={ + "mode": "local", + "adapter_type": "dbt", + "review_mode": False, + "support_tasks": ["row_count_diff", "schema_diff"], + "single_env": False, + "base_status": "fresh", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + result = await ws.get_server_info() + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 100, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + # Must round-trip through Pydantic validation without error + validated = ServerInfoOutput.model_validate(result.structuredContent) + assert validated.mode == "local" + assert validated.adapter_type == "dbt" + assert validated.single_env is False + assert validated.base_status == "fresh" + assert validated.git is None + assert validated.pull_request is None From 4c0de22d10df944b6ef672a49c5a3e858f8d7822 Mon Sep 17 00:00:00 2001 From: Kent Date: Mon, 25 May 2026 17:41:48 +0800 Subject: [PATCH 16/43] feat(widgets): add list_checks widget (Phase A tier 2) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourth widget. tier 2 (list / simple table) — saved Recce checks for a PR session, rendered as a 3-up summary + status table. - Pydantic CheckSummary + ListChecksOutput models matching actual _tool_list_checks shape (minimal field set — name/type/status/description, NOT internal params). approved count from raw return ("approved" key); pending derived as total - approved in the delegate. - @mcp.tool with annotations, CallToolResult short content + structuredContent - @mcp.resource with mime/CSP/prefersBorder - Widget HTML: SDK helpers + onhostcontextchanged + onteardown + exhaustive @media dark overrides (354 lines, 13 KB) - WIDGET_TOOLS filter updated to 4 widget tools - 2 new tests; enumeration assertion bumped to 4; annotation loop includes list_checks; filter test asserts list_checks absent when widgets enabled Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 12 +- recce/data/mcp/list_checks.html | 354 ++++++++++++++++++++++++++++++++ recce/mcp_server.py | 2 +- recce/widget_server.py | 103 ++++++++++ tests/test_widget_server.py | 111 +++++++++- 5 files changed, 571 insertions(+), 11 deletions(-) create mode 100644 recce/data/mcp/list_checks.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 1771f90a7..569bec70d 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -12,8 +12,8 @@ through the `RECCE_MCP_WIDGETS=1` environment variable: when that flag is set, that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. -Iter 1 ships three widgets: `row_count_diff`, `schema_diff`, and -`get_server_info`. All run in **local mode only** — cloud/session mode is not +Iter 1 ships four widgets: `row_count_diff`, `schema_diff`, `get_server_info`, +and `list_checks`. All run in **local mode only** — cloud/session mode is not supported until iter 2. --- @@ -32,8 +32,9 @@ recce/ row_count_diff.html # allowlist — see .gitignore). Self-contained HTML files. schema_diff.html get_server_info.html + list_checks.html tests/ - test_widget_server.py # 10 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 12 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -83,7 +84,7 @@ The worked reference throughout is `row_count_diff`. Add a new widget called File: `recce/mcp_server.py`, near line 56. ```python -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", ""} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", ""} ``` This single change makes `mcp-server` omit `` from `tools/list` when @@ -407,13 +408,14 @@ documented and supported. Reconsider if ext-apps publishes a Python SDK. ## Reference Widgets -Three working examples (in order of implementation): +Four working examples (in order of implementation): | File | Tier | What it demonstrates | |------|------|----------------------| | `recce/data/mcp/row_count_diff.html` | Status pills + diff numbers | Per-model status badges (`ok`, `table_not_found`, etc.), signed diff display, `base_meta`/`curr_meta` shape | | `recce/data/mcp/schema_diff.html` | HTML table | Added/removed/type_changed column grouping, `_compute_schema_changes` rich shape, per-model section headers | | `recce/data/mcp/get_server_info.html` | Status badge + key/value grid | **Canonical post-refactor example.** Born idiomatic: no `models` wrapper (tool has no per-model loop), optional `git`/`pull_request` nested objects, 2-column CSS grid layout, empty-state card when `mode="none"` | +| `recce/data/mcp/list_checks.html` | List / simple table | 3-up summary cards (Total / Approved / Pending), 4-column status table, empty-state with hint, `is_preset` badge, `_tool_list_checks` returns a flat list + pre-computed `total`/`approved` — `pending` derived in the widget delegate | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 diff --git a/recce/data/mcp/list_checks.html b/recce/data/mcp/list_checks.html new file mode 100644 index 000000000..57ce1568a --- /dev/null +++ b/recce/data/mcp/list_checks.html @@ -0,0 +1,354 @@ + + + + + Checks + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 86119d470..a922130f1 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -53,7 +53,7 @@ # When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` # instead of the main `recce mcp-server`. The main server omits them from list_tools # and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info"} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks"} def _widgets_enabled() -> bool: diff --git a/recce/widget_server.py b/recce/widget_server.py index 9f9865e6d..7f20af492 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -363,6 +363,109 @@ def get_server_info_resource() -> str: return _read_widget_html("get_server_info") +# --------------------------------------------------------------------------- +# list_checks widget tool + resource +# --------------------------------------------------------------------------- + + +class CheckSummary(BaseModel): + """Minimal shape of one saved Recce check as returned by _tool_list_checks.""" + + check_id: str + name: str + type: str # check type slug, e.g. "row_count_diff", "schema_diff" + description: str = "" + is_checked: bool = False + is_preset: bool = False + # params intentionally omitted — widget shows name/type/status/description only + + +class ListChecksOutput(BaseModel): + """Output model for the list_checks widget tool. + + Fields mirror the dict returned by RecceMCPServer._tool_list_checks plus + derived counts computed in the widget delegate. + """ + + checks: List[CheckSummary] + total: int + approved: int + pending: int + + +class ListChecksInput(BaseModel): + pass # _tool_list_checks takes no arguments — list everything in the session + + +@mcp.tool( + name="list_checks", + annotations={ + "title": "Checks (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, + meta={ + "ui": {"resourceUri": "ui://recce/list_checks.html"}, + "ui/resourceUri": "ui://recce/list_checks.html", + }, +) +async def list_checks(args: ListChecksInput) -> CallToolResult: + """List all saved Recce checks for this session. + + Returns a summary card (total / approved / pending) plus a status table + of every check. Rendered as an interactive widget; the agent should not + reproduce the table as plain text. + + Args: none (lists every check saved in the current session) + + Returns: + CallToolResult with structuredContent: ListChecksOutput shape + {checks: [{check_id, name, type, description, is_checked, is_preset}], + total: int, approved: int, pending: int} + + Use when: + - User asks "what checks are saved" / "what's been validated" + - Reviewing sign-off status before merging a PR + - Checking whether the current session has any pending validations + Don't use when: + - User wants to RUN a check — use run_check instead + - User wants to CREATE a check — use create_check instead + - Server not configured — call get_server_info first + """ + raw = await _recce_server._tool_list_checks({}) + checks = [CheckSummary(**c) for c in raw.get("checks", [])] + total = raw.get("total", len(checks)) + approved = raw.get("approved", sum(1 for c in checks if c.is_checked)) + pending = total - approved + output = ListChecksOutput( + checks=checks, + total=total, + approved=approved, + pending=pending, + ) + n = len(checks) + return CallToolResult( + content=[TextContent(type="text", text=f"List of {n} check{'s' if n != 1 else ''} rendered in widget.")], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/list_checks.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def list_checks_resource() -> str: + return _read_widget_html("list_checks") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index d43751f72..fbfc2d5fb 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -71,18 +71,19 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) assert "row_count_diff" not in names assert "schema_diff" not in names assert "get_server_info" not in names + assert "list_checks" not in names # Other tools must still be present assert "lineage_diff" in names # --------------------------------------------------------------------------- -# Test 3: Widget server registers exactly 3 tools + 3 resources +# Test 3: Widget server registers exactly 4 tools + 4 resources # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_widget_server_registers_three_tools_and_three_resources(): - """Widget FastMCP instance has exactly row_count_diff + schema_diff + get_server_info tools/resources. +async def test_widget_server_registers_four_tools_and_four_resources(): + """Widget FastMCP instance has exactly row_count_diff + schema_diff + get_server_info + list_checks tools/resources. Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -94,11 +95,12 @@ async def test_widget_server_registers_three_tools_and_three_resources(): tool_names = {t.name for t in tools} resource_uris = {str(r.uri) for r in resources} - assert tool_names == {"row_count_diff", "schema_diff", "get_server_info"} + assert tool_names == {"row_count_diff", "schema_diff", "get_server_info", "list_checks"} assert resource_uris == { "ui://recce/row_count_diff.html", "ui://recce/schema_diff.html", "ui://recce/get_server_info.html", + "ui://recce/list_checks.html", } @@ -261,7 +263,7 @@ async def test_widget_tool_annotations_present(): tools = await mcp.list_tools() tool_map = {t.name: t for t in tools} - for tool_name in ("row_count_diff", "schema_diff", "get_server_info"): + for tool_name in ("row_count_diff", "schema_diff", "get_server_info", "list_checks"): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] a = t.annotations @@ -352,3 +354,102 @@ async def test_get_server_info_returns_calltoolresult_with_pydantic_shape(): assert validated.base_status == "fresh" assert validated.git is None assert validated.pull_request is None + + +# --------------------------------------------------------------------------- +# Test 11: list_checks widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_checks_widget_registered(): + """list_checks appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'list_checks' is in widget mcp tool list + - resource URI 'ui://recce/list_checks.html' is in widget mcp resource list + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "list_checks" in tool_names + assert "ui://recce/list_checks.html" in resource_uris + + +# --------------------------------------------------------------------------- +# Test 12: list_checks returns CallToolResult with correct Pydantic shape + counts +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_list_checks_returns_calltoolresult_with_pydantic_shape(): + """list_checks handler returns CallToolResult with structuredContent matching ListChecksOutput. + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes ListChecksOutput.model_validate() + - approved/pending counts are derived correctly from the check list + - empty is_preset field is tolerated (default False) + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import ListChecksInput, ListChecksOutput + + mock_server = MagicMock() + mock_server._tool_list_checks = AsyncMock( + return_value={ + "checks": [ + { + "check_id": "aaaaaaaa-0000-0000-0000-000000000001", + "name": "Row count check", + "type": "row_count_diff", + "description": "Checks that row counts match", + "params": {"select": "customers"}, + "is_checked": True, + "is_preset": False, + }, + { + "check_id": "aaaaaaaa-0000-0000-0000-000000000002", + "name": "Schema check", + "type": "schema_diff", + "description": "", + "params": {}, + "is_checked": False, + "is_preset": True, + }, + ], + "total": 2, + "approved": 1, + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = ListChecksInput() + result = await ws.list_checks(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 100, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = ListChecksOutput.model_validate(result.structuredContent) + assert validated.total == 2 + assert validated.approved == 1 + assert validated.pending == 1 + assert len(validated.checks) == 2 + assert validated.checks[0].is_checked is True + assert validated.checks[1].is_preset is True + assert validated.checks[1].is_checked is False From 89b724ee28a2940a029d73e80c94d3946c3d280b Mon Sep 17 00:00:00 2001 From: Kent Date: Mon, 25 May 2026 17:50:28 +0800 Subject: [PATCH 17/43] feat(widgets): add get_model widget (Phase A tier 2, Phase A complete) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fifth widget, completing Phase A of the post-iter-1 widget expansion. Single-model detail view (base/current column tables, constraints, not-found state). - Pydantic ColumnInfo + ModelEnvironment + GetModelOutput + GetModelInput matching actual _tool_get_model return shape (columns dict → list normalised in _parse_model_env helper; primary_key preserved; raw_code omitted) - @mcp.tool with annotations, CallToolResult short content + structuredContent - @mcp.resource with mime/CSP/prefersBorder - Widget HTML: SDK helpers + onhostcontextchanged + onteardown + exhaustive @media dark; adaptive 2-col/3-col column table layout (constraints visible only when present); per-env base/current sections; not-found empty state - WIDGET_TOOLS filter at 5 tools - 2 new tests (test_get_model_widget_registered, test_get_model_returns_calltoolresult_with_pydantic_shape); enumeration assertions bumped to 5 tools/resources Phase A (tier 1-2 multipliers) complete: row_count_diff, schema_diff, get_server_info, list_checks, get_model. ~half the design's 12 widget candidates done — design Open Q #6 >=50% FastMCP migration trigger approaches. Iter 2 mini-doc to reevaluate. No Pydantic reserved-name conflicts in actual handler shape (columns, primary_key, raw_code are all safe); Gotcha documented in mcp-widgets.md for future implementors. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 25 ++- recce/data/mcp/get_model.html | 365 ++++++++++++++++++++++++++++++++++ recce/mcp_server.py | 2 +- recce/widget_server.py | 157 +++++++++++++++ tests/test_widget_server.py | 113 ++++++++++- 5 files changed, 649 insertions(+), 13 deletions(-) create mode 100644 recce/data/mcp/get_model.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 569bec70d..33c43a6ac 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -12,9 +12,9 @@ through the `RECCE_MCP_WIDGETS=1` environment variable: when that flag is set, that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. -Iter 1 ships four widgets: `row_count_diff`, `schema_diff`, `get_server_info`, -and `list_checks`. All run in **local mode only** — cloud/session mode is not -supported until iter 2. +Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, +`list_checks`, and `get_model`. All run in **local mode only** — cloud/session +mode is not supported until iter 2. --- @@ -24,7 +24,7 @@ supported until iter 2. recce/ mcp_server.py # Existing primary server. # WIDGET_TOOLS set + _widgets_enabled() filter live here. - widget_server.py # FastMCP widget server (iter 1). + widget_server.py # FastMCP widget server (Phase A). # @mcp.tool delegates + @mcp.resource handlers. cli.py # mcp-widget-server CLI subcommand added here. data/ @@ -33,8 +33,9 @@ recce/ schema_diff.html get_server_info.html list_checks.html + get_model.html tests/ - test_widget_server.py # 12 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 14 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -84,7 +85,7 @@ The worked reference throughout is `row_count_diff`. Add a new widget called File: `recce/mcp_server.py`, near line 56. ```python -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", ""} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", ""} ``` This single change makes `mcp-server` omit `` from `tools/list` when @@ -372,6 +373,15 @@ directly. Always use Pydantic models for widget tool outputs. `args: None` or empty-model arg — it generates a confusing schema that Claude Desktop may mis-render. +- **Watch out for Pydantic reserved field names.** `schema` and any field + starting with `model_` are reserved in Pydantic v2. If the raw handler + response uses one of these keys (e.g. a `schema` field for the DB schema + name), either rename the Pydantic field and use `Field(alias="schema")` with + `model_config = {"populate_by_name": True}`, or normalise the key in the + widget delegate before passing to the Pydantic model. `get_model`'s + `_parse_model_env` helper is a worked example: the raw `columns` dict is + normalised to a typed list before constructing `ModelEnvironment`. + - **CSS token naming: use `danger`, not `error`.** The MCP Apps spec enum `McpUiStyleVariableKey` uses `--color-text-danger` and `--color-background-danger`. There is no `--color-text-error` token — using @@ -408,7 +418,7 @@ documented and supported. Reconsider if ext-apps publishes a Python SDK. ## Reference Widgets -Four working examples (in order of implementation): +Five working examples (in order of implementation): | File | Tier | What it demonstrates | |------|------|----------------------| @@ -416,6 +426,7 @@ Four working examples (in order of implementation): | `recce/data/mcp/schema_diff.html` | HTML table | Added/removed/type_changed column grouping, `_compute_schema_changes` rich shape, per-model section headers | | `recce/data/mcp/get_server_info.html` | Status badge + key/value grid | **Canonical post-refactor example.** Born idiomatic: no `models` wrapper (tool has no per-model loop), optional `git`/`pull_request` nested objects, 2-column CSS grid layout, empty-state card when `mode="none"` | | `recce/data/mcp/list_checks.html` | List / simple table | 3-up summary cards (Total / Approved / Pending), 4-column status table, empty-state with hint, `is_preset` badge, `_tool_list_checks` returns a flat list + pre-computed `total`/`approved` — `pending` derived in the widget delegate | +| `recce/data/mcp/get_model.html` | Single-item detail card | Per-environment column tables (base/current), adaptive 2-col/3-col layout when constraints present, PK + not-null + unique badges, not-found empty state, `columns` dict → list normalisation in delegate | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 diff --git a/recce/data/mcp/get_model.html b/recce/data/mcp/get_model.html new file mode 100644 index 000000000..0de35db77 --- /dev/null +++ b/recce/data/mcp/get_model.html @@ -0,0 +1,365 @@ + + + + + Model Detail + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index a922130f1..52318ef14 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -53,7 +53,7 @@ # When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` # instead of the main `recce mcp-server`. The main server omits them from list_tools # and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks"} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model"} def _widgets_enabled() -> bool: diff --git a/recce/widget_server.py b/recce/widget_server.py index 7f20af492..0b428c89a 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -466,6 +466,163 @@ def list_checks_resource() -> str: return _read_widget_html("list_checks") +# --------------------------------------------------------------------------- +# get_model widget tool + resource +# --------------------------------------------------------------------------- + + +class ColumnInfo(BaseModel): + """Shape of one column entry in a get_model response.""" + + name: str + type: Optional[str] = None + not_null: bool = False + unique: bool = False + + +class ModelEnvironment(BaseModel): + """Column details for one environment (base or current). + + ``columns`` is stored as a dict keyed by column name in the raw handler + response. The widget server normalises it to a list so the HTML can + iterate without Object.values() gymnastics. + """ + + columns: List[ColumnInfo] = [] + primary_key: Optional[str] = None + # raw_code intentionally omitted — widget shows schema, not SQL source + + +class GetModelOutput(BaseModel): + """Output model for the get_model widget tool. + + ``model_id`` echoes back the requested identifier for the widget header. + ``base`` / ``current`` hold per-environment column details. Either may be + None when the model exists in only one environment or is not found at all. + ``not_found`` is True only when neither environment has the model. + """ + + model_id: str + base: Optional[ModelEnvironment] = None + current: Optional[ModelEnvironment] = None + not_found: bool = False + + +class GetModelInput(BaseModel): + model_id: str = Field( + description=( + "The dbt unique node ID of the model " + "(e.g. 'model.jaffle_shop.customers'). " + "Use the full unique ID, not just the model name." + ) + ) + + +def _parse_model_env(raw: Optional[dict]) -> Optional[ModelEnvironment]: + """Convert raw get_model environment dict → ModelEnvironment Pydantic model. + + The raw dict has ``columns`` as a nested dict keyed by column name. + Each value is ``{name, type, not_null?, unique?}``. We normalise to a list + so the widget HTML can iterate in order. + """ + if not raw: + return None + raw_cols: dict = raw.get("columns") or {} + columns = [] + for col_name, col_data in raw_cols.items(): + columns.append( + ColumnInfo( + name=col_data.get("name", col_name), + type=col_data.get("type"), + not_null=col_data.get("not_null", False), + unique=col_data.get("unique", False), + ) + ) + return ModelEnvironment( + columns=columns, + primary_key=raw.get("primary_key"), + ) + + +@mcp.tool( + name="get_model", + annotations={ + "title": "Model Detail (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, + meta={ + "ui": {"resourceUri": "ui://recce/get_model.html"}, + "ui/resourceUri": "ui://recce/get_model.html", + }, +) +async def get_model(args: GetModelInput) -> CallToolResult: + """Get column details for a single dbt model from base and current environments. + + Returns schema information (column names, types, constraints) rendered + as a model-detail card widget. The agent should not reproduce the column + table as plain text — the widget handles rendering. + + Args: + model_id: Full dbt unique node ID (e.g. 'model.jaffle_shop.customers'). + Use the full ID, not just the short model name. + + Returns: + CallToolResult with structuredContent: GetModelOutput shape + {model_id, base: {columns, primary_key}?, current: {columns, primary_key}?, + not_found: bool} + + Use when: + - User asks "what columns does {model} have" / "schema of {model}" + - Need to verify column types or constraints before running a diff + - Comparing base vs current column layout for a single model + Don't use when: + - User wants column CHANGES across models — use schema_diff instead + - User wants ALL models — use lineage_diff for DAG scope + - Modifying anything — get_model is read-only + """ + raw = await _recce_server._tool_get_model({"model_id": args.model_id}) + # _tool_get_model returns {"model": {"base": {...}, "current": {...}}} + # It raises ValueError if neither env has the model, so raw is always a dict here. + model_data = raw.get("model", {}) if isinstance(raw, dict) else {} + base_raw = model_data.get("base") + curr_raw = model_data.get("current") + base_env = _parse_model_env(base_raw if isinstance(base_raw, dict) else None) + curr_env = _parse_model_env(curr_raw if isinstance(curr_raw, dict) else None) + not_found = base_env is None and curr_env is None + output = GetModelOutput( + model_id=args.model_id, + base=base_env, + current=curr_env, + not_found=not_found, + ) + short = ( + f"Model '{args.model_id}' detail rendered in widget." + if not not_found + else f"Model '{args.model_id}' not found." + ) + return CallToolResult( + content=[TextContent(type="text", text=short)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/get_model.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def get_model_resource() -> str: + return _read_widget_html("get_model") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index fbfc2d5fb..a85b87b86 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -72,18 +72,19 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) assert "schema_diff" not in names assert "get_server_info" not in names assert "list_checks" not in names + assert "get_model" not in names # Other tools must still be present assert "lineage_diff" in names # --------------------------------------------------------------------------- -# Test 3: Widget server registers exactly 4 tools + 4 resources +# Test 3: Widget server registers exactly 5 tools + 5 resources # --------------------------------------------------------------------------- @pytest.mark.asyncio -async def test_widget_server_registers_four_tools_and_four_resources(): - """Widget FastMCP instance has exactly row_count_diff + schema_diff + get_server_info + list_checks tools/resources. +async def test_widget_server_registers_five_tools_and_five_resources(): + """Widget FastMCP instance has exactly 5 tools/resources (Phase A complete). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -95,12 +96,13 @@ async def test_widget_server_registers_four_tools_and_four_resources(): tool_names = {t.name for t in tools} resource_uris = {str(r.uri) for r in resources} - assert tool_names == {"row_count_diff", "schema_diff", "get_server_info", "list_checks"} + assert tool_names == {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model"} assert resource_uris == { "ui://recce/row_count_diff.html", "ui://recce/schema_diff.html", "ui://recce/get_server_info.html", "ui://recce/list_checks.html", + "ui://recce/get_model.html", } @@ -263,7 +265,7 @@ async def test_widget_tool_annotations_present(): tools = await mcp.list_tools() tool_map = {t.name: t for t in tools} - for tool_name in ("row_count_diff", "schema_diff", "get_server_info", "list_checks"): + for tool_name in ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model"): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] a = t.annotations @@ -453,3 +455,104 @@ async def test_list_checks_returns_calltoolresult_with_pydantic_shape(): assert validated.checks[0].is_checked is True assert validated.checks[1].is_preset is True assert validated.checks[1].is_checked is False + + +# --------------------------------------------------------------------------- +# Test 13: get_model widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_model_widget_registered(): + """get_model appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'get_model' is in widget mcp tool list + - resource URI 'ui://recce/get_model.html' is in widget mcp resource list + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "get_model" in tool_names + assert "ui://recce/get_model.html" in resource_uris + + +# --------------------------------------------------------------------------- +# Test 14: get_model returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_model_returns_calltoolresult_with_pydantic_shape(): + """get_model handler returns CallToolResult with structuredContent matching GetModelOutput. + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes GetModelOutput.model_validate() + - columns are normalised from dict → list + - primary_key is preserved + - not_found is False when at least one env has data + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import GetModelInput, GetModelOutput + + mock_server = MagicMock() + mock_server._tool_get_model = AsyncMock( + return_value={ + "model": { + "base": { + "columns": { + "id": {"name": "id", "type": "bigint", "unique": True}, + "name": {"name": "name", "type": "varchar", "not_null": True}, + "created_at": {"name": "created_at", "type": "timestamp"}, + }, + "primary_key": "id", + }, + "current": { + "columns": { + "id": {"name": "id", "type": "bigint", "unique": True}, + "name": {"name": "name", "type": "varchar", "not_null": True}, + "created_at": {"name": "created_at", "type": "timestamp"}, + "updated_at": {"name": "updated_at", "type": "timestamp"}, + }, + "primary_key": "id", + }, + } + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = GetModelInput(model_id="model.jaffle_shop.customers") + result = await ws.get_model(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 120, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = GetModelOutput.model_validate(result.structuredContent) + assert validated.model_id == "model.jaffle_shop.customers" + assert validated.not_found is False + # base: 3 columns, primary_key=id + assert validated.base is not None + assert len(validated.base.columns) == 3 + assert validated.base.primary_key == "id" + pk_col = next(c for c in validated.base.columns if c.name == "id") + assert pk_col.unique is True + # current: 4 columns (added updated_at) + assert validated.current is not None + assert len(validated.current.columns) == 4 From 0f579223aa6adec2808c3040d46638a9a928d714 Mon Sep 17 00:00:00 2001 From: Kent Date: Mon, 25 May 2026 23:43:21 +0800 Subject: [PATCH 18/43] fix(widgets): get_server_info support_tasks is Dict[str, bool] not List[str] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Captain hit Pydantic validation error in Claude Desktop: actual _tool_get_server_info returns support_tasks as {"query": True, ..., "change_analysis": True} (dict of task slug → enabled bool), not a list of enabled slugs. Fix the Pydantic field type and widget HTML iteration. - ServerInfoOutput.support_tasks: Dict[str, bool] (was List[str]) - Widget HTML iterates Object.entries(...).filter(([_,v]) => v) for enabled tasks; renders as info badges (same visual as before) - Test fixture updated to use dict shape Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/data/mcp/get_server_info.html | 21 +++++++++++++-------- recce/widget_server.py | 2 +- tests/test_widget_server.py | 11 ++++++++++- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/recce/data/mcp/get_server_info.html b/recce/data/mcp/get_server_info.html index 85d36780e..975e187c0 100644 --- a/recce/data/mcp/get_server_info.html +++ b/recce/data/mcp/get_server_info.html @@ -321,14 +321,19 @@
${prLink}
`); } - // support_tasks (only if present and non-empty) - if (Array.isArray(data.support_tasks) && data.support_tasks.length > 0) { - const taskBadges = data.support_tasks - .map(t => `${escapeHtml(t)}`) - .join(" "); - rows.push(` -
Supported tasks
-
${taskBadges}
`); + // support_tasks — dict of {taskSlug: bool}; show only enabled tasks + if (data.support_tasks && typeof data.support_tasks === "object" && !Array.isArray(data.support_tasks)) { + const enabledTasks = Object.entries(data.support_tasks) + .filter(([, enabled]) => enabled) + .map(([name]) => name); + if (enabledTasks.length > 0) { + const taskBadges = enabledTasks + .map(t => `${escapeHtml(t)}`) + .join(" "); + rows.push(` +
Supported tasks
+
${taskBadges}
`); + } } document.getElementById("root").innerHTML = ` diff --git a/recce/widget_server.py b/recce/widget_server.py index 0b428c89a..70e10cbef 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -87,7 +87,7 @@ class ServerInfoOutput(BaseModel): mode: str = "local" # "local" | "cloud" | "none" adapter_type: Optional[str] = None review_mode: Optional[bool] = None - support_tasks: Optional[List[str]] = None + support_tasks: Optional[Dict[str, bool]] = None single_env: bool = False base_status: Optional[str] = None # "fresh"|"stale_time"|"stale_sha"|"missing"|"single_env"|"unknown" git: Optional[GitInfo] = None diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index a85b87b86..ff7aa9478 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -327,7 +327,16 @@ async def test_get_server_info_returns_calltoolresult_with_pydantic_shape(): "mode": "local", "adapter_type": "dbt", "review_mode": False, - "support_tasks": ["row_count_diff", "schema_diff"], + "support_tasks": { + "query": True, + "query_base": True, + "value_diff": True, + "profile_diff": True, + "row_count_diff": True, + "top_k_diff": True, + "histogram_diff": True, + "change_analysis": True, + }, "single_env": False, "base_status": "fresh", } From 24a0680354a8407325e32a7b685378a4ed94e288 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 26 May 2026 00:23:58 +0800 Subject: [PATCH 19/43] fix(widgets): list_checks no-args pattern (empty Pydantic input broke invocation) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Captain hit Pydantic validation error in Claude Desktop: Error executing tool list_checks: 1 validation error for list_checksArguments args Field required [type=missing, input_value={}, input_type=dict] Root cause: list_checks delegate signature was `async def list_checks(args: ListChecksInput)` where ListChecksInput was an empty BaseModel (no real args to pass). FastMCP exposed this as an inputSchema requiring `args` as a top- level field — agent called with no params, MCP sent {}, validation failed. Fix: drop the empty ListChecksInput class and signature param. Match the no-args pattern of get_server_info. `_tool_list_checks` still receives {} internally (the handler accepts but ignores arguments). Confirmed only list_checks affected: row_count_diff / schema_diff have Optional-fields-only inputs (OK), get_server_info has no-args (OK), get_model has required model_id (OK). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/widget_server.py | 6 +----- tests/test_widget_server.py | 5 ++--- 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/recce/widget_server.py b/recce/widget_server.py index 70e10cbef..570287285 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -393,10 +393,6 @@ class ListChecksOutput(BaseModel): pending: int -class ListChecksInput(BaseModel): - pass # _tool_list_checks takes no arguments — list everything in the session - - @mcp.tool( name="list_checks", annotations={ @@ -411,7 +407,7 @@ class ListChecksInput(BaseModel): "ui/resourceUri": "ui://recce/list_checks.html", }, ) -async def list_checks(args: ListChecksInput) -> CallToolResult: +async def list_checks() -> CallToolResult: """List all saved Recce checks for this session. Returns a summary card (total / approved / pending) plus a status table diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index ff7aa9478..d68201acb 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -410,7 +410,7 @@ async def test_list_checks_returns_calltoolresult_with_pydantic_shape(): from mcp.types import CallToolResult import recce.widget_server as ws - from recce.widget_server import ListChecksInput, ListChecksOutput + from recce.widget_server import ListChecksOutput mock_server = MagicMock() mock_server._tool_list_checks = AsyncMock( @@ -443,8 +443,7 @@ async def test_list_checks_returns_calltoolresult_with_pydantic_shape(): original = ws._recce_server ws._recce_server = mock_server try: - args = ListChecksInput() - result = await ws.list_checks(args) + result = await ws.list_checks() finally: ws._recce_server = original From 46251d5be878551752ec67a3062991c6de16ebd6 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 26 May 2026 00:32:17 +0800 Subject: [PATCH 20/43] =?UTF-8?q?feat(widgets):=20add=20query=20widget=20(?= =?UTF-8?q?Phase=20B=20tier=203=20=E2=80=94=20first=20data-table=20widget)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sixth widget; first tier-3 widget. Establishes the data-table rendering pattern for the other Phase B widgets (query_diff, value_diff, value_diff_detail, top_k_diff). - QueryInput / QueryOutput Pydantic models matching actual DataFrame.model_dump shape (READ from source — columns/data/limit/more/total_row_count) - Sticky-header scrollable table inside ~400px container - Cell type rendering (null → "—" italic secondary, number → tabular-nums right, string → truncated with title attr, bool → check/dash, date → display as-is) - Truncation badge when result is capped (DataFrame.more) - Empty state with SQL echo for debug context - @mcp.tool with annotations (openWorldHint=True since queries hit the warehouse) - CallToolResult short content + structuredContent - WIDGET_TOOLS filter expanded to 6 tools - 2 new tests (tests 15-16); enumeration assertions bumped to 6 - docs/mcp-widgets.md gains "Adding a tier-3 (data table) widget" section as template for the remaining Phase B widgets Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 137 ++++++++++- recce/data/mcp/query.html | 438 ++++++++++++++++++++++++++++++++++++ recce/mcp_server.py | 2 +- recce/widget_server.py | 118 ++++++++++ tests/test_widget_server.py | 160 ++++++++++++- 5 files changed, 841 insertions(+), 14 deletions(-) create mode 100644 recce/data/mcp/query.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 33c43a6ac..f4c218358 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -13,8 +13,9 @@ that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, -`list_checks`, and `get_model`. All run in **local mode only** — cloud/session -mode is not supported until iter 2. +`list_checks`, and `get_model`. Phase B iter 1 adds `query` (first tier-3 +data-table widget). All run in **local mode only** — cloud/session mode is +not supported until iter 2. --- @@ -34,8 +35,9 @@ recce/ get_server_info.html list_checks.html get_model.html + query.html # Phase B tier-3: scrollable SQL result table tests/ - test_widget_server.py # 14 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 16 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -85,7 +87,7 @@ The worked reference throughout is `row_count_diff`. Add a new widget called File: `recce/mcp_server.py`, near line 56. ```python -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", ""} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query", ""} ``` This single change makes `mcp-server` omit `` from `tools/list` when @@ -418,7 +420,7 @@ documented and supported. Reconsider if ext-apps publishes a Python SDK. ## Reference Widgets -Five working examples (in order of implementation): +Six working examples (in order of implementation): | File | Tier | What it demonstrates | |------|------|----------------------| @@ -427,6 +429,7 @@ Five working examples (in order of implementation): | `recce/data/mcp/get_server_info.html` | Status badge + key/value grid | **Canonical post-refactor example.** Born idiomatic: no `models` wrapper (tool has no per-model loop), optional `git`/`pull_request` nested objects, 2-column CSS grid layout, empty-state card when `mode="none"` | | `recce/data/mcp/list_checks.html` | List / simple table | 3-up summary cards (Total / Approved / Pending), 4-column status table, empty-state with hint, `is_preset` badge, `_tool_list_checks` returns a flat list + pre-computed `total`/`approved` — `pending` derived in the widget delegate | | `recce/data/mcp/get_model.html` | Single-item detail card | Per-environment column tables (base/current), adaptive 2-col/3-col layout when constraints present, PK + not-null + unique badges, not-found empty state, `columns` dict → list normalisation in delegate | +| `recce/data/mcp/query.html` | **Tier-3 data table** | **Template for Phase B.** Sticky-header scrollable table (400px cap), type-aware cell rendering, truncation badge, empty/error states. Use this as the base pattern for `query_diff`, `value_diff`, `value_diff_detail`, `top_k_diff` | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 @@ -444,6 +447,130 @@ verify rendering without running a full MCP server. --- +## Adding a Tier-3 (Data Table) Widget + +Phase B widgets (`query`, `query_diff`, `value_diff`, `value_diff_detail`, `top_k_diff`) +render arbitrary columnar data. `recce/data/mcp/query.html` is the canonical example. + +### Data shape + +The underlying `DataFrame.model_dump(mode='json')` has this exact shape (confirmed from +`recce/tasks/dataframe.py`): + +```json +{ + "columns": [ + {"key": "id", "name": "id", "type": "integer"}, + {"key": "amount", "name": "amount", "type": "number"}, + {"key": "label", "name": "label", "type": "text"} + ], + "data": [[1, 99.9, "Alice"], [2, null, null]], + "limit": 2000, + "more": false, + "total_row_count": 2 +} +``` + +`DataFrameColumnType` enum values: `"integer"`, `"number"`, `"text"`, `"boolean"`, +`"date"`, `"datetime"`, `"timedelta"`, `"unknown"`. + +### Pydantic models + +```python +class QueryColumnInfo(BaseModel): + key: Optional[str] = None + name: str + type: str # DataFrameColumnType enum value + +class QueryOutput(BaseModel): + columns: List[QueryColumnInfo] + data: List[List[Any]] + limit: Optional[int] = None + more: Optional[bool] = None + total_row_count: Optional[int] = None + sql_template: Optional[str] = None # echo input for context +``` + +### CSS mechanics for sticky-header scrollable table + +```css +/* Container caps height and scrolls in both axes */ +.table-wrap { + max-height: 400px; + overflow: auto; + border: 1px solid var(--color-border-primary, #e5e7eb); + border-radius: var(--border-radius-md, 8px); +} +/* Table sticky header works inside overflow:auto parent */ +.result-table thead th { + position: sticky; + top: 0; + z-index: 1; +} +``` + +This combination — `overflow: auto` on the container, `position: sticky; top: 0` +on `thead th` — is the pattern to use for all tier-3 table widgets. Do NOT use +`overflow: hidden` on the container (breaks scroll) or `position: fixed` on the +header (breaks column alignment). + +### `renderCell(value, type)` helper — canonical implementation + +```js +function renderCell(value, type) { + if (value === null || value === undefined) + return ``; + if (type === "boolean" || typeof value === "boolean") + return value ? `` : ``; + if (type === "integer" || type === "number") { + const formatted = typeof value === "number" + ? value.toLocaleString(undefined, { maximumFractionDigits: 6 }) + : escapeHtml(String(value)); + return `${formatted}`; + } + if (type === "date" || type === "datetime" || type === "timedelta") + return `${escapeHtml(String(value))}`; + // Text / unknown — truncate at 80 chars + const str = String(value); + if (str.length > 80) + return `${escapeHtml(str.slice(0, 80))}…`; + return escapeHtml(str); +} +``` + +CSS classes used: `.cell-null` (italic, secondary color), `.cell-num` (tabular-nums, +mono, right-aligned), `.cell-bool-true` (green), `.cell-bool-false` (gray). +All four classes need exhaustive `@media (prefers-color-scheme: dark)` overrides. + +### Truncation badge + +When `more === true`, show a warning badge above the table: + +```js +const truncatedBadge = more + ? `Truncated to ${limit ?? nRows} rows` + : ""; +``` + +### `openWorldHint` for warehouse-hitting tools + +Tools that execute SQL against the warehouse (all tier-3 tools) must set +`openWorldHint: True` in annotations — they perform real external I/O. +This contrasts with Phase A tools that only read dbt manifest/state: + +```python +@mcp.tool( + name="query", + annotations={ + ... + "openWorldHint": True, # hits the warehouse + }, + ... +) +``` + +--- + ## What Is NOT in Iter 1 These are deferred to iter 2 or later: diff --git a/recce/data/mcp/query.html b/recce/data/mcp/query.html new file mode 100644 index 000000000..96f12e72a --- /dev/null +++ b/recce/data/mcp/query.html @@ -0,0 +1,438 @@ + + + + + Query Result + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 52318ef14..a58b70427 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -53,7 +53,7 @@ # When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` # instead of the main `recce mcp-server`. The main server omits them from list_tools # and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model"} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query"} def _widgets_enabled() -> bool: diff --git a/recce/widget_server.py b/recce/widget_server.py index 570287285..744bba15f 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -619,6 +619,124 @@ def get_model_resource() -> str: return _read_widget_html("get_model") +# --------------------------------------------------------------------------- +# query widget tool + resource +# --------------------------------------------------------------------------- + + +class QueryColumnInfo(BaseModel): + """Shape of one column in a DataFrame result (from DataFrameColumn.model_dump).""" + + key: Optional[str] = None + name: str + type: str # DataFrameColumnType enum value: "integer", "text", "number", "boolean", + # "date", "datetime", "timedelta", "unknown" + + +class QueryOutput(BaseModel): + """Output model for the query widget tool. + + Fields mirror DataFrame.model_dump(mode='json') output, with sql_template + echoed back for context in the empty-state and debug display. + """ + + columns: List[QueryColumnInfo] + data: List[List[Any]] + limit: Optional[int] = None + more: Optional[bool] = None + total_row_count: Optional[int] = None + sql_template: Optional[str] = None # echoed from input for widget header/empty-state + + +class QueryInput(BaseModel): + sql_template: str = Field( + ..., + description=( + "SQL query with optional Jinja templating. " + "Use {{ ref('model_name') }} for dbt model references and other dbt macros." + ), + ) + base: bool = Field( + default=False, + description="If true, query the base environment (target-base); else current (target). Default false.", + ) + + +@mcp.tool( + name="query", + annotations={ + "title": "Query Result (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # queries hit the warehouse (external I/O) + }, + meta={ + "ui": {"resourceUri": "ui://recce/query.html"}, + "ui/resourceUri": "ui://recce/query.html", + }, +) +async def query(args: QueryInput) -> CallToolResult: + """Execute an ad-hoc SQL query against the dbt environment. + + Returns a scrollable result-table widget with type-aware cell rendering. + The agent should not reproduce the table data as plain text — the widget + handles rendering. + + Args: + sql_template: SQL with Jinja (e.g. "SELECT count(*) FROM {{ ref('customers') }}") + base: If true, target base environment (target-base manifest); else current. Default false. + + Returns: + CallToolResult with structuredContent: QueryOutput shape + {columns: [{key, name, type}], data: [[...]], limit?, more?, + total_row_count?, sql_template} + + Use when: + - User asks "run SQL: ..." or "query the warehouse for ..." + - Need ad-hoc data extraction (no pre-built check type covers it) + - Validating a hypothesis about specific values in the database + Don't use when: + - User wants row counts → use row_count_diff + - User wants column-level diff → use value_diff + - User wants the schema (columns/types) → use get_model or schema_diff + - User wants to compare base vs current → use query_diff + """ + raw = await _recce_server._tool_query(args.model_dump()) + # raw is DataFrame.model_dump(mode='json') — shape confirmed from source: + # {columns: [{key, name, type}], data: [[...]], limit?, more?, total_row_count?} + columns = [QueryColumnInfo(**c) for c in raw.get("columns", [])] + output = QueryOutput( + columns=columns, + data=raw.get("data", []), + limit=raw.get("limit"), + more=raw.get("more"), + total_row_count=raw.get("total_row_count"), + sql_template=args.sql_template, + ) + n_rows = len(output.data) + n_cols = len(output.columns) + text = f"Query result ({n_rows} row{'s' if n_rows != 1 else ''} × {n_cols} col{'s' if n_cols != 1 else ''}) rendered in widget." + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/query.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def query_resource() -> str: + return _read_widget_html("query") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index d68201acb..6157f0258 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -73,6 +73,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) assert "get_server_info" not in names assert "list_checks" not in names assert "get_model" not in names + assert "query" not in names # Other tools must still be present assert "lineage_diff" in names @@ -83,8 +84,8 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio -async def test_widget_server_registers_five_tools_and_five_resources(): - """Widget FastMCP instance has exactly 5 tools/resources (Phase A complete). +async def test_widget_server_registers_six_tools_and_six_resources(): + """Widget FastMCP instance has exactly 6 tools/resources (Phase A + query widget). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -96,13 +97,14 @@ async def test_widget_server_registers_five_tools_and_five_resources(): tool_names = {t.name for t in tools} resource_uris = {str(r.uri) for r in resources} - assert tool_names == {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model"} + assert tool_names == {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query"} assert resource_uris == { "ui://recce/row_count_diff.html", "ui://recce/schema_diff.html", "ui://recce/get_server_info.html", "ui://recce/list_checks.html", "ui://recce/get_model.html", + "ui://recce/query.html", } @@ -255,17 +257,17 @@ async def test_structured_content_matches_pydantic_model(): @pytest.mark.asyncio async def test_widget_tool_annotations_present(): - """Both widget tools have required annotations per SDK idiom checklist. + """All widget tools have required annotations per SDK idiom checklist. - Asserts readOnlyHint=True, destructiveHint=False, idempotentHint=True, - openWorldHint=False, and title is set. + All tools: readOnlyHint=True, destructiveHint=False, idempotentHint=True, title set. + openWorldHint=False for all except 'query' (which hits the warehouse, openWorldHint=True). """ from recce.widget_server import mcp tools = await mcp.list_tools() tool_map = {t.name: t for t in tools} - for tool_name in ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model"): + for tool_name in ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query"): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] a = t.annotations @@ -273,9 +275,17 @@ async def test_widget_tool_annotations_present(): assert a.readOnlyHint is True, f"{tool_name}: expected readOnlyHint=True" assert a.destructiveHint is False, f"{tool_name}: expected destructiveHint=False" assert a.idempotentHint is True, f"{tool_name}: expected idempotentHint=True" - assert a.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" assert a.title is not None and len(a.title) > 0, f"{tool_name}: title must be set" + # Closed-world tools (no external warehouse I/O) + closed_world_tools = ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model") + for tool_name in closed_world_tools: + t = tool_map[tool_name] + assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" + + # query hits the warehouse — openWorldHint must be True + assert tool_map["query"].annotations.openWorldHint is True, "query: expected openWorldHint=True" + # --------------------------------------------------------------------------- # Test 9: get_server_info widget tool is registered with correct resource URI @@ -564,3 +574,137 @@ async def test_get_model_returns_calltoolresult_with_pydantic_shape(): # current: 4 columns (added updated_at) assert validated.current is not None assert len(validated.current.columns) == 4 + + +# --------------------------------------------------------------------------- +# Test 15: query widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_query_widget_registered(): + """query appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'query' is in widget mcp tool list + - resource URI 'ui://recce/query.html' is in widget mcp resource list + - sql_template is required in inputSchema + - base is optional (has default) + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "query" in tool_names + assert "ui://recce/query.html" in resource_uris + + # Check inputSchema: sql_template required, base optional. + # FastMCP wraps the Pydantic model in an 'args' outer envelope + # (schema is {properties: {args: {$ref: ...}}, required: ["args"]}). + # The actual field requirements live inside the $defs/QueryInput sub-schema. + query_tool = next(t for t in tools if t.name == "query") + schema = query_tool.inputSchema + assert schema is not None + + # Navigate into the nested QueryInput definition + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) # first $def or top-level + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "sql_template" in inner_required, "sql_template must be required" + assert "base" not in inner_required, "base must be optional (has default)" + assert "sql_template" in inner_props, "sql_template must be a property" + assert "base" in inner_props, "base must be a property" + + +# --------------------------------------------------------------------------- +# Test 16: query returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_query_returns_calltoolresult_with_pydantic_shape(): + """query handler returns CallToolResult with structuredContent matching QueryOutput. + + Uses a realistic DataFrame.model_dump shape (confirmed from source reading): + {columns: [{key, name, type}], data: [[...]], limit, more, total_row_count} + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes QueryOutput.model_validate() + - columns are hydrated correctly into QueryColumnInfo list + - sql_template is echoed back in structuredContent + - more/limit/total_row_count are preserved + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import QueryInput, QueryOutput + + mock_server = MagicMock() + # Realistic DataFrame.model_dump shape (shape verified from recce/tasks/dataframe.py) + mock_server._tool_query = AsyncMock( + return_value={ + "columns": [ + {"key": "id", "name": "id", "type": "integer"}, + {"key": "name", "name": "name", "type": "text"}, + {"key": "amount", "name": "amount", "type": "number"}, + {"key": "active", "name": "active", "type": "boolean"}, + {"key": "created_at", "name": "created_at", "type": "date"}, + ], + "data": [ + [1, "Alice", 99.9, True, "2024-01-01"], + [2, None, None, False, "2024-02-15"], + ], + "limit": 2000, + "more": False, + "total_row_count": 2, + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = QueryInput( + sql_template="SELECT id, name, amount, active, created_at FROM {{ ref('customers') }}", base=False + ) + result = await ws.query(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 120, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = QueryOutput.model_validate(result.structuredContent) + + # Columns + assert len(validated.columns) == 5 + assert validated.columns[0].name == "id" + assert validated.columns[0].type == "integer" + assert validated.columns[1].name == "name" + assert validated.columns[1].type == "text" + assert validated.columns[3].type == "boolean" + + # Data — 2 rows, nulls preserved + assert len(validated.data) == 2 + assert validated.data[0][0] == 1 + assert validated.data[1][1] is None # null name + assert validated.data[1][2] is None # null amount + + # Metadata + assert validated.limit == 2000 + assert validated.more is False + assert validated.total_row_count == 2 + + # sql_template echoed back + assert validated.sql_template is not None + assert "customers" in validated.sql_template From 1aaa8a2dfee343fb6083617329eb93debfcaa13c Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 26 May 2026 00:41:02 +0800 Subject: [PATCH 21/43] =?UTF-8?q?feat(widgets):=20add=20query=5Fdiff=20wid?= =?UTF-8?q?get=20(Phase=20B=20tier=203=20=E2=80=94=20two-env=20SQL=20compa?= =?UTF-8?q?rison)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Seventh widget. Builds on query widget (46251d5b)'s data-table pattern. Compares SQL execution against base + current environments with per-row status rendering. Handles both QueryDiffTask result shapes from source. - QueryDiffInput / QueryDiffOutput Pydantic models verified against actual QueryDiffTask.execute() return shape (QueryDiffResult) - Two render modes detected at runtime from structuredContent: * Shape A (no primary_keys): side-by-side base / current tables * Shape B (primary_keys): join-diff table with in_a/in_b stripped, status pills (Added/Removed), row tinting, filter buttons - _parse_dataframe() helper mirrors _parse_model_env pattern - _warning extracted to output.warning named field (consistent with row_count_diff Day 1.5 pattern) - Widget HTML: sticky-header scrollable tables + status pills + filter buttons (All / Added / Removed with counts) + dark-mode coverage - @mcp.tool with openWorldHint=True (queries hit warehouse) - WIDGET_TOOLS updated to 7 tools - 2 new tests (registration + both Shape A and B); enumeration bumped to 7 - test_widget_tool_annotations_present updated for query_diff open-world group - docs/mcp-widgets.md: widget count + reference table updated Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 12 +- recce/data/mcp/query_diff.html | 675 +++++++++++++++++++++++++++++++++ recce/mcp_server.py | 2 +- recce/widget_server.py | 169 +++++++++ tests/test_widget_server.py | 195 +++++++++- 5 files changed, 1043 insertions(+), 10 deletions(-) create mode 100644 recce/data/mcp/query_diff.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index f4c218358..6403a8d52 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -13,9 +13,9 @@ that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, -`list_checks`, and `get_model`. Phase B iter 1 adds `query` (first tier-3 -data-table widget). All run in **local mode only** — cloud/session mode is -not supported until iter 2. +`list_checks`, and `get_model`. Phase B iter 1 adds `query` and `query_diff` +(tier-3 data-table widgets). All run in **local mode only** — cloud/session mode +is not supported until iter 2. --- @@ -36,8 +36,9 @@ recce/ list_checks.html get_model.html query.html # Phase B tier-3: scrollable SQL result table + query_diff.html # Phase B tier-3: two-env comparison with status pills + filters tests/ - test_widget_server.py # 16 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 18 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -420,7 +421,7 @@ documented and supported. Reconsider if ext-apps publishes a Python SDK. ## Reference Widgets -Six working examples (in order of implementation): +Seven working examples (in order of implementation): | File | Tier | What it demonstrates | |------|------|----------------------| @@ -430,6 +431,7 @@ Six working examples (in order of implementation): | `recce/data/mcp/list_checks.html` | List / simple table | 3-up summary cards (Total / Approved / Pending), 4-column status table, empty-state with hint, `is_preset` badge, `_tool_list_checks` returns a flat list + pre-computed `total`/`approved` — `pending` derived in the widget delegate | | `recce/data/mcp/get_model.html` | Single-item detail card | Per-environment column tables (base/current), adaptive 2-col/3-col layout when constraints present, PK + not-null + unique badges, not-found empty state, `columns` dict → list normalisation in delegate | | `recce/data/mcp/query.html` | **Tier-3 data table** | **Template for Phase B.** Sticky-header scrollable table (400px cap), type-aware cell rendering, truncation badge, empty/error states. Use this as the base pattern for `query_diff`, `value_diff`, `value_diff_detail`, `top_k_diff` | +| `recce/data/mcp/query_diff.html` | **Tier-3 two-env comparison** | Two render modes: side-by-side (no primary_keys → base/current tables) and join-diff (primary_keys → single table with status pills + Added/Removed filter buttons). Row tinting (red=removed, green=added), `in_a`/`in_b` columns stripped from display. | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 diff --git a/recce/data/mcp/query_diff.html b/recce/data/mcp/query_diff.html new file mode 100644 index 000000000..bcf8f6d08 --- /dev/null +++ b/recce/data/mcp/query_diff.html @@ -0,0 +1,675 @@ + + + + + Query Diff + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index a58b70427..a0d697e7b 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -53,7 +53,7 @@ # When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` # instead of the main `recce mcp-server`. The main server omits them from list_tools # and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query"} +WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query", "query_diff"} def _widgets_enabled() -> bool: diff --git a/recce/widget_server.py b/recce/widget_server.py index 744bba15f..1039a7f3c 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -737,6 +737,175 @@ def query_resource() -> str: return _read_widget_html("query") +# --------------------------------------------------------------------------- +# query_diff widget tool + resource +# --------------------------------------------------------------------------- + + +class QueryDiffInput(BaseModel): + sql_template: str = Field( + ..., + description=( + "SQL query with optional Jinja templating. " + "Use {{ ref('model_name') }} for dbt model references. " + "Runs against BOTH base and current environments (or produces a join diff when primary_keys supplied)." + ), + ) + base_sql_template: Optional[str] = Field( + default=None, + description="Alternative SQL template to run on the base environment (defaults to sql_template).", + ) + primary_keys: Optional[List[str]] = Field( + default=None, + description=( + "List of primary key column names for row-level join diff. " + "When provided, the tool runs a set-based diff (INTERSECT / EXCEPT) and returns rows " + "that differ between base and current, each tagged with in_a / in_b flags. " + "When omitted, both base and current result sets are returned side-by-side." + ), + ) + + +class QueryDiffDataFrame(BaseModel): + """Serialised DataFrame — mirrors DataFrame.model_dump(mode='json').""" + + columns: List[QueryColumnInfo] + data: List[List[Any]] + limit: Optional[int] = None + more: Optional[bool] = None + total_row_count: Optional[int] = None + + +class QueryDiffOutput(BaseModel): + """Output model for the query_diff widget tool. + + QueryDiffTask.execute() returns a QueryDiffResult with two possible shapes: + + Shape A — side-by-side (no primary_keys): + base: DataFrame, current: DataFrame, diff: None + + Shape B — join diff (primary_keys provided): + base: None, current: None, diff: DataFrame + The diff DataFrame includes all data columns plus 'in_a' and 'in_b' + boolean columns. Only rows that differ are included. + + The widget renders both shapes; JS logic checks which fields are present. + sql_template is echoed from input for use in the widget header / empty-state. + """ + + base: Optional[QueryDiffDataFrame] = None + current: Optional[QueryDiffDataFrame] = None + diff: Optional[QueryDiffDataFrame] = None + sql_template: Optional[str] = None # echoed from input + warning: Optional[str] = None # from _maybe_add_single_env_warning + + +def _parse_dataframe(raw: Optional[dict]) -> Optional[QueryDiffDataFrame]: + """Convert a DataFrame.model_dump(mode='json') dict → QueryDiffDataFrame. + + Returns None when raw is None/empty so callers can check presence. + """ + if not raw: + return None + columns = [QueryColumnInfo(**c) for c in raw.get("columns", [])] + return QueryDiffDataFrame( + columns=columns, + data=raw.get("data", []), + limit=raw.get("limit"), + more=raw.get("more"), + total_row_count=raw.get("total_row_count"), + ) + + +@mcp.tool( + name="query_diff", + annotations={ + "title": "Query Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # queries hit the warehouse (external I/O) + }, + meta={ + "ui": {"resourceUri": "ui://recce/query_diff.html"}, + "ui/resourceUri": "ui://recce/query_diff.html", + }, +) +async def query_diff(args: QueryDiffInput) -> CallToolResult: + """Run a SQL query against BOTH base and current environments and compare results. + + Returns a comparison widget. The agent should not reproduce the comparison as + plain text — the widget handles rendering. + + Two comparison modes depending on whether primary_keys is supplied: + + Side-by-side mode (no primary_keys): + Executes sql_template against base and current independently. + Returns two parallel result tables (base / current) displayed side-by-side. + + Join diff mode (primary_keys provided): + Runs a set-based SQL diff (INTERSECT / EXCEPT) to find rows that differ. + Returns a single table of differing rows tagged with in_a (in base) and + in_b (in current) boolean flags. Only changed rows are included. + + Args: + sql_template: SQL with Jinja (e.g. "SELECT * FROM {{ ref('customers') }}") + base_sql_template: Optional alternative SQL for the base env; defaults to sql_template. + primary_keys: Column names to use as join keys for row-level diff (optional). + + Returns: + CallToolResult with structuredContent: QueryDiffOutput shape + Side-by-side: {base: {columns, data, ...}, current: {columns, data, ...}, diff: null, sql_template, warning?} + Join diff: {base: null, current: null, diff: {columns, data, ...}, sql_template, warning?} + + Use when: + - User asks "compare base vs current for this SQL" + - Investigating whether a dbt model change altered output values + - Row-level comparison with known primary key columns + Don't use when: + - Single environment only — use query instead + - Schema (column) changes — use schema_diff + - Row count only — use row_count_diff + """ + raw = await _recce_server._tool_query_diff(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + output = QueryDiffOutput( + base=_parse_dataframe(raw.get("base") if isinstance(raw, dict) else None), + current=_parse_dataframe(raw.get("current") if isinstance(raw, dict) else None), + diff=_parse_dataframe(raw.get("diff") if isinstance(raw, dict) else None), + sql_template=args.sql_template, + warning=warning, + ) + # Build a short descriptive text based on which shape was returned + if output.diff is not None: + n = len(output.diff.data) + text = f"Query diff ({n} differing row{'s' if n != 1 else ''}) rendered in widget." + elif output.base is not None or output.current is not None: + base_n = len(output.base.data) if output.base else 0 + curr_n = len(output.current.data) if output.current else 0 + text = f"Query diff (base: {base_n} row{'s' if base_n != 1 else ''}, current: {curr_n} row{'s' if curr_n != 1 else ''}) rendered in widget." + else: + text = "Query diff rendered in widget." + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/query_diff.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def query_diff_resource() -> str: + return _read_widget_html("query_diff") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 6157f0258..dc22d2e97 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 6 tools/resources (Phase A + query widget). + """Widget FastMCP instance has exactly 7 tools/resources (Phase A + query + query_diff widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -97,7 +97,15 @@ async def test_widget_server_registers_six_tools_and_six_resources(): tool_names = {t.name for t in tools} resource_uris = {str(r.uri) for r in resources} - assert tool_names == {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query"} + assert tool_names == { + "row_count_diff", + "schema_diff", + "get_server_info", + "list_checks", + "get_model", + "query", + "query_diff", + } assert resource_uris == { "ui://recce/row_count_diff.html", "ui://recce/schema_diff.html", @@ -105,6 +113,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/list_checks.html", "ui://recce/get_model.html", "ui://recce/query.html", + "ui://recce/query_diff.html", } @@ -267,7 +276,15 @@ async def test_widget_tool_annotations_present(): tools = await mcp.list_tools() tool_map = {t.name: t for t in tools} - for tool_name in ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query"): + for tool_name in ( + "row_count_diff", + "schema_diff", + "get_server_info", + "list_checks", + "get_model", + "query", + "query_diff", + ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] a = t.annotations @@ -283,8 +300,9 @@ async def test_widget_tool_annotations_present(): t = tool_map[tool_name] assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" - # query hits the warehouse — openWorldHint must be True + # query and query_diff hit the warehouse — openWorldHint must be True assert tool_map["query"].annotations.openWorldHint is True, "query: expected openWorldHint=True" + assert tool_map["query_diff"].annotations.openWorldHint is True, "query_diff: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -708,3 +726,172 @@ async def test_query_returns_calltoolresult_with_pydantic_shape(): # sql_template echoed back assert validated.sql_template is not None assert "customers" in validated.sql_template + + +# --------------------------------------------------------------------------- +# Test 17: query_diff widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_query_diff_widget_registered(): + """query_diff appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'query_diff' is in widget mcp tool list + - resource URI 'ui://recce/query_diff.html' is in widget mcp resource list + - sql_template is required; base_sql_template and primary_keys are optional + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "query_diff" in tool_names + assert "ui://recce/query_diff.html" in resource_uris + + # Check inputSchema: sql_template required, others optional. + # FastMCP wraps the Pydantic model in an 'args' outer envelope. + qd_tool = next(t for t in tools if t.name == "query_diff") + schema = qd_tool.inputSchema + assert schema is not None + + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "sql_template" in inner_required, "sql_template must be required" + assert "base_sql_template" not in inner_required, "base_sql_template must be optional" + assert "primary_keys" not in inner_required, "primary_keys must be optional" + assert "sql_template" in inner_props + assert "base_sql_template" in inner_props + assert "primary_keys" in inner_props + + +# --------------------------------------------------------------------------- +# Test 18: query_diff returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_query_diff_returns_calltoolresult_with_pydantic_shape(): + """query_diff handler returns CallToolResult with structuredContent matching QueryDiffOutput. + + Tests both QueryDiffResult shapes: + A — side-by-side (no primary_keys → base+current DataFrames) + B — join diff (primary_keys provided → diff DataFrame with in_a/in_b) + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes QueryDiffOutput.model_validate() for both shapes + - sql_template is echoed back + - _warning is extracted to output.warning named field + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import QueryDiffInput, QueryDiffOutput + + # ── Shape A: side-by-side (no primary_keys) ────────────────────────── + mock_server = MagicMock() + base_df = { + "columns": [ + {"key": "id", "name": "id", "type": "integer"}, + {"key": "amount", "name": "amount", "type": "number"}, + ], + "data": [[1, 100.0], [2, 200.0]], + "limit": 2000, + "more": False, + "total_row_count": 2, + } + curr_df = { + "columns": [ + {"key": "id", "name": "id", "type": "integer"}, + {"key": "amount", "name": "amount", "type": "number"}, + ], + "data": [[1, 110.0], [2, 200.0], [3, 300.0]], + "limit": 2000, + "more": False, + "total_row_count": 3, + } + mock_server._tool_query_diff = AsyncMock( + return_value={ + "base": base_df, + "current": curr_df, + "diff": None, + "_warning": "Base environment not configured", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = QueryDiffInput(sql_template="SELECT id, amount FROM {{ ref('orders') }}") + result = await ws.query_diff(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 140, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated_a = QueryDiffOutput.model_validate(result.structuredContent) + # Shape A: base + current present, diff absent + assert validated_a.base is not None + assert validated_a.current is not None + assert validated_a.diff is None + assert len(validated_a.base.columns) == 2 + assert len(validated_a.base.data) == 2 + assert len(validated_a.current.data) == 3 + # warning extracted from _warning key + assert validated_a.warning == "Base environment not configured" + # sql_template echoed back + assert validated_a.sql_template is not None + assert "orders" in validated_a.sql_template + + # ── Shape B: join diff (primary_keys → diff DataFrame with in_a/in_b) ── + diff_df = { + "columns": [ + {"key": "id", "name": "id", "type": "integer"}, + {"key": "amount", "name": "amount", "type": "number"}, + {"key": "in_a", "name": "in_a", "type": "boolean"}, + {"key": "in_b", "name": "in_b", "type": "boolean"}, + ], + "data": [ + [1, 100.0, True, False], # removed (only in base) + [3, 300.0, False, True], # added (only in current) + ], + "limit": 2000, + "more": False, + "total_row_count": None, + } + mock_server2 = MagicMock() + mock_server2._tool_query_diff = AsyncMock(return_value={"base": None, "current": None, "diff": diff_df}) + + ws._recce_server = mock_server2 + try: + args_b = QueryDiffInput( + sql_template="SELECT id, amount FROM {{ ref('orders') }}", + primary_keys=["id"], + ) + result_b = await ws.query_diff(args_b) + finally: + ws._recce_server = original + + assert isinstance(result_b, CallToolResult) + validated_b = QueryDiffOutput.model_validate(result_b.structuredContent) + # Shape B: diff present, base/current absent + assert validated_b.diff is not None + assert validated_b.base is None + assert validated_b.current is None + # diff DataFrame has 4 columns (including in_a/in_b) and 2 rows + assert len(validated_b.diff.columns) == 4 + assert len(validated_b.diff.data) == 2 + assert validated_b.warning is None From 7809a2f3dd07e6379e051f5274cf5651a3a25048 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 26 May 2026 00:47:20 +0800 Subject: [PATCH 22/43] =?UTF-8?q?feat(widgets):=20add=20value=5Fdiff=20wid?= =?UTF-8?q?get=20(Phase=20B=20tier=203=20=E2=80=94=20per-row=20value=20com?= =?UTF-8?q?parison)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eighth widget. Aggregate stats + per-column match breakdown for value comparison across two environments (primary key matching). - ValueDiffInput / ValueDiffColumnRow / ValueDiffSummary / ValueDiffOutput Pydantic models matching actual ValueDiffTask return shape (verified from source): summary={total, added, removed}, data.data=[[col, matched, matched_p], ...] where matched_p is 0.0-1.0 fraction (not percentage) - _warning extracted to output.warning named field - 4-up summary cards (total/common/columns-affected/added+removed) + per-column table with inline bar visualization (CSS-only, no chart lib) - All-match empty state when every column is 100% identical - @mcp.tool with annotations (openWorldHint=True, queries warehouse) - WIDGET_TOOLS at 8 tools - 2 new tests (registration + Pydantic shape); enumeration bumped to 8 Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/data/mcp/value_diff.html | 478 +++++++++++++++++++++++++++++++++ recce/mcp_server.py | 11 +- recce/widget_server.py | 167 +++++++++++- tests/test_widget_server.py | 158 ++++++++++- 4 files changed, 810 insertions(+), 4 deletions(-) create mode 100644 recce/data/mcp/value_diff.html diff --git a/recce/data/mcp/value_diff.html b/recce/data/mcp/value_diff.html new file mode 100644 index 000000000..f43eadebd --- /dev/null +++ b/recce/data/mcp/value_diff.html @@ -0,0 +1,478 @@ + + + + + Value Diff + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index a0d697e7b..6929f9996 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -53,7 +53,16 @@ # When RECCE_MCP_WIDGETS=1 is set, these tools are served by `recce mcp-widget-server` # instead of the main `recce mcp-server`. The main server omits them from list_tools # and raises in call_tool if the agent calls them anyway. See recce/widget_server.py. -WIDGET_TOOLS = {"row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "query", "query_diff"} +WIDGET_TOOLS = { + "row_count_diff", + "schema_diff", + "get_server_info", + "list_checks", + "get_model", + "query", + "query_diff", + "value_diff", +} def _widgets_enabled() -> bool: diff --git a/recce/widget_server.py b/recce/widget_server.py index 1039a7f3c..93b7783bf 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -12,7 +12,7 @@ import importlib.resources import logging import sys -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from mcp.server.fastmcp import FastMCP from mcp.types import CallToolResult, TextContent @@ -737,6 +737,171 @@ def query_resource() -> str: return _read_widget_html("query") +# --------------------------------------------------------------------------- +# value_diff widget tool + resource +# --------------------------------------------------------------------------- + + +class ValueDiffColumnRow(BaseModel): + """Per-column match statistics as returned from ValueDiffResult.data rows. + + ValueDiffTask returns a DataFrame with columns ["column", "matched", "matched_p"]. + Each row encodes one model column's match stats: + column: str — the column name + matched: int — count of matched (identical) rows across common rows + matched_p: float — fraction 0.0–1.0 of common rows that matched (None if common==0) + """ + + column: str + matched: int = 0 + matched_p: Optional[float] = None # 0.0–1.0; None when total common rows == 0 + + +class ValueDiffSummary(BaseModel): + """Aggregate row statistics from ValueDiffResult.summary.""" + + total: int = 0 # total rows seen (union of base + current via PK join) + added: int = 0 # rows in current only (PK absent in base) + removed: int = 0 # rows in base only (PK absent in current) + + +class ValueDiffOutput(BaseModel): + """Output model for the value_diff widget tool. + + Mirrors ValueDiffResult.model_dump(mode='json') after normalisation: + - summary: {total, added, removed} + - columns: per-column match stats extracted from data.data rows + - model: echoed from input for widget header + - primary_key: echoed from input (str or list) + - warning: extracted from _warning key (single-env mode notice) + """ + + model: str + primary_key: Optional[Union[str, List[str]]] = None + summary: ValueDiffSummary + columns: List[ValueDiffColumnRow] + warning: Optional[str] = None + + +class ValueDiffInput(BaseModel): + model: str = Field(..., description="dbt model name to compare (e.g. 'customers')") + primary_key: Union[str, List[str]] = Field( + ..., + description=( + "Primary key column(s) for row matching. " + "Use a string for a single column (e.g. 'id'), " + "or a list for a composite key (e.g. ['order_id', 'line_id'])." + ), + ) + columns: Optional[List[str]] = Field( + default=None, + description="Columns to compare (default: all common columns between base and current)", + ) + + +@mcp.tool( + name="value_diff", + annotations={ + "title": "Value Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # executes queries against the warehouse + }, + meta={ + "ui": {"resourceUri": "ui://recce/value_diff.html"}, + "ui/resourceUri": "ui://recce/value_diff.html", + }, +) +async def value_diff(args: ValueDiffInput) -> CallToolResult: + """Compare row-level values between base and current environments using primary key matching. + + Returns aggregate summary (total / added / removed rows) and per-column match + statistics rendered as a widget. The agent should not reproduce the column + breakdown as plain text — the widget handles rendering. + + Args: + model: dbt model name (e.g. 'customers') + primary_key: column(s) to match rows on; str for single col, list for composite key + columns: optional subset of columns to compare (default: all common columns) + + Returns: + CallToolResult with structuredContent: ValueDiffOutput shape + {model, primary_key, summary: {total, added, removed}, + columns: [{column, matched, matched_p}], warning?} + + Use when: + - User asks "are values consistent" / "did values shift" for a known model + - PR review needs row-level value validation after a schema_diff shows no changes + - Verifying data quality impact after a model refactor + Don't use when: + - Need row-level detail (which exact rows mismatched) → value_diff_detail + - Schema changed → schema_diff first to see column additions/removals + - No primary key available → query_diff with primary_keys param instead + - Single-environment only — tool warns but returns no useful comparison + """ + raw = await _recce_server._tool_value_diff(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + + # Extract summary from raw dict + raw_summary = raw.get("summary", {}) if isinstance(raw, dict) else {} + summary = ValueDiffSummary( + total=raw_summary.get("total", 0), + added=raw_summary.get("added", 0), + removed=raw_summary.get("removed", 0), + ) + + # Extract per-column rows from data.data (list of [col_name, matched_count, matched_p]) + raw_data = raw.get("data", {}) if isinstance(raw, dict) else {} + data_rows = raw_data.get("data", []) if isinstance(raw_data, dict) else [] + columns_out: List[ValueDiffColumnRow] = [] + for row in data_rows: + if not isinstance(row, (list, tuple)) or len(row) < 3: + continue + col_name, matched_count, matched_p = row[0], row[1], row[2] + columns_out.append( + ValueDiffColumnRow( + column=str(col_name), + matched=int(matched_count) if matched_count is not None else 0, + matched_p=float(matched_p) if matched_p is not None else None, + ) + ) + + output = ValueDiffOutput( + model=args.model, + primary_key=args.primary_key, + summary=summary, + columns=columns_out, + warning=warning, + ) + + n_cols = len(columns_out) + mismatched = [c for c in columns_out if c.matched_p is not None and c.matched_p < 1.0] + text = ( + f"Value diff for '{args.model}': {summary.total} rows compared, " + f"{len(mismatched)} of {n_cols} column{'s' if n_cols != 1 else ''} have mismatches. " + "Rendered in widget." + ) + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/value_diff.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def value_diff_resource() -> str: + return _read_widget_html("value_diff") + + # --------------------------------------------------------------------------- # query_diff widget tool + resource # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index dc22d2e97..5b3824b11 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 7 tools/resources (Phase A + query + query_diff widgets). + """Widget FastMCP instance has exactly 8 tools/resources (Phase A + query + query_diff + value_diff widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -105,6 +105,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "get_model", "query", "query_diff", + "value_diff", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -114,6 +115,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/get_model.html", "ui://recce/query.html", "ui://recce/query_diff.html", + "ui://recce/value_diff.html", } @@ -284,6 +286,7 @@ async def test_widget_tool_annotations_present(): "get_model", "query", "query_diff", + "value_diff", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -300,9 +303,10 @@ async def test_widget_tool_annotations_present(): t = tool_map[tool_name] assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" - # query and query_diff hit the warehouse — openWorldHint must be True + # query, query_diff, and value_diff hit the warehouse — openWorldHint must be True assert tool_map["query"].annotations.openWorldHint is True, "query: expected openWorldHint=True" assert tool_map["query_diff"].annotations.openWorldHint is True, "query_diff: expected openWorldHint=True" + assert tool_map["value_diff"].annotations.openWorldHint is True, "value_diff: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -895,3 +899,153 @@ async def test_query_diff_returns_calltoolresult_with_pydantic_shape(): assert len(validated_b.diff.columns) == 4 assert len(validated_b.diff.data) == 2 assert validated_b.warning is None + + +# --------------------------------------------------------------------------- +# Test 19: value_diff widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_value_diff_widget_registered(): + """value_diff appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'value_diff' is in widget mcp tool list + - resource URI 'ui://recce/value_diff.html' is in widget mcp resource list + - model and primary_key are required in inputSchema; columns is optional + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "value_diff" in tool_names + assert "ui://recce/value_diff.html" in resource_uris + + # Check inputSchema: model + primary_key required, columns optional. + # FastMCP wraps the Pydantic model in an 'args' outer envelope. + vd_tool = next(t for t in tools if t.name == "value_diff") + schema = vd_tool.inputSchema + assert schema is not None + + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "model" in inner_required, "model must be required" + assert "primary_key" in inner_required, "primary_key must be required" + assert "columns" not in inner_required, "columns must be optional" + assert "model" in inner_props + assert "primary_key" in inner_props + assert "columns" in inner_props + + +# --------------------------------------------------------------------------- +# Test 20: value_diff returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_value_diff_returns_calltoolresult_with_pydantic_shape(): + """value_diff handler returns CallToolResult with structuredContent matching ValueDiffOutput. + + Uses the actual ValueDiffResult.model_dump(mode='json') shape verified from source: + { + "summary": {"total": N, "added": N, "removed": N}, + "data": { + "columns": [{"key": "column", "name": "column", "type": "text"}, + {"key": "matched", "name": "matched", "type": "number"}, + {"key": "matched_p", "name": "matched_p", "type": "number"}], + "data": [["col_name", matched_count, matched_percent_0_to_1], ...], + "limit": null, "more": null, "total_row_count": null + } + } + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes ValueDiffOutput.model_validate() + - per-column rows are extracted correctly from data.data (list-of-lists) + - _warning is extracted to output.warning named field + - model and primary_key are echoed back + - matched_p is preserved as 0.0–1.0 fraction + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import ValueDiffInput, ValueDiffOutput + + mock_server = MagicMock() + # Realistic ValueDiffResult.model_dump(mode='json') shape (verified from source) + mock_server._tool_value_diff = AsyncMock( + return_value={ + "summary": { + "total": 1000, + "added": 5, + "removed": 3, + }, + "data": { + "columns": [ + {"key": "column", "name": "column", "type": "text"}, + {"key": "matched", "name": "matched", "type": "number"}, + {"key": "matched_p", "name": "matched_p", "type": "number"}, + ], + "data": [ + ["customer_id", 992, 1.0], + ["name", 990, 0.9980], + ["amount", 750, 0.7560], + ["status", 992, 1.0], + ], + "limit": None, + "more": None, + "total_row_count": None, + }, + "_warning": "Base environment not configured — comparing current against itself.", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = ValueDiffInput(model="customers", primary_key="customer_id") + result = await ws.value_diff(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 200, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = ValueDiffOutput.model_validate(result.structuredContent) + + # model + primary_key echoed back + assert validated.model == "customers" + assert validated.primary_key == "customer_id" + + # summary + assert validated.summary.total == 1000 + assert validated.summary.added == 5 + assert validated.summary.removed == 3 + + # per-column rows: 4 columns extracted from data.data list-of-lists + assert len(validated.columns) == 4 + # First column: customer_id — 100% match + col0 = validated.columns[0] + assert col0.column == "customer_id" + assert col0.matched == 992 + assert col0.matched_p == 1.0 + # Third column: amount — partial match + col2 = validated.columns[2] + assert col2.column == "amount" + assert col2.matched == 750 + assert abs(col2.matched_p - 0.7560) < 1e-6 + + # _warning extracted + assert validated.warning == "Base environment not configured — comparing current against itself." From 16b62230be888a8b9b397495c5ff87efc39d0f90 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 26 May 2026 00:55:36 +0800 Subject: [PATCH 23/43] =?UTF-8?q?feat(widgets):=20add=20value=5Fdiff=5Fdet?= =?UTF-8?q?ail=20widget=20(Phase=20B=20tier=203=20=E2=80=94=20row-level=20?= =?UTF-8?q?inspection)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ninth widget. Row-level companion to value_diff. Shows the actual rows with mismatched values for investigation. - ValueDiffDetailInput / Output Pydantic models matching actual ValueDiffDetailTask return shape (plain DataFrame with in_a/in_b flags) - Scrollable table with sticky primary-key column (CSS sticky left), filter buttons (All/Removed/Added) with counts, status pill per row, row tinting (red=removed, green=added), cell type-aware rendering - _warning extracted to output.warning named field - @mcp.tool with annotations (openWorldHint=True) - WIDGET_TOOLS at 9 tools - 2 new tests (Tests 21-22); enumeration bumped to 9; 27 total passing Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 10 +- recce/data/mcp/value_diff_detail.html | 518 ++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 150 ++++++++ tests/test_widget_server.py | 157 +++++++- 5 files changed, 830 insertions(+), 6 deletions(-) create mode 100644 recce/data/mcp/value_diff_detail.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 6403a8d52..755c0cbfa 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -13,9 +13,9 @@ that Claude Desktop routes those calls exclusively to `mcp-widget-server`, which annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, -`list_checks`, and `get_model`. Phase B iter 1 adds `query` and `query_diff` -(tier-3 data-table widgets). All run in **local mode only** — cloud/session mode -is not supported until iter 2. +`list_checks`, and `get_model`. Phase B iter 1 adds `query`, `query_diff`, +`value_diff`, and `value_diff_detail` (tier-3 data-table widgets). All run in +**local mode only** — cloud/session mode is not supported until iter 2. --- @@ -37,8 +37,10 @@ recce/ get_model.html query.html # Phase B tier-3: scrollable SQL result table query_diff.html # Phase B tier-3: two-env comparison with status pills + filters + value_diff.html # Phase B tier-3: column-level match stats + value_diff_detail.html # Phase B tier-3: row-level diff table with filter pills tests/ - test_widget_server.py # 18 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 22 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` diff --git a/recce/data/mcp/value_diff_detail.html b/recce/data/mcp/value_diff_detail.html new file mode 100644 index 000000000..e321277b6 --- /dev/null +++ b/recce/data/mcp/value_diff_detail.html @@ -0,0 +1,518 @@ + + + + + Value Diff Detail + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 6929f9996..f798f5bf5 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -62,6 +62,7 @@ "query", "query_diff", "value_diff", + "value_diff_detail", } diff --git a/recce/widget_server.py b/recce/widget_server.py index 93b7783bf..8a0fd5c55 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -902,6 +902,156 @@ def value_diff_resource() -> str: return _read_widget_html("value_diff") +# --------------------------------------------------------------------------- +# value_diff_detail widget tool + resource +# --------------------------------------------------------------------------- + + +class ValueDiffDetailOutput(BaseModel): + """Output model for the value_diff_detail widget tool. + + ValueDiffDetailTask.execute() returns ValueDiffDetailResult(DataFrame). + After model_dump(mode='json') it becomes a standard DataFrame dict: + {columns: [{key, name, type}], data: [[...]], limit, more, total_row_count} + + Columns include all data columns PLUS 'in_a' and 'in_b' boolean flags. + Rows where in_a=True, in_b=False are "removed" (only in base). + Rows where in_a=False, in_b=True are "added" (only in current). + (Both true cannot occur — only differing rows are returned.) + + primary_key and model are echoed from input for the widget header. + """ + + model: str + primary_key: Optional[Union[str, List[str]]] = None + columns: List[QueryColumnInfo] + data: List[List[Any]] + limit: Optional[int] = None + more: Optional[bool] = None + total_row_count: Optional[int] = None + warning: Optional[str] = None # from _maybe_add_single_env_warning + + +class ValueDiffDetailInput(BaseModel): + model: str = Field(..., description="dbt model name to inspect (e.g. 'customers')") + primary_key: Union[str, List[str]] = Field( + ..., + description=( + "Primary key column(s) for row matching. " + "Use a string for a single column (e.g. 'id'), " + "or a list for a composite key (e.g. ['order_id', 'line_id'])." + ), + ) + columns: Optional[List[str]] = Field( + default=None, + description="Columns to inspect (default: all common columns between base and current)", + ) + + +@mcp.tool( + name="value_diff_detail", + annotations={ + "title": "Value Diff Detail (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # executes queries against the warehouse + }, + meta={ + "ui": {"resourceUri": "ui://recce/value_diff_detail.html"}, + "ui/resourceUri": "ui://recce/value_diff_detail.html", + }, +) +async def value_diff_detail(args: ValueDiffDetailInput) -> CallToolResult: + """Show per-row detail of value differences (actual mismatched rows). + + Companion to value_diff (which shows aggregate stats). Returns the actual + rows with mismatched values, rendered as a scrollable table with filter + pills (All / Removed / Added). The agent should not enumerate the row list + as plain text — the widget handles rendering. + + The result DataFrame columns include all data columns plus 'in_a' and + 'in_b' boolean flags (in_a=True, in_b=False → "removed"; in_a=False, + in_b=True → "added"). Rows are capped at 1000 by the underlying task SQL. + + Args: + model: dbt model name (e.g. 'customers') + primary_key: column(s) to match rows on; str for single col, list for composite key + columns: optional subset of columns to compare (default: all common columns) + + Returns: + CallToolResult with structuredContent: ValueDiffDetailOutput shape + {model, primary_key, columns: [{key, name, type}], data: [[...]], limit?, + more?, total_row_count?, warning?} + + Use when: + - User asks "which rows changed" / "show me the actual mismatches" + - Investigating specific records flagged by value_diff + - PR review needs row-level evidence of data changes + Don't use when: + - User wants aggregate stats → value_diff (faster, no row data) + - Need full row comparison without primary key → query_diff instead + - Single-environment only — tool warns but returns no useful comparison + """ + raw = await _recce_server._tool_value_diff_detail(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + columns = [QueryColumnInfo(**c) for c in (raw.get("columns") or [])] + output = ValueDiffDetailOutput( + model=args.model, + primary_key=args.primary_key, + columns=columns, + data=raw.get("data") or [], + limit=raw.get("limit"), + more=raw.get("more"), + total_row_count=raw.get("total_row_count"), + warning=warning, + ) + n_rows = len(output.data) + # Classify rows by in_a/in_b to build a human-readable summary + in_a_idx = next((i for i, c in enumerate(columns) if c.name == "in_a"), None) + in_b_idx = next((i for i, c in enumerate(columns) if c.name == "in_b"), None) + if in_a_idx is not None and in_b_idx is not None: + removed = sum( + 1 + for row in output.data + if len(row) > max(in_a_idx, in_b_idx) + and (row[in_a_idx] is True or row[in_a_idx] == 1) + and not (row[in_b_idx] is True or row[in_b_idx] == 1) + ) + added = sum( + 1 + for row in output.data + if len(row) > max(in_a_idx, in_b_idx) + and not (row[in_a_idx] is True or row[in_a_idx] == 1) + and (row[in_b_idx] is True or row[in_b_idx] == 1) + ) + text = ( + f"Value diff detail for '{args.model}': " + f"{n_rows} differing row{'s' if n_rows != 1 else ''} " + f"(+{added} added, -{removed} removed). Rendered in widget." + ) + else: + text = f"Value diff detail for '{args.model}': {n_rows} row{'s' if n_rows != 1 else ''} rendered in widget." + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/value_diff_detail.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def value_diff_detail_resource() -> str: + return _read_widget_html("value_diff_detail") + + # --------------------------------------------------------------------------- # query_diff widget tool + resource # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 5b3824b11..bc6183071 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 8 tools/resources (Phase A + query + query_diff + value_diff widgets). + """Widget FastMCP instance has exactly 9 tools/resources (Phase A + query + query_diff + value_diff + value_diff_detail widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -106,6 +106,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "query", "query_diff", "value_diff", + "value_diff_detail", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -116,6 +117,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/query.html", "ui://recce/query_diff.html", "ui://recce/value_diff.html", + "ui://recce/value_diff_detail.html", } @@ -287,6 +289,7 @@ async def test_widget_tool_annotations_present(): "query", "query_diff", "value_diff", + "value_diff_detail", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -303,10 +306,13 @@ async def test_widget_tool_annotations_present(): t = tool_map[tool_name] assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" - # query, query_diff, and value_diff hit the warehouse — openWorldHint must be True + # query, query_diff, value_diff, and value_diff_detail hit the warehouse — openWorldHint must be True assert tool_map["query"].annotations.openWorldHint is True, "query: expected openWorldHint=True" assert tool_map["query_diff"].annotations.openWorldHint is True, "query_diff: expected openWorldHint=True" assert tool_map["value_diff"].annotations.openWorldHint is True, "value_diff: expected openWorldHint=True" + assert ( + tool_map["value_diff_detail"].annotations.openWorldHint is True + ), "value_diff_detail: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -1049,3 +1055,150 @@ async def test_value_diff_returns_calltoolresult_with_pydantic_shape(): # _warning extracted assert validated.warning == "Base environment not configured — comparing current against itself." + + +# --------------------------------------------------------------------------- +# Test 21: value_diff_detail widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_value_diff_detail_widget_registered(): + """value_diff_detail appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'value_diff_detail' is in widget mcp tool list + - resource URI 'ui://recce/value_diff_detail.html' is in widget mcp resource list + - model and primary_key are required in inputSchema; columns is optional + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "value_diff_detail" in tool_names + assert "ui://recce/value_diff_detail.html" in resource_uris + + # Check inputSchema: model + primary_key required, columns optional. + # FastMCP wraps the Pydantic model in an 'args' outer envelope. + vdd_tool = next(t for t in tools if t.name == "value_diff_detail") + schema = vdd_tool.inputSchema + assert schema is not None + + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "model" in inner_required, "model must be required" + assert "primary_key" in inner_required, "primary_key must be required" + assert "columns" not in inner_required, "columns must be optional" + assert "model" in inner_props + assert "primary_key" in inner_props + assert "columns" in inner_props + + +# --------------------------------------------------------------------------- +# Test 22: value_diff_detail returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_value_diff_detail_returns_calltoolresult_with_pydantic_shape(): + """value_diff_detail handler returns CallToolResult with structuredContent matching ValueDiffDetailOutput. + + Uses the actual ValueDiffDetailTask return shape — a plain DataFrame (confirmed from source): + ValueDiffDetailResult(DataFrame) → model_dump(mode='json') → + {columns: [{key, name, type}], data: [[...]], limit, more, total_row_count} + + Columns include all data columns PLUS in_a / in_b booleans. + Rows where in_a=True, in_b=False are "removed" (only in base). + Rows where in_a=False, in_b=True are "added" (only in current). + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes ValueDiffDetailOutput.model_validate() + - model and primary_key are echoed back + - columns include in_a / in_b (raw DataFrame shape preserved) + - data rows are preserved verbatim + - _warning is extracted to output.warning named field + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import ValueDiffDetailInput, ValueDiffDetailOutput + + mock_server = MagicMock() + # Realistic ValueDiffDetailResult.model_dump(mode='json') shape (verified from source). + # Returns a DataFrame with all original data columns + in_a + in_b booleans. + mock_server._tool_value_diff_detail = AsyncMock( + return_value={ + "columns": [ + {"key": "customer_id", "name": "customer_id", "type": "integer"}, + {"key": "name", "name": "name", "type": "text"}, + {"key": "amount", "name": "amount", "type": "number"}, + {"key": "in_a", "name": "in_a", "type": "boolean"}, + {"key": "in_b", "name": "in_b", "type": "boolean"}, + ], + "data": [ + [1, "Alice", 100.0, True, False], # removed (only in base) + [2, "Bob", 250.0, True, False], # removed (only in base) + [5, "Carol", 310.0, False, True], # added (only in current) + ], + "limit": 1000, + "more": False, + "total_row_count": None, + "_warning": "Base environment not configured — comparing current against itself.", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = ValueDiffDetailInput(model="customers", primary_key="customer_id") + result = await ws.value_diff_detail(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 200, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = ValueDiffDetailOutput.model_validate(result.structuredContent) + + # model + primary_key echoed back + assert validated.model == "customers" + assert validated.primary_key == "customer_id" + + # columns: 5 total (3 data cols + in_a + in_b) + assert len(validated.columns) == 5 + col_names = [c.name for c in validated.columns] + assert "customer_id" in col_names + assert "name" in col_names + assert "amount" in col_names + assert "in_a" in col_names + assert "in_b" in col_names + + # data: 3 rows preserved verbatim + assert len(validated.data) == 3 + # First row: customer_id=1, in_a=True, in_b=False (removed) + assert validated.data[0][0] == 1 + assert validated.data[0][3] is True # in_a + assert validated.data[0][4] is False # in_b + # Third row: customer_id=5, added + assert validated.data[2][0] == 5 + assert validated.data[2][3] is False # in_a + assert validated.data[2][4] is True # in_b + + # metadata + assert validated.limit == 1000 + assert validated.more is False + + # _warning extracted + assert validated.warning == "Base environment not configured — comparing current against itself." From 43eb6b375efcd49059a3fba966019764f0c21dd9 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 26 May 2026 01:04:04 +0800 Subject: [PATCH 24/43] feat(widgets): add top_k_diff widget (Phase B tier 3, Phase B complete) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tenth widget; closes Phase B (5 tier-3 widgets). Side-by-side ranked lists of top-K most frequent values across base + current envs. - TopKDiffInput / TopKEnvStats / TopKDiffOutput Pydantic models matching actual TopKDiffTask return shape (READ from source): base/current: {values[], counts[], valids, total} parallel lists. values[] is the SAME union list in both envs (SQL FULL OUTER JOIN order: curr_count DESC, base_count DESC). count=0 means absent from that env. Param is column_name (not column), default k=10 (not 50). - 2-column grid with ranked lists per env, inline bar viz, rank-change arrows (up/down), New/Gone badges for env-exclusive categories - _warning extracted to output.warning named field - @mcp.tool annotations (openWorldHint=True — executes warehouse SQL) - WIDGET_TOOLS at 10 tools — 50% coverage (10/20), design Open Q #6 FastMCP migration trigger threshold REACHED. iter 2 to reevaluate. - 2 new tests (23: registration + inputSchema, 24: Pydantic shape); enumeration bumped to 10 Phase B (tier-3 data tables): query, query_diff, value_diff, value_diff_detail, top_k_diff. Each hand-rolled its table/list layout because patterns diverged enough that a shared would need extreme flexibility. Iter 2 mini-doc: evaluate renderRankedList() JS helper extraction; full component abstraction deferred until 3+ widgets converge on same layout. Co-Authored-By: Claude Sonnet 4.6 Signed-off-by: Kent --- docs/mcp-widgets.md | 21 +- recce/data/mcp/top_k_diff.html | 493 +++++++++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 153 ++++++++++ tests/test_widget_server.py | 148 +++++++++- 5 files changed, 811 insertions(+), 5 deletions(-) create mode 100644 recce/data/mcp/top_k_diff.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 755c0cbfa..70c7b0bfe 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -14,7 +14,8 @@ annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, `list_checks`, and `get_model`. Phase B iter 1 adds `query`, `query_diff`, -`value_diff`, and `value_diff_detail` (tier-3 data-table widgets). All run in +`value_diff`, `value_diff_detail`, and `top_k_diff` (five tier-3 data-table/list +widgets). Total: **10 of 20 planned widgets** (50% coverage). All run in **local mode only** — cloud/session mode is not supported until iter 2. --- @@ -25,7 +26,7 @@ Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, recce/ mcp_server.py # Existing primary server. # WIDGET_TOOLS set + _widgets_enabled() filter live here. - widget_server.py # FastMCP widget server (Phase A). + widget_server.py # FastMCP widget server (Phase A + Phase B). # @mcp.tool delegates + @mcp.resource handlers. cli.py # mcp-widget-server CLI subcommand added here. data/ @@ -39,8 +40,9 @@ recce/ query_diff.html # Phase B tier-3: two-env comparison with status pills + filters value_diff.html # Phase B tier-3: column-level match stats value_diff_detail.html # Phase B tier-3: row-level diff table with filter pills + top_k_diff.html # Phase B tier-3: side-by-side ranked lists with inline bars tests/ - test_widget_server.py # 22 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 24 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -434,6 +436,7 @@ Seven working examples (in order of implementation): | `recce/data/mcp/get_model.html` | Single-item detail card | Per-environment column tables (base/current), adaptive 2-col/3-col layout when constraints present, PK + not-null + unique badges, not-found empty state, `columns` dict → list normalisation in delegate | | `recce/data/mcp/query.html` | **Tier-3 data table** | **Template for Phase B.** Sticky-header scrollable table (400px cap), type-aware cell rendering, truncation badge, empty/error states. Use this as the base pattern for `query_diff`, `value_diff`, `value_diff_detail`, `top_k_diff` | | `recce/data/mcp/query_diff.html` | **Tier-3 two-env comparison** | Two render modes: side-by-side (no primary_keys → base/current tables) and join-diff (primary_keys → single table with status pills + Added/Removed filter buttons). Row tinting (red=removed, green=added), `in_a`/`in_b` columns stripped from display. | +| `recce/data/mcp/top_k_diff.html` | **Tier-3 side-by-side ranked lists** | Two-column grid (Base / Current) with ranked entries, inline bars, rank-change arrows (↑↓), and New/Gone badges for env-exclusive categories. Union of categories shown for both sides; count=0 entries denote absent categories. | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 @@ -456,6 +459,18 @@ verify rendering without running a full MCP server. Phase B widgets (`query`, `query_diff`, `value_diff`, `value_diff_detail`, `top_k_diff`) render arbitrary columnar data. `recce/data/mcp/query.html` is the canonical example. +**Phase B table layout retrospective (iter 1):** All five Phase B widgets hand-rolled +their own table/list layout because the data shapes diverged enough that a shared +`` component would have needed extreme flexibility: `query` is a plain +scrollable table; `query_diff` is two tables OR one flagged table; `value_diff` is a +stat-card grid + match-bar table; `value_diff_detail` is a sticky-left filtered row +table; `top_k_diff` is a side-by-side ranked-list grid with badges and inline bars. +A shared component would have handled all five only by accepting almost all rendering +decisions as parameters — essentially the same work. In iter 2, evaluate whether +extracting a `renderRankedList()` JS helper function (shared between `top_k_diff` and +any future histogram-bar widget) is worth the coordination cost. A full `` +abstraction is not recommended until at least 3 widgets converge on the same layout. + ### Data shape The underlying `DataFrame.model_dump(mode='json')` has this exact shape (confirmed from diff --git a/recce/data/mcp/top_k_diff.html b/recce/data/mcp/top_k_diff.html new file mode 100644 index 000000000..e2e0f6218 --- /dev/null +++ b/recce/data/mcp/top_k_diff.html @@ -0,0 +1,493 @@ + + + + + Top-K Diff + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index f798f5bf5..17366c646 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -63,6 +63,7 @@ "query_diff", "value_diff", "value_diff_detail", + "top_k_diff", } diff --git a/recce/widget_server.py b/recce/widget_server.py index 8a0fd5c55..038a70424 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -1221,6 +1221,159 @@ def query_diff_resource() -> str: return _read_widget_html("query_diff") +# --------------------------------------------------------------------------- +# top_k_diff widget tool + resource +# --------------------------------------------------------------------------- + + +class TopKDiffInput(BaseModel): + model: str = Field(..., description="dbt model name to analyze (e.g. 'customers')") + column_name: str = Field(..., description="Column name to get top-K most frequent values for") + k: Optional[int] = Field( + default=None, + description="Number of top values to return (default: 10)", + ) + + +class TopKEnvStats(BaseModel): + """Per-environment aggregate stats for the top-K diff. + + TopKDiffTask.execute() returns parallel lists: + values: List[str|None] — category labels (same order for base + current) + counts: List[int] — occurrence count for THIS environment + valids: int — count of non-null rows in THIS environment + total: int — total rows in THIS environment + """ + + values: List[Optional[str]] = [] # category labels (None means original null) + counts: List[int] = [] # count per category in this env + valids: int = 0 # non-null row count + total: int = 0 # total row count (including nulls) + + +class TopKDiffOutput(BaseModel): + """Output model for the top_k_diff widget tool. + + Mirrors TopKDiffTask.execute() return shape after _warning extraction: + base: {values, counts, valids, total} + current: {values, counts, valids, total} + + Note: values[] is the same list in both envs (union of top-K by curr_count desc, + base_count desc). counts[] are specific to each environment. + + model, column_name, k are echoed from input for the widget header. + warning is extracted from _warning key (single-env mode notice). + """ + + model: str + column_name: str + k: int = 10 + base: TopKEnvStats + current: TopKEnvStats + warning: Optional[str] = None + + +@mcp.tool( + name="top_k_diff", + annotations={ + "title": "Top-K Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # executes queries against the warehouse + }, + meta={ + "ui": {"resourceUri": "ui://recce/top_k_diff.html"}, + "ui/resourceUri": "ui://recce/top_k_diff.html", + }, +) +async def top_k_diff(args: TopKDiffInput) -> CallToolResult: + """Compare the top-K most frequent values of a column across base and current environments. + + Surfaces shifts in value distribution: new dominant values, retired values, + and count changes. Rendered as a side-by-side ranked list with inline bars. + The agent should not reproduce the ranked list as plain text — the widget + handles rendering. + + Top-K is computed as a SQL FULL OUTER JOIN of the top-K by current count + (desc) then base count (desc), so the same category list is shown for both + environments. Categories absent from one env show a count of 0. + + Args: + model: dbt model name (e.g. 'customers') + column_name: categorical column to analyze (e.g. 'status') + k: number of top values to return (default: 10) + + Returns: + CallToolResult with structuredContent: TopKDiffOutput shape + {model, column_name, k, + base: {values, counts, valids, total}, + current: {values, counts, valids, total}, + warning?} + + Use when: + - User asks "what are the most common X" or "did the distribution of Y shift" + - Categorical column investigation during PR review + - Cardinality or value-shape change detection (new statuses, retired categories) + Don't use when: + - Need numeric distribution → histogram_diff + - Need row-level diff → value_diff_detail + - Need full value comparison across all columns → value_diff + - Continuous data without natural top-K semantics → profile_diff + """ + raw = await _recce_server._tool_top_k_diff(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + + raw_base = raw.get("base", {}) if isinstance(raw, dict) else {} + raw_curr = raw.get("current", {}) if isinstance(raw, dict) else {} + + base_stats = TopKEnvStats( + values=raw_base.get("values") or [], + counts=raw_base.get("counts") or [], + valids=raw_base.get("valids") or 0, + total=raw_base.get("total") or 0, + ) + curr_stats = TopKEnvStats( + values=raw_curr.get("values") or [], + counts=raw_curr.get("counts") or [], + valids=raw_curr.get("valids") or 0, + total=raw_curr.get("total") or 0, + ) + + output = TopKDiffOutput( + model=args.model, + column_name=args.column_name, + k=args.k if args.k is not None else 10, + base=base_stats, + current=curr_stats, + warning=warning, + ) + + n = len(base_stats.values) + text = ( + f"Top-K diff for '{args.model}.{args.column_name}': " + f"{n} categor{'ies' if n != 1 else 'y'} compared. Rendered in widget." + ) + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/top_k_diff.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def top_k_diff_resource() -> str: + return _read_widget_html("top_k_diff") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index bc6183071..9b06469b4 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 9 tools/resources (Phase A + query + query_diff + value_diff + value_diff_detail widgets). + """Widget FastMCP instance has exactly 10 tools/resources (Phase A + Phase B widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -107,6 +107,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "query_diff", "value_diff", "value_diff_detail", + "top_k_diff", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -118,6 +119,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/query_diff.html", "ui://recce/value_diff.html", "ui://recce/value_diff_detail.html", + "ui://recce/top_k_diff.html", } @@ -290,6 +292,7 @@ async def test_widget_tool_annotations_present(): "query_diff", "value_diff", "value_diff_detail", + "top_k_diff", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -306,13 +309,14 @@ async def test_widget_tool_annotations_present(): t = tool_map[tool_name] assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" - # query, query_diff, value_diff, and value_diff_detail hit the warehouse — openWorldHint must be True + # query, query_diff, value_diff, value_diff_detail, and top_k_diff hit the warehouse — openWorldHint must be True assert tool_map["query"].annotations.openWorldHint is True, "query: expected openWorldHint=True" assert tool_map["query_diff"].annotations.openWorldHint is True, "query_diff: expected openWorldHint=True" assert tool_map["value_diff"].annotations.openWorldHint is True, "value_diff: expected openWorldHint=True" assert ( tool_map["value_diff_detail"].annotations.openWorldHint is True ), "value_diff_detail: expected openWorldHint=True" + assert tool_map["top_k_diff"].annotations.openWorldHint is True, "top_k_diff: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -1202,3 +1206,143 @@ async def test_value_diff_detail_returns_calltoolresult_with_pydantic_shape(): # _warning extracted assert validated.warning == "Base environment not configured — comparing current against itself." + + +# --------------------------------------------------------------------------- +# Test 23: top_k_diff widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_top_k_diff_widget_registered(): + """top_k_diff appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'top_k_diff' is in widget mcp tool list + - resource URI 'ui://recce/top_k_diff.html' is in widget mcp resource list + - model and column_name are required in inputSchema; k is optional + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "top_k_diff" in tool_names + assert "ui://recce/top_k_diff.html" in resource_uris + + # Check inputSchema: model + column_name required, k optional. + # FastMCP wraps the Pydantic model in an 'args' outer envelope. + tk_tool = next(t for t in tools if t.name == "top_k_diff") + schema = tk_tool.inputSchema + assert schema is not None + + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "model" in inner_required, "model must be required" + assert "column_name" in inner_required, "column_name must be required" + assert "k" not in inner_required, "k must be optional" + assert "model" in inner_props + assert "column_name" in inner_props + assert "k" in inner_props + + +# --------------------------------------------------------------------------- +# Test 24: top_k_diff returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_top_k_diff_returns_calltoolresult_with_pydantic_shape(): + """top_k_diff handler returns CallToolResult with structuredContent matching TopKDiffOutput. + + Uses the actual TopKDiffTask.execute() return shape (verified from source): + { + "base": {"values": [...], "counts": [...], "valids": N, "total": N}, + "current": {"values": [...], "counts": [...], "valids": N, "total": N}, + } + + Note: values[] is the SAME list in both envs (union ordered by curr_count desc, + base_count desc). counts[] differ per env. Categories absent from an env have + count=0 in that env's counts list. + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes TopKDiffOutput.model_validate() + - model, column_name, k are echoed back + - base and current env stats are hydrated correctly + - _warning is extracted to output.warning named field + - categories with count=0 in an env represent absent entries (New/Gone in widget) + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import TopKDiffInput, TopKDiffOutput + + mock_server = MagicMock() + # Realistic TopKDiffTask.execute() return shape (verified from recce/tasks/top_k.py). + # values[] is the union ordered by curr_count desc, base_count desc. + # Entries with base_count=0 are "new" (only in current); entries with curr_count=0 are "gone". + mock_server._tool_top_k_diff = AsyncMock( + return_value={ + "base": { + "values": ["active", "pending", "closed", "cancelled", None], + "counts": [500, 300, 200, 0, 10], # 'cancelled' absent in base + "valids": 1010, + "total": 1020, + }, + "current": { + "values": ["active", "pending", "closed", "cancelled", None], + "counts": [480, 320, 180, 50, 8], # 'cancelled' appeared in current + "valids": 1030, + "total": 1038, + }, + "_warning": "Base environment not configured — comparing current against itself.", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = TopKDiffInput(model="orders", column_name="status", k=5) + result = await ws.top_k_diff(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 200, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = TopKDiffOutput.model_validate(result.structuredContent) + + # model, column_name, k echoed back + assert validated.model == "orders" + assert validated.column_name == "status" + assert validated.k == 5 + + # base env stats + assert len(validated.base.values) == 5 + assert validated.base.values[0] == "active" + assert validated.base.values[4] is None # null category + assert validated.base.counts[0] == 500 + assert validated.base.counts[3] == 0 # cancelled absent in base + assert validated.base.valids == 1010 + assert validated.base.total == 1020 + + # current env stats (same values list, different counts) + assert len(validated.current.values) == 5 + assert validated.current.counts[0] == 480 + assert validated.current.counts[3] == 50 # cancelled appeared in current + assert validated.current.valids == 1030 + assert validated.current.total == 1038 + + # _warning extracted + assert validated.warning == "Base environment not configured — comparing current against itself." From d37aba65b84e451d186247a276eb0cccda7d1091 Mon Sep 17 00:00:00 2001 From: Kent Date: Wed, 27 May 2026 15:48:10 +0800 Subject: [PATCH 25/43] =?UTF-8?q?feat(widgets):=20add=20histogram=5Fdiff?= =?UTF-8?q?=20widget=20(Phase=20C=20tier=204=20=E2=80=94=20first=20chart?= =?UTF-8?q?=20widget)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eleventh widget; first tier-4 (statistics/chart) widget. Hand-rolled SVG bars — no new chart library, no CSP/CDN changes. Establishes the "tier-4 via hand-rolled SVG" pattern that profile_diff and any future chart widgets will follow. Actual HistogramDiffTask return shape (read from recce/tasks/histogram.py): base/current: {counts: List[int], total: int} (or {} if env failed) min/max: overall min/max across both envs (numeric or date object) bin_edges: N+1 edge values (int/float for numeric, date for datetime) labels: List[str] for numeric cols ("lo-hi" format); None for datetime _tool_histogram_diff auto-detects column_type from catalog — tool only needs model + column_name (+ optional num_bins). - HistogramDiffInput / HistogramEnvStats / HistogramDiffOutput Pydantic models matching actual HistogramDiffTask return shape - Date bin_edges serialised to ISO strings in widget delegate (date objects are not JSON-serialisable natively) - Hand-rolled SVG: viewBox 600x180, base bars (blue 45% opacity) behind current bars (green 70% opacity), x-axis label density auto-reduced (every Nth label for bins > 10), hover tooltip via mousemove - CSS tokens for bar colors with exhaustive @media dark fallback - _warning extracted to output.warning named field - @mcp.tool annotations (openWorldHint=True, hits the warehouse) - WIDGET_TOOLS at 11 tools - 2 new tests (25, 26); enumeration bumped to 11; annotations test extended to cover histogram_diff openWorldHint=True - docs/mcp-widgets.md: widget count 10→11, new "Tier-4 (Chart) Widget Architecture" section explaining hand-roll SVG approach + iter-2 upgrade criteria for swapping to a real chart library Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 43 ++- recce/data/mcp/histogram_diff.html | 560 +++++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 162 +++++++++ tests/test_widget_server.py | 156 +++++++- 5 files changed, 915 insertions(+), 7 deletions(-) create mode 100644 recce/data/mcp/histogram_diff.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 70c7b0bfe..63f0fb74a 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -15,7 +15,8 @@ annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, `list_checks`, and `get_model`. Phase B iter 1 adds `query`, `query_diff`, `value_diff`, `value_diff_detail`, and `top_k_diff` (five tier-3 data-table/list -widgets). Total: **10 of 20 planned widgets** (50% coverage). All run in +widgets). Phase C widget 1 adds `histogram_diff` (first tier-4 chart widget — hand-rolled SVG bars, +no external chart library). Total: **11 of 20 planned widgets** (55% coverage). All run in **local mode only** — cloud/session mode is not supported until iter 2. --- @@ -41,8 +42,9 @@ recce/ value_diff.html # Phase B tier-3: column-level match stats value_diff_detail.html # Phase B tier-3: row-level diff table with filter pills top_k_diff.html # Phase B tier-3: side-by-side ranked lists with inline bars + histogram_diff.html # Phase C tier-4: hand-rolled SVG bar chart (base vs current bins) tests/ - test_widget_server.py # 24 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 26 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -425,7 +427,7 @@ documented and supported. Reconsider if ext-apps publishes a Python SDK. ## Reference Widgets -Seven working examples (in order of implementation): +Eight working examples (in order of implementation): | File | Tier | What it demonstrates | |------|------|----------------------| @@ -437,6 +439,7 @@ Seven working examples (in order of implementation): | `recce/data/mcp/query.html` | **Tier-3 data table** | **Template for Phase B.** Sticky-header scrollable table (400px cap), type-aware cell rendering, truncation badge, empty/error states. Use this as the base pattern for `query_diff`, `value_diff`, `value_diff_detail`, `top_k_diff` | | `recce/data/mcp/query_diff.html` | **Tier-3 two-env comparison** | Two render modes: side-by-side (no primary_keys → base/current tables) and join-diff (primary_keys → single table with status pills + Added/Removed filter buttons). Row tinting (red=removed, green=added), `in_a`/`in_b` columns stripped from display. | | `recce/data/mcp/top_k_diff.html` | **Tier-3 side-by-side ranked lists** | Two-column grid (Base / Current) with ranked entries, inline bars, rank-change arrows (↑↓), and New/Gone badges for env-exclusive categories. Union of categories shown for both sides; count=0 entries denote absent categories. | +| `recce/data/mcp/histogram_diff.html` | **Tier-4 SVG bar chart** | **First chart widget.** Hand-rolled SVG (no external chart library). Base bars (blue, semi-transparent) overlaid with current bars (green) per bin. viewBox-scaled for responsiveness. Hover tooltip shows bin range + both counts. x-axis label density auto-reduced for dense bins. See "Tier-4 (Chart) Widget Architecture" below. | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 @@ -590,12 +593,42 @@ This contrasts with Phase A tools that only read dbt manifest/state: --- +## Tier-4 (Chart) Widget Architecture + +Phase C introduces chart-tier widgets. The first is `histogram_diff`. Iter 1 uses **hand-rolled SVG bars** — no external chart library. This was a deliberate architectural decision: + +### Why hand-rolled SVG (not Chart.js / Vega-Lite / D3)? + +1. **CSP constraint** — `resourceDomains` in the `@mcp.resource` meta is currently `["https://unpkg.com"]`. Adding a chart CDN (e.g. `cdn.jsdelivr.net`, `cdn.skypack.dev`) requires validation against MCP Apps' content security policy sandbox. Avoiding a new CDN keeps CSP unchanged. +2. **No library lock-in** — iter 1 widgets are deliberately minimal. Committing to Chart.js shapes the data contract and HTML rendering for all future chart widgets. Hand-rolled SVG defers that commitment. +3. **Bundle size** — the widget HTML is self-contained (no npm, no build step). Chart.js alone is ~200KB. For a simple bar chart, the trade-off favours SVG primitives. + +### Hand-rolled SVG pattern (`histogram_diff.html`) + +- Single `` — responsive via `width: 100%` on the SVG element. +- Y-axis: 4 evenly-spaced ticks with grid lines; count labels right-justified. +- X-axis: bin labels rotated 35° to avoid overlap; only every Nth label shown when bins > 10. +- Bars: `` (blue, 45% opacity) behind `` (green, 70% opacity). Overlay layout — same x-position, tallest bar visible. +- Hover tooltip: a transparent `` overlay per bin triggers `mousemove` on the SVG; tooltip positioned relative to the containing `chart-wrap` div. +- Dark mode: `@media (prefers-color-scheme: dark)` overrides all SVG class fill colors and CSS token fallbacks exhaustively. + +### When to upgrade to a real chart library (iter 2 considerations) + +Consider Chart.js or Vega-Lite for future chart widgets when: +- Log-scale y-axis is needed (hand-rolled requires manual tick calculation) +- Interactive zoom/pan is required +- Multiple series with automatic legend management +- The chart type is complex (scatter, violin, heatmap) + +If upgrading, add the chosen CDN to `resourceDomains` in **all** widget `@mcp.resource` registrations (the list is per-server, shared). Validate with MCP Apps' CSP sandbox before shipping. + +--- + ## What Is NOT in Iter 1 These are deferred to iter 2 or later: -- **Chart-tier widgets** (bar charts, histograms) — requires a charting library - added to the CSP `resourceDomains` list and tested against MCP Apps sandbox. +- **Advanced chart interactions** — log-scale toggle, zoom/pan, downloadable PNG. The `histogram_diff` widget provides the canonical SVG bar pattern; iter 2 can wrap it in a chart library if these are needed. - **Cloud/session mode** — `recce mcp-widget-server` raises immediately if `--cloud` or `--session` kwargs are passed. Cloud support requires state-loader plumbing not attempted in iter 1. diff --git a/recce/data/mcp/histogram_diff.html b/recce/data/mcp/histogram_diff.html new file mode 100644 index 000000000..8a45515af --- /dev/null +++ b/recce/data/mcp/histogram_diff.html @@ -0,0 +1,560 @@ + + + + + Histogram Diff + + + +
Loading…
+ + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 17366c646..edee2df04 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -64,6 +64,7 @@ "value_diff", "value_diff_detail", "top_k_diff", + "histogram_diff", } diff --git a/recce/widget_server.py b/recce/widget_server.py index 038a70424..f4b8e99b3 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -1374,6 +1374,168 @@ def top_k_diff_resource() -> str: return _read_widget_html("top_k_diff") +# --------------------------------------------------------------------------- +# histogram_diff widget tool + resource +# --------------------------------------------------------------------------- + + +class HistogramDiffInput(BaseModel): + model: str = Field(..., description="dbt model name to analyze (e.g. 'customers')") + column_name: str = Field(..., description="Column name to generate histogram for (numeric or datetime)") + num_bins: Optional[int] = Field( + default=None, + description="Number of histogram bins (default: 50)", + ) + + +class HistogramEnvStats(BaseModel): + """Per-environment histogram counts. + + HistogramDiffTask.execute() returns per-env dicts: + counts: List[int] — count per bin (same length as num_bins) + total: int — total rows in this environment + + An empty dict {} is returned when the environment fails or produces no data. + """ + + counts: List[int] = [] # count per bin + total: Optional[int] = None # total rows (may be None if env produced empty dict) + + +class HistogramDiffOutput(BaseModel): + """Output model for the histogram_diff widget tool. + + Mirrors HistogramDiffTask.execute() return shape after _warning extraction: + base: {counts, total} + current: {counts, total} + min: overall min value across both envs (numeric or ISO date string) + max: overall max value across both envs + bin_edges: list of bin boundary values (N+1 values for N bins) + labels: list of bin label strings for numeric cols; null for datetime cols + + model, column_name are echoed from input for the widget header. + warning is extracted from _warning key (single-env mode notice). + """ + + model: str + column_name: str + base: HistogramEnvStats + current: HistogramEnvStats + min: Optional[Any] = None + max: Optional[Any] = None + bin_edges: List[Any] = [] # List[int | float | date] + labels: Optional[List[str]] = None # None for datetime columns + warning: Optional[str] = None + + +@mcp.tool( + name="histogram_diff", + annotations={ + "title": "Histogram Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # executes queries against the warehouse + }, + meta={ + "ui": {"resourceUri": "ui://recce/histogram_diff.html"}, + "ui/resourceUri": "ui://recce/histogram_diff.html", + }, +) +async def histogram_diff(args: HistogramDiffInput) -> CallToolResult: + """Compare numeric or datetime column distributions across base and current environments. + + Renders an SVG bar chart widget — base bars and current bars overlaid per bin. + The agent should not enumerate bin counts as plain text — the widget handles + all rendering. + + Column type is auto-detected from the dbt catalog; no explicit column_type + argument is required. + + Args: + model: dbt model name (e.g. 'orders') + column_name: numeric or datetime column to bin (e.g. 'amount', 'created_at') + num_bins: optional bin count (default: 50 for numeric; adaptive for datetime) + + Returns: + CallToolResult with structuredContent: HistogramDiffOutput shape + {model, column_name, + base: {counts, total}, + current: {counts, total}, + min, max, bin_edges, labels?, + warning?} + + Use when: + - User asks "how is X distributed" / "did the distribution shift" + - Numeric or continuous column investigation during PR review + - Detecting outliers or distribution skew between environments + Don't use when: + - Categorical column → use top_k_diff instead + - Need per-row diff → use value_diff_detail + - Stats summary only (min/max/stddev) → use profile_diff + - String / boolean columns (not supported) → use top_k_diff + """ + raw = await _recce_server._tool_histogram_diff(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + + raw_base = raw.get("base", {}) if isinstance(raw, dict) else {} + raw_curr = raw.get("current", {}) if isinstance(raw, dict) else {} + + base_stats = HistogramEnvStats( + counts=raw_base.get("counts") or [], + total=raw_base.get("total"), + ) + curr_stats = HistogramEnvStats( + counts=raw_curr.get("counts") or [], + total=raw_curr.get("total"), + ) + + # bin_edges may contain date objects — convert to ISO strings for JSON serialisation + raw_edges = raw.get("bin_edges") or [] if isinstance(raw, dict) else [] + bin_edges = [e.isoformat() if hasattr(e, "isoformat") else e for e in raw_edges] + + raw_min = raw.get("min") if isinstance(raw, dict) else None + raw_max = raw.get("max") if isinstance(raw, dict) else None + min_val = raw_min.isoformat() if hasattr(raw_min, "isoformat") else raw_min + max_val = raw_max.isoformat() if hasattr(raw_max, "isoformat") else raw_max + + output = HistogramDiffOutput( + model=args.model, + column_name=args.column_name, + base=base_stats, + current=curr_stats, + min=min_val, + max=max_val, + bin_edges=bin_edges, + labels=raw.get("labels") if isinstance(raw, dict) else None, + warning=warning, + ) + + n_bins = len(base_stats.counts) or len(curr_stats.counts) + text = ( + f"Histogram diff for '{args.model}.{args.column_name}': " + f"{n_bins} bin{'s' if n_bins != 1 else ''} rendered in widget." + ) + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/histogram_diff.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def histogram_diff_resource() -> str: + return _read_widget_html("histogram_diff") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 9b06469b4..27fe2b35e 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 10 tools/resources (Phase A + Phase B widgets). + """Widget FastMCP instance has exactly 11 tools/resources (Phase A + Phase B + Phase C widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -108,6 +108,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "value_diff", "value_diff_detail", "top_k_diff", + "histogram_diff", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -120,6 +121,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/value_diff.html", "ui://recce/value_diff_detail.html", "ui://recce/top_k_diff.html", + "ui://recce/histogram_diff.html", } @@ -293,6 +295,7 @@ async def test_widget_tool_annotations_present(): "value_diff", "value_diff_detail", "top_k_diff", + "histogram_diff", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -309,7 +312,7 @@ async def test_widget_tool_annotations_present(): t = tool_map[tool_name] assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" - # query, query_diff, value_diff, value_diff_detail, and top_k_diff hit the warehouse — openWorldHint must be True + # query, query_diff, value_diff, value_diff_detail, top_k_diff, and histogram_diff hit the warehouse assert tool_map["query"].annotations.openWorldHint is True, "query: expected openWorldHint=True" assert tool_map["query_diff"].annotations.openWorldHint is True, "query_diff: expected openWorldHint=True" assert tool_map["value_diff"].annotations.openWorldHint is True, "value_diff: expected openWorldHint=True" @@ -317,6 +320,9 @@ async def test_widget_tool_annotations_present(): tool_map["value_diff_detail"].annotations.openWorldHint is True ), "value_diff_detail: expected openWorldHint=True" assert tool_map["top_k_diff"].annotations.openWorldHint is True, "top_k_diff: expected openWorldHint=True" + assert ( + tool_map["histogram_diff"].annotations.openWorldHint is True + ), "histogram_diff: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -1346,3 +1352,149 @@ async def test_top_k_diff_returns_calltoolresult_with_pydantic_shape(): # _warning extracted assert validated.warning == "Base environment not configured — comparing current against itself." + + +# --------------------------------------------------------------------------- +# Test 25: histogram_diff widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_histogram_diff_widget_registered(): + """histogram_diff appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'histogram_diff' is in widget mcp tool list + - resource URI 'ui://recce/histogram_diff.html' is in widget mcp resource list + - model and column_name are required in inputSchema; num_bins is optional + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "histogram_diff" in tool_names + assert "ui://recce/histogram_diff.html" in resource_uris + + # Check inputSchema: model + column_name required, num_bins optional. + # FastMCP wraps the Pydantic model in an 'args' outer envelope. + hd_tool = next(t for t in tools if t.name == "histogram_diff") + schema = hd_tool.inputSchema + assert schema is not None + + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "model" in inner_required, "model must be required" + assert "column_name" in inner_required, "column_name must be required" + assert "num_bins" not in inner_required, "num_bins must be optional" + assert "model" in inner_props + assert "column_name" in inner_props + assert "num_bins" in inner_props + + +# --------------------------------------------------------------------------- +# Test 26: histogram_diff returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_histogram_diff_returns_calltoolresult_with_pydantic_shape(): + """histogram_diff handler returns CallToolResult with structuredContent matching HistogramDiffOutput. + + Uses the actual HistogramDiffTask.execute() return shape (verified from source): + { + "base": {"counts": [int, ...], "total": int}, + "current": {"counts": [int, ...], "total": int}, + "min": , + "max": , + "bin_edges": [edge0, edge1, ..., edgeN], + "labels": ["lo-hi", ...] for numeric cols; None for datetime, + } + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes HistogramDiffOutput.model_validate() + - base and current counts are hydrated correctly + - bin_edges and labels are preserved + - min/max are echoed from the raw result + - _warning is extracted to output.warning named field + - model and column_name are echoed from input + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import HistogramDiffInput, HistogramDiffOutput + + mock_server = MagicMock() + # Realistic HistogramDiffTask.execute() return shape (confirmed from recce/tasks/histogram.py). + # Numeric column: 5 bins, shared bin_edges, labels from integer binning. + mock_server._tool_histogram_diff = AsyncMock( + return_value={ + "base": { + "counts": [120, 340, 210, 80, 15], + "total": 765, + }, + "current": { + "counts": [100, 360, 220, 90, 20], + "total": 790, + }, + "min": 0, + "max": 500, + "bin_edges": [0, 100, 200, 300, 400, 500], + "labels": ["0-100", "100-200", "200-300", "300-400", "400-500"], + "_warning": "Base environment not configured — comparing current against itself.", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = HistogramDiffInput(model="orders", column_name="amount") + result = await ws.histogram_diff(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 200, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = HistogramDiffOutput.model_validate(result.structuredContent) + + # model + column_name echoed back + assert validated.model == "orders" + assert validated.column_name == "amount" + + # base env stats + assert len(validated.base.counts) == 5 + assert validated.base.counts[0] == 120 + assert validated.base.counts[1] == 340 + assert validated.base.total == 765 + + # current env stats + assert len(validated.current.counts) == 5 + assert validated.current.counts[1] == 360 + assert validated.current.total == 790 + + # bin_edges and labels preserved + assert len(validated.bin_edges) == 6 + assert validated.bin_edges[0] == 0 + assert validated.bin_edges[5] == 500 + assert validated.labels is not None + assert len(validated.labels) == 5 + assert validated.labels[0] == "0-100" + + # min/max + assert validated.min == 0 + assert validated.max == 500 + + # _warning extracted + assert validated.warning == "Base environment not configured — comparing current against itself." From be9e939b58f98a94d3dd6966616a2bbacef0f7b1 Mon Sep 17 00:00:00 2001 From: Kent Date: Wed, 27 May 2026 16:00:33 +0800 Subject: [PATCH 26/43] feat(widgets): add profile_diff widget (Phase C tier 4, Phase C complete) Twelfth widget; closes Phase C. Per-column statistical profile comparison (count, null proportion, distinct count, min, max, avg, median) across base + current envs rendered as a card grid. - ProfileColumnStats / ProfileColumnDiff / ProfileDiffOutput / ProfileDiffInput Pydantic models matching actual ProfileDiffResult return shape (base + current DataFrames with columns: column_name, data_type, row_count, not_null_proportion, distinct_proportion, distinct_count, is_unique, min, max, avg, median) - _parse_profile_dataframe() helper: DataFrame dict -> {col_name: ProfileColumnStats} with _to_float / _to_int / _to_str / _to_bool coercions for agate type variants - _parse_data_type_map() helper: DataFrame dict -> {col_name: data_type} - Union column ordering: base columns first, then current-only appended; columns absent from one env have base=None or current=None in the diff - min/max arrive as str (SQL CAST to text type in PROFILE_COLUMN_JINJA_TEMPLATE) -- no isoformat() conversion needed (unlike histogram_diff bin_edges) - _warning extracted to output.warning named field - @mcp.tool annotations: openWorldHint=True (warehouse queries) - WIDGET_TOOLS now 12 tools -- 60% coverage, Open Q #6 FastMCP migration trigger threshold EXCEEDED (was 50% hypothesis) - 2 new tests (27, 28); enumeration bumped to 12 in test 3; profile_diff added to annotation loop in test 8; openWorldHint assertion added - HTML: 521 lines, ~22KB; CSS grid card layout (260px min per card); stat mini-table per card with Base->Current columns; delta chips for numeric changes; pp (percentage-point) delta for proportions; type classification (numeric/text/date-time/boolean/other) for badge and stat visibility - No sparklines -- ProfileDiffTask returns aggregate stats only, no per-bin data Phase C (tier 4 chart/stats): histogram_diff (SVG bars), profile_diff (CSS grid cards). Both keep CSP at single unpkg origin. If iter 3+ needs richer chart types (stacked bar, line, heatmap), evaluate Chart.js / Vega-Lite then. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 20 +- recce/data/mcp/profile_diff.html | 521 +++++++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 278 +++++++++++++++++ tests/test_widget_server.py | 197 +++++++++++- 5 files changed, 1009 insertions(+), 8 deletions(-) create mode 100644 recce/data/mcp/profile_diff.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 63f0fb74a..282f7acb0 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -15,8 +15,9 @@ annotates each tool with `_meta.ui.resourceUri` pointing at an HTML resource. Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, `list_checks`, and `get_model`. Phase B iter 1 adds `query`, `query_diff`, `value_diff`, `value_diff_detail`, and `top_k_diff` (five tier-3 data-table/list -widgets). Phase C widget 1 adds `histogram_diff` (first tier-4 chart widget — hand-rolled SVG bars, -no external chart library). Total: **11 of 20 planned widgets** (55% coverage). All run in +widgets). Phase C adds two tier-4 chart widgets: `histogram_diff` (hand-rolled SVG +bar chart) and `profile_diff` (per-column statistical profile card grid). Phase C is +now complete. Total: **12 of 20 planned widgets** (60% coverage). All run in **local mode only** — cloud/session mode is not supported until iter 2. --- @@ -43,8 +44,9 @@ recce/ value_diff_detail.html # Phase B tier-3: row-level diff table with filter pills top_k_diff.html # Phase B tier-3: side-by-side ranked lists with inline bars histogram_diff.html # Phase C tier-4: hand-rolled SVG bar chart (base vs current bins) + profile_diff.html # Phase C tier-4: per-column profile card grid (count/null/distinct/min/max/avg/median) tests/ - test_widget_server.py # 26 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 28 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -440,6 +442,7 @@ Eight working examples (in order of implementation): | `recce/data/mcp/query_diff.html` | **Tier-3 two-env comparison** | Two render modes: side-by-side (no primary_keys → base/current tables) and join-diff (primary_keys → single table with status pills + Added/Removed filter buttons). Row tinting (red=removed, green=added), `in_a`/`in_b` columns stripped from display. | | `recce/data/mcp/top_k_diff.html` | **Tier-3 side-by-side ranked lists** | Two-column grid (Base / Current) with ranked entries, inline bars, rank-change arrows (↑↓), and New/Gone badges for env-exclusive categories. Union of categories shown for both sides; count=0 entries denote absent categories. | | `recce/data/mcp/histogram_diff.html` | **Tier-4 SVG bar chart** | **First chart widget.** Hand-rolled SVG (no external chart library). Base bars (blue, semi-transparent) overlaid with current bars (green) per bin. viewBox-scaled for responsiveness. Hover tooltip shows bin range + both counts. x-axis label density auto-reduced for dense bins. See "Tier-4 (Chart) Widget Architecture" below. | +| `recce/data/mcp/profile_diff.html` | **Tier-4 per-column profile card grid** | **Phase C complete.** Per-column statistical profile comparison. ProfileDiffResult base/current DataFrames merged by column_name into card grid. Stats: row_count, not_null_proportion, distinct_count, distinct_proportion, min/max (string, SQL-cast), avg, median. Delta chips (+N, -N) for numeric changes; proportions shown as percentages with pp delta. Columns absent from one env still shown (base or current is null). No sparklines — task returns no per-bin data. | `get_server_info` is the **recommended canonical example** for new widgets because it was written after the idiomatic pattern was established (Day 3 @@ -612,6 +615,15 @@ Phase C introduces chart-tier widgets. The first is `histogram_diff`. Iter 1 use - Hover tooltip: a transparent `` overlay per bin triggers `mousemove` on the SVG; tooltip positioned relative to the containing `chart-wrap` div. - Dark mode: `@media (prefers-color-scheme: dark)` overrides all SVG class fill colors and CSS token fallbacks exhaustively. +### Phase C retrospective — hand-roll SVG verdict + +Phase C shipped two chart widgets (`histogram_diff` and `profile_diff`) using hand-rolled SVG or plain CSS grid layouts. Neither required an external chart library. Key findings: + +- `histogram_diff`: SVG `` bars with viewBox scaling worked well for the overlaid base/current histogram. The hover tooltip and x-axis label density auto-reduction added ~80 lines of JS but no library dependency. +- `profile_diff`: Profile data is tabular (one row per column × one column per stat). A CSS grid card layout was more appropriate than SVG. No mini sparklines — `ProfileDiffTask` returns aggregate stats per column only, no per-bin data. +- **CSP stayed at single unpkg origin** throughout Phase C. Both widgets load only `@modelcontextprotocol/ext-apps@0.4.0` from unpkg. +- All 12 widgets so far use Claude design tokens (`var(--token, fallback)`) and exhaustive `@media (prefers-color-scheme: dark)` overrides. Pydantic models + `CallToolResult` with explicit `structuredContent` is the established pattern. + ### When to upgrade to a real chart library (iter 2 considerations) Consider Chart.js or Vega-Lite for future chart widgets when: @@ -620,7 +632,7 @@ Consider Chart.js or Vega-Lite for future chart widgets when: - Multiple series with automatic legend management - The chart type is complex (scatter, violin, heatmap) -If upgrading, add the chosen CDN to `resourceDomains` in **all** widget `@mcp.resource` registrations (the list is per-server, shared). Validate with MCP Apps' CSP sandbox before shipping. +**Trigger threshold exceeded**: 12/20 widgets (60%) now use the hand-roll SVG + Pydantic pattern. If iter 2 introduces charts requiring stacked bars, line charts, or heatmaps, evaluate adopting Chart.js or Vega-Lite. Add the chosen CDN to `resourceDomains` in **all** widget `@mcp.resource` registrations (the list is per-server, shared). Validate with MCP Apps' CSP sandbox before shipping. --- diff --git a/recce/data/mcp/profile_diff.html b/recce/data/mcp/profile_diff.html new file mode 100644 index 000000000..86724ab62 --- /dev/null +++ b/recce/data/mcp/profile_diff.html @@ -0,0 +1,521 @@ + + + + + Profile Diff + + + +
+
+
Loading profile diff…
+
Waiting for profiling results.
+
+
+ + + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index edee2df04..7556e54ee 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -65,6 +65,7 @@ "value_diff_detail", "top_k_diff", "histogram_diff", + "profile_diff", } diff --git a/recce/widget_server.py b/recce/widget_server.py index f4b8e99b3..69cb42265 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -1536,6 +1536,284 @@ def histogram_diff_resource() -> str: return _read_widget_html("histogram_diff") +# --------------------------------------------------------------------------- +# profile_diff widget tool + resource +# --------------------------------------------------------------------------- + +# ProfileDiffResult.model_dump(mode='json') shape (from recce/tasks/profile.py): +# { +# "base": {"columns": [{"key", "name", "type"}, ...], "data": [[row_values], ...]}, +# "current": {"columns": [{"key", "name", "type"}, ...], "data": [[row_values], ...]} +# } +# +# Each row in data corresponds to one profiled column, with values for: +# column_name, data_type, row_count, not_null_proportion, distinct_proportion, +# distinct_count, is_unique, min, max, avg, median +# +# min/max are CAST TO STRING in the SQL template (`cast(min(...) as text_type)`), +# so they arrive as Python str (or None). avg/median are numeric floats or None. +# is_unique arrives as bool or None (NULL for empty tables, per template comment). + + +class ProfileColumnStats(BaseModel): + """Per-environment stats for a single profiled column. + + All fields are Optional — not all stats apply to every column type: + - min/max: only for numeric and date/time columns (cast to str in SQL) + - avg: numeric + logical (boolean); None for text/struct + - median: numeric only; None otherwise + - is_unique: None for empty tables (SQL emits NULL by design) + """ + + row_count: Optional[int] = None + not_null_proportion: Optional[float] = None + distinct_proportion: Optional[float] = None + distinct_count: Optional[int] = None + is_unique: Optional[bool] = None + min: Optional[str] = None # always str — SQL CAST to text type + max: Optional[str] = None # always str — SQL CAST to text type + avg: Optional[float] = None + median: Optional[float] = None + + +class ProfileColumnDiff(BaseModel): + """Profile diff for one column: name, data_type, base stats, current stats.""" + + column_name: str + data_type: Optional[str] = None + base: Optional[ProfileColumnStats] = None + current: Optional[ProfileColumnStats] = None + + +class ProfileDiffOutput(BaseModel): + """Output model for the profile_diff widget tool. + + ProfileDiffResult.model_dump(mode='json') returns two DataFrames (base, current). + Each DataFrame has columns [column_name, data_type, row_count, not_null_proportion, + distinct_proportion, distinct_count, is_unique, min, max, avg, median] and data rows, + one row per profiled column. + + The delegate merges base + current rows by column_name into per-column ProfileColumnDiff + entries, then stores them in a list. + + model is echoed from input for the widget header. + warning is extracted from _warning key (single-env mode notice). + """ + + model: str + columns: List[ProfileColumnDiff] + warning: Optional[str] = None + + +class ProfileDiffInput(BaseModel): + model: str = Field(..., description="dbt model name to profile (e.g. 'customers')") + columns: Optional[List[str]] = Field( + default=None, + description="Columns to profile (default: all columns in the model)", + ) + + +def _parse_profile_dataframe(raw_df: Optional[dict]) -> Dict[str, ProfileColumnStats]: + """Convert a ProfileDiffResult DataFrame dict → {column_name: ProfileColumnStats}. + + Returns empty dict when raw_df is None or missing columns/data. + The DataFrame columns list is used to build an index so row values are mapped + by position to the correct stat field. + """ + if not raw_df: + return {} + + col_meta = raw_df.get("columns") or [] + col_names = [c.get("name") or c.get("key", "") for c in col_meta] + rows = raw_df.get("data") or [] + + result: Dict[str, ProfileColumnStats] = {} + for row in rows: + if not isinstance(row, (list, tuple)) or len(row) < len(col_names): + continue + row_dict = dict(zip(col_names, row)) + + col_name = row_dict.get("column_name") + if not col_name: + continue + + def _to_float(v: Any) -> Optional[float]: + if v is None: + return None + # May arrive as a Decimal (from agate) serialised to string by model_dump + try: + return float(v) + except (TypeError, ValueError): + return None + + def _to_int(v: Any) -> Optional[int]: + if v is None: + return None + try: + return int(v) + except (TypeError, ValueError): + return None + + def _to_str(v: Any) -> Optional[str]: + if v is None: + return None + if hasattr(v, "isoformat"): + return v.isoformat() + return str(v) + + def _to_bool(v: Any) -> Optional[bool]: + if v is None: + return None + if isinstance(v, bool): + return v + if isinstance(v, int): + return bool(v) + if isinstance(v, str): + return v.lower() in ("true", "1", "t", "yes") + return None + + result[str(col_name)] = ProfileColumnStats( + row_count=_to_int(row_dict.get("row_count")), + not_null_proportion=_to_float(row_dict.get("not_null_proportion")), + distinct_proportion=_to_float(row_dict.get("distinct_proportion")), + distinct_count=_to_int(row_dict.get("distinct_count")), + is_unique=_to_bool(row_dict.get("is_unique")), + min=_to_str(row_dict.get("min")), + max=_to_str(row_dict.get("max")), + avg=_to_float(row_dict.get("avg")), + median=_to_float(row_dict.get("median")), + ) + return result + + +def _parse_data_type_map(raw_df: Optional[dict]) -> Dict[str, Optional[str]]: + """Extract {column_name: data_type} from a profile DataFrame dict.""" + if not raw_df: + return {} + col_meta = raw_df.get("columns") or [] + col_names = [c.get("name") or c.get("key", "") for c in col_meta] + rows = raw_df.get("data") or [] + + result: Dict[str, Optional[str]] = {} + for row in rows: + if not isinstance(row, (list, tuple)) or len(row) < len(col_names): + continue + row_dict = dict(zip(col_names, row)) + col_name = row_dict.get("column_name") + if col_name: + result[str(col_name)] = row_dict.get("data_type") + return result + + +@mcp.tool( + name="profile_diff", + annotations={ + "title": "Profile Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # executes queries against the warehouse + }, + meta={ + "ui": {"resourceUri": "ui://recce/profile_diff.html"}, + "ui/resourceUri": "ui://recce/profile_diff.html", + }, +) +async def profile_diff(args: ProfileDiffInput) -> CallToolResult: + """Compare per-column statistical profiles across base and current environments. + + Returns side-by-side stats (row count, null proportion, distinct count, min, max, + avg, median) for every profiled column, rendered as a card grid. The agent should + not enumerate the stats as plain text — the widget handles presentation. + + Column type is inferred by the profiling SQL: numeric columns get avg/median; + date/time columns get min/max as ISO strings; text columns show only count + and distinct stats. + + Args: + model: dbt model name (e.g. 'customers') + columns: optional subset of columns to profile (default: all columns) + + Returns: + CallToolResult with structuredContent: ProfileDiffOutput shape + {model, columns: [{column_name, data_type, base: {stats}, current: {stats}}], + warning?} + + Use when: + - User asks "did the stats shift" / "any null count change" + - PR review needs distribution sanity check across columns + - Following up a row_count_diff showing changes — drill into which columns shifted + - Verifying numeric ranges (min/max) or distinct cardinality changed + Don't use when: + - Need value-level diff → value_diff or value_diff_detail + - Need distribution bars → histogram_diff (one column at a time) + - Need top-K most frequent values → top_k_diff + - Single-environment only — tool warns but returns no useful comparison + """ + raw = await _recce_server._tool_profile_diff(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + + raw_base = raw.get("base") if isinstance(raw, dict) else None + raw_curr = raw.get("current") if isinstance(raw, dict) else None + + base_stats = _parse_profile_dataframe(raw_base) + curr_stats = _parse_profile_dataframe(raw_curr) + + # Build data_type map from whichever DataFrame has it (prefer current) + dtype_base = _parse_data_type_map(raw_base) + dtype_curr = _parse_data_type_map(raw_curr) + + # Union of all column names, preserving order (base first, then current-only) + all_columns: List[str] = [] + seen: set = set() + for col in list(base_stats.keys()) + list(curr_stats.keys()): + if col not in seen: + all_columns.append(col) + seen.add(col) + + col_diffs: List[ProfileColumnDiff] = [] + for col_name in all_columns: + data_type = dtype_curr.get(col_name) or dtype_base.get(col_name) + col_diffs.append( + ProfileColumnDiff( + column_name=col_name, + data_type=data_type, + base=base_stats.get(col_name), + current=curr_stats.get(col_name), + ) + ) + + output = ProfileDiffOutput( + model=args.model, + columns=col_diffs, + warning=warning, + ) + + n_cols = len(col_diffs) + text = ( + f"Profile diff for '{args.model}': " + f"{n_cols} column{'s' if n_cols != 1 else ''} profiled. Rendered in widget." + ) + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/profile_diff.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def profile_diff_resource() -> str: + return _read_widget_html("profile_diff") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 27fe2b35e..643f95c0d 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 11 tools/resources (Phase A + Phase B + Phase C widgets). + """Widget FastMCP instance has exactly 12 tools/resources (Phase A + Phase B + Phase C widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -109,6 +109,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "value_diff_detail", "top_k_diff", "histogram_diff", + "profile_diff", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -122,6 +123,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/value_diff_detail.html", "ui://recce/top_k_diff.html", "ui://recce/histogram_diff.html", + "ui://recce/profile_diff.html", } @@ -296,6 +298,7 @@ async def test_widget_tool_annotations_present(): "value_diff_detail", "top_k_diff", "histogram_diff", + "profile_diff", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -320,9 +323,8 @@ async def test_widget_tool_annotations_present(): tool_map["value_diff_detail"].annotations.openWorldHint is True ), "value_diff_detail: expected openWorldHint=True" assert tool_map["top_k_diff"].annotations.openWorldHint is True, "top_k_diff: expected openWorldHint=True" - assert ( - tool_map["histogram_diff"].annotations.openWorldHint is True - ), "histogram_diff: expected openWorldHint=True" + assert tool_map["histogram_diff"].annotations.openWorldHint is True, "histogram_diff: expected openWorldHint=True" + assert tool_map["profile_diff"].annotations.openWorldHint is True, "profile_diff: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -1498,3 +1500,190 @@ async def test_histogram_diff_returns_calltoolresult_with_pydantic_shape(): # _warning extracted assert validated.warning == "Base environment not configured — comparing current against itself." + + +# --------------------------------------------------------------------------- +# Test 27: profile_diff widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_profile_diff_widget_registered(): + """profile_diff appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'profile_diff' is in widget mcp tool list + - resource URI 'ui://recce/profile_diff.html' is in widget mcp resource list + - model is required in inputSchema; columns is optional + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "profile_diff" in tool_names + assert "ui://recce/profile_diff.html" in resource_uris + + # Check inputSchema: model required, columns optional. + pd_tool = next(t for t in tools if t.name == "profile_diff") + schema = pd_tool.inputSchema + assert schema is not None + + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "model" in inner_required, "model must be required" + assert "columns" not in inner_required, "columns must be optional" + assert "model" in inner_props + assert "columns" in inner_props + + +# --------------------------------------------------------------------------- +# Test 28: profile_diff returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_profile_diff_returns_calltoolresult_with_pydantic_shape(): + """profile_diff handler returns CallToolResult with structuredContent matching ProfileDiffOutput. + + Uses the actual ProfileDiffResult.model_dump(mode='json') shape (verified from source): + ProfileDiffResult has base: DataFrame, current: DataFrame. + Each DataFrame: {columns: [{key, name, type}], data: [[row_values], ...]} + Profile columns: column_name, data_type, row_count, not_null_proportion, + distinct_proportion, distinct_count, is_unique, min, max, avg, median. + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes ProfileDiffOutput.model_validate() + - per-column diffs are built correctly from base + current DataFrames + - model is echoed from input + - _warning is extracted to output.warning named field + - numeric stats (row_count, distinct_count, avg, etc.) are correctly typed + - string stats (min, max) are preserved as str (SQL casts them to text) + - is_unique bool is parsed correctly + - columns absent from one env still appear in the output (union) + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import ProfileDiffInput, ProfileDiffOutput + + # Profile DataFrame column metadata (matches PROFILE_COLUMN_JINJA_TEMPLATE output) + profile_col_meta = [ + {"key": "column_name", "name": "column_name", "type": "text"}, + {"key": "data_type", "name": "data_type", "type": "text"}, + {"key": "row_count", "name": "row_count", "type": "integer"}, + {"key": "not_null_proportion", "name": "not_null_proportion", "type": "number"}, + {"key": "distinct_proportion", "name": "distinct_proportion", "type": "number"}, + {"key": "distinct_count", "name": "distinct_count", "type": "integer"}, + {"key": "is_unique", "name": "is_unique", "type": "boolean"}, + {"key": "min", "name": "min", "type": "text"}, + {"key": "max", "name": "max", "type": "text"}, + {"key": "avg", "name": "avg", "type": "number"}, + {"key": "median", "name": "median", "type": "number"}, + ] + # Base: id (numeric), name (text), amount (numeric) + base_data = [ + ["id", "bigint", 1000, 1.0, 1.0, 1000, True, "1", "1000", None, None], + ["name", "text", 1000, 0.98, 0.97, 970, False, None, None, None, None], + ["amount", "float", 1000, 0.995, 0.72, 720, False, "0.5", "999.9", 105.3, 87.2], + ] + # Current: id same, name has more nulls, amount shifted, new column "status" + curr_data = [ + ["id", "bigint", 1020, 1.0, 1.0, 1020, True, "1", "1020", None, None], + ["name", "text", 1020, 0.95, 0.96, 979, False, None, None, None, None], + ["amount", "float", 1020, 0.995, 0.70, 714, False, "0.5", "1099.9", 112.7, 91.4], + ["status", "text", 1020, 1.0, 0.05, 51, False, None, None, None, None], + ] + + mock_server = MagicMock() + mock_server._tool_profile_diff = AsyncMock( + return_value={ + "base": { + "columns": profile_col_meta, + "data": base_data, + "limit": None, + "more": None, + "total_row_count": None, + }, + "current": { + "columns": profile_col_meta, + "data": curr_data, + "limit": None, + "more": None, + "total_row_count": None, + }, + "_warning": "Base environment not configured — comparing current against itself.", + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = ProfileDiffInput(model="customers") + result = await ws.profile_diff(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 200, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + assert result.structuredContent is not None + validated = ProfileDiffOutput.model_validate(result.structuredContent) + + # model echoed back + assert validated.model == "customers" + + # Union of columns: id, name, amount from base + current; status only in current + col_names = [c.column_name for c in validated.columns] + assert "id" in col_names + assert "name" in col_names + assert "amount" in col_names + assert "status" in col_names # current-only column still appears + assert len(validated.columns) == 4 # id, name, amount, status + + # id column: base + current both present + id_col = next(c for c in validated.columns if c.column_name == "id") + assert id_col.data_type == "bigint" + assert id_col.base is not None + assert id_col.current is not None + assert id_col.base.row_count == 1000 + assert id_col.current.row_count == 1020 + assert id_col.base.distinct_count == 1000 + assert id_col.base.is_unique is True + assert id_col.base.not_null_proportion == 1.0 + + # amount column: numeric — avg and median present + amt_col = next(c for c in validated.columns if c.column_name == "amount") + assert amt_col.base is not None + assert amt_col.base.avg == 105.3 + assert amt_col.base.median == 87.2 + assert amt_col.base.min == "0.5" # SQL casts min to text + assert amt_col.base.max == "999.9" + assert amt_col.current is not None + assert amt_col.current.avg == 112.7 + + # name column: text — avg and min/max are None (not profiled for text cols) + name_col = next(c for c in validated.columns if c.column_name == "name") + assert name_col.base is not None + assert name_col.base.avg is None + assert name_col.base.min is None + assert name_col.base.not_null_proportion == 0.98 + + # status column: only in current — base is None + status_col = next(c for c in validated.columns if c.column_name == "status") + assert status_col.base is None + assert status_col.current is not None + assert status_col.current.row_count == 1020 + + # _warning extracted from _warning key + assert validated.warning == "Base environment not configured — comparing current against itself." From c7615a50f064b3efffe14018693b9dab74a4a790 Mon Sep 17 00:00:00 2001 From: Kent Date: Wed, 27 May 2026 16:13:21 +0800 Subject: [PATCH 27/43] =?UTF-8?q?feat(widgets):=20add=20get=5Fcll=20widget?= =?UTF-8?q?=20(Phase=20D=20tier=205=20=E2=80=94=20first=20mini-graph)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thirteenth widget; first tier-5 (mini graph) widget. Column-level lineage rendered as a hand-rolled SVG mini-DAG with layered layout (sources left, target middle, downstream right). Establishes the "tier-5 via hand-rolled SVG with bail-out" pattern. Actual CllData shape (from recce/models/types.py) uses nodes/columns dicts and parent_map/child_map sets — significantly different from the placeholder nodes+edges list shape in the mission. Column-to-column edges come from CllColumn.depends_on, not the top-level parent_map (which is node-level). Adapter logic in the delegate normalises sets → lists for JSON serialisation. - GetCllInput / GetCllColumnDep / GetCllColumnInfo / GetCllNodeInfo / GetCllOutput Pydantic models matching actual CllData shape - Hand-roll SVG: BFS layered layout, bezier edge routing via depends_on, model cards with per-column rows, target column highlighted in blue - Complexity bail-out: >12 nodes or >30 edges → text summary list with hint to use Recce web app lineage view for full DAG - @mcp.tool annotations (openWorldHint=False — manifest read, no warehouse) - WIDGET_TOOLS bumped to 13 tools - 2 new tests (enumeration bumped to 13, annotations test updated) - docs/mcp-widgets.md gains Tier-5 Widget Architecture section explaining layered layout, bezier routing, bail-out approach, iter-2 considerations Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 68 +++- recce/data/mcp/get_cll.html | 785 ++++++++++++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 247 ++++++++++++ tests/test_widget_server.py | 242 ++++++++++- 5 files changed, 1337 insertions(+), 6 deletions(-) create mode 100644 recce/data/mcp/get_cll.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 282f7acb0..93ef39293 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -16,8 +16,10 @@ Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, `list_checks`, and `get_model`. Phase B iter 1 adds `query`, `query_diff`, `value_diff`, `value_diff_detail`, and `top_k_diff` (five tier-3 data-table/list widgets). Phase C adds two tier-4 chart widgets: `histogram_diff` (hand-rolled SVG -bar chart) and `profile_diff` (per-column statistical profile card grid). Phase C is -now complete. Total: **12 of 20 planned widgets** (60% coverage). All run in +bar chart) and `profile_diff` (per-column statistical profile card grid). Phase D +adds the first tier-5 (mini graph) widget: `get_cll` — column-level lineage +rendered as a hand-rolled SVG mini-DAG with layered layout (sources left, target +middle, downstream right). Total: **13 of 20 planned widgets** (65% coverage). All run in **local mode only** — cloud/session mode is not supported until iter 2. --- @@ -632,7 +634,67 @@ Consider Chart.js or Vega-Lite for future chart widgets when: - Multiple series with automatic legend management - The chart type is complex (scatter, violin, heatmap) -**Trigger threshold exceeded**: 12/20 widgets (60%) now use the hand-roll SVG + Pydantic pattern. If iter 2 introduces charts requiring stacked bars, line charts, or heatmaps, evaluate adopting Chart.js or Vega-Lite. Add the chosen CDN to `resourceDomains` in **all** widget `@mcp.resource` registrations (the list is per-server, shared). Validate with MCP Apps' CSP sandbox before shipping. +**Trigger threshold exceeded**: 13/20 widgets (65%) now use the hand-roll SVG + Pydantic pattern. If iter 2 introduces charts requiring stacked bars, line charts, or heatmaps, evaluate adopting Chart.js or Vega-Lite. Add the chosen CDN to `resourceDomains` in **all** widget `@mcp.resource` registrations (the list is per-server, shared). Validate with MCP Apps' CSP sandbox before shipping. + +--- + +## Tier-5 Widget Architecture (Mini Graphs) + +Phase D introduces the first tier-5 (mini graph) widget: `get_cll`. Unlike tier-4 +chart widgets that render bars/grids, tier-5 widgets render interactive graph diagrams +as hand-rolled SVGs with layout algorithms. + +### `get_cll` — Column-Level Lineage DAG + +`get_cll` reads `CllData` from the dbt adapter and renders it as a layered SVG DAG. +The actual `CllData` shape (from `recce/models/types.py`) uses: +- `nodes`: `Dict[str, CllNode]` — keyed by node_id; each node contains `columns: Dict[str, CllColumn]` +- `columns`: flat `Dict[str, CllColumn]` — keyed by `"{node_id}_{column_name}"` (aggregate index) +- `parent_map`: `Dict[str, Set[str]]` — child key → set of parent keys (edges) +- `child_map`: `Dict[str, Set[str]]` — parent key → set of child keys + +`CllColumn.depends_on` is a list of `CllColumnDep(node, column)` — these are the column-to-column +dependency edges used for bezier curve rendering between card rows. + +### Layout algorithm (simplified Sugiyama) + +1. BFS from the target node, assigning layers: target = 0, upstream = -N, downstream = +N. +2. Shift layers so min = 0 (left = most upstream). +3. Within each layer, sort nodes alphabetically by name and stack vertically. +4. Card width = 200px, column row height = 22px. Card height = header (32px) + N × 22px + 6px padding. +5. Layer gap = 80px horizontal. Node gap = 20px vertical. +6. SVG viewBox computed from total extent; `overflow-x: auto` on wrapper div for wide graphs. + +### Bezier edge routing + +For each column's `depends_on` entry, draw a cubic bezier from: +- Source: `right-edge` of source node card, at the y-center of the source column row. +- Target: `left-edge` of target node card, at the y-center of the target column row. +- Control points: `dx = (target_x - source_x) * 0.45` horizontal offset; same y as endpoints. + +This creates smooth S-curves without requiring a graph library. + +### Complexity bail-out + +If `node_count > 12` OR `edge_count > 30`, the widget skips the SVG layout and renders +a text summary card listing all node names with a hint to use the Recce web app for the +full interactive DAG. The `node_count` and `edge_count` fields are pre-computed in the +Python delegate (`GetCllOutput`) so the widget doesn't need to recompute them. + +### `openWorldHint=False` for `get_cll` + +`get_cll` reads the dbt manifest (local files) — it never hits the warehouse. This +contrasts with all tier-3 tools (`query`, `query_diff`, `value_diff`, etc.) which +set `openWorldHint=True`. Adding it to the `closed_world_tools` assertion in +`test_widget_server.py` enforces this distinction. + +### Iter 2 considerations for mini-graph widgets + +- **Cytoscape.js or D3** for larger graphs (>12 nodes): adds a CDN dependency but enables + interactive pan/zoom, auto-layout (Dagre), and click-to-focus interactions. +- **Depth limiting** instead of hard bail-out: show only N hops upstream/downstream. +- **Column filter**: highlight only the requested column's lineage path, greying out others. +- **Cross-environment diff overlay**: show base vs current columns side-by-side in the card. --- diff --git a/recce/data/mcp/get_cll.html b/recce/data/mcp/get_cll.html new file mode 100644 index 000000000..d707857f4 --- /dev/null +++ b/recce/data/mcp/get_cll.html @@ -0,0 +1,785 @@ + + + + + Column Lineage + + + +
+
+
Loading column lineage…
+
Waiting for CLL results.
+
+
+ + + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 7556e54ee..ba84d6591 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -66,6 +66,7 @@ "top_k_diff", "histogram_diff", "profile_diff", + "get_cll", } diff --git a/recce/widget_server.py b/recce/widget_server.py index 69cb42265..e519d1230 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -1814,6 +1814,253 @@ def profile_diff_resource() -> str: return _read_widget_html("profile_diff") +# --------------------------------------------------------------------------- +# get_cll widget tool + resource +# --------------------------------------------------------------------------- + + +class GetCllInput(BaseModel): + node_id: str = Field(..., description="Full dbt node ID (e.g. 'model.jaffle_shop.customers')") + column: str = Field(..., description="Column name to trace lineage for") + change_analysis: bool = Field( + default=False, + description="Highlight columns whose transformation logic changed between base and current", + ) + + +class GetCllColumnDep(BaseModel): + """A single column-to-column dependency edge (source of column data).""" + + node: str # node_id of the source node + column: str # source column name + + +class GetCllColumnInfo(BaseModel): + """Per-column lineage info from CllColumn, adapted for the widget.""" + + id: Optional[str] = None + table_id: Optional[str] = None + name: Optional[str] = None + type: Optional[str] = None + transformation_type: str = "unknown" # source|passthrough|renamed|derived|unknown + change_status: Optional[str] = None + depends_on: List[GetCllColumnDep] = [] + + +class GetCllNodeInfo(BaseModel): + """Per-node info from CllNode, adapted for the widget.""" + + id: str + name: str + package_name: str + resource_type: str + source_name: Optional[str] = None + change_status: Optional[str] = None + change_category: Optional[str] = None + impacted: Optional[bool] = None + # columns dict: column_name → GetCllColumnInfo + columns: Dict[str, GetCllColumnInfo] = {} + + +class GetCllOutput(BaseModel): + """Output model for the get_cll widget tool. + + Mirrors CllData.model_dump(mode='json') after normalisation. + + CllData has: + nodes: Dict[str, CllNode] — keyed by node_id + columns: Dict[str, CllColumn] — keyed by "{node_id}_{column_name}" + parent_map: Dict[str, Set[str]] — child → set of parent keys + child_map: Dict[str, Set[str]] — parent → set of child keys + + The widget uses nodes/columns to draw cards and parent_map/child_map for edges. + We echo the query params so the widget header can show "{node}.{column}". + + node_count / edge_count enable the bail-out path in the widget without + the widget having to recompute them. + """ + + node_id: str # echoed from input + column: str # echoed from input + change_analysis: bool # echoed from input + nodes: Dict[str, GetCllNodeInfo] + columns: Dict[str, GetCllColumnInfo] + parent_map: Dict[str, List[str]] # Set serialises as list in JSON + child_map: Dict[str, List[str]] + node_count: int + edge_count: int # total directed edges (sum of len(parents) for each key) + warning: Optional[str] = None + + +@mcp.tool( + name="get_cll", + annotations={ + "title": "Column Lineage (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, # reads dbt manifest only, no warehouse I/O + }, + meta={ + "ui": {"resourceUri": "ui://recce/get_cll.html"}, + "ui/resourceUri": "ui://recce/get_cll.html", + }, +) +async def get_cll(args: GetCllInput) -> CallToolResult: + """Show column-level lineage — which upstream columns feed into a target column, and which downstream columns consume it. + + Rendered as a mini SVG DAG with layered layout: source nodes on the left, + the queried node in the middle, downstream nodes on the right. Column rows + are shown inside model "cards" (rectangles). Bezier edges connect source + column rows to target column rows. + + For complex graphs (>12 nodes or >30 edges), falls back to a summary list + with a hint to use the Recce web app lineage view for the full DAG. + + Only available with dbt adapter (reads dbt manifest — no warehouse I/O). + + Args: + node_id: Full dbt node ID (e.g. 'model.jaffle_shop.customers') + column: Column name within that model to trace lineage for + change_analysis: True to highlight transformation-logic changes between base and current envs + + Returns: + CallToolResult with structuredContent: GetCllOutput shape + {node_id, column, change_analysis, nodes, columns, parent_map, child_map, + node_count, edge_count, warning?} + + Use when: + - User asks "where does column X come from / what uses it" + - Tracing data origin for a specific field during PR review + - Verifying a refactor preserved column semantics (with change_analysis=True) + Don't use when: + - Need full model-level DAG → lineage_diff (future widget) or Recce web app + - Need impact_analysis across changed models → impact_analysis + - Column doesn't exist in the model → use get_model to verify schema first + - Non-dbt adapter → tool raises immediately + """ + raw = await _recce_server._tool_get_cll(args.model_dump()) + # raw is CllData.model_dump(mode="json"): + # {nodes: {node_id: {id, name, ...}}, + # columns: {col_key: {id, name, ...}}, + # parent_map: {key: [parents...]}, + # child_map: {key: [children...]}} + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + + raw_nodes = raw.get("nodes", {}) if isinstance(raw, dict) else {} + raw_cols = raw.get("columns", {}) if isinstance(raw, dict) else {} + raw_parent_map = raw.get("parent_map", {}) if isinstance(raw, dict) else {} + raw_child_map = raw.get("child_map", {}) if isinstance(raw, dict) else {} + + # Normalise nodes + nodes_out: Dict[str, GetCllNodeInfo] = {} + for nid, ndata in raw_nodes.items(): + if not isinstance(ndata, dict): + continue + raw_node_cols = ndata.get("columns", {}) or {} + node_cols: Dict[str, GetCllColumnInfo] = {} + for cname, cdata in raw_node_cols.items(): + if isinstance(cdata, dict): + deps = [ + GetCllColumnDep(node=d["node"], column=d["column"]) + for d in (cdata.get("depends_on") or []) + if isinstance(d, dict) and "node" in d and "column" in d + ] + node_cols[cname] = GetCllColumnInfo( + id=cdata.get("id"), + table_id=cdata.get("table_id"), + name=cdata.get("name"), + type=cdata.get("type"), + transformation_type=cdata.get("transformation_type") or "unknown", + change_status=cdata.get("change_status"), + depends_on=deps, + ) + nodes_out[nid] = GetCllNodeInfo( + id=ndata.get("id", nid), + name=ndata.get("name", nid), + package_name=ndata.get("package_name", ""), + resource_type=ndata.get("resource_type", "model"), + source_name=ndata.get("source_name"), + change_status=ndata.get("change_status"), + change_category=ndata.get("change_category"), + impacted=ndata.get("impacted"), + columns=node_cols, + ) + + # Normalise flat columns dict (keyed by "{node_id}_{column_name}") + cols_out: Dict[str, GetCllColumnInfo] = {} + for col_key, cdata in raw_cols.items(): + if not isinstance(cdata, dict): + continue + deps = [ + GetCllColumnDep(node=d["node"], column=d["column"]) + for d in (cdata.get("depends_on") or []) + if isinstance(d, dict) and "node" in d and "column" in d + ] + cols_out[col_key] = GetCllColumnInfo( + id=cdata.get("id"), + table_id=cdata.get("table_id"), + name=cdata.get("name"), + type=cdata.get("type"), + transformation_type=cdata.get("transformation_type") or "unknown", + change_status=cdata.get("change_status"), + depends_on=deps, + ) + + # Normalise parent_map / child_map (sets serialise as lists in JSON) + parent_map_out: Dict[str, List[str]] = { + k: list(v) if isinstance(v, (list, set)) else [] for k, v in raw_parent_map.items() + } + child_map_out: Dict[str, List[str]] = { + k: list(v) if isinstance(v, (list, set)) else [] for k, v in raw_child_map.items() + } + + # Compute counts for bail-out logic + node_count = len(nodes_out) + edge_count = sum(len(parents) for parents in parent_map_out.values()) + + output = GetCllOutput( + node_id=args.node_id, + column=args.column, + change_analysis=args.change_analysis, + nodes=nodes_out, + columns=cols_out, + parent_map=parent_map_out, + child_map=child_map_out, + node_count=node_count, + edge_count=edge_count, + warning=warning, + ) + + # Short content text — widget handles rendering + node_name = nodes_out.get( + args.node_id, GetCllNodeInfo(id=args.node_id, name=args.node_id, package_name="", resource_type="model") + ).name + text = ( + f"Column lineage for {node_name}.{args.column}: " + f"{node_count} node{'s' if node_count != 1 else ''}, " + f"{edge_count} edge{'s' if edge_count != 1 else ''}. Rendered in widget." + ) + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/get_cll.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def get_cll_resource() -> str: + return _read_widget_html("get_cll") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 643f95c0d..43ba01a2d 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 12 tools/resources (Phase A + Phase B + Phase C widgets). + """Widget FastMCP instance has exactly 13 tools/resources (Phase A + Phase B + Phase C + Phase D widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -110,6 +110,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "top_k_diff", "histogram_diff", "profile_diff", + "get_cll", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -124,6 +125,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/top_k_diff.html", "ui://recce/histogram_diff.html", "ui://recce/profile_diff.html", + "ui://recce/get_cll.html", } @@ -299,6 +301,7 @@ async def test_widget_tool_annotations_present(): "top_k_diff", "histogram_diff", "profile_diff", + "get_cll", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -309,8 +312,8 @@ async def test_widget_tool_annotations_present(): assert a.idempotentHint is True, f"{tool_name}: expected idempotentHint=True" assert a.title is not None and len(a.title) > 0, f"{tool_name}: title must be set" - # Closed-world tools (no external warehouse I/O) - closed_world_tools = ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model") + # Closed-world tools (no external warehouse I/O) — get_cll reads manifest only + closed_world_tools = ("row_count_diff", "schema_diff", "get_server_info", "list_checks", "get_model", "get_cll") for tool_name in closed_world_tools: t = tool_map[tool_name] assert t.annotations.openWorldHint is False, f"{tool_name}: expected openWorldHint=False" @@ -1687,3 +1690,236 @@ async def test_profile_diff_returns_calltoolresult_with_pydantic_shape(): # _warning extracted from _warning key assert validated.warning == "Base environment not configured — comparing current against itself." + + +# --------------------------------------------------------------------------- +# Test 87: get_cll widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_cll_widget_registered(): + """get_cll appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'get_cll' is in widget mcp tool list + - resource URI 'ui://recce/get_cll.html' is in widget mcp resource list + - node_id and column are required; change_analysis is optional (default False) + - annotations: openWorldHint=False (reads manifest, no warehouse I/O) + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "get_cll" in tool_names + assert "ui://recce/get_cll.html" in resource_uris + + # Verify inputSchema: node_id + column required; change_analysis optional + tool = next(t for t in tools if t.name == "get_cll") + schema = tool.inputSchema + assert schema is not None + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "node_id" in inner_required, "node_id must be required" + assert "column" in inner_required, "column must be required" + assert "change_analysis" not in inner_required, "change_analysis must be optional" + assert "node_id" in inner_props + assert "column" in inner_props + assert "change_analysis" in inner_props + + # openWorldHint must be False — CLL reads manifest only, no warehouse + a = tool.annotations + assert a is not None + assert a.openWorldHint is False, "get_cll: expected openWorldHint=False (manifest read, no warehouse)" + assert a.readOnlyHint is True + assert a.destructiveHint is False + + +# --------------------------------------------------------------------------- +# Test 88: get_cll returns CallToolResult with correct Pydantic shape + counts +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_get_cll_returns_calltoolresult_with_pydantic_shape(): + """get_cll handler returns CallToolResult with structuredContent matching GetCllOutput. + + Uses a realistic CllData.model_dump shape (verified from recce/models/types.py): + {nodes: {node_id: {id, name, package_name, resource_type, columns: {col_name: {…}}}}, + columns: {col_key: {…}}, + parent_map: {key: [parents]}, + child_map: {key: [children]}} + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes GetCllOutput.model_validate() + - node_id, column, change_analysis echoed from input + - node_count and edge_count computed correctly + - CllColumn.depends_on hydrated into GetCllColumnDep list + - parent_map/child_map sets converted to lists + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import GetCllInput, GetCllOutput + + # Realistic jaffle-shop-scale CLL output: + # orders.amount depends on raw_orders.amount (passthrough) + # stg_orders.amount depends on raw_orders.amount (passthrough) + # orders.amount depends on stg_orders.amount (passthrough) + mock_server = MagicMock() + mock_server._tool_get_cll = AsyncMock( + return_value={ + "nodes": { + "source.jaffle_shop.raw_orders": { + "id": "source.jaffle_shop.raw_orders", + "name": "raw_orders", + "package_name": "jaffle_shop", + "resource_type": "source", + "source_name": "jaffle_shop", + "change_status": None, + "change_category": None, + "impacted": None, + "columns": { + "amount": { + "id": "source.jaffle_shop.raw_orders_amount", + "table_id": "source.jaffle_shop.raw_orders", + "name": "amount", + "type": "numeric", + "transformation_type": "source", + "change_status": None, + "depends_on": [], + } + }, + }, + "model.jaffle_shop.stg_orders": { + "id": "model.jaffle_shop.stg_orders", + "name": "stg_orders", + "package_name": "jaffle_shop", + "resource_type": "model", + "source_name": None, + "change_status": None, + "change_category": None, + "impacted": None, + "columns": { + "amount": { + "id": "model.jaffle_shop.stg_orders_amount", + "table_id": "model.jaffle_shop.stg_orders", + "name": "amount", + "type": "numeric", + "transformation_type": "passthrough", + "change_status": None, + "depends_on": [{"node": "source.jaffle_shop.raw_orders", "column": "amount"}], + } + }, + }, + "model.jaffle_shop.orders": { + "id": "model.jaffle_shop.orders", + "name": "orders", + "package_name": "jaffle_shop", + "resource_type": "model", + "source_name": None, + "change_status": None, + "change_category": None, + "impacted": None, + "columns": { + "amount": { + "id": "model.jaffle_shop.orders_amount", + "table_id": "model.jaffle_shop.orders", + "name": "amount", + "type": "numeric", + "transformation_type": "passthrough", + "change_status": None, + "depends_on": [{"node": "model.jaffle_shop.stg_orders", "column": "amount"}], + } + }, + }, + }, + "columns": { + "source.jaffle_shop.raw_orders_amount": { + "id": "source.jaffle_shop.raw_orders_amount", + "table_id": "source.jaffle_shop.raw_orders", + "name": "amount", + "type": "numeric", + "transformation_type": "source", + "change_status": None, + "depends_on": [], + }, + }, + "parent_map": { + "model.jaffle_shop.stg_orders": ["source.jaffle_shop.raw_orders"], + "model.jaffle_shop.orders": ["model.jaffle_shop.stg_orders"], + }, + "child_map": { + "source.jaffle_shop.raw_orders": ["model.jaffle_shop.stg_orders"], + "model.jaffle_shop.stg_orders": ["model.jaffle_shop.orders"], + }, + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = GetCllInput( + node_id="model.jaffle_shop.orders", + column="amount", + change_analysis=False, + ) + result = await ws.get_cll(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 160, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + # structuredContent must round-trip through Pydantic + assert result.structuredContent is not None + validated = GetCllOutput.model_validate(result.structuredContent) + + # Input echoed back + assert validated.node_id == "model.jaffle_shop.orders" + assert validated.column == "amount" + assert validated.change_analysis is False + + # Counts + assert validated.node_count == 3 + assert validated.edge_count == 2 # parent_map has 2 entries each with 1 parent + + # Nodes normalised correctly + assert "model.jaffle_shop.orders" in validated.nodes + orders_node = validated.nodes["model.jaffle_shop.orders"] + assert orders_node.name == "orders" + assert orders_node.resource_type == "model" + assert "amount" in orders_node.columns + orders_amount = orders_node.columns["amount"] + assert orders_amount.transformation_type == "passthrough" + assert len(orders_amount.depends_on) == 1 + assert orders_amount.depends_on[0].node == "model.jaffle_shop.stg_orders" + assert orders_amount.depends_on[0].column == "amount" + + # Source node + assert "source.jaffle_shop.raw_orders" in validated.nodes + src_node = validated.nodes["source.jaffle_shop.raw_orders"] + assert src_node.resource_type == "source" + assert src_node.source_name == "jaffle_shop" + src_amount = src_node.columns["amount"] + assert src_amount.transformation_type == "source" + assert len(src_amount.depends_on) == 0 + + # parent_map / child_map are lists (not sets) + assert "model.jaffle_shop.orders" in validated.parent_map + assert isinstance(validated.parent_map["model.jaffle_shop.orders"], list) + assert "model.jaffle_shop.stg_orders" in validated.parent_map["model.jaffle_shop.orders"] + + # warning is None (not provided) + assert validated.warning is None From f8eea2c97887649b54ca06ad18d1b0d33a07dbef Mon Sep 17 00:00:00 2001 From: Kent Date: Wed, 27 May 2026 16:27:38 +0800 Subject: [PATCH 28/43] feat(widgets): add impact_analysis widget (Phase D tier 5, Phase D complete) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fourteenth widget; closes Phase D (2 mini-graph widgets). Model-level blast radius — confirmed/potential impact per downstream model with next-action hints. Hand-roll SVG mini-DAG + below-SVG actionable list. - ImpactedModel / NextAction / ImpactAnalysisOutput Pydantic models matching actual _tool_impact_analysis return shape (_guidance, row_count, value_diff summary, schema_changes, next_action per model) - ImpactValueDiffSummary (renamed to avoid shadowing existing ValueDiffSummary) - Hand-roll SVG: 2-layer BFS DAG (modified left, downstream right) with per-node impact badge chips, row-count/value-diff metric, next_action hint - 'What to investigate next' actionable list grouped by priority (high/medium/low) - Bail-out for >15 nodes: skip SVG, show summary counts + actionable list only - _warning extracted to output.warning, _guidance to output.guidance - WIDGET_TOOLS at 14 tools — 70% coverage (only lineage_diff in Phase E left) - 2 new tests; enumeration bumped to 14 (37 total pass) Phase D complete (get_cll, impact_analysis). Both use hand-roll SVG mini-DAG with BFS layered layout. Approach scales to ~15 model nodes — anything larger hits bail-out. Phase E (lineage_diff, full DAG with potentially hundreds of nodes) needs ReactFlow + the @datarecce/ui lineage borrow contract; can't be served by hand-roll SVG. Captain decision required before Phase E. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- docs/mcp-widgets.md | 40 +- recce/data/mcp/impact_analysis.html | 808 ++++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 251 +++++++++ tests/test_widget_server.py | 199 ++++++- 5 files changed, 1292 insertions(+), 7 deletions(-) create mode 100644 recce/data/mcp/impact_analysis.html diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 93ef39293..55fcdcba4 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -17,9 +17,11 @@ Phase A ships five widgets: `row_count_diff`, `schema_diff`, `get_server_info`, `value_diff`, `value_diff_detail`, and `top_k_diff` (five tier-3 data-table/list widgets). Phase C adds two tier-4 chart widgets: `histogram_diff` (hand-rolled SVG bar chart) and `profile_diff` (per-column statistical profile card grid). Phase D -adds the first tier-5 (mini graph) widget: `get_cll` — column-level lineage -rendered as a hand-rolled SVG mini-DAG with layered layout (sources left, target -middle, downstream right). Total: **13 of 20 planned widgets** (65% coverage). All run in +adds two tier-5 (mini graph) widgets: `get_cll` — column-level lineage rendered as +a hand-rolled SVG mini-DAG — and `impact_analysis` — model-level blast-radius +dashboard with per-model impact badges, row-count/value-diff chips, SVG mini-DAG +(up to 15 nodes), and an actionable "What to investigate next" list. +Total: **14 of 20 planned widgets** (70% coverage). All run in **local mode only** — cloud/session mode is not supported until iter 2. --- @@ -688,13 +690,39 @@ contrasts with all tier-3 tools (`query`, `query_diff`, `value_diff`, etc.) whic set `openWorldHint=True`. Adding it to the `closed_world_tools` assertion in `test_widget_server.py` enforces this distinction. +### `impact_analysis` — Model-Level Blast Radius + +`impact_analysis` runs warehouse queries (row_count_diff + value_diff SQL) against +non-view models with a primary key. It renders: + +1. **Header** — explosion icon, "Impact analysis" title, impacted model count badge. +2. **Summary bar** — confirmed / potential / clean counts + max affected rows. +3. **SVG mini-DAG** (up to 15 models) — 2-layer layout: modified models left, downstream right. + Each model card shows impact badge (CONFIRMED/POTENTIAL/CLEAN), row-count delta chip, + and next-action hint. Bezier edges connect every modified node to every downstream node. +4. **"What to investigate next"** — actionable list of `next_action` items grouped by + priority (high / medium / low). Only models with `data_impact='potential'` have + `next_action`; confirmed and clean models need no follow-up. + +Bail-out at >15 models: skip SVG, show summary counts + actionable list only. + +`openWorldHint=True` — runs warehouse SQL (unlike `get_cll` which is manifest-only). + +### `openWorldHint` for impact_analysis + +`impact_analysis` queries the warehouse for row counts and value diffs, so it is added to +the open-world group (alongside `query`, `profile_diff`, etc.). It is NOT in +`closed_world_tools`. See the annotations assertion in `test_widget_server.py`. + ### Iter 2 considerations for mini-graph widgets -- **Cytoscape.js or D3** for larger graphs (>12 nodes): adds a CDN dependency but enables - interactive pan/zoom, auto-layout (Dagre), and click-to-focus interactions. +- **Cytoscape.js or D3** for larger graphs (>15 models / >12 nodes): adds a CDN dependency + but enables interactive pan/zoom, auto-layout (Dagre), and click-to-focus interactions. - **Depth limiting** instead of hard bail-out: show only N hops upstream/downstream. -- **Column filter**: highlight only the requested column's lineage path, greying out others. +- **Column filter** (get_cll): highlight only the requested column's lineage path. - **Cross-environment diff overlay**: show base vs current columns side-by-side in the card. +- **`impact_analysis` edge routing**: current bail-out uses full modified×downstream matrix. + Iter 2 should use actual DAG parent/child links from lineage_diff to draw only real edges. --- diff --git a/recce/data/mcp/impact_analysis.html b/recce/data/mcp/impact_analysis.html new file mode 100644 index 000000000..09254b813 --- /dev/null +++ b/recce/data/mcp/impact_analysis.html @@ -0,0 +1,808 @@ + + + + + Impact Analysis + + + +
+
+
Loading impact analysis…
+
Waiting for results.
+
+
+ + + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index ba84d6591..fc919ff0b 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -67,6 +67,7 @@ "histogram_diff", "profile_diff", "get_cll", + "impact_analysis", } diff --git a/recce/widget_server.py b/recce/widget_server.py index e519d1230..72fdadacc 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -2061,6 +2061,257 @@ def get_cll_resource() -> str: return _read_widget_html("get_cll") +# --------------------------------------------------------------------------- +# impact_analysis widget tool + resource +# --------------------------------------------------------------------------- + + +class ImpactAnalysisInput(BaseModel): + select: Optional[str] = Field( + default=None, + description=( + "dbt selector syntax. Default: data-affecting changes only " + "(body + macros + contract and their downstream). " + "Use 'state:modified+' to include all changes including config." + ), + ) + skip_value_diff: bool = Field( + default=False, + description="Skip row-level value comparison on modified models.", + ) + skip_downstream_value_diff: bool = Field( + default=False, + description="Skip value comparison on downstream models (faster for large DAGs).", + ) + + +class RowCountSummary(BaseModel): + """Row count comparison between base and current environments.""" + + base: Optional[int] = None + current: Optional[int] = None + delta: Optional[int] = None + delta_pct: Optional[float] = None + + +class ImpactValueDiffSummary(BaseModel): + """Row-level value diff summary (PK join result) as returned by impact_analysis.""" + + affected_row_count: int = 0 + rows_added: int = 0 + rows_removed: int = 0 + rows_changed: int = 0 + columns: Optional[Dict[str, Any]] = None # column → {affected_row_count, base_mean, current_mean} + + +class NextAction(BaseModel): + """Suggested follow-up tool to investigate a model further.""" + + tool: str # "profile_diff" | "query_diff" | "row_count_diff" | etc. + columns: Optional[List[str]] = None + reason: str + priority: str # "high" | "medium" | "low" + + +class SchemaChange(BaseModel): + """A single column-level schema change.""" + + column: str + change_status: str # "added" | "removed" | "modified" + + +class ImpactedModelEntry(BaseModel): + """Per-model impact record from _tool_impact_analysis.""" + + name: str + change_status: Optional[str] = None # "added" | "removed" | "modified" | None (downstream) + materialized: Optional[str] = None # "table" | "view" | "incremental" | etc. + row_count: Optional[RowCountSummary] = None + schema_changes: List[SchemaChange] = [] + value_diff: Optional[ImpactValueDiffSummary] = None + affected_row_count: Optional[int] = None + data_impact: Optional[str] = None # "confirmed" | "none" | "potential" + next_action: Optional[NextAction] = None + + +class ImpactAnalysisOutput(BaseModel): + """Output model for the impact_analysis widget tool. + + Mirrors _tool_impact_analysis return shape (without _guidance). + + Fields: + guidance: LLM-facing triage hint (from _guidance) + classification_source: always "lineage_dag" + max_affected_row_count: max across all confirmed models + confirmed_impacted_models: list of all blast-radius models with data_impact field + confirmed_not_impacted_models: list of model names confirmed clean + errors: list of per-step error dicts (step, message, model?) + warning: single-env warning if present + """ + + guidance: Optional[str] = None + classification_source: Optional[str] = None + max_affected_row_count: int = 0 + confirmed_impacted_models: List[ImpactedModelEntry] = [] + confirmed_not_impacted_models: List[str] = [] + errors: List[Dict[str, Any]] = [] + warning: Optional[str] = None + + +@mcp.tool( + name="impact_analysis", + annotations={ + "title": "Impact Analysis (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": True, # runs row_count_diff + value_diff SQL against warehouse + }, + meta={ + "ui": {"resourceUri": "ui://recce/impact_analysis.html"}, + "ui/resourceUri": "ui://recce/impact_analysis.html", + }, +) +async def impact_analysis(args: ImpactAnalysisInput) -> CallToolResult: + """Show the blast radius of dbt model changes — which models are confirmed-impacted, clean, or need investigation. + + Rendered as a model-level impact dashboard: summary counts, optional SVG mini-DAG + of impacted models (up to 15 nodes), and an actionable "What to investigate next" + list extracted from each model's next_action field grouped by priority. + + Runs warehouse queries (row_count_diff + value_diff) for non-view models with a + primary key. View models and models without a PK receive data_impact='potential' + and a next_action hint instead. + + Args: + select: dbt selector syntax (default: data-affecting changes + downstream) + skip_value_diff: skip value comparison on all models + skip_downstream_value_diff: skip value comparison on downstream models only + + Returns: + CallToolResult with structuredContent: ImpactAnalysisOutput shape + {guidance, classification_source, max_affected_row_count, + confirmed_impacted_models, confirmed_not_impacted_models, errors, warning?} + + Use when: + - Starting a PR review: "what models are impacted by my changes?" + - Triaging blast radius before deciding which diffs to run + - Building a structured change report for stakeholders + Don't use when: + - Need column-level lineage → get_cll + - Need detailed row-by-row diffs for a specific model → value_diff / value_diff_detail + - Already have impact results and need to drill in → profile_diff / query_diff + """ + raw = await _recce_server._tool_impact_analysis(args.model_dump(exclude_none=True)) + warning = raw.pop("_warning", None) if isinstance(raw, dict) else None + + guidance = raw.get("_guidance") if isinstance(raw, dict) else None + classification_source = raw.get("classification_source") if isinstance(raw, dict) else None + max_affected = raw.get("max_affected_row_count", 0) if isinstance(raw, dict) else 0 + raw_impacted = raw.get("confirmed_impacted_models", []) if isinstance(raw, dict) else [] + raw_not_impacted = raw.get("confirmed_not_impacted_models", []) if isinstance(raw, dict) else [] + raw_errors = raw.get("errors", []) if isinstance(raw, dict) else [] + + # Normalise impacted model entries + impacted_models: List[ImpactedModelEntry] = [] + for m in raw_impacted: + if not isinstance(m, dict): + continue + rc = m.get("row_count") + row_count = ( + RowCountSummary( + base=rc.get("base"), + current=rc.get("current"), + delta=rc.get("delta"), + delta_pct=rc.get("delta_pct"), + ) + if isinstance(rc, dict) + else None + ) + vd = m.get("value_diff") + value_diff = ( + ImpactValueDiffSummary( + affected_row_count=vd.get("affected_row_count", 0), + rows_added=vd.get("rows_added", 0), + rows_removed=vd.get("rows_removed", 0), + rows_changed=vd.get("rows_changed", 0), + columns=vd.get("columns"), + ) + if isinstance(vd, dict) + else None + ) + na = m.get("next_action") + next_action = ( + NextAction( + tool=na.get("tool", "profile_diff"), + columns=na.get("columns"), + reason=na.get("reason", ""), + priority=na.get("priority", "medium"), + ) + if isinstance(na, dict) + else None + ) + schema_changes = [ + SchemaChange(column=sc["column"], change_status=sc["change_status"]) + for sc in (m.get("schema_changes") or []) + if isinstance(sc, dict) and "column" in sc and "change_status" in sc + ] + impacted_models.append( + ImpactedModelEntry( + name=m.get("name", ""), + change_status=m.get("change_status"), + materialized=m.get("materialized"), + row_count=row_count, + schema_changes=schema_changes, + value_diff=value_diff, + affected_row_count=m.get("affected_row_count"), + data_impact=m.get("data_impact"), + next_action=next_action, + ) + ) + + # confirmed_not_impacted_models is a list of name strings + not_impacted: List[str] = [n for n in raw_not_impacted if isinstance(n, str)] + + output = ImpactAnalysisOutput( + guidance=guidance, + classification_source=classification_source, + max_affected_row_count=max_affected if isinstance(max_affected, int) else 0, + confirmed_impacted_models=impacted_models, + confirmed_not_impacted_models=not_impacted, + errors=[e for e in raw_errors if isinstance(e, dict)], + warning=warning, + ) + + n_confirmed = sum(1 for m in impacted_models if m.data_impact == "confirmed") + n_potential = sum(1 for m in impacted_models if m.data_impact == "potential") + n_none = sum(1 for m in impacted_models if m.data_impact == "none") + total_impacted = len(impacted_models) + text = ( + f"Impact analysis: {total_impacted} model{'s' if total_impacted != 1 else ''} in blast radius " + f"({n_confirmed} confirmed, {n_potential} potential, {n_none} no impact). " + f"Max affected rows: {max_affected:,}. Rendered in widget." + ) + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(), + ) + + +@mcp.resource( + uri="ui://recce/impact_analysis.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def impact_analysis_resource() -> str: + return _read_widget_html("impact_analysis") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 43ba01a2d..c17536888 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -85,7 +85,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 13 tools/resources (Phase A + Phase B + Phase C + Phase D widgets). + """Widget FastMCP instance has exactly 14 tools/resources (Phase A + Phase B + Phase C + Phase D widgets). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -111,6 +111,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "histogram_diff", "profile_diff", "get_cll", + "impact_analysis", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -126,6 +127,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/histogram_diff.html", "ui://recce/profile_diff.html", "ui://recce/get_cll.html", + "ui://recce/impact_analysis.html", } @@ -302,6 +304,7 @@ async def test_widget_tool_annotations_present(): "histogram_diff", "profile_diff", "get_cll", + "impact_analysis", ): assert tool_name in tool_map, f"{tool_name} not found in widget mcp tools" t = tool_map[tool_name] @@ -328,6 +331,7 @@ async def test_widget_tool_annotations_present(): assert tool_map["top_k_diff"].annotations.openWorldHint is True, "top_k_diff: expected openWorldHint=True" assert tool_map["histogram_diff"].annotations.openWorldHint is True, "histogram_diff: expected openWorldHint=True" assert tool_map["profile_diff"].annotations.openWorldHint is True, "profile_diff: expected openWorldHint=True" + assert tool_map["impact_analysis"].annotations.openWorldHint is True, "impact_analysis: expected openWorldHint=True" # --------------------------------------------------------------------------- @@ -1923,3 +1927,196 @@ async def test_get_cll_returns_calltoolresult_with_pydantic_shape(): # warning is None (not provided) assert validated.warning is None + + +# --------------------------------------------------------------------------- +# Test 89: impact_analysis widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_impact_analysis_widget_registered(): + """impact_analysis appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'impact_analysis' is in widget mcp tool list + - resource URI 'ui://recce/impact_analysis.html' is in widget mcp resource list + - select, skip_value_diff, skip_downstream_value_diff are all optional + - annotations: openWorldHint=True (runs row_count_diff + value_diff SQL) + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "impact_analysis" in tool_names + assert "ui://recce/impact_analysis.html" in resource_uris + + # Verify inputSchema: all args are optional + tool = next(t for t in tools if t.name == "impact_analysis") + schema = tool.inputSchema + assert schema is not None + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "select" not in inner_required, "select must be optional" + assert "skip_value_diff" not in inner_required, "skip_value_diff must be optional" + assert "skip_downstream_value_diff" not in inner_required, "skip_downstream_value_diff must be optional" + assert "select" in inner_props + assert "skip_value_diff" in inner_props + assert "skip_downstream_value_diff" in inner_props + + # openWorldHint must be True — runs warehouse queries + a = tool.annotations + assert a is not None + assert a.openWorldHint is True, "impact_analysis: expected openWorldHint=True (runs warehouse queries)" + assert a.readOnlyHint is True + assert a.destructiveHint is False + + +# --------------------------------------------------------------------------- +# Test 90: impact_analysis returns CallToolResult with correct Pydantic shape +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_impact_analysis_returns_calltoolresult_with_pydantic_shape(): + """impact_analysis handler returns CallToolResult with structuredContent matching ImpactAnalysisOutput. + + Uses a realistic _tool_impact_analysis return shape: + {_guidance, classification_source, max_affected_row_count, + confirmed_impacted_models: [{name, change_status, materialized, row_count, + schema_changes, value_diff, affected_row_count, data_impact, next_action}], + confirmed_not_impacted_models: [name_str, ...], + errors: []} + + Verifies: + - content[0].text is a short human-readable sentence (not a JSON dump) + - structuredContent passes ImpactAnalysisOutput.model_validate() + - confirmed/potential/none models normalised correctly + - next_action hydrated into NextAction shape + - _guidance extracted to guidance field (without underscore) + - _warning extracted to warning field + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import ImpactAnalysisInput, ImpactAnalysisOutput + + mock_server = MagicMock() + mock_server._tool_impact_analysis = AsyncMock( + return_value={ + "_guidance": ( + "confirmed_impacted_models lists all models in the DAG blast radius. " "Use data_impact to triage." + ), + "classification_source": "lineage_dag", + "max_affected_row_count": 150, + "confirmed_impacted_models": [ + { + "name": "orders", + "change_status": "modified", + "materialized": "table", + "row_count": {"base": 1000, "current": 1100, "delta": 100, "delta_pct": 10.0}, + "schema_changes": [], + "value_diff": { + "affected_row_count": 150, + "rows_added": 100, + "rows_removed": 0, + "rows_changed": 50, + "columns": {"amount": {"affected_row_count": 50, "base_mean": 80.0, "current_mean": 85.0}}, + }, + "affected_row_count": 150, + "data_impact": "confirmed", + "next_action": None, + }, + { + "name": "customers", + "change_status": None, + "materialized": "view", + "row_count": None, + "schema_changes": [], + "value_diff": None, + "affected_row_count": None, + "data_impact": "potential", + "next_action": { + "tool": "profile_diff", + "columns": None, + "reason": "downstream view, value_diff skipped", + "priority": "low", + }, + }, + ], + "confirmed_not_impacted_models": ["payments", "stg_orders"], + "errors": [], + } + ) + + original = ws._recce_server + ws._recce_server = mock_server + try: + args = ImpactAnalysisInput() + result = await ws.impact_analysis(args) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + content_text = result.content[0].text + assert isinstance(content_text, str) + assert len(content_text) < 200, f"content too long ({len(content_text)} chars): {content_text!r}" + assert "widget" in content_text.lower() + + # structuredContent must round-trip through Pydantic + assert result.structuredContent is not None + validated = ImpactAnalysisOutput.model_validate(result.structuredContent) + + # guidance echoed (from _guidance, without underscore) + assert validated.guidance is not None + assert "triage" in validated.guidance + + # classification_source + assert validated.classification_source == "lineage_dag" + + # max_affected_row_count + assert validated.max_affected_row_count == 150 + + # confirmed_impacted_models + assert len(validated.confirmed_impacted_models) == 2 + + orders = next(m for m in validated.confirmed_impacted_models if m.name == "orders") + assert orders.data_impact == "confirmed" + assert orders.change_status == "modified" + assert orders.materialized == "table" + assert orders.row_count is not None + assert orders.row_count.base == 1000 + assert orders.row_count.delta == 100 + assert orders.row_count.delta_pct == 10.0 + assert orders.value_diff is not None + assert orders.value_diff.affected_row_count == 150 + assert orders.value_diff.rows_added == 100 + assert orders.value_diff.rows_changed == 50 + assert orders.next_action is None + + customers = next(m for m in validated.confirmed_impacted_models if m.name == "customers") + assert customers.data_impact == "potential" + assert customers.change_status is None + assert customers.materialized == "view" + assert customers.row_count is None + assert customers.value_diff is None + assert customers.next_action is not None + assert customers.next_action.tool == "profile_diff" + assert customers.next_action.priority == "low" + assert customers.next_action.columns is None + + # confirmed_not_impacted_models + assert sorted(validated.confirmed_not_impacted_models) == ["payments", "stg_orders"] + + # errors empty + assert validated.errors == [] + + # warning None (not provided) + assert validated.warning is None From eb923cfeb1508792c8af2904ed030d452cac781d Mon Sep 17 00:00:00 2001 From: Kent Date: Thu, 28 May 2026 21:25:53 +0800 Subject: [PATCH 29/43] fix(widgets): resolve SchemaChange class name collision shadowing schema_diff Phase D impact_analysis introduced a second `class SchemaChange` at module top level, which silently shadowed the schema_diff model defined earlier in the file. Module-global binding meant schema_diff serialization tried to fit {added, removed, type_changed, unchanged_count} into the impact_analysis shape and failed Pydantic validation with "column / change_status Field required". Rename the impact_analysis variant to `ColumnSchemaChange` and add a regression test that pins both field surfaces so a future widget tool cannot reintroduce the shadow. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/widget_server.py | 12 ++++++++---- tests/test_widget_server.py | 20 ++++++++++++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) diff --git a/recce/widget_server.py b/recce/widget_server.py index 72fdadacc..b94c94b65 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -2113,8 +2113,12 @@ class NextAction(BaseModel): priority: str # "high" | "medium" | "low" -class SchemaChange(BaseModel): - """A single column-level schema change.""" +class ColumnSchemaChange(BaseModel): + """A single column-level schema change used by impact_analysis. + + Distinct from the model-level SchemaChange defined for schema_diff + (which carries added/removed/type_changed lists per model). + """ column: str change_status: str # "added" | "removed" | "modified" @@ -2127,7 +2131,7 @@ class ImpactedModelEntry(BaseModel): change_status: Optional[str] = None # "added" | "removed" | "modified" | None (downstream) materialized: Optional[str] = None # "table" | "view" | "incremental" | etc. row_count: Optional[RowCountSummary] = None - schema_changes: List[SchemaChange] = [] + schema_changes: List[ColumnSchemaChange] = [] value_diff: Optional[ImpactValueDiffSummary] = None affected_row_count: Optional[int] = None data_impact: Optional[str] = None # "confirmed" | "none" | "potential" @@ -2252,7 +2256,7 @@ async def impact_analysis(args: ImpactAnalysisInput) -> CallToolResult: else None ) schema_changes = [ - SchemaChange(column=sc["column"], change_status=sc["change_status"]) + ColumnSchemaChange(column=sc["column"], change_status=sc["change_status"]) for sc in (m.get("schema_changes") or []) if isinstance(sc, dict) and "column" in sc and "change_status" in sc ] diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index c17536888..70b3b740e 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -2120,3 +2120,23 @@ async def test_impact_analysis_returns_calltoolresult_with_pydantic_shape(): # warning None (not provided) assert validated.warning is None + + +def test_schema_change_models_have_distinct_shapes(): + """Guard against name collisions between schema_diff and impact_analysis schema models. + + `SchemaChange` (model-level, for schema_diff) and `ColumnSchemaChange` + (column-level, for impact_analysis) used to share the same class name, + which silently shadowed the schema_diff model at module import time and + broke schema_diff serialization with a Pydantic "column / change_status + Field required" error. This test pins the field surfaces so any future + accidental shadow fails loudly. + """ + from recce.widget_server import ColumnSchemaChange, SchemaChange + + schema_diff_fields = set(SchemaChange.model_fields.keys()) + impact_fields = set(ColumnSchemaChange.model_fields.keys()) + + assert schema_diff_fields == {"added", "removed", "type_changed", "unchanged_count"} + assert impact_fields == {"column", "change_status"} + assert SchemaChange is not ColumnSchemaChange From cc8e8691cde2f94ebdeb833b8e754675fe08c95f Mon Sep 17 00:00:00 2001 From: Kent Date: Thu, 28 May 2026 22:42:07 +0800 Subject: [PATCH 30/43] feat(widgets): add lineage_diff widget (Phase E first version, 10-node cap) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 15th widget tool. Hand-roll SVG mini-DAG (no Mermaid/ReactFlow dep) copying the impact_analysis BFS layout pattern. Hard cap MAX_INLINE_NODES=10 — over cap shows graceful empty-state message pointing to Recce web UI, no truncated view. Full ReactFlow plan from design Phase E remains deferred. Motivation: without a lineage_diff widget, Claude Desktop's agent falls back to rendering lineage as Mermaid text after seeing impact_analysis output, bypassing the widget rendering pipeline. Adding this widget keeps lineage visualization inside the same theming + interaction surface as the other 14 widgets and brings widget coverage to 15/20 (75%). Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- recce/data/mcp/lineage_diff.html | 424 +++++++++++++++++++++++++++++++ recce/mcp_server.py | 1 + recce/widget_server.py | 177 +++++++++++++ tests/test_widget_server.py | 171 ++++++++++++- 4 files changed, 770 insertions(+), 3 deletions(-) create mode 100644 recce/data/mcp/lineage_diff.html diff --git a/recce/data/mcp/lineage_diff.html b/recce/data/mcp/lineage_diff.html new file mode 100644 index 000000000..3c9cc4eb3 --- /dev/null +++ b/recce/data/mcp/lineage_diff.html @@ -0,0 +1,424 @@ + + + + + Lineage Diff + + + +
+
+
Loading lineage diff…
+
Waiting for results.
+
+
+ + + + diff --git a/recce/mcp_server.py b/recce/mcp_server.py index fc919ff0b..36f86198f 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -68,6 +68,7 @@ "profile_diff", "get_cll", "impact_analysis", + "lineage_diff", } diff --git a/recce/widget_server.py b/recce/widget_server.py index b94c94b65..53419fd2c 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -2316,6 +2316,183 @@ def impact_analysis_resource() -> str: return _read_widget_html("impact_analysis") +# --------------------------------------------------------------------------- +# lineage_diff widget tool + resource (Phase E first version, 10-node cap) +# --------------------------------------------------------------------------- + +MAX_INLINE_NODES = 10 + + +class LineageDiffInput(BaseModel): + select: Optional[str] = Field( + default=None, + description="dbt selector syntax (e.g. 'state:modified+', '1+state:modified')", + ) + exclude: Optional[str] = Field( + default=None, + description="dbt selector syntax for exclusion", + ) + packages: Optional[List[str]] = Field( + default=None, + description="Restrict to specific dbt packages by name", + ) + view_mode: Optional[str] = Field( + default="changed_models", + description="'all' (full lineage) or 'changed_models' (default, modified + downstream).", + ) + + +class LineageNode(BaseModel): + """One node in the lineage DAG, flattened from the DataFrame row format.""" + + idx: int + id: str + name: Optional[str] = None + resource_type: Optional[str] = None + materialized: Optional[str] = None + change_status: Optional[str] = None # "added" | "modified" | "removed" | None + impacted: bool = False + + +class LineageEdge(BaseModel): + """One directed edge in the lineage DAG (parent_idx -> child_idx). + + The underlying DataFrame uses 'from'/'to' column keys, which are Python + reserved words — Pydantic aliases let us accept those keys while exposing + idiomatic Python attribute names. + """ + + from_idx: int = Field(alias="from") + to_idx: int = Field(alias="to") + + model_config = {"populate_by_name": True} + + +class LineageDiffOutput(BaseModel): + """Output model for the lineage_diff widget tool. + + First-version contract: when node_count > MAX_INLINE_NODES the widget + receives empty `nodes`/`edges` lists plus `exceeds_limit=True` so the HTML + can render a graceful skip message. No truncation, no toggle. + """ + + nodes: List[LineageNode] + edges: List[LineageEdge] + node_count: int + exceeds_limit: bool + max_inline_nodes: int + + +def _dataframe_rows(df: Dict[str, Any]) -> List[Dict[str, Any]]: + """Convert a serialized DataFrame ({columns:[{name,...}], data:[tuple,...]}) + into a flat list of {column_name: value} dicts. + + Returns [] when columns or data are missing/empty. + """ + columns = df.get("columns") or [] + rows = df.get("data") or [] + keys = [c.get("name") for c in columns if isinstance(c, dict)] + return [dict(zip(keys, row)) for row in rows] + + +@mcp.tool( + name="lineage_diff", + annotations={ + "title": "Lineage Diff (Widget)", + "readOnlyHint": True, + "destructiveHint": False, + "idempotentHint": True, + "openWorldHint": False, + }, + meta={ + "ui": {"resourceUri": "ui://recce/lineage_diff.html"}, + "ui/resourceUri": "ui://recce/lineage_diff.html", + }, +) +async def lineage_diff(args: LineageDiffInput) -> CallToolResult: + """Show the lineage DAG diff between base and current dbt environments. + + Renders an interactive SVG of modified models and their dependencies, hand-rolled + using the same BFS layered layout as the impact_analysis mini-DAG. First version + is capped at MAX_INLINE_NODES (10) inline nodes — larger graphs are skipped with + a graceful message pointing the user to the Recce web UI. + + Args: + select: dbt selector syntax (e.g. "state:modified+", "customers orders") + exclude: dbt selector for exclusion + packages: restrict to specific dbt packages by name + view_mode: 'all' or 'changed_models' (default) + + Returns: + CallToolResult with structuredContent: LineageDiffOutput shape + {nodes: [{idx, id, name, resource_type, materialized, change_status, impacted}], + edges: [{from, to}], node_count, exceeds_limit, max_inline_nodes} + + Use when: + - User asks "show me the lineage diff" / "what models depend on X" + - Visualizing the dependency graph of changed models in a small PR + Don't use when: + - Need column-level lineage → use get_cll + - Need data impact triage with row counts → use impact_analysis + - Lineage scope exceeds 10 nodes — widget will show a skip message and the + user should be directed to the Recce web UI + + Error Handling: + - Underlying _tool_lineage_diff raises on adapter/context failure + - >10 nodes returns exceeds_limit=True with empty nodes/edges (not an error) + """ + result = await _recce_server._tool_lineage_diff(args.model_dump(exclude_none=True)) + nodes_df = result.get("nodes", {}) if isinstance(result, dict) else {} + edges_df = result.get("edges", {}) if isinstance(result, dict) else {} + + raw_nodes = _dataframe_rows(nodes_df) + raw_edges = _dataframe_rows(edges_df) + + nodes = [LineageNode(**row) for row in raw_nodes] + edges = [LineageEdge(**row) for row in raw_edges] + + node_count = len(nodes) + exceeds = node_count > MAX_INLINE_NODES + + output = LineageDiffOutput( + nodes=[] if exceeds else nodes, + edges=[] if exceeds else edges, + node_count=node_count, + exceeds_limit=exceeds, + max_inline_nodes=MAX_INLINE_NODES, + ) + + if exceeds: + text = ( + f"Lineage diff: {node_count} nodes exceeds {MAX_INLINE_NODES}-node " + f"inline widget cap. Open the Recce web UI for the full view." + ) + else: + text = ( + f"Lineage diff: {node_count} node{'s' if node_count != 1 else ''}, " + f"{len(edges)} edge{'s' if len(edges) != 1 else ''}. Rendered in widget." + ) + + return CallToolResult( + content=[TextContent(type="text", text=text)], + structuredContent=output.model_dump(by_alias=True), + ) + + +@mcp.resource( + uri="ui://recce/lineage_diff.html", + mime_type="text/html;profile=mcp-app", + meta={ + "ui": { + "csp": {"resourceDomains": ["https://unpkg.com"]}, + "prefersBorder": False, + }, + }, +) +def lineage_diff_resource() -> str: + return _read_widget_html("lineage_diff") + + # --------------------------------------------------------------------------- # Entry point # --------------------------------------------------------------------------- diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 70b3b740e..3a5ef6e5d 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -74,8 +74,10 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) assert "list_checks" not in names assert "get_model" not in names assert "query" not in names - # Other tools must still be present - assert "lineage_diff" in names + assert "lineage_diff" not in names + # Non-widget tools must still be present + assert "create_check" in names + assert "run_check" in names # --------------------------------------------------------------------------- @@ -85,7 +87,7 @@ async def test_mcp_server_filters_widget_tools_when_widgets_enabled(monkeypatch) @pytest.mark.asyncio async def test_widget_server_registers_six_tools_and_six_resources(): - """Widget FastMCP instance has exactly 14 tools/resources (Phase A + Phase B + Phase C + Phase D widgets). + """Widget FastMCP instance has exactly 15 tools/resources (Phase A + Phase B + Phase C + Phase D + lineage_diff). Uses FastMCP public API: mcp.list_tools() and mcp.list_resources(). """ @@ -112,6 +114,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "profile_diff", "get_cll", "impact_analysis", + "lineage_diff", } assert resource_uris == { "ui://recce/row_count_diff.html", @@ -128,6 +131,7 @@ async def test_widget_server_registers_six_tools_and_six_resources(): "ui://recce/profile_diff.html", "ui://recce/get_cll.html", "ui://recce/impact_analysis.html", + "ui://recce/lineage_diff.html", } @@ -2140,3 +2144,164 @@ def test_schema_change_models_have_distinct_shapes(): assert schema_diff_fields == {"added", "removed", "type_changed", "unchanged_count"} assert impact_fields == {"column", "change_status"} assert SchemaChange is not ColumnSchemaChange + + +# --------------------------------------------------------------------------- +# Test 91: lineage_diff widget tool is registered with correct resource URI +# --------------------------------------------------------------------------- + + +@pytest.mark.asyncio +async def test_lineage_diff_widget_registered(): + """lineage_diff appears in widget mcp tools/list and its resource URI exists. + + Verifies: + - tool named 'lineage_diff' is in widget mcp tool list + - resource URI 'ui://recce/lineage_diff.html' is in widget mcp resource list + - select, exclude, packages, view_mode are all optional + - annotations: openWorldHint=False (no warehouse query, manifest-only) + """ + from recce.widget_server import mcp + + tools = await mcp.list_tools() + resources = await mcp.list_resources() + + tool_names = {t.name for t in tools} + resource_uris = {str(r.uri) for r in resources} + + assert "lineage_diff" in tool_names + assert "ui://recce/lineage_diff.html" in resource_uris + + tool = next(t for t in tools if t.name == "lineage_diff") + schema = tool.inputSchema + assert schema is not None + defs = schema.get("$defs", {}) + inner_schema = next(iter(defs.values()), schema) + inner_required = inner_schema.get("required", []) + inner_props = inner_schema.get("properties", {}) + assert "select" not in inner_required, "select must be optional" + assert "exclude" not in inner_required, "exclude must be optional" + assert "packages" not in inner_required, "packages must be optional" + assert "view_mode" not in inner_required, "view_mode must be optional" + for key in ("select", "exclude", "packages", "view_mode"): + assert key in inner_props, f"missing prop: {key}" + + a = tool.annotations + assert a is not None + assert a.readOnlyHint is True + assert a.destructiveHint is False + assert a.openWorldHint is False, "lineage_diff: expected openWorldHint=False (manifest-only, no warehouse query)" + + +# --------------------------------------------------------------------------- +# Test 92: lineage_diff returns CallToolResult with correct Pydantic shape +# covering both under-cap and over-cap (>MAX_INLINE_NODES) branches. +# --------------------------------------------------------------------------- + + +def _make_lineage_dataframe(node_count: int) -> dict: + """Build a realistic _tool_lineage_diff return shape with `node_count` nodes. + + DataFrame format: {columns: [{key, name, type}, ...], data: [tuple, ...]}. + Edges chain node_0 → node_1 → node_2 → ... (linear DAG for predictable tests). + """ + node_cols = [ + {"key": "idx", "name": "idx", "type": "integer"}, + {"key": "id", "name": "id", "type": "text"}, + {"key": "name", "name": "name", "type": "text"}, + {"key": "resource_type", "name": "resource_type", "type": "text"}, + {"key": "materialized", "name": "materialized", "type": "text"}, + {"key": "change_status", "name": "change_status", "type": "text"}, + {"key": "impacted", "name": "impacted", "type": "boolean"}, + ] + nodes_data = [] + for i in range(node_count): + change_status = "modified" if i == 0 else None + impacted = i == 0 + nodes_data.append( + (i, f"model.recce.node_{i}", f"node_{i}", "model", "table", change_status, impacted) + ) + + edge_cols = [ + {"key": "from", "name": "from", "type": "integer"}, + {"key": "to", "name": "to", "type": "integer"}, + ] + edges_data = [(i, i + 1) for i in range(max(0, node_count - 1))] + + return { + "nodes": {"columns": node_cols, "data": nodes_data}, + "edges": {"columns": edge_cols, "data": edges_data}, + } + + +@pytest.mark.asyncio +async def test_lineage_diff_returns_calltoolresult_with_pydantic_shape(): + """lineage_diff handler returns CallToolResult with structuredContent matching LineageDiffOutput. + + Verifies BOTH branches: + - Under-cap (3 nodes): nodes + edges populated, exceeds_limit=False. + - Over-cap (11 nodes > MAX_INLINE_NODES=10): nodes=[], edges=[], exceeds_limit=True. + """ + from mcp.types import CallToolResult + + import recce.widget_server as ws + from recce.widget_server import MAX_INLINE_NODES, LineageDiffInput, LineageDiffOutput + + # ── Under-cap branch (3 nodes, 2 edges) ────────────────────────── + mock_server = MagicMock() + mock_server._tool_lineage_diff = AsyncMock(return_value=_make_lineage_dataframe(3)) + + original = ws._recce_server + ws._recce_server = mock_server + try: + result = await ws.lineage_diff(LineageDiffInput()) + finally: + ws._recce_server = original + + assert isinstance(result, CallToolResult) + assert len(result.content) == 1 + text = result.content[0].text + assert isinstance(text, str) + assert len(text) < 200 + assert "widget" in text.lower() + + assert result.structuredContent is not None + validated = LineageDiffOutput.model_validate(result.structuredContent) + assert validated.node_count == 3 + assert validated.exceeds_limit is False + assert validated.max_inline_nodes == MAX_INLINE_NODES == 10 + assert len(validated.nodes) == 3 + assert len(validated.edges) == 2 + # First node is the modified root + first = next(n for n in validated.nodes if n.idx == 0) + assert first.id == "model.recce.node_0" + assert first.name == "node_0" + assert first.change_status == "modified" + assert first.impacted is True + # Edge alias round-trip — model_dump(by_alias=True) emits 'from'/'to' keys + raw_edges = result.structuredContent["edges"] + assert raw_edges[0] == {"from": 0, "to": 1} + # validated edges use python attribute names + assert validated.edges[0].from_idx == 0 + assert validated.edges[0].to_idx == 1 + + # ── Over-cap branch (11 > 10) ──────────────────────────────────── + mock_server2 = MagicMock() + mock_server2._tool_lineage_diff = AsyncMock(return_value=_make_lineage_dataframe(11)) + + ws._recce_server = mock_server2 + try: + result2 = await ws.lineage_diff(LineageDiffInput()) + finally: + ws._recce_server = original + + assert isinstance(result2, CallToolResult) + text2 = result2.content[0].text + assert "exceed" in text2.lower() or "cap" in text2.lower(), f"expected over-cap message, got: {text2!r}" + + validated2 = LineageDiffOutput.model_validate(result2.structuredContent) + assert validated2.node_count == 11 + assert validated2.exceeds_limit is True + assert validated2.max_inline_nodes == 10 + assert validated2.nodes == [], "over-cap must return empty nodes list" + assert validated2.edges == [], "over-cap must return empty edges list" From dd4e033300214423ede65dd51eb68e1a2786dd15 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 29 May 2026 18:13:28 +0800 Subject: [PATCH 31/43] chore(claude): enable mcp-apps + mcp-server-dev plugins for widget dev These two Claude Code plugins back the MCP App widget work in this branch (ext-apps SDK reference, MCP server scaffolding). Enabling them in the repo settings keeps the dev environment consistent for anyone continuing the widget POC. Co-Authored-By: Claude Opus 4.7 (1M context) Signed-off-by: Kent --- .claude/settings.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.claude/settings.json b/.claude/settings.json index 2bfc7e60d..c3d031f3f 100644 --- a/.claude/settings.json +++ b/.claude/settings.json @@ -1,5 +1,7 @@ { "enabledPlugins": { - "hookify@claude-plugins-official": true + "hookify@claude-plugins-official": true, + "mcp-apps@claude-plugins-official": true, + "mcp-server-dev@claude-plugins-official": true } } From f692f086ba79cedc0d7444d461d1be45c65f7fe7 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 29 May 2026 18:45:00 +0800 Subject: [PATCH 32/43] fix(mcp): intersect base+current columns in impact_analysis value_diff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The impact_analysis value_diff builder derived its per-column diff list from model_info["columns"], which reflects only the CURRENT relation (get_model -> get_columns(base=False)). It then applied `b."col" IS DISTINCT FROM c."col"` to BOTH the base (b) and current (c) relations. When a column had drifted — present in the current physical table but not the base — the warehouse binder failed hard (Snowflake: `Table "b" does not have a column named ""`). Restrict the diff to the intersection of base and current columns via get_model(node_id, base=True) (which manages its own warehouse connection), mirroring ValueDiffTask. Drifted columns are already reported via schema_changes. Also skip value_diff when the PK itself has drifted, since it is the FULL OUTER JOIN key. Adds a regression test asserting drifted columns never reach the generated SQL and that value_diff covers only common non-PK columns. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/mcp_server.py | 22 ++++++++++-- tests/test_mcp_server.py | 75 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 93 insertions(+), 4 deletions(-) diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 36f86198f..9bdfe5817 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -1871,11 +1871,27 @@ async def _tool_impact_analysis(self, arguments: Dict[str, Any]) -> Dict[str, An if not pk: continue # no PK → value_diff stays null - # Get column info for building SQL + # Get column info for building SQL. model_info reflects the CURRENT + # relation only (get_model -> get_columns(base=False)). columns_info = model_info.get("columns", {}) - non_pk_cols = [c for c in columns_info if c != pk] + + # Restrict the diff to columns present in BOTH the base and current + # relations. The per-column expression `b."col" IS DISTINCT FROM c."col"` + # references the same column on both sides, so a column that has drifted + # (exists in one relation but not the other) fails the warehouse binder. + # Intersect both sides, mirroring ValueDiffTask; drifted columns are + # already reported via schema_changes (Step 2b). get_model(base=True) + # manages its own warehouse connection for the base-side introspection. + base_model_info = self.context.adapter.get_model(node_id, base=True) + common_cols = set(columns_info) & set(base_model_info.get("columns", {})) + + # The PK is the FULL OUTER JOIN key — it must exist on both sides. + if pk not in common_cols: + continue # PK drifted; cannot value-diff without a shared join key + + non_pk_cols = [c for c in columns_info if c != pk and c in common_cols] if not non_pk_cols: - continue # only PK column, no value diff to compute + continue # only PK column (or all non-PK columns drifted), no value diff # Build relations for base and current schemas base_rel = self.context.adapter.create_relation(model["name"], base=True) diff --git a/tests/test_mcp_server.py b/tests/test_mcp_server.py index 9457f6bf9..bb00172c4 100644 --- a/tests/test_mcp_server.py +++ b/tests/test_mcp_server.py @@ -2312,7 +2312,10 @@ def mock_select_nodes(select=""): adapter.select_nodes.side_effect = mock_select_nodes - def mock_get_model(node_id): + # base param mirrors the real adapter signature; the value_diff builder calls + # get_model(node_id, base=True) to intersect base columns. No drift here, so both + # sides return identical columns. + def mock_get_model(node_id, base=False): models = { "model.project.modified_model": { "primary_key": "id", @@ -2637,3 +2640,73 @@ async def test_response_uses_new_field_names(self, setup_impact_mocks): assert "max_affected_row_count" in result assert "total_affected_row_count" not in result assert "suggested_deep_dives" not in result + + @pytest.mark.asyncio + async def test_value_diff_excludes_schema_drifted_columns(self, mcp_server): + """Schema drift: a column present in CURRENT but absent in BASE must NOT appear + in the value_diff SQL. + + Deriving the per-column diff list from a single (current) relation's + introspection produces ``b."" IS DISTINCT FROM c.""`` against a base + relation that lacks ```` → Snowflake Binder Error + ``Table "b" does not have a column named ""``. The diff must use only the + intersection of base and current columns; drifted columns are reported via + schema_changes, not fed into the per-column diff expression. + + Regression test for the impact_analysis value_diff stale-column Binder Error. + """ + server, mock_context = mcp_server + mock_context.get_lineage_diff.return_value = MagicMock( + model_dump=MagicMock(return_value=self.LINEAGE_DIFF_DATA) + ) + + adapter = self._make_mock_adapter() + + # CURRENT relation carries a stale `full_name` column that BASE lacks (schema drift). + # get_model(base=False) reflects CURRENT introspection (includes full_name); + # get_model(base=True) reflects BASE (no full_name). + def mock_get_model_drift(node_id, base=False): + modified_cols = {"id": {"type": "INTEGER"}, "amount": {"type": "DECIMAL"}} + if not base: + modified_cols["full_name"] = {"type": "VARCHAR"} # stale, current-only + models = { + "model.project.modified_model": {"primary_key": "id", "columns": modified_cols}, + "model.project.downstream_model": { + "primary_key": "id", + "columns": {"id": {"type": "INTEGER"}, "total": {"type": "DECIMAL"}}, + }, + } + return models.get(node_id, {}) + + adapter.get_model.side_effect = mock_get_model_drift + + captured_queries = [] + + def capture_execute(query, fetch=False): + captured_queries.append(str(query)) + # Row long enough for both pre-fix ([amount, full_name]) and post-fix + # ([amount]) column layouts so result parsing never crashes. + row = [0, 0, 1, 1, 10.0, 12.0, 0] + table = MagicMock() + table.__len__ = MagicMock(return_value=1) + table.__getitem__ = MagicMock(side_effect=lambda i: row if i == 0 else None) + return (None, table) + + adapter.execute.side_effect = capture_execute + mock_context.adapter = adapter + + with ( + patch("recce.mcp_server.sentry_metrics", None), + patch.object(RowCountDiffTask, "execute", return_value={}), + ): + result = await self._call_impact_analysis(server) + + # The drifted column must never reach the SQL — neither the b. nor the c. side. + for q in captured_queries: + assert "full_name" not in q, f"drifted column leaked into value_diff SQL: {q}" + + # modified_model value_diff covers only the common non-PK column (amount). + models_by_name = {m["name"]: m for m in result["confirmed_impacted_models"]} + modified = models_by_name["modified_model"] + assert modified["value_diff"] is not None + assert set(modified["value_diff"]["columns"].keys()) == {"amount"} From 87f22fd30f322d56abfbf7588bcba6ab55f2f178 Mon Sep 17 00:00:00 2001 From: Kent Date: Fri, 29 May 2026 20:15:36 +0800 Subject: [PATCH 33/43] chore(gitignore): ignore dbt log dirs and hypothesis example cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `logs/` (dbt run logs) and `.hypothesis/` (hypothesis testing example cache) are runtime/test artifacts regenerated on every test run, but the existing rules only covered the root `dbt.log` file — not the `logs/` directory dbt writes to, nor `integration_tests/dbt/logs/`. They showed as persistent untracked changes in the workspace UI. Ignore the dir forms (no leading slash → matches at any depth). Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index 4c6e4f5fa..bd723e293 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,12 @@ build .noai .DS_Store +# Test / runtime artifacts (dbt run logs, hypothesis example cache). +# `logs/` (no leading slash) matches dbt log dirs at any depth, +# e.g. ./logs and integration_tests/dbt/logs. +logs/ +.hypothesis/ + # Dependabot CLI output deps-*.yml From bf5705ed83e099e113fec44d62ce91b705974611 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 17:53:15 +0800 Subject: [PATCH 34/43] fix(mcp,cli): address Codex review feedback Batch of correctness/robustness fixes from a Codex review pass: - mcp_server: default 30s timeout on cloud proxy requests so a stalled endpoint cannot hang the MCP server indefinitely. - mcp_server: include the effective base selection (single-env vs dual-env) in the set_backend local cache key, so a context loaded in single-env mode is reloaded once target-base/ later appears instead of serving a stale single-env context. - cli: mcp_config_install invokes `python -m recce.cli` (the package has no recce.__main__, so `-m recce` is not runnable). - cli: make check_base_freshness SHA comparison opt-in via expected_base_sha. Base-branch artifacts legitimately carry a DBT_GIT_SHA that differs from the feature-branch HEAD, so the previous unconditional compare produced false "stale_sha" results. - tests: cover cloud request timeout, single-env -> dual-env reload, the config-install module path, and the opt-in base-SHA freshness check. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/cli.py | 25 ++++++++------- recce/mcp_server.py | 35 ++++++++++++++------- tests/test_check_base.py | 21 +++++++++++-- tests/test_mcp_cloud_backend.py | 54 ++++++++++++++++++++++++++++++++ tests/test_mcp_config_install.py | 30 ++++++++++++++++++ 5 files changed, 140 insertions(+), 25 deletions(-) diff --git a/recce/cli.py b/recce/cli.py index 3dd13fda3..f64cb8384 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -3154,9 +3154,10 @@ def mcp_config_install(project_dir, claude_config, yes, dry_run): # ------------------------------------------------------------------ recce_bin = shutil.which("recce") if not recce_bin: - # Fallback: use the current Python executable with -m recce + # Fallback: use the current Python executable with the CLI module. + # The package has no recce.__main__, so `python -m recce` is not runnable. recce_bin = sys.executable - recce_base_args_prefix = ["-m", "recce"] + recce_base_args_prefix = ["-m", "recce.cli"] else: recce_base_args_prefix = [] @@ -3342,6 +3343,7 @@ def resolve_target_base_path( def check_base_freshness( target_base_path: str = "target-base", freshness_threshold_hours: float = 48.0, + expected_base_sha: str | None = None, ) -> dict: """ Check whether the base artifacts in target_base_path are fresh. @@ -3352,7 +3354,8 @@ def check_base_freshness( message: human-readable explanation artifact_age_hours: float or None base_sha: str or None (DBT_GIT_SHA from manifest metadata) - current_sha: str or None (current HEAD SHA) + current_sha: str or None (reserved for legacy callers) + expected_base_sha: str or None threshold_hours: float """ import json @@ -3369,6 +3372,7 @@ def check_base_freshness( "artifact_age_hours": None, "base_sha": None, "current_sha": None, + "expected_base_sha": expected_base_sha, "threshold_hours": freshness_threshold_hours, } @@ -3399,24 +3403,23 @@ def check_base_freshness( ) return result - # SHA-based freshness check (best-effort: skip if field absent or git unavailable) + # SHA-based freshness check is opt-in. In normal Recce usage, target-base + # artifacts are generated from the base branch, so DBT_GIT_SHA is expected + # to differ from the current feature-branch HEAD. Only compare when the + # caller provides the expected base SHA explicitly. try: with open(manifest_path) as f: manifest_data = json.load(f) base_sha = manifest_data.get("metadata", {}).get("env", {}).get("DBT_GIT_SHA") result["base_sha"] = base_sha - if base_sha is not None: - from recce.git import current_commit_hash - - current_sha = current_commit_hash() - result["current_sha"] = current_sha - if current_sha and base_sha != current_sha: + if base_sha is not None and expected_base_sha: + if base_sha != expected_base_sha: result["status"] = "stale_sha" result["recommendation"] = "docs_generate" result["message"] = ( f"Base artifacts are stale (generated at {base_sha[:7]}, " - f"current HEAD: {current_sha[:7]}). " + f"expected base: {expected_base_sha[:7]}). " f"Run: dbt docs generate --target-path {target_base_path}" ) return result diff --git a/recce/mcp_server.py b/recce/mcp_server.py index 56fb1c658..a60b08faa 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -71,6 +71,8 @@ "lineage_diff", } +DEFAULT_CLOUD_REQUEST_TIMEOUT = 30 + def _widgets_enabled() -> bool: """Read RECCE_MCP_WIDGETS env at call time (not import time) so tests can monkeypatch.""" @@ -120,6 +122,7 @@ async def _request(self, method: str, api_name: str, **kwargs): **kwargs.pop("headers", {}), "Authorization": f"Bearer {self.api_token}", } + kwargs.setdefault("timeout", DEFAULT_CLOUD_REQUEST_TIMEOUT) response = await asyncio.to_thread(requests.request, method, url, headers=headers, **kwargs) if response.status_code == 405: raise InstanceSpawningError() @@ -2307,7 +2310,22 @@ async def _tool_set_backend(self, arguments: Dict[str, Any]) -> Dict[str, Any]: project_dir = arguments.get("project_dir") target_path = arguments.get("target_path", "target") target_base_path = arguments.get("target_base_path", "target-base") - cache_key = (project_dir, target_path, target_base_path) + + # Check artifact presence on every local switch. A long-lived MCP + # can start in single-env mode, then the user may generate + # target-base/ and call set_backend(local) again with identical + # args. The cache key must include the effective base selection so + # that stale single-env contexts are not reused. + base_path = Path(project_dir or "./").joinpath(target_base_path) + target_dir = Path(project_dir or "./").joinpath(target_path) + effective_base = target_base_path + single_env = False + if target_dir.is_dir() and not base_path.is_dir(): + effective_base = target_path + single_env = True + else: + single_env = not base_path.is_dir() + cache_key = (project_dir, target_path, target_base_path, effective_base, single_env) if self.context is None or self._local_cache_key != cache_key: # Reset the global so RecceContext.load runs fresh against new params. @@ -2315,17 +2333,10 @@ async def _tool_set_backend(self, arguments: Dict[str, Any]) -> Dict[str, Any]: _core.recce_context = None - # Mirror CLI single-env fallback: if target/ exists but target-base/ - # doesn't, point both envs at target/ so load_context() doesn't fail - # on a missing base manifest. - base_path = Path(project_dir or "./").joinpath(target_base_path) - target_dir = Path(project_dir or "./").joinpath(target_path) - effective_base = target_base_path - if target_dir.is_dir() and not base_path.is_dir(): - effective_base = target_path - self.single_env = True - else: - self.single_env = not base_path.is_dir() + # Mirror CLI single-env fallback: if target/ exists but + # target-base/ doesn't, point both envs at target/ so + # load_context() doesn't fail on a missing base manifest. + self.single_env = single_env load_kwargs = { "target_path": target_path, diff --git a/tests/test_check_base.py b/tests/test_check_base.py index 51e4260c0..ac27ee2e8 100644 --- a/tests/test_check_base.py +++ b/tests/test_check_base.py @@ -98,16 +98,33 @@ def test_status_stale_time(old_manifest_dir): def test_status_stale_sha(fresh_manifest_dir): - """SHA in manifest differs from current HEAD → stale_sha, message contains 'stale'.""" + """SHA checking is opt-in: without expected_base_sha, a base-branch SHA + that differs from feature-branch HEAD is still fresh. + + In normal Recce usage, target-base artifacts are generated from the base + branch, so DBT_GIT_SHA is expected to differ from current feature HEAD. + """ different_sha = "9999999deadbeef0000000000000000000000000" with patch("recce.git.current_commit_hash", return_value=different_sha): result = check_base_freshness( target_base_path=str(fresh_manifest_dir), freshness_threshold_hours=48.0, ) + assert result["status"] == "fresh" + assert result["recommendation"] == "reuse" + + +def test_status_stale_sha_when_expected_base_sha_mismatch(fresh_manifest_dir): + """When an expected base SHA is provided, mismatch → stale_sha.""" + expected_base_sha = "9999999deadbeef0000000000000000000000000" + result = check_base_freshness( + target_base_path=str(fresh_manifest_dir), + freshness_threshold_hours=48.0, + expected_base_sha=expected_base_sha, + ) assert result["status"] == "stale_sha" assert result["recommendation"] == "docs_generate" - assert "stale" in result["message"].lower() + assert result["expected_base_sha"] == expected_base_sha def test_status_missing(tmp_path): diff --git a/tests/test_mcp_cloud_backend.py b/tests/test_mcp_cloud_backend.py index 174d9391b..425712b04 100644 --- a/tests/test_mcp_cloud_backend.py +++ b/tests/test_mcp_cloud_backend.py @@ -45,6 +45,16 @@ async def test_cloud_backend_spawns_instance_without_inner_api_path(cloud_reques assert "/sessions/sess-123/api/" not in url +@pytest.mark.asyncio +async def test_cloud_backend_requests_use_timeout(cloud_requests): + """Cloud proxy calls must not hang forever on stalled endpoints.""" + cloud_requests.return_value = MockResponse(204) + + await CloudBackend.create(session_id="sess-123", api_token="token-abc") + + assert cloud_requests.call_args.kwargs["timeout"] == 30 + + @pytest.mark.asyncio async def test_cloud_backend_uses_session_proxy_paths_without_inner_api_segment(cloud_requests): cloud_requests.side_effect = [ @@ -736,6 +746,50 @@ async def test_set_backend_local_keeps_dual_env_when_base_dir_present(): assert mock_load.call_args.kwargs["target_base_path"] == "target-base" +@pytest.mark.asyncio +async def test_set_backend_local_reloads_when_base_dir_appears(): + """single-env cached context must be replaced once target-base/ appears.""" + single_env_context = MagicMock() + single_env_context.adapter_type = "dbt" + dual_env_context = MagicMock() + dual_env_context.adapter_type = "dbt" + + server = RecceMCPServer() + + with ( + patch("recce.mcp_server.load_context", side_effect=[single_env_context, dual_env_context]) as mock_load, + patch("recce.mcp_server.Path") as mock_path, + ): + base_missing = MagicMock() + base_missing.is_dir.return_value = False + target_present_1 = MagicMock() + target_present_1.is_dir.return_value = True + base_present = MagicMock() + base_present.is_dir.return_value = True + target_present_2 = MagicMock() + target_present_2.is_dir.return_value = True + mock_path.return_value.joinpath.side_effect = [ + base_missing, + target_present_1, + base_present, + target_present_2, + ] + + first = await server._tool_set_backend( + {"mode": "local", "project_dir": "/proj", "target_path": "target", "target_base_path": "target-base"} + ) + second = await server._tool_set_backend( + {"mode": "local", "project_dir": "/proj", "target_path": "target", "target_base_path": "target-base"} + ) + + assert first["single_env"] is True + assert second["single_env"] is False + assert server.context is dual_env_context + assert mock_load.call_count == 2 + assert mock_load.call_args_list[0].kwargs["target_base_path"] == "target" + assert mock_load.call_args_list[1].kwargs["target_base_path"] == "target-base" + + @pytest.mark.asyncio async def test_cloud_backend_routes_run_tool_types_through_run_backed(cloud_requests): """RUN_TOOL_TYPES tools (e.g., row_count_diff) dispatch via _tool_run_backed.""" diff --git a/tests/test_mcp_config_install.py b/tests/test_mcp_config_install.py index 64d92b3fd..032883bf7 100644 --- a/tests/test_mcp_config_install.py +++ b/tests/test_mcp_config_install.py @@ -9,6 +9,7 @@ """ import json +import shutil import sys from pathlib import Path @@ -225,3 +226,32 @@ def test_install_backup_created(tmp_path, monkeypatch): # Backup content matches original (pre-write snapshot) backup_content = json.loads(backup_path.read_text()) assert backup_content == {"mcpServers": {}}, "Backup content does not match original config" + + +def test_install_python_fallback_uses_cli_module(tmp_path, monkeypatch): + """When no recce executable is on PATH, fallback command must be runnable.""" + monkeypatch.setattr(sys, "platform", "darwin") + monkeypatch.setattr(shutil, "which", lambda _: None) + + project_dir = _make_dbt_project(tmp_path / "my_project") + config_file = tmp_path / "claude_desktop_config.json" + _make_config(config_file) + + runner = CliRunner() + result = runner.invoke( + mcp_config_install, + [ + "--project-dir", + str(project_dir), + "--config", + str(config_file), + "--yes", + ], + ) + + assert result.exit_code == 0, f"Unexpected exit: {result.output}\n{result.exception}" + written = json.loads(config_file.read_text()) + servers = written["mcpServers"] + assert servers["recce"]["command"] == sys.executable + assert servers["recce"]["args"][:2] == ["-m", "recce.cli"] + assert servers["recce-widgets"]["args"][:2] == ["-m", "recce.cli"] From 230af0f033f133996293a9fcb6b32890113db0a9 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 17:54:33 +0800 Subject: [PATCH 35/43] style(test): satisfy black + isort on test_widget_server.py Addresses the Code Review format-gate issue (Issue 1): black wraps an over-length call and isort splits the multi-line import block in the lineage_diff widget test. Pure formatting, no behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- tests/test_widget_server.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_widget_server.py b/tests/test_widget_server.py index 3a5ef6e5d..ada6f1456 100644 --- a/tests/test_widget_server.py +++ b/tests/test_widget_server.py @@ -2218,9 +2218,7 @@ def _make_lineage_dataframe(node_count: int) -> dict: for i in range(node_count): change_status = "modified" if i == 0 else None impacted = i == 0 - nodes_data.append( - (i, f"model.recce.node_{i}", f"node_{i}", "model", "table", change_status, impacted) - ) + nodes_data.append((i, f"model.recce.node_{i}", f"node_{i}", "model", "table", change_status, impacted)) edge_cols = [ {"key": "from", "name": "from", "type": "integer"}, @@ -2245,7 +2243,11 @@ async def test_lineage_diff_returns_calltoolresult_with_pydantic_shape(): from mcp.types import CallToolResult import recce.widget_server as ws - from recce.widget_server import MAX_INLINE_NODES, LineageDiffInput, LineageDiffOutput + from recce.widget_server import ( + MAX_INLINE_NODES, + LineageDiffInput, + LineageDiffOutput, + ) # ── Under-cap branch (3 nodes, 2 edges) ────────────────────────── mock_server = MagicMock() From eeb5002d2a66aeea6965cc9743bcac9f7b24a697 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 18:05:58 +0800 Subject: [PATCH 36/43] fix(cli): preserve pristine config backup on mcp-config-install re-run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses Code Review Note 1. The backup was rewritten unconditionally on every run, so a second `recce mcp-config-install` clobbered the `.recce.bak` with the already-modified config — destroying the pristine pre-recce original and making the documented "restore from .recce.bak" undo path useless. Skip the copy when a backup already exists, and report "Existing backup preserved" instead of "Backup saved" in that case. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/cli.py | 12 ++++++++++-- tests/test_mcp_config_install.py | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/recce/cli.py b/recce/cli.py index f64cb8384..102bc53c4 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -3211,8 +3211,13 @@ def _make_args(subcommand): # ------------------------------------------------------------------ # Backup existing config # ------------------------------------------------------------------ + # Skip if a backup already exists: a re-run would otherwise clobber the + # pristine pre-recce original with the already-modified config, making the + # documented "restore from .recce.bak" undo path useless. backup_path = config_path.with_suffix(config_path.suffix + ".recce.bak") - shutil.copy2(str(config_path), str(backup_path)) + backup_created = not backup_path.exists() + if backup_created: + shutil.copy2(str(config_path), str(backup_path)) # ------------------------------------------------------------------ # Merge entries and write @@ -3225,7 +3230,10 @@ def _make_args(subcommand): # ------------------------------------------------------------------ keys_written = ", ".join(new_entries.keys()) console.print(f"\n[green]✓[/green] Wrote {len(new_entries)} MCP server entries ({keys_written}) to {config_path}") - console.print(f"[green]✓[/green] Backup saved to {backup_path}") + if backup_created: + console.print(f"[green]✓[/green] Backup saved to {backup_path}") + else: + console.print(f"[green]✓[/green] Existing backup preserved at {backup_path}") console.print( "\nNext steps:\n" " 1. Cmd+Q to fully quit Claude Desktop " diff --git a/tests/test_mcp_config_install.py b/tests/test_mcp_config_install.py index 032883bf7..6978af99d 100644 --- a/tests/test_mcp_config_install.py +++ b/tests/test_mcp_config_install.py @@ -228,6 +228,33 @@ def test_install_backup_created(tmp_path, monkeypatch): assert backup_content == {"mcpServers": {}}, "Backup content does not match original config" +def test_install_backup_preserves_pristine_original_on_rerun(tmp_path, monkeypatch): + """Re-running install must NOT clobber the .recce.bak with the already-modified + config. The backup must keep the pristine pre-recce original so 'undo' is reliable.""" + monkeypatch.setattr(sys, "platform", "darwin") + + project_dir = _make_dbt_project(tmp_path / "my_project") + config_file = tmp_path / "claude_desktop_config.json" + # Pristine original: a user's third-party server, no recce entries. + pristine = {"mcpServers": {"other-server": {"command": "/usr/local/bin/other", "args": ["start"], "env": {}}}} + config_file.write_text(json.dumps(pristine, indent=2)) + + runner = CliRunner() + args = ["--project-dir", str(project_dir), "--config", str(config_file), "--yes"] + + r1 = runner.invoke(mcp_config_install, args) + assert r1.exit_code == 0, f"first run failed: {r1.output}\n{r1.exception}" + + # Second run: config_file already carries recce entries now. + r2 = runner.invoke(mcp_config_install, args) + assert r2.exit_code == 0, f"second run failed: {r2.output}\n{r2.exception}" + + backup_path = config_file.with_suffix(config_file.suffix + ".recce.bak") + backup_content = json.loads(backup_path.read_text()) + assert backup_content == pristine, "Backup was clobbered with the modified config; pristine original lost" + assert "recce" not in backup_content["mcpServers"], "Backup must not contain recce entries" + + def test_install_python_fallback_uses_cli_module(tmp_path, monkeypatch): """When no recce executable is on PATH, fallback command must be runnable.""" monkeypatch.setattr(sys, "platform", "darwin") From 65ca0739adcec872f3d7377fdbc895f4dfeeaf61 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 22:24:44 +0800 Subject: [PATCH 37/43] fix(cli): address review - expand ~ and atomic write in mcp-config-install - expanduser() on --project-dir and --config so ~/path resolves (Copilot) - atomic config write (temp + os.replace) guarded by except OSError, so a mid-write failure leaves the existing claude_desktop_config.json intact instead of a truncated file (self-review) Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/cli.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/recce/cli.py b/recce/cli.py index 102bc53c4..d798b44f6 100644 --- a/recce/cli.py +++ b/recce/cli.py @@ -3103,7 +3103,7 @@ def mcp_config_install(project_dir, claude_config, yes, dry_run): # ------------------------------------------------------------------ # Validate project dir # ------------------------------------------------------------------ - project_dir_path = Path(project_dir).resolve() + project_dir_path = Path(project_dir).expanduser().resolve() if not project_dir_path.exists(): console.print( f"[[red]Error[/red]] Project directory not found: {project_dir_path}\n" @@ -3123,7 +3123,7 @@ def mcp_config_install(project_dir, claude_config, yes, dry_run): # Resolve Claude Desktop config path # ------------------------------------------------------------------ if claude_config: - config_path = Path(claude_config).resolve() + config_path = Path(claude_config).expanduser().resolve() else: config_path = Path.home() / "Library" / "Application Support" / "Claude" / "claude_desktop_config.json" @@ -3220,10 +3220,28 @@ def _make_args(subcommand): shutil.copy2(str(config_path), str(backup_path)) # ------------------------------------------------------------------ - # Merge entries and write + # Merge entries and write (atomically) # ------------------------------------------------------------------ existing_config["mcpServers"].update(new_entries) - config_path.write_text(json.dumps(existing_config, indent=2), encoding="utf-8") + # Write to a temp file in the same directory, then os.replace() for an atomic + # swap. A crash mid-write leaves the existing config untouched rather than a + # truncated/corrupt claude_desktop_config.json. + tmp_path = config_path.with_suffix(config_path.suffix + ".recce.tmp") + try: + tmp_path.write_text(json.dumps(existing_config, indent=2), encoding="utf-8") + os.replace(str(tmp_path), str(config_path)) + except OSError as e: + # Best-effort cleanup of the temp file; the existing config is intact. + try: + if tmp_path.exists(): + tmp_path.unlink() + except OSError: + pass + console.print( + f"[[red]Error[/red]] Failed to write config: {e}\n" + f"Your existing config was left unchanged (a backup is at {backup_path})." + ) + exit(1) # ------------------------------------------------------------------ # Success message From dbde32479af1f6ab59c5d77783631970839ccdbb Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 22:25:07 +0800 Subject: [PATCH 38/43] fix(mcp,widgets): address review - widget coordination robustness - _widgets_enabled() now case-insensitive (accepts TRUE/True/true/1) (Copilot) - run_widget_server cloud/session guard checks cloud_session (the actual --session dest) in addition to session, so the guard is not silently dead (self-review) - _read_widget_html broadens except + logs on failure so a packaging defect that drops recce/data/mcp/*.html is visible in server logs (self-review) Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/mcp_server.py | 2 +- recce/widget_server.py | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/recce/mcp_server.py b/recce/mcp_server.py index a60b08faa..d66eedced 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -76,7 +76,7 @@ def _widgets_enabled() -> bool: """Read RECCE_MCP_WIDGETS env at call time (not import time) so tests can monkeypatch.""" - return os.environ.get("RECCE_MCP_WIDGETS", "").strip() in ("1", "true", "True") + return os.environ.get("RECCE_MCP_WIDGETS", "").strip().lower() in ("1", "true") class InstanceSpawningError(RuntimeError): diff --git a/recce/widget_server.py b/recce/widget_server.py index 53419fd2c..842347392 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -143,8 +143,15 @@ def _read_widget_html(name: str) -> str: try: ref = importlib.resources.files("recce.data.mcp") / f"{name}.html" return ref.read_text(encoding="utf-8") - except (FileNotFoundError, TypeError, ModuleNotFoundError): - return f"Widget asset missing: {name}.html. Run pnpm run build." + except Exception as e: + # Broad except: importlib can raise OSError / PermissionError / + # UnicodeDecodeError beyond the file-missing case. Log it so a packaging + # defect (a built/installed recce that dropped recce/data/mcp/*.html) is + # visible in server logs, not only when a human opens the rendered widget. + # These are tracked SOURCE files, so a miss here means a broken + # install/package — not a forgotten frontend build. + logger.error("Failed to load widget asset %s.html: %s", name, e) + return f"Widget asset missing: {name}.html (broken recce install/package)." # --------------------------------------------------------------------------- @@ -2511,7 +2518,10 @@ def run_widget_server(**kwargs) -> None: """ global _recce_server - if kwargs.get("cloud") or kwargs.get("session"): + # `--session` binds to the dest `cloud_session` (see recce mcp-server CLI); + # check both that and a bare `session` key so the guard fires regardless of + # how the kwarg was supplied. + if kwargs.get("cloud") or kwargs.get("cloud_session") or kwargs.get("session"): raise ValueError( "recce mcp-widget-server does not support cloud/session mode in iter 1 " "— use recce mcp-server for cloud sessions" From fcd7a6c3a4b2a07fb0e07eb89762047a0fe1b6e9 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 22:25:24 +0800 Subject: [PATCH 39/43] fix(track): address review - route --debug traceback to stderr _show_error_message used Console() (stdout); the --debug path's print_exception corrupts the stdio JSON-RPC channel for MCP servers. Now Console(stderr=True), matching the already-fixed non-debug branch (Copilot, outdated thread). Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/track.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/recce/track.py b/recce/track.py index 5d2658798..fdccf520d 100644 --- a/recce/track.py +++ b/recce/track.py @@ -54,7 +54,10 @@ def __init__( def _show_error_message(self, msg, params): from rich.console import Console - console = Console() + # stderr=True: stdio MCP servers use stdout as the JSON-RPC channel, so a + # traceback printed to stdout (e.g. the --debug path below) corrupts the + # protocol. Mirrors the non-debug branch which already writes to stderr. + console = Console(stderr=True) if params.get("debug"): console.print_exception(show_locals=True) else: From c6d3506e980a3f89faadaee8ce90078aa1c18e9a Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 22:25:27 +0800 Subject: [PATCH 40/43] fix(widgets): address review - HTML escaping + link hardening - escapeHtml in all 15 widgets now also escapes single quotes (' -> '); defense-in-depth so a future single-quoted attribute stays safe (even-wei N2) - get_server_info PR link adds rel=noopener noreferrer to its target=_blank anchor (Copilot, even-wei) Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/data/mcp/get_cll.html | 2 +- recce/data/mcp/get_model.html | 2 +- recce/data/mcp/get_server_info.html | 4 ++-- recce/data/mcp/histogram_diff.html | 2 +- recce/data/mcp/impact_analysis.html | 2 +- recce/data/mcp/lineage_diff.html | 2 +- recce/data/mcp/list_checks.html | 2 +- recce/data/mcp/profile_diff.html | 2 +- recce/data/mcp/query.html | 2 +- recce/data/mcp/query_diff.html | 2 +- recce/data/mcp/row_count_diff.html | 2 +- recce/data/mcp/schema_diff.html | 2 +- recce/data/mcp/top_k_diff.html | 2 +- recce/data/mcp/value_diff.html | 2 +- recce/data/mcp/value_diff_detail.html | 2 +- 15 files changed, 16 insertions(+), 16 deletions(-) diff --git a/recce/data/mcp/get_cll.html b/recce/data/mcp/get_cll.html index d707857f4..4cf3afb70 100644 --- a/recce/data/mcp/get_cll.html +++ b/recce/data/mcp/get_cll.html @@ -239,7 +239,7 @@ .replace(/&/g, "&") .replace(//g, ">") - .replace(/"/g, """); + .replace(/"/g, """).replace(/'/g, "'"); } function svgEl(tag, attrs = {}) { diff --git a/recce/data/mcp/get_model.html b/recce/data/mcp/get_model.html index 0de35db77..66cab3bef 100644 --- a/recce/data/mcp/get_model.html +++ b/recce/data/mcp/get_model.html @@ -274,7 +274,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } // Shorten a node ID like "model.jaffle_shop.customers" → "customers" diff --git a/recce/data/mcp/get_server_info.html b/recce/data/mcp/get_server_info.html index 975e187c0..57d0c0ac7 100644 --- a/recce/data/mcp/get_server_info.html +++ b/recce/data/mcp/get_server_info.html @@ -210,7 +210,7 @@ function escapeHtml(s) { return String(s) .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function modeBadge(mode) { @@ -314,7 +314,7 @@ if (data.pull_request && data.pull_request.title) { const pr = data.pull_request; const prLink = pr.url - ? `${escapeHtml(pr.title)}` + ? `${escapeHtml(pr.title)}` : escapeHtml(pr.title); rows.push(`
Pull request
diff --git a/recce/data/mcp/histogram_diff.html b/recce/data/mcp/histogram_diff.html index 8a45515af..e37f30ee8 100644 --- a/recce/data/mcp/histogram_diff.html +++ b/recce/data/mcp/histogram_diff.html @@ -229,7 +229,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function fmtNum(n) { diff --git a/recce/data/mcp/impact_analysis.html b/recce/data/mcp/impact_analysis.html index 09254b813..9acf5296a 100644 --- a/recce/data/mcp/impact_analysis.html +++ b/recce/data/mcp/impact_analysis.html @@ -304,7 +304,7 @@ .replace(/&/g, "&") .replace(//g, ">") - .replace(/"/g, """); + .replace(/"/g, """).replace(/'/g, "'"); } function svgEl(tag, attrs = {}) { diff --git a/recce/data/mcp/lineage_diff.html b/recce/data/mcp/lineage_diff.html index 3c9cc4eb3..2bffd4702 100644 --- a/recce/data/mcp/lineage_diff.html +++ b/recce/data/mcp/lineage_diff.html @@ -162,7 +162,7 @@ .replace(/&/g, "&") .replace(//g, ">") - .replace(/"/g, """); + .replace(/"/g, """).replace(/'/g, "'"); } function svgEl(tag, attrs = {}) { diff --git a/recce/data/mcp/list_checks.html b/recce/data/mcp/list_checks.html index 57ce1568a..3a83d09ad 100644 --- a/recce/data/mcp/list_checks.html +++ b/recce/data/mcp/list_checks.html @@ -258,7 +258,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function truncate(s, max) { diff --git a/recce/data/mcp/profile_diff.html b/recce/data/mcp/profile_diff.html index 86724ab62..a038f2a73 100644 --- a/recce/data/mcp/profile_diff.html +++ b/recce/data/mcp/profile_diff.html @@ -262,7 +262,7 @@ .replace(/&/g, "&") .replace(//g, ">") - .replace(/"/g, """); + .replace(/"/g, """).replace(/'/g, "'"); } /** diff --git a/recce/data/mcp/query.html b/recce/data/mcp/query.html index 96f12e72a..b7e3e6c12 100644 --- a/recce/data/mcp/query.html +++ b/recce/data/mcp/query.html @@ -295,7 +295,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } /** diff --git a/recce/data/mcp/query_diff.html b/recce/data/mcp/query_diff.html index bcf8f6d08..7a27402e6 100644 --- a/recce/data/mcp/query_diff.html +++ b/recce/data/mcp/query_diff.html @@ -357,7 +357,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function renderCell(value, type) { diff --git a/recce/data/mcp/row_count_diff.html b/recce/data/mcp/row_count_diff.html index c7e5335fe..f3dcd0ad7 100644 --- a/recce/data/mcp/row_count_diff.html +++ b/recce/data/mcp/row_count_diff.html @@ -230,7 +230,7 @@ function escapeHtml(s) { return String(s) .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function formatNa(meta) { diff --git a/recce/data/mcp/schema_diff.html b/recce/data/mcp/schema_diff.html index dcd6069c2..66b848bf9 100644 --- a/recce/data/mcp/schema_diff.html +++ b/recce/data/mcp/schema_diff.html @@ -195,7 +195,7 @@ function escapeHtml(s) { return String(s) .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function renderModelBlock(nodeId, m) { diff --git a/recce/data/mcp/top_k_diff.html b/recce/data/mcp/top_k_diff.html index e2e0f6218..e418beed6 100644 --- a/recce/data/mcp/top_k_diff.html +++ b/recce/data/mcp/top_k_diff.html @@ -301,7 +301,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function fmtNum(n) { diff --git a/recce/data/mcp/value_diff.html b/recce/data/mcp/value_diff.html index f43eadebd..c113c6ee4 100644 --- a/recce/data/mcp/value_diff.html +++ b/recce/data/mcp/value_diff.html @@ -300,7 +300,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function fmtPct(p) { diff --git a/recce/data/mcp/value_diff_detail.html b/recce/data/mcp/value_diff_detail.html index e321277b6..09334c919 100644 --- a/recce/data/mcp/value_diff_detail.html +++ b/recce/data/mcp/value_diff_detail.html @@ -318,7 +318,7 @@ function escapeHtml(s) { return String(s ?? "") .replace(/&/g, "&").replace(//g, ">").replace(/"/g, """); + .replace(/>/g, ">").replace(/"/g, """).replace(/'/g, "'"); } function pkLabel(pk) { From c533be9336d0485734ad17543110a08308c1721e Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 22:25:29 +0800 Subject: [PATCH 41/43] fix(build,docs): address review - robust recce/data gitignore + doc sync - .gitignore ignores recce/data/** then re-includes recce/data/mcp/ via negation, so nested build output (e.g. recce/data/lineage/index.html) is ignored while widget source is tracked; replaces the flat per-extension allowlist that missed nested dirs (self-review) - docs/mcp-widgets.md: gitignore gotcha rewritten for the negation scheme; widget test count 28 -> 35 (Copilot) - test_mcp_config_install docstring: add backup-pristine-on-rerun + python-fallback coverage lines (Copilot) Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- .gitignore | 27 +++++++++------------------ docs/mcp-widgets.md | 14 +++++++------- tests/test_mcp_config_install.py | 2 ++ 3 files changed, 18 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index bd723e293..bcb08ada8 100644 --- a/.gitignore +++ b/.gitignore @@ -28,24 +28,15 @@ deps-*.yml recce.yml STATUS.md -# Ignore build artifacts from frontend. -# recce/data/mcp/*.html are SOURCE files (widget HTML for MCP Apps) — not build output. -# git negation cannot un-ignore files under an ignored directory, so we ignore -# specific subdirs instead of the whole recce/data tree. -recce/data/.gitkeep -recce/data/_next/ -recce/data/chunks/ -recce/data/*.html -recce/data/*.js -recce/data/*.css -recce/data/*.json -recce/data/*.txt -recce/data/*.xml -recce/data/*.ico -recce/data/*.png -recce/data/*.svg -recce/data/*.woff -recce/data/*.woff2 +# Ignore the embedded frontend build output under recce/data/, EXCEPT +# recce/data/mcp/*.html which are tracked SOURCE files (hand-authored widget HTML +# for MCP Apps). Ignore the directory contents, then re-include the mcp/ source +# subdir via negation. This catches nested build output (e.g. +# recce/data/lineage/index.html) that a flat per-extension allowlist would miss. +recce/data/** +!recce/data/.gitkeep +!recce/data/mcp/ +!recce/data/mcp/** # ignore Claude logs and plans at any nesting **/docs/plans/**/*.md diff --git a/docs/mcp-widgets.md b/docs/mcp-widgets.md index 55fcdcba4..85aa24792 100644 --- a/docs/mcp-widgets.md +++ b/docs/mcp-widgets.md @@ -50,7 +50,7 @@ recce/ histogram_diff.html # Phase C tier-4: hand-rolled SVG bar chart (base vs current bins) profile_diff.html # Phase C tier-4: per-column profile card grid (count/null/distinct/min/max/avg/median) tests/ - test_widget_server.py # 28 tests covering WIDGET_TOOLS coordination + widget server. + test_widget_server.py # 35 tests covering WIDGET_TOOLS coordination + widget server. docs/ mcp-widgets.md # This file. ``` @@ -369,12 +369,12 @@ directly. Always use Pydantic models for widget tool outputs. Wrapping it raises `ValueError: a coroutine was expected, got None` (fixed in Day 1 cycle 1; see commit `bb6f1261`). -- **`recce/data/` is gitignored as build output.** Widget HTML files use a - per-extension allowlist in `.gitignore` to escape the broad `recce/data` - ignore rule. The allowlist currently covers `*.html`. If you add new file - types (`.css`, `.svg`, `.js`) under `recce/data/mcp/`, check `.gitignore` - and add an allowlist entry if needed; otherwise `git add` will silently skip - your file. +- **`recce/data/` is gitignored as build output.** `.gitignore` ignores the + directory contents (`recce/data/**`) and then re-includes the source subdir + via negation (`!recce/data/mcp/`, `!recce/data/mcp/**`). Any file type you add + under `recce/data/mcp/` (`.css`, `.svg`, `.js`, …) is tracked automatically — + no per-extension allowlist to maintain. Files placed elsewhere under + `recce/data/` remain ignored as build output. - **In stdio transport mode, stdout is JSON-RPC.** Any `print()` or `logging.info()` output written to stdout will corrupt the MCP framing. diff --git a/tests/test_mcp_config_install.py b/tests/test_mcp_config_install.py index 6978af99d..aadcda207 100644 --- a/tests/test_mcp_config_install.py +++ b/tests/test_mcp_config_install.py @@ -6,6 +6,8 @@ - Validates --project-dir (must contain dbt_project.yml) - Dry-run mode does not write to disk - Backup file is created before writing +- Backup preserves the pristine pre-recce original on re-run (not clobbered) +- Falls back to `python -m recce.cli` when the `recce` binary is not on PATH """ import json From d089e737a7517aa9665079cc6149a1f5bc0ab8ee Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 23:13:48 +0800 Subject: [PATCH 42/43] fix(widgets): address review - anchor widget HTML load on recce package MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _read_widget_html resolved assets via importlib.resources.files("recce.data.mcp"), which relies on recce.data.mcp resolving as a namespace package — fragile across install layouts (Copilot flagged a potential ModuleNotFoundError). Anchor on the recce regular package (always importable) and traverse to data/mcp/ instead, so a pip-installed wheel reliably serves the widgets. Goal: widgets must work on any install, not just an editable/source checkout. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/widget_server.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/recce/widget_server.py b/recce/widget_server.py index 842347392..2a3df0c8e 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -141,7 +141,13 @@ class SchemaDiffInput(BaseModel): def _read_widget_html(name: str) -> str: """Read widget HTML from recce/data/mcp/{name}.html, returning an error stub if missing.""" try: - ref = importlib.resources.files("recce.data.mcp") / f"{name}.html" + # Anchor on the `recce` package (a regular package — always importable) + # and traverse to data/mcp/, rather than importing `recce.data.mcp` as a + # namespace package. Namespace-package resolution can differ between an + # editable/source checkout and an installed wheel; anchoring on `recce` + # + filesystem traversal works in both, so a `pip install recce` user + # reliably gets the widgets. + ref = importlib.resources.files("recce") / "data" / "mcp" / f"{name}.html" return ref.read_text(encoding="utf-8") except Exception as e: # Broad except: importlib can raise OSError / PermissionError / From d080819eaf92350969234ed378189d4374d97811 Mon Sep 17 00:00:00 2001 From: Kent Date: Tue, 2 Jun 2026 23:20:21 +0800 Subject: [PATCH 43/43] docs(widgets): add TODO markers for deferred review follow-ups Mark the deferred POC items (tracked in the PR description's follow-up section) with TODO(DRC-3526 follow-up) comments in code: - widget tools bypass central call_tool classifier/telemetry (DRC-2754) - schema_diff has no single-env warning - impact_analysis value_diff drift fix lacks a real-DuckDB e2e No behavior change. Co-Authored-By: Claude Opus 4.8 (1M context) Signed-off-by: Kent --- recce/mcp_server.py | 7 +++++++ recce/widget_server.py | 14 ++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/recce/mcp_server.py b/recce/mcp_server.py index d66eedced..91a6a905e 100644 --- a/recce/mcp_server.py +++ b/recce/mcp_server.py @@ -1928,6 +1928,13 @@ async def _tool_impact_analysis(self, arguments: Dict[str, Any]) -> Dict[str, An # Intersect both sides, mirroring ValueDiffTask; drifted columns are # already reported via schema_changes (Step 2b). get_model(base=True) # manages its own warehouse connection for the base-side introspection. + # TODO(DRC-3526 follow-up): this drift fix is covered by a mock + # test (asserts drifted columns never reach the generated SQL) + + # a manual Snowflake A/B, but has no real-adapter e2e. Add a + # DuckDB e2e fixture with a PK where `current` has a column absent + # in `base`; assert value_diff runs over the common non-PK columns + # and does not raise a binder error. Mocks cannot catch the + # missing-warehouse-connection class of bug. base_model_info = self.context.adapter.get_model(node_id, base=True) common_cols = set(columns_info) & set(base_model_info.get("columns", {})) diff --git a/recce/widget_server.py b/recce/widget_server.py index 2a3df0c8e..f42a91194 100644 --- a/recce/widget_server.py +++ b/recce/widget_server.py @@ -21,6 +21,15 @@ mcp = FastMCP("recce-widgets") # Forward ref — initialized in run_widget_server() to avoid eager import at module load. +# +# TODO(DRC-3526 follow-up): the widget tools below delegate directly to +# _recce_server._tool_*(), bypassing RecceMCPServer's central @server.call_tool +# handler (recce/mcp_server.py). That handler is where DRC-2754 error +# classification + Sentry "mcp.expected_error" metrics live, so expected DB errors +# (table_not_found / permission_denied) are not downgraded/telemetried for the 15 +# widget tools. Errors still surface (FastMCP wraps them) — this is deferred POC +# debt; route delegates through a shared classify/telemetry wrapper when the +# two-server architecture is consolidated. _recce_server: Optional[Any] = None logger = logging.getLogger(__name__) @@ -290,6 +299,11 @@ async def schema_diff(args: SchemaDiffInput) -> CallToolResult: exclude=args.exclude, packages=args.packages if args.packages is not None else None, ) + # TODO(DRC-3526 follow-up): SchemaDiffOutput has no `warning` field and this + # handler does not surface a single-env notice, unlike the other diff tools. + # In single-env mode the widget renders an empty "No models found" state that + # reads as a clean diff when there is simply no base to compare against. Add a + # `warning` field + single-env notice. output = SchemaDiffOutput( models={node_id: SchemaChange(**m) for node_id, m in rich_result.items()}, )