From 626e8ec75b4d4a89949085ad51a39080fa5772f4 Mon Sep 17 00:00:00 2001 From: Kaguya-19 Date: Tue, 2 Jun 2026 14:19:11 +0800 Subject: [PATCH 1/4] fix(mcp): marshal MCP ImageContent into inline tool result images Add marshalMcpContent() to convert MCP TextContent/ImageContent blocks into PilotDeck text/image result types, replacing the old single json blob. This enables MCP tool screenshots (e.g. Playwright) to render inline in the chat UI. Co-authored-by: Cursor --- src/mcp/runtime/PluginToToolBridge.ts | 54 ++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/mcp/runtime/PluginToToolBridge.ts b/src/mcp/runtime/PluginToToolBridge.ts index 89620e72..b17e6549 100644 --- a/src/mcp/runtime/PluginToToolBridge.ts +++ b/src/mcp/runtime/PluginToToolBridge.ts @@ -10,10 +10,11 @@ * reflected onto the PilotDeck tool flags so the permission * engine can decide whether to ask. * - * Result transformation (M14): we currently emit a single `json` result - * block. The existing `ToolRuntime` already truncates oversized payloads - * via `maxResultBytes`; deferring the persisted-large-blob path for now - * (recorded as `intentional_difference` in the parity table). + * Result transformation (M14): MCP ContentBlock types `text` and `image` + * are mapped to their PilotDeck equivalents so that images (e.g. Playwright + * screenshots) render inline in the chat UI. Remaining block types + * (`audio`, `resource`, `resource_link`) fall through as a single `json` + * block until the downstream pipeline supports them. */ import { PilotDeckToolRuntimeError } from "../../tool/protocol/errors.js"; @@ -21,6 +22,7 @@ import type { PilotDeckToolDefinition, PilotDeckToolExecutionOutput, PilotDeckToolInputSchema, + PilotDeckToolResultContent, } from "../../tool/index.js"; import type { McpClient } from "../client/McpClient.js"; import type { McpRuntime } from "./McpRuntime.js"; @@ -85,7 +87,7 @@ function buildToolDefinition( ); } return { - content: [{ type: "json", value: content }], + content: marshalMcpContent(content), data: content, metadata: { mcp: { serverId: spec.serverId, toolName: spec.toolName, wireName: spec.wireName }, @@ -118,6 +120,48 @@ function buildToolDefinition( }; } +type McpContentBlock = { type: string; [key: string]: unknown }; + +/** + * Map MCP `ContentBlock[]` → `PilotDeckToolResultContent[]`. + * + * `TextContent` → `{ type: "text" }` + * `ImageContent` → `{ type: "image" }` (renders inline in chat) + * Everything else falls through as a single `json` block. + */ +function marshalMcpContent(raw: unknown): PilotDeckToolResultContent[] { + if (!Array.isArray(raw)) return [{ type: "json", value: raw }]; + + const result: PilotDeckToolResultContent[] = []; + const remainder: unknown[] = []; + + for (const block of raw as McpContentBlock[]) { + if (!block || typeof block !== "object" || typeof block.type !== "string") { + remainder.push(block); + continue; + } + if (block.type === "text" && typeof block.text === "string") { + result.push({ type: "text", text: block.text }); + } else if ( + block.type === "image" && + typeof block.data === "string" && + typeof block.mimeType === "string" + ) { + result.push({ type: "image", mimeType: block.mimeType as string, data: block.data as string }); + } else { + remainder.push(block); + } + } + + if (remainder.length > 0) { + result.push({ type: "json", value: remainder }); + } + if (result.length === 0) { + result.push({ type: "json", value: raw }); + } + return result; +} + function extractMcpErrorText( content: unknown, serverId: string, From e23a318bd8fa8e92b328d9df4a8881e7dfb0fb12 Mon Sep 17 00:00:00 2001 From: Kaguya-19 Date: Tue, 2 Jun 2026 14:19:22 +0800 Subject: [PATCH 2/4] fix(web): preserve tool result images when loading session history Extract image sub-blocks from tool_result content during JSONL session replay and attach them as WebMessage.images. Fixes screenshots disappearing on page refresh or session reload. Co-authored-by: Cursor --- src/web/server/readSessionMessages.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/web/server/readSessionMessages.ts b/src/web/server/readSessionMessages.ts index 51e13e31..3f6774ec 100644 --- a/src/web/server/readSessionMessages.ts +++ b/src/web/server/readSessionMessages.ts @@ -319,6 +319,12 @@ function flushBlock( const resultText = flattenToolResultBlockText(block); const errorCode = readToolResultErrorCode(block.raw); const planData = readPlanData(block.raw); + const resultImages: NonNullable = []; + for (const sub of block.content) { + if (sub.type === "image") { + resultImages.push(toWebMessageImage(sub)); + } + } out.push({ id: `${context.sessionKey}-tool-${block.toolCallId}-result`, sessionKey: context.sessionKey, @@ -332,6 +338,7 @@ function flushBlock( text: resultText, ...(errorCode ? { errorCode } : {}), ...(planData ? { payload: planData } : {}), + ...(resultImages.length > 0 ? { images: resultImages } : {}), source: "history", }); return; From f2b018613d3dfaccccb755bfad9d392dc79a1e3b Mon Sep 17 00:00:00 2001 From: Kaguya-19 Date: Tue, 2 Jun 2026 14:19:33 +0800 Subject: [PATCH 3/4] fix(ui): resolve relative image paths in Markdown via project files API Add projectName prop to and resolveImageSrc() helper that rewrites relative paths (e.g. ./foo.png) to the backend file-serving endpoint /api/projects/:name/files/content. Wire projectName into all Markdown usages across V1 and V2 chat views. Co-authored-by: Cursor --- ui/src/components/chat-v2/MessageRowV2.tsx | 8 ++--- .../chat/view/subcomponents/Markdown.tsx | 31 +++++++++++++++++-- .../view/subcomponents/MessageComponent.tsx | 10 +++--- 3 files changed, 37 insertions(+), 12 deletions(-) diff --git a/ui/src/components/chat-v2/MessageRowV2.tsx b/ui/src/components/chat-v2/MessageRowV2.tsx index 13f76c2c..0d9e2221 100644 --- a/ui/src/components/chat-v2/MessageRowV2.tsx +++ b/ui/src/components/chat-v2/MessageRowV2.tsx @@ -272,7 +272,7 @@ function MessageRowV2({ ) : null} {formattedContent ? ( - {formattedContent} + {formattedContent} ) : null} )} @@ -296,7 +296,7 @@ function MessageRowV2({
- {formattedContent} + {formattedContent}
, ); @@ -312,7 +312,7 @@ function MessageRowV2({ {t('thinking.title', { defaultValue: 'Thinking...' })}
- {formattedContent} + {formattedContent}
, @@ -326,7 +326,7 @@ function MessageRowV2({ ) : ( <> - {formattedContent} + {formattedContent} {formattedContent.trim() && (!nextMessage || nextMessage.type === 'user' || nextMessage.type === 'error') ? (
diff --git a/ui/src/components/chat/view/subcomponents/Markdown.tsx b/ui/src/components/chat/view/subcomponents/Markdown.tsx index dfd85697..59c71904 100644 --- a/ui/src/components/chat/view/subcomponents/Markdown.tsx +++ b/ui/src/components/chat/view/subcomponents/Markdown.tsx @@ -12,6 +12,8 @@ import { copyTextToClipboard } from '../../../../utils/clipboard'; type MarkdownProps = { children: React.ReactNode; className?: string; + /** When set, relative image paths are resolved via the project files API. */ + projectName?: string; }; type CodeBlockProps = { @@ -116,7 +118,14 @@ const CodeBlock = ({ node, inline, className, children, ...props }: CodeBlockPro ); }; -const markdownComponents = { +function resolveImageSrc(src: string | undefined, projectName: string | undefined): string | undefined { + if (!src || !projectName) return src; + if (src.startsWith('data:') || src.startsWith('http://') || src.startsWith('https://')) return src; + const cleaned = src.replace(/^\.\//, ''); + return `/api/projects/${encodeURIComponent(projectName)}/files/content?path=${encodeURIComponent(cleaned)}`; +} + +const baseMarkdownComponents = { code: CodeBlock, blockquote: ({ children }: { children?: React.ReactNode }) => (
@@ -143,14 +152,30 @@ const markdownComponents = { ), }; -export function Markdown({ children, className }: MarkdownProps) { +export function Markdown({ children, className, projectName }: MarkdownProps) { const content = normalizeInlineCodeFences(String(children ?? '')); const remarkPlugins = useMemo(() => [remarkGfm, remarkMath], []); const rehypePlugins = useMemo(() => [rehypeKatex], []); + const components = useMemo(() => { + if (!projectName) return baseMarkdownComponents; + return { + ...baseMarkdownComponents, + img: ({ src, alt, ...rest }: React.ImgHTMLAttributes) => ( + {alt + ), + }; + }, [projectName]); + return (
- + {content}
diff --git a/ui/src/components/chat/view/subcomponents/MessageComponent.tsx b/ui/src/components/chat/view/subcomponents/MessageComponent.tsx index baa6f46b..9fffb250 100644 --- a/ui/src/components/chat/view/subcomponents/MessageComponent.tsx +++ b/ui/src/components/chat/view/subcomponents/MessageComponent.tsx @@ -323,7 +323,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o <>
- + {String(message.displayText || '')}
@@ -392,7 +392,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o toolCategory="default" autoExpandable={false} > - + {renderedErrorContent} @@ -427,7 +427,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o ) : null}
- + {renderedErrorContent}
@@ -607,7 +607,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o {t('thinking.emoji')}
- + {messageContent}
@@ -664,7 +664,7 @@ const MessageComponent = memo(({ message, prevMessage, createDiff, onFileOpen, o // Normal rendering for non-JSON content return message.type === 'assistant' ? ( - + {content} ) : ( From 07ccfa117621494e8993eaf60e11b945fa977bb0 Mon Sep 17 00:00:00 2001 From: Kaguya-19 Date: Tue, 2 Jun 2026 14:19:47 +0800 Subject: [PATCH 4/4] fix(model): gracefully downgrade unsupported media for non-multimodal models Add downgradeUnsupportedContent() that replaces image/pdf/audio blocks with descriptive text placeholders before assertContentSupported runs. Prevents unsupported_modality turn failures when a text-only model receives MCP screenshot results in tool_result content. Co-authored-by: Cursor --- src/model/index.ts | 1 + src/model/protocol/multimodal.ts | 69 ++++++++++++++++++++++- src/model/request/validateModelRequest.ts | 4 +- 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/model/index.ts b/src/model/index.ts index 1ae943bf..5f8c900d 100644 --- a/src/model/index.ts +++ b/src/model/index.ts @@ -67,6 +67,7 @@ export { type StructuredOutputExtractionError, } from "./structuredOutput/extractStructuredOutput.js"; export type { ModelCapabilities } from "./protocol/capabilities.js"; +export { downgradeUnsupportedContent } from "./protocol/multimodal.js"; export type { InputModality, MultimodalConstraints } from "./protocol/multimodal.js"; export { ModelConfigError, diff --git a/src/model/protocol/multimodal.ts b/src/model/protocol/multimodal.ts index 68a29c2c..c139ba27 100644 --- a/src/model/protocol/multimodal.ts +++ b/src/model/protocol/multimodal.ts @@ -1,5 +1,9 @@ import { ModelRequestError } from "./errors.js"; -import type { CanonicalContentBlock } from "./canonical.js"; +import type { + CanonicalContentBlock, + CanonicalMessage, + CanonicalToolResultContentBlock, +} from "./canonical.js"; export const SUPPORTED_INPUT_MODALITIES = ["text", "image", "pdf", "audio"] as const; @@ -24,6 +28,69 @@ export function isInputModality(value: unknown): value is InputModality { return typeof value === "string" && SUPPORTED_INPUT_MODALITIES.includes(value as InputModality); } +/** + * Pre-flight downgrade: replace media blocks the target model cannot accept + * with descriptive text placeholders. Mutates `messages` in-place so the + * caller's cloned request is updated without copying the entire array. + * + * Only tool_result sub-blocks and top-level content blocks are converted; + * tool_call / thinking blocks are left untouched. + */ +export function downgradeUnsupportedContent( + messages: CanonicalMessage[], + constraints: MultimodalConstraints, +): void { + const allowed = new Set(constraints.input); + if (allowed.has("image") && allowed.has("pdf") && allowed.has("audio")) return; + + for (const msg of messages) { + for (let i = 0; i < msg.content.length; i++) { + const block = msg.content[i]; + + if (block.type === "tool_result") { + let changed = false; + const newContent: CanonicalToolResultContentBlock[] = []; + for (const sub of block.content) { + const placeholder = mediaBlockToPlaceholder(sub, allowed); + if (placeholder) { + newContent.push({ type: "text", text: placeholder }); + changed = true; + } else { + newContent.push(sub); + } + } + if (changed) { + (block as { content: CanonicalToolResultContentBlock[] }).content = newContent; + } + continue; + } + + const placeholder = mediaBlockToPlaceholder(block, allowed); + if (placeholder) { + (msg.content as CanonicalContentBlock[])[i] = { type: "text", text: placeholder }; + } + } + } +} + +function mediaBlockToPlaceholder( + block: CanonicalContentBlock | CanonicalToolResultContentBlock, + allowed: Set, +): string | undefined { + if (block.type === "image" && !allowed.has("image")) { + const sizeHint = block.bytes ? `, ${Math.round(block.bytes / 1024)}KB` : ""; + return `[Image: ${block.mimeType}${sizeHint} — omitted, model does not support image input]`; + } + if (block.type === "pdf" && !allowed.has("pdf")) { + const pagesHint = block.pages ? `, ${block.pages} pages` : ""; + return `[PDF: ${block.mimeType}, ${Math.round(block.bytes / 1024)}KB${pagesHint} — omitted, model does not support PDF input]`; + } + if (block.type === "audio" && !allowed.has("audio")) { + return `[Audio: ${block.mimeType} — omitted, model does not support audio input]`; + } + return undefined; +} + export function contentBlockToInputModality(block: CanonicalContentBlock): InputModality | undefined { switch (block.type) { case "text": diff --git a/src/model/request/validateModelRequest.ts b/src/model/request/validateModelRequest.ts index fb7100a6..ca34e769 100644 --- a/src/model/request/validateModelRequest.ts +++ b/src/model/request/validateModelRequest.ts @@ -1,6 +1,6 @@ import type { CanonicalModelRequest, ModelConfig, ModelDefinition, ProviderConfig } from "../protocol/canonical.js"; import { ModelRequestError } from "../protocol/errors.js"; -import { assertContentSupported } from "../protocol/multimodal.js"; +import { assertContentSupported, downgradeUnsupportedContent } from "../protocol/multimodal.js"; export type ResolvedModelRequest = { provider: ProviderConfig; @@ -39,6 +39,8 @@ export function validateModelRequest( throw new ModelRequestError("unsupported_tool_use", `Model ${request.model} does not support tools.`); } + downgradeUnsupportedContent(request.messages, model.multimodal); + for (const message of request.messages) { assertContentSupported(message.content, model.multimodal); }