From 5f94a185b69be18b94a9077b53d2e9df44d9a639 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 03:19:17 +0000 Subject: [PATCH 1/2] Add image asset citations to chat --- src/components/chat-message-list.test.ts | 40 ++++ src/components/chat-message-list.tsx | 86 +++++++- src/domains/chat/contracts.ts | 4 +- src/domains/chat/index.test.ts | 89 +++++++++ src/domains/chat/index.ts | 28 ++- src/domains/chat/media-assets.test.ts | 109 +++++++++++ src/domains/chat/media-assets.ts | 239 +++++++++++++++++++++++ src/domains/chat/prompt.ts | 15 +- src/domains/chat/route-answer.ts | 3 + src/domains/chat/service.ts | 3 + 10 files changed, 604 insertions(+), 12 deletions(-) create mode 100644 src/domains/chat/media-assets.test.ts create mode 100644 src/domains/chat/media-assets.ts diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index 03d3342..fef61c4 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -59,6 +59,46 @@ describe("ChatMessageList", () => { ).toBeTruthy(); }); + it("renders image citations as viewable image attachments", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "Here is the launch image.", + citations: [ + { + chunkType: "image", + score: 0.9, + assetUrl: "https://blob.example/images/launch.jpg", + source: { + documentId: "doc_1", + sourceFileName: "spacex-s1.pdf", + sectionPath: "Assets / images / launch.jpg", + }, + }, + ], + }, + ], + }), + ); + + const image = screen.getByRole("img", { + name: "spacex-s1.pdf · Assets / images / launch.jpg", + }); + expect(image.getAttribute("src")).toBe( + "https://blob.example/images/launch.jpg", + ); + + const link = screen.getByRole("link", { + name: "https://blob.example/images/launch.jpg", + }); + expect(link.getAttribute("href")).toBe( + "https://blob.example/images/launch.jpg", + ); + }); + it("shows thinking progress after existing messages while sending", () => { render( React.createElement(ChatMessageList, { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index 721fa0b..d49c8a7 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -2,7 +2,7 @@ import { type CSSProperties, type ReactElement } from "react"; import { type VirtualItem } from "@tanstack/react-virtual"; -import { MessageCircle } from "lucide-react"; +import { ImageIcon, MessageCircle } from "lucide-react"; import { useChatMessageListWorkflow } from "@/components/chat-message-list-workflow"; import { chatPanelModel } from "@/components/chat-panel-model"; @@ -19,6 +19,10 @@ type DisplayCitation = { readonly label: string; }; +type DisplayImageCitation = DisplayCitation & { + readonly assetUrl: string; +}; + export type ChatMessageListProps = { readonly isDisabled?: boolean; readonly isSending?: boolean; @@ -240,11 +244,48 @@ function MessageBubble({ message, sourceTitlesByDocumentId, ); + const displayImageCitations = getDisplayImageCitations(displayCitations); return (

{message.content}

+ {displayImageCitations.length > 0 && ( +
+

+ + Images +

+
+ {displayImageCitations.map(({ assetUrl, citationId, label }) => ( +
+ {/* eslint-disable-next-line @next/next/no-img-element -- Chat image citation dimensions are not known before render. */} + {label} +
+ + {label} + + + {assetUrl} + +
+
+ ))} +
+
+ )} {displayCitations.length > 0 && (

@@ -302,6 +343,49 @@ function getDisplayCitations( return displayCitations; } +function getDisplayImageCitations( + citations: readonly DisplayCitation[], +): readonly DisplayImageCitation[] { + const seenAssetUrls = new Set(); + const imageCitations: DisplayImageCitation[] = []; + + for (const citation of citations) { + const assetUrl = getTrimmedCitationField(citation.citation.assetUrl); + if (!assetUrl || !isImageCitation(citation.citation, assetUrl)) continue; + if (seenAssetUrls.has(assetUrl)) continue; + + seenAssetUrls.add(assetUrl); + imageCitations.push({ ...citation, assetUrl }); + } + + return imageCitations; +} + +function isImageCitation( + citation: ChatCitationView, + assetUrl: string, +): boolean { + return ( + citation.chunkType.toLowerCase() === "image" || + hasImageFileExtension(assetUrl) + ); +} + +function hasImageFileExtension(assetUrl: string): boolean { + const pathname = getUrlPathname(assetUrl).toLowerCase(); + return [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"].some( + (extension) => pathname.endsWith(extension), + ); +} + +function getUrlPathname(assetUrl: string): string { + try { + return new URL(assetUrl).pathname; + } catch { + return assetUrl.split("?")[0] ?? assetUrl; + } +} + function getCitationDisplayKey( citation: ChatCitationView, label: string, diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 49d8bc1..5205ed0 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -2,6 +2,7 @@ import type { RetrievalQueryParams, RetrievalQueryResponse } from "@ontos-ai/kno import type { Source } from "@/infrastructure/db/schema" import type { ChatCitationView } from "@/domains/chat/types" +import type { LoadSourceAssetUrls } from "./media-assets" export type RetrievalClient = { query(params: RetrievalQueryParams): Promise @@ -25,6 +26,7 @@ export type GenerateAnswer = (input: { retrievalQuery: string messages: readonly ChatHistoryMessage[] evidenceText: string + mediaAssetContext?: string }) => Promise export type AnswerQuestionInput = { @@ -35,6 +37,7 @@ export type AnswerQuestionInput = { retrieval: RetrievalClient generateRetrievalQuery: GenerateRetrievalQuery generateAnswer: GenerateAnswer + loadSourceAssetUrls?: LoadSourceAssetUrls messages: readonly ChatHistoryMessage[] } @@ -42,4 +45,3 @@ export type AnswerQuestionResult = { answer: string citations: ChatCitationView[] } - diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index ba7a4bd..8438e71 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -188,6 +188,77 @@ describe("answerQuestionWithRetrieval", () => { ]); }); + it("passes retrieved image asset URLs to the answer prompt and citations", async () => { + const result = makeRetrievalResult({ + chunkType: "image", + source: { + documentId: "doc_spacex", + sourceFileName: "document-generated.pdf", + sectionPath: "Assets / images / image-9-Night Rocket Launch.jpg", + }, + }); + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [result], + evidenceText: "A SpaceX rocket launches at night.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "SpaceX rocket photos", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn().mockResolvedValue("Use this launch photo."); + const generateRetrievalQuery = vi.fn().mockResolvedValue("SpaceX rocket photos"); + const loadSourceAssetUrls = vi.fn().mockResolvedValue({ + "images/image-9-Night Rocket Launch.jpg": + "https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "Show me the SpaceX rocket photos.", + namespace: "notebook-workspace", + sources: [ + makeSource({ + id: "source_spacex", + title: "spacex-s1.pdf", + knowhereDocumentId: "doc_spacex", + }), + ], + excludedSourceIds: [], + retrieval, + generateRetrievalQuery, + generateAnswer, + loadSourceAssetUrls, + messages: [], + }), + ); + + expect(loadSourceAssetUrls).toHaveBeenCalledWith( + expect.objectContaining({ id: "source_spacex" }), + ); + expect(generateAnswer).toHaveBeenCalledWith({ + question: "Show me the SpaceX rocket photos.", + retrievalQuery: "SpaceX rocket photos", + messages: [], + evidenceText: "A SpaceX rocket launches at night.", + mediaAssetContext: + "- spacex-s1.pdf / Assets / images / image-9-Night Rocket Launch.jpg: https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + }); + expect(answer.citations).toEqual([ + { + ...result, + assetUrl: + "https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + source: { + ...result.source, + sourceFileName: "spacex-s1.pdf", + }, + }, + ]); + }); + it("returns a deterministic no-results answer without calling the model", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ @@ -374,6 +445,24 @@ describe("buildGroundedPrompt", () => { "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", ); }); + + it("includes retrieved media asset URLs when they are available", () => { + const prompt = buildGroundedPrompt({ + question: "Show me the launch image.", + evidenceText: "A launch image was retrieved.", + mediaAssetContext: + "- spacex-s1.pdf / Assets / images / launch.jpg: https://blob.example/images/launch.jpg", + }); + + expect(prompt).toContain("Retrieved media asset URLs:"); + expect(prompt).toContain( + "When retrieved image or table asset URLs are relevant to the user's request, include the URL next to the matching source label.", + ); + expect(prompt).toContain( + "Do not invent asset URLs; use only the retrieved media asset URLs listed below.", + ); + expect(prompt).toContain("https://blob.example/images/launch.jpg"); + }); }); describe("buildRetrievalQueryPrompt", () => { diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index eea0889..2b6cd6b 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -10,6 +10,10 @@ import { excludeDocuments, normalizeRetrievalQuery, } from "./retrieval" +import { + enrichRetrievalResultsWithAssetUrls, + formatRetrievedMediaAssetContext, +} from "./media-assets" const DEFAULT_TOP_K = 8 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." @@ -65,18 +69,26 @@ export const answerQuestionWithRetrieval = ( return { answer: NO_RESULTS_ANSWER, citations: [] as ChatCitationView[] } } - const results = useNotebookSourceTitles(response.results, input.sources) - const answer = yield* Effect.tryPromise(() => - input.generateAnswer({ - question, - retrievalQuery: query, - messages: input.messages, - evidenceText, + const results = yield* Effect.tryPromise(() => + enrichRetrievalResultsWithAssetUrls({ + results: useNotebookSourceTitles(response.results, input.sources), + sources: input.sources, + loadSourceAssetUrls: input.loadSourceAssetUrls, }), ) + const mediaAssetContext = formatRetrievedMediaAssetContext(results) + const generateAnswerInput = { + question, + retrievalQuery: query, + messages: input.messages, + evidenceText, + ...(mediaAssetContext ? { mediaAssetContext } : {}), + } + const answer = yield* Effect.tryPromise(() => + input.generateAnswer(generateAnswerInput), + ) return { answer, citations: toChatCitationViews(results, answer), } }) - diff --git a/src/domains/chat/media-assets.test.ts b/src/domains/chat/media-assets.test.ts new file mode 100644 index 0000000..f7163bc --- /dev/null +++ b/src/domains/chat/media-assets.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it, vi } from "vitest" +import type { RetrievalResult } from "@ontos-ai/knowhere-sdk" + +import { + enrichRetrievalResultsWithAssetUrls, + formatRetrievedMediaAssetContext, + isImageAssetUrl, +} from "./media-assets" +import type { Source } from "@/infrastructure/db/schema" + +describe("chat media assets", () => { + it("enriches retrieved image chunks from Notebook parsed asset URLs", async () => { + const loadSourceAssetUrls = vi.fn().mockResolvedValue({ + "images/image-9-Night Rocket Launch.jpg": + "https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + }) + + const [result] = await enrichRetrievalResultsWithAssetUrls({ + results: [ + makeRetrievalResult({ + chunkType: "image", + source: { + documentId: "doc_spacex", + sourceFileName: "spacex-s1.pdf", + sectionPath: "Assets / images / image-9-Night Rocket Launch.jpg", + }, + }), + ], + sources: [ + makeSource({ + id: "source_spacex", + knowhereDocumentId: "doc_spacex", + }), + ], + loadSourceAssetUrls, + }) + + expect(loadSourceAssetUrls).toHaveBeenCalledTimes(1) + expect(result?.assetUrl).toBe( + "https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + ) + }) + + it("formats a bounded media asset context for the grounded prompt", () => { + const context = formatRetrievedMediaAssetContext([ + makeRetrievalResult({ + chunkType: "image", + assetUrl: "https://blob.example/images/launch.jpg", + source: { + documentId: "doc_spacex", + sourceFileName: "spacex-s1.pdf", + sectionPath: "Assets / images / launch.jpg", + }, + }), + ]) + + expect(context).toBe( + "- spacex-s1.pdf / Assets / images / launch.jpg: https://blob.example/images/launch.jpg", + ) + }) + + it("recognizes image asset URLs with query strings", () => { + expect( + isImageAssetUrl("https://blob.example/images/launch.jpg?download=1"), + ).toBe(true) + expect(isImageAssetUrl("https://blob.example/tables/table-1.html")).toBe( + false, + ) + }) +}) + +function makeRetrievalResult( + overrides: Partial = {}, +): RetrievalResult { + return { + content: "Image evidence", + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "source.pdf", + sectionPath: "Root", + }, + ...overrides, + } +} + +function makeSource(overrides: Partial = {}): Source { + return { + id: "source_1", + workspaceId: "workspace_1", + title: "source.pdf", + mimeType: "application/pdf", + sizeBytes: 100, + status: "ready", + failureReason: null, + knowhereJobId: "job_1", + knowhereDocumentId: "doc_1", + stagedBlobPathname: null, + stagedBlobUrl: null, + originalBlobPathname: null, + originalBlobUrl: null, + demoKey: null, + createdAt: new Date("2026-06-04T00:00:00Z"), + updatedAt: new Date("2026-06-04T00:00:00Z"), + deletedAt: null, + ...overrides, + } +} diff --git a/src/domains/chat/media-assets.ts b/src/domains/chat/media-assets.ts new file mode 100644 index 0000000..c38fe86 --- /dev/null +++ b/src/domains/chat/media-assets.ts @@ -0,0 +1,239 @@ +import type { RetrievalResult } from "@ontos-ai/knowhere-sdk" + +import type { Source } from "@/infrastructure/db/schema" + +const retrievedMediaAssetLimit = 6 +const imageExtensions = [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"] as const + +export type LoadSourceAssetUrls = ( + source: Source, +) => Promise>> + +export type RetrievalResultAssetInput = { + readonly results: readonly RetrievalResult[] + readonly sources: readonly Source[] + readonly loadSourceAssetUrls?: LoadSourceAssetUrls +} + +export async function enrichRetrievalResultsWithAssetUrls({ + results, + sources, + loadSourceAssetUrls, +}: RetrievalResultAssetInput): Promise { + if (!loadSourceAssetUrls || results.length === 0) return [...results] + + const sourcesByDocumentId = new Map( + sources.flatMap((source): readonly [string, Source][] => + source.knowhereDocumentId ? [[source.knowhereDocumentId, source]] : [], + ), + ) + const assetUrlsBySourceId = new Map< + string, + Promise>> + >() + + return Promise.all( + results.map(async (result): Promise => { + if (getTrimmedString(result.assetUrl)) return result + + const documentId = getTrimmedString(result.source.documentId) + const source = documentId ? sourcesByDocumentId.get(documentId) : undefined + if (!source) return result + + const assetUrls = await getCachedSourceAssetUrls( + source, + loadSourceAssetUrls, + assetUrlsBySourceId, + ) + const assetUrl = resolveResultAssetUrl(result, assetUrls) + return assetUrl ? { ...result, assetUrl } : result + }), + ) +} + +export function formatRetrievedMediaAssetContext( + results: readonly RetrievalResult[], +): string | undefined { + const lines: string[] = [] + const seen = new Set() + + for (const result of results) { + const assetUrl = getTrimmedString(result.assetUrl) + if (!assetUrl || !isRenderableMediaAsset(result, assetUrl)) continue + + const label = formatResultAssetLabel(result) + const key = `${label}\0${assetUrl}` + if (seen.has(key)) continue + + seen.add(key) + lines.push(`- ${label}: ${assetUrl}`) + if (lines.length >= retrievedMediaAssetLimit) break + } + + return lines.length > 0 ? lines.join("\n") : undefined +} + +export function isImageAssetUrl(assetUrl: string): boolean { + const pathname = getUrlPathname(assetUrl).toLowerCase() + return imageExtensions.some((extension) => pathname.endsWith(extension)) +} + +async function getCachedSourceAssetUrls( + source: Source, + loadSourceAssetUrls: LoadSourceAssetUrls, + cache: Map>>>, +): Promise>> { + let cached = cache.get(source.id) + if (!cached) { + cached = loadSourceAssetUrls(source).catch(() => ({})) + cache.set(source.id, cached) + } + return cached +} + +function resolveResultAssetUrl( + result: RetrievalResult, + assetUrlsByFilePath: Readonly>, +): string | null { + const normalizedHaystacks = [ + result.source.sectionPath, + result.content, + ].flatMap((value): string[] => { + const normalized = normalizeAssetLookupText(value) + return normalized ? [normalized] : [] + }) + if (normalizedHaystacks.length === 0) return null + + const basenameCounts = getNormalizedBasenameCounts(assetUrlsByFilePath) + const matches = Object.entries(assetUrlsByFilePath) + .flatMap(([assetPath, assetUrl]): readonly AssetReferenceMatch[] => { + const trimmedUrl = getTrimmedString(assetUrl) + if (!trimmedUrl || !isSupportedAssetPath(assetPath)) return [] + + const index = getAssetReferenceIndex( + normalizedHaystacks, + assetPath, + basenameCounts, + ) + return index === null ? [] : [{ assetPath, assetUrl: trimmedUrl, index }] + }) + .sort(compareAssetReferenceMatches) + + return matches[0]?.assetUrl ?? null +} + +type AssetReferenceMatch = { + readonly assetPath: string + readonly assetUrl: string + readonly index: number +} + +function compareAssetReferenceMatches( + left: AssetReferenceMatch, + right: AssetReferenceMatch, +): number { + return left.index - right.index || left.assetPath.localeCompare(right.assetPath) +} + +function getAssetReferenceIndex( + normalizedHaystacks: readonly string[], + assetPath: string, + basenameCounts: ReadonlyMap, +): number | null { + const normalizedPath = normalizeAssetLookupText(assetPath) + if (!normalizedPath) return null + + const directIndex = getFirstIndex(normalizedHaystacks, normalizedPath) + if (directIndex !== null) return directIndex + + const basename = getNormalizedBasename(assetPath) + if (!basename || basenameCounts.get(basename) !== 1) return null + + return getFirstIndex(normalizedHaystacks, basename) +} + +function getFirstIndex( + normalizedHaystacks: readonly string[], + needle: string, +): number | null { + const indexes = normalizedHaystacks + .map((haystack): number => haystack.indexOf(needle)) + .filter((index): index is number => index >= 0) + + return indexes.length > 0 ? Math.min(...indexes) : null +} + +function getNormalizedBasenameCounts( + assetUrlsByFilePath: Readonly>, +): ReadonlyMap { + const counts = new Map() + for (const assetPath of Object.keys(assetUrlsByFilePath)) { + const basename = getNormalizedBasename(assetPath) + if (!basename) continue + counts.set(basename, (counts.get(basename) ?? 0) + 1) + } + return counts +} + +function getNormalizedBasename(assetPath: string): string | null { + const basename = assetPath.replaceAll("\\", "/").split("/").pop() + return normalizeAssetLookupText(basename) +} + +function normalizeAssetLookupText(value: string | null | undefined): string | null { + const trimmedValue = getTrimmedString(value) + if (!trimmedValue) return null + + const normalized = decodeUrlText(trimmedValue) + .replaceAll("\\", "/") + .replace(/\s*\/\s*/g, "/") + .replace(/\s+/g, " ") + .trim() + .toLowerCase() + + return normalized.length > 0 ? normalized : null +} + +function decodeUrlText(value: string): string { + try { + return decodeURIComponent(value) + } catch { + return value + } +} + +function isSupportedAssetPath(assetPath: string): boolean { + const normalizedPath = normalizeAssetLookupText(assetPath) + return ( + normalizedPath?.startsWith("images/") === true || + normalizedPath?.startsWith("tables/") === true + ) +} + +function isRenderableMediaAsset( + result: RetrievalResult, + assetUrl: string, +): boolean { + const chunkType = result.chunkType.toLowerCase() + return chunkType === "image" || chunkType === "table" || isImageAssetUrl(assetUrl) +} + +function formatResultAssetLabel(result: RetrievalResult): string { + const sourceFileName = getTrimmedString(result.source.sourceFileName) + const sectionPath = getTrimmedString(result.source.sectionPath) + const label = [sourceFileName, sectionPath].filter(Boolean).join(" / ") + return label || "Retrieved media asset" +} + +function getUrlPathname(assetUrl: string): string { + try { + return new URL(assetUrl).pathname + } catch { + return assetUrl.split("?")[0] ?? assetUrl + } +} + +function getTrimmedString(value: string | null | undefined): string | null { + const trimmedValue = value?.trim() ?? "" + return trimmedValue.length > 0 ? trimmedValue : null +} diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index cec9fa6..9ffed07 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -23,6 +23,7 @@ type GenerateGroundedAnswerInput = { retrievalQuery: string messages: readonly ChatHistoryMessage[] evidenceText: string + mediaAssetContext?: string } type BuildGroundedPromptInput = { @@ -30,6 +31,7 @@ type BuildGroundedPromptInput = { retrievalQuery?: string messages?: readonly ChatHistoryMessage[] evidenceText: string + mediaAssetContext?: string } export const generateContextualRetrievalQueryEffect = ( @@ -126,11 +128,14 @@ export function buildRetrievalQueryPrompt( export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { const retrievalQuery = input.retrievalQuery?.trim() || input.question const conversationContext = formatConversationContext(input.messages ?? []) + const mediaAssetContext = input.mediaAssetContext?.trim() - return [ + const promptLines = [ "You answer user questions.", "Use the retrieved evidence as your primary context.", "Cite document sections (e.g. [文档名 / 章节名]) when they support a claim.", + "When retrieved image or table asset URLs are relevant to the user's request, include the URL next to the matching source label.", + "Do not invent asset URLs; use only the retrieved media asset URLs listed below.", "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", "Do not invent document-specific facts that are not in the sources.", "Use the recent conversation only to resolve references like \"this document\"; do not use it as factual evidence.", @@ -148,7 +153,13 @@ export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { "", "Retrieved evidence:", input.evidenceText, - ].join("\n") + ] + + if (mediaAssetContext) { + promptLines.push("", "Retrieved media asset URLs:", mediaAssetContext) + } + + return promptLines.join("\n") } function formatSourceContext( diff --git a/src/domains/chat/route-answer.ts b/src/domains/chat/route-answer.ts index a7599d8..17946d8 100644 --- a/src/domains/chat/route-answer.ts +++ b/src/domains/chat/route-answer.ts @@ -11,6 +11,7 @@ import { } from "@/domains/chat/service" import { chatTurnPersistence } from "@/domains/chat/chat-turn-persistence" import { reconcileSourcesForWorkspace } from "@/domains/sources/reconcile" +import { sourceService } from "@/domains/sources/service" import { notebookRequestContext } from "@/domains/workspace/request-context" import { routeResult, type RouteResult } from "@/lib/route-result" @@ -58,6 +59,8 @@ const answerChatEffect = (input: AnswerChatInput) => retrieval: client.retrieval, generateRetrievalQuery: generateContextualRetrievalQuery, generateAnswer: generateGroundedAnswer, + loadSourceAssetUrls: (source) => + sourceService.getParseAssetUrls(workspace.id, source.id), repository: chatTurnPersistence.createRepository(), }), ).pipe( diff --git a/src/domains/chat/service.ts b/src/domains/chat/service.ts index 4f1fcd2..edf31f8 100644 --- a/src/domains/chat/service.ts +++ b/src/domains/chat/service.ts @@ -2,6 +2,7 @@ import { Effect, Either } from "effect" import { answerQuestionWithRetrieval, + type AnswerQuestionInput, type ChatHistoryMessage, type GenerateAnswer, type GenerateRetrievalQuery, @@ -62,6 +63,7 @@ type ChatTurnInput = { retrieval: RetrievalClient generateRetrievalQuery: GenerateRetrievalQuery generateAnswer: GenerateAnswer + loadSourceAssetUrls?: AnswerQuestionInput["loadSourceAssetUrls"] repository: ChatRepository } @@ -119,6 +121,7 @@ export const handleChatTurnEffect = (input: ChatTurnInput) => retrieval: input.retrieval, generateRetrievalQuery: input.generateRetrievalQuery, generateAnswer: input.generateAnswer, + loadSourceAssetUrls: input.loadSourceAssetUrls, messages: chatHistoryMessages, }).pipe(Effect.catchAllCause(Effect.die)) From b28ae2f7dde6ee03ee54fa97d65bb7561b09342b Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 04:36:41 +0000 Subject: [PATCH 2/2] Hide raw image asset URLs in chat --- src/components/chat-message-list.test.ts | 15 +++---- src/components/chat-message-list.tsx | 8 ---- src/domains/chat/index.test.ts | 17 +++++--- src/domains/chat/index.ts | 4 +- src/domains/chat/media-assets.test.ts | 19 +++++++++ src/domains/chat/media-assets.ts | 53 ++++++++++++++++++++++++ src/domains/chat/prompt.ts | 11 +++-- 7 files changed, 103 insertions(+), 24 deletions(-) diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index fef61c4..2c0ca80 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -90,13 +90,14 @@ describe("ChatMessageList", () => { expect(image.getAttribute("src")).toBe( "https://blob.example/images/launch.jpg", ); - - const link = screen.getByRole("link", { - name: "https://blob.example/images/launch.jpg", - }); - expect(link.getAttribute("href")).toBe( - "https://blob.example/images/launch.jpg", - ); + expect( + screen.queryByRole("link", { + name: "https://blob.example/images/launch.jpg", + }), + ).toBeNull(); + expect( + screen.queryByText("https://blob.example/images/launch.jpg"), + ).toBeNull(); }); it("shows thinking progress after existing messages while sending", () => { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index d49c8a7..1f81c74 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -272,14 +272,6 @@ function MessageBubble({ {label} - - {assetUrl} - ))} diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index 8438e71..c4048d7 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -208,7 +208,11 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateAnswer = vi.fn().mockResolvedValue("Use this launch photo."); + const generateAnswer = vi + .fn() + .mockResolvedValue( + "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + ); const generateRetrievalQuery = vi.fn().mockResolvedValue("SpaceX rocket photos"); const loadSourceAssetUrls = vi.fn().mockResolvedValue({ "images/image-9-Night Rocket Launch.jpg": @@ -246,6 +250,7 @@ describe("answerQuestionWithRetrieval", () => { mediaAssetContext: "- spacex-s1.pdf / Assets / images / image-9-Night Rocket Launch.jpg: https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", }); + expect(answer.answer).toBe("Use this launch photo."); expect(answer.citations).toEqual([ { ...result, @@ -446,7 +451,7 @@ describe("buildGroundedPrompt", () => { ); }); - it("includes retrieved media asset URLs when they are available", () => { + it("includes retrieved media asset references as internal metadata", () => { const prompt = buildGroundedPrompt({ question: "Show me the launch image.", evidenceText: "A launch image was retrieved.", @@ -454,12 +459,14 @@ describe("buildGroundedPrompt", () => { "- spacex-s1.pdf / Assets / images / launch.jpg: https://blob.example/images/launch.jpg", }); - expect(prompt).toContain("Retrieved media asset URLs:"); expect(prompt).toContain( - "When retrieved image or table asset URLs are relevant to the user's request, include the URL next to the matching source label.", + "Retrieved media asset references (internal; do not quote raw URLs):", + ); + expect(prompt).toContain( + "When retrieved image or table asset references are relevant to the user's request, cite the matching source label; the UI renders media from citation metadata.", ); expect(prompt).toContain( - "Do not invent asset URLs; use only the retrieved media asset URLs listed below.", + "Do not write raw media asset URLs in the answer. They are internal metadata only.", ); expect(prompt).toContain("https://blob.example/images/launch.jpg"); }); diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index 2b6cd6b..e84ec89 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -13,6 +13,7 @@ import { import { enrichRetrievalResultsWithAssetUrls, formatRetrievedMediaAssetContext, + removeRetrievedMediaAssetUrls, } from "./media-assets" const DEFAULT_TOP_K = 8 @@ -84,9 +85,10 @@ export const answerQuestionWithRetrieval = ( evidenceText, ...(mediaAssetContext ? { mediaAssetContext } : {}), } - const answer = yield* Effect.tryPromise(() => + const generatedAnswer = yield* Effect.tryPromise(() => input.generateAnswer(generateAnswerInput), ) + const answer = removeRetrievedMediaAssetUrls(generatedAnswer, results) return { answer, citations: toChatCitationViews(results, answer), diff --git a/src/domains/chat/media-assets.test.ts b/src/domains/chat/media-assets.test.ts index f7163bc..95d9398 100644 --- a/src/domains/chat/media-assets.test.ts +++ b/src/domains/chat/media-assets.test.ts @@ -5,6 +5,7 @@ import { enrichRetrievalResultsWithAssetUrls, formatRetrievedMediaAssetContext, isImageAssetUrl, + removeRetrievedMediaAssetUrls, } from "./media-assets" import type { Source } from "@/infrastructure/db/schema" @@ -67,6 +68,24 @@ describe("chat media assets", () => { false, ) }) + + it("removes retrieved raw asset URLs from generated answer text", () => { + const answer = removeRetrievedMediaAssetUrls( + "Use this launch photo. [Open image](https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg) It is from the filing.", + [ + makeRetrievalResult({ + chunkType: "image", + assetUrl: + "https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + }), + ], + ) + + expect(answer).toBe( + "Use this launch photo. Open image It is from the filing.", + ) + expect(answer).not.toContain("https://blob.example") + }) }) function makeRetrievalResult( diff --git a/src/domains/chat/media-assets.ts b/src/domains/chat/media-assets.ts index c38fe86..00b05f5 100644 --- a/src/domains/chat/media-assets.ts +++ b/src/domains/chat/media-assets.ts @@ -78,6 +78,26 @@ export function isImageAssetUrl(assetUrl: string): boolean { return imageExtensions.some((extension) => pathname.endsWith(extension)) } +export function removeRetrievedMediaAssetUrls( + answer: string, + results: readonly RetrievalResult[], +): string { + const assetUrls = Array.from( + new Set( + results + .map((result): string | null => getTrimmedString(result.assetUrl)) + .filter((assetUrl): assetUrl is string => assetUrl !== null), + ), + ) + if (assetUrls.length === 0) return answer + + const sanitizedAnswer = assetUrls + .flatMap(getAssetUrlTextVariants) + .reduce(removeAssetUrlFromAnswer, answer) + + return cleanSanitizedAnswer(sanitizedAnswer) +} + async function getCachedSourceAssetUrls( source: Source, loadSourceAssetUrls: LoadSourceAssetUrls, @@ -237,3 +257,36 @@ function getTrimmedString(value: string | null | undefined): string | null { const trimmedValue = value?.trim() ?? "" return trimmedValue.length > 0 ? trimmedValue : null } + +function getAssetUrlTextVariants(assetUrl: string): string[] { + return Array.from(new Set([assetUrl, decodeUrlText(assetUrl)])) +} + +function removeAssetUrlFromAnswer(answer: string, assetUrl: string): string { + const escapedAssetUrl = escapeRegExp(assetUrl) + return answer + .replace( + new RegExp(`\\[([^\\]]+)\\]\\(\\s*${escapedAssetUrl}\\s*\\)`, "g"), + "$1", + ) + .replace(new RegExp(`<\\s*${escapedAssetUrl}\\s*>`, "g"), "") + .replace(new RegExp(escapedAssetUrl, "g"), "") +} + +function cleanSanitizedAnswer(answer: string): string { + const cleanedAnswer = answer + .replace(/[ \t]+([,.;:!?])/g, "$1") + .replace(/\(\s*\)/g, "") + .replace(/\[\s*\]/g, "") + .replace(/[ \t]{2,}/g, " ") + .replace(/\s+([,.;!?])/g, "$1") + .replace(/\s*[::]\s*$/u, ".") + .replace(/\n{3,}/g, "\n\n") + .trim() + + return cleanedAnswer || "I found the relevant media asset in the sources." +} + +function escapeRegExp(value: string): string { + return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&") +} diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 9ffed07..e07e7df 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -134,8 +134,9 @@ export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { "You answer user questions.", "Use the retrieved evidence as your primary context.", "Cite document sections (e.g. [文档名 / 章节名]) when they support a claim.", - "When retrieved image or table asset URLs are relevant to the user's request, include the URL next to the matching source label.", - "Do not invent asset URLs; use only the retrieved media asset URLs listed below.", + "When retrieved image or table asset references are relevant to the user's request, cite the matching source label; the UI renders media from citation metadata.", + "Do not write raw media asset URLs in the answer. They are internal metadata only.", + "Do not invent asset URLs; use only the retrieved media asset references listed below.", "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", "Do not invent document-specific facts that are not in the sources.", "Use the recent conversation only to resolve references like \"this document\"; do not use it as factual evidence.", @@ -156,7 +157,11 @@ export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { ] if (mediaAssetContext) { - promptLines.push("", "Retrieved media asset URLs:", mediaAssetContext) + promptLines.push( + "", + "Retrieved media asset references (internal; do not quote raw URLs):", + mediaAssetContext, + ) } return promptLines.join("\n")