From 17d0d0a9dac5574fec5882f3d318dbba71afca02 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 08:01:01 +0000 Subject: [PATCH] Render image references from retrieved evidence --- src/components/chat-message-list.test.ts | 46 ++++++++ src/components/chat-message-list.tsx | 21 +++- src/domains/chat/index.test.ts | 71 ++++++++++++ src/domains/chat/index.ts | 1 + src/domains/chat/media-assets.test.ts | 47 ++++++++ src/domains/chat/media-assets.ts | 142 ++++++++++++++++++++--- 6 files changed, 309 insertions(+), 19 deletions(-) diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index 2c0ca80..c515976 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -100,6 +100,52 @@ describe("ChatMessageList", () => { ).toBeNull(); }); + it("does not hide image cards when source links dedupe the same section", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "这里是相关身份证明图片。", + citations: [ + { + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }, + { + chunkType: "image", + score: 0.9, + assetUrl: "https://blob.example/images/image-6-id-front.jpg", + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }, + ], + }, + ], + }), + ); + + expect( + screen.getByRole("img", { + name: "商务标文件.pdf · 二、法定代表人身份证明", + }), + ).toBeTruthy(); + expect( + screen.getAllByRole("button", { + name: "Open source 商务标文件.pdf · 二、法定代表人身份证明", + }), + ).toHaveLength(1); + }); + it("shows thinking progress after existing messages while sending", () => { render( React.createElement(ChatMessageList, { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index 1f81c74..fe2f85f 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -244,7 +244,10 @@ function MessageBubble({ message, sourceTitlesByDocumentId, ); - const displayImageCitations = getDisplayImageCitations(displayCitations); + const displayImageCitations = getDisplayImageCitations( + message, + sourceTitlesByDocumentId, + ); return (
@@ -336,18 +339,24 @@ function getDisplayCitations( } function getDisplayImageCitations( - citations: readonly DisplayCitation[], + message: ChatMessageView, + sourceTitlesByDocumentId: Readonly>, ): readonly DisplayImageCitation[] { const seenAssetUrls = new Set(); const imageCitations: DisplayImageCitation[] = []; - for (const citation of citations) { - const assetUrl = getTrimmedCitationField(citation.citation.assetUrl); - if (!assetUrl || !isImageCitation(citation.citation, assetUrl)) continue; + for (const [index, citation] of (message.citations ?? []).entries()) { + const assetUrl = getTrimmedCitationField(citation.assetUrl); + if (!assetUrl || !isImageCitation(citation, assetUrl)) continue; if (seenAssetUrls.has(assetUrl)) continue; seenAssetUrls.add(assetUrl); - imageCitations.push({ ...citation, assetUrl }); + imageCitations.push({ + citation, + citationId: chatPanelModel.getCitationId(message.id, index), + label: chatPanelModel.getCitationLabel(citation, sourceTitlesByDocumentId), + assetUrl, + }); } return imageCitations; diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index c4048d7..6eaf598 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -264,6 +264,77 @@ describe("answerQuestionWithRetrieval", () => { ]); }); + it("turns retrieved evidence image filenames into image citations", async () => { + const result = makeRetrievalResult({ + content: "This section contains identity proof attachments.", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }); + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [result], + evidenceText: + "[image-6-中华人民共和国居民身份证.jpg]\n[image-7-中国居民身份证.jpg]", + referencedChunks: [], + namespace: "notebook-workspace", + query: "公民身份证明 图片", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn().mockResolvedValue("这里是相关身份证明图片。"); + const generateRetrievalQuery = vi.fn().mockResolvedValue("公民身份证明 图片"); + const loadSourceAssetUrls = vi.fn().mockResolvedValue({ + "images/image-6-中华人民共和国居民身份证.jpg": + "https://blob.example/images/image-6-id-front.jpg", + "images/image-7-中国居民身份证.jpg": + "https://blob.example/images/image-7-id-back.jpg", + }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "请发送几张关于公民身份的图片给我", + namespace: "notebook-workspace", + sources: [ + makeSource({ + id: "source_identity", + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ], + excludedSourceIds: [], + retrieval, + generateRetrievalQuery, + generateAnswer, + loadSourceAssetUrls, + messages: [], + }), + ); + + expect(generateAnswer).toHaveBeenCalledWith({ + question: "请发送几张关于公民身份的图片给我", + retrievalQuery: "公民身份证明 图片", + messages: [], + evidenceText: + "[image-6-中华人民共和国居民身份证.jpg]\n[image-7-中国居民身份证.jpg]", + mediaAssetContext: + "- 商务标文件.pdf / images/image-6-中华人民共和国居民身份证.jpg: https://blob.example/images/image-6-id-front.jpg\n" + + "- 商务标文件.pdf / images/image-7-中国居民身份证.jpg: https://blob.example/images/image-7-id-back.jpg", + }); + expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ + undefined, + "https://blob.example/images/image-6-id-front.jpg", + "https://blob.example/images/image-7-id-back.jpg", + ]); + expect(answer.citations.slice(1).map((citation) => citation.chunkType)).toEqual([ + "image", + "image", + ]); + }); + it("returns a deterministic no-results answer without calling the model", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index e84ec89..d903ab2 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -75,6 +75,7 @@ export const answerQuestionWithRetrieval = ( results: useNotebookSourceTitles(response.results, input.sources), sources: input.sources, loadSourceAssetUrls: input.loadSourceAssetUrls, + evidenceText, }), ) const mediaAssetContext = formatRetrievedMediaAssetContext(results) diff --git a/src/domains/chat/media-assets.test.ts b/src/domains/chat/media-assets.test.ts index 95d9398..9ad4b3c 100644 --- a/src/domains/chat/media-assets.test.ts +++ b/src/domains/chat/media-assets.test.ts @@ -42,6 +42,53 @@ describe("chat media assets", () => { ) }) + it("adds image citation results for asset filenames that only appear in evidence text", async () => { + const loadSourceAssetUrls = vi.fn().mockResolvedValue({ + "images/image-6-中华人民共和国居民身份证.jpg": + "https://blob.example/images/image-6-id-front.jpg", + "images/image-7-中国居民身份证.jpg": + "https://blob.example/images/image-7-id-back.jpg", + }) + + const results = await enrichRetrievalResultsWithAssetUrls({ + results: [ + makeRetrievalResult({ + content: "The section contains citizen identity proof copies.", + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }), + ], + sources: [ + makeSource({ + id: "source_identity", + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ], + loadSourceAssetUrls, + evidenceText: + "[image-6-中华人民共和国居民身份证.jpg]\n[image-7-中国居民身份证.jpg]", + }) + + expect(results).toHaveLength(3) + expect(results[0]?.assetUrl).toBeUndefined() + expect(results.slice(1).map((result) => result.assetUrl)).toEqual([ + "https://blob.example/images/image-6-id-front.jpg", + "https://blob.example/images/image-7-id-back.jpg", + ]) + expect(results.slice(1).map((result) => result.chunkType)).toEqual([ + "image", + "image", + ]) + expect(results.slice(1).map((result) => result.source.sectionPath)).toEqual([ + "images/image-6-中华人民共和国居民身份证.jpg", + "images/image-7-中国居民身份证.jpg", + ]) + }) + it("formats a bounded media asset context for the grounded prompt", () => { const context = formatRetrievedMediaAssetContext([ makeRetrievalResult({ diff --git a/src/domains/chat/media-assets.ts b/src/domains/chat/media-assets.ts index 00b05f5..1832019 100644 --- a/src/domains/chat/media-assets.ts +++ b/src/domains/chat/media-assets.ts @@ -13,12 +13,14 @@ export type RetrievalResultAssetInput = { readonly results: readonly RetrievalResult[] readonly sources: readonly Source[] readonly loadSourceAssetUrls?: LoadSourceAssetUrls + readonly evidenceText?: string } export async function enrichRetrievalResultsWithAssetUrls({ results, sources, loadSourceAssetUrls, + evidenceText, }: RetrievalResultAssetInput): Promise { if (!loadSourceAssetUrls || results.length === 0) return [...results] @@ -32,23 +34,22 @@ export async function enrichRetrievalResultsWithAssetUrls({ Promise>> >() - return Promise.all( - results.map(async (result): Promise => { - if (getTrimmedString(result.assetUrl)) return result - + const enrichedResults = await Promise.all( + results.map(async (result): Promise => { const documentId = getTrimmedString(result.source.documentId) const source = documentId ? sourcesByDocumentId.get(documentId) : undefined - if (!source) return result + if (!source) return [result] const assetUrls = await getCachedSourceAssetUrls( source, loadSourceAssetUrls, assetUrlsBySourceId, ) - const assetUrl = resolveResultAssetUrl(result, assetUrls) - return assetUrl ? { ...result, assetUrl } : result + return addAssetCitationResults(result, assetUrls, evidenceText) }), ) + + return enrichedResults.flat() } export function formatRetrievedMediaAssetContext( @@ -111,10 +112,104 @@ async function getCachedSourceAssetUrls( return cached } -function resolveResultAssetUrl( +function addAssetCitationResults( + result: RetrievalResult, + assetUrlsByFilePath: Readonly>, + evidenceText: string | undefined, +): readonly RetrievalResult[] { + const existingAssetUrl = getTrimmedString(result.assetUrl) + const resultMatches = resolveAssetReferenceMatches(result, assetUrlsByFilePath) + const evidenceMatches = resolveAssetReferenceMatchesFromText( + evidenceText, + assetUrlsByFilePath, + ) + const seenAssetUrls = new Set() + const output: RetrievalResult[] = [] + + if (existingAssetUrl) { + seenAssetUrls.add(existingAssetUrl) + output.push(result) + } else if (resultMatches.length > 0) { + const [firstMatch, ...remainingMatches] = resultMatches + seenAssetUrls.add(firstMatch.assetUrl) + output.push(toAssetResult(result, firstMatch)) + for (const match of remainingMatches) { + if (seenAssetUrls.has(match.assetUrl)) continue + seenAssetUrls.add(match.assetUrl) + output.push(toAssetResult(result, match)) + } + } else { + output.push(result) + } + + for (const match of evidenceMatches) { + if (seenAssetUrls.has(match.assetUrl)) continue + seenAssetUrls.add(match.assetUrl) + output.push(toAssetResult(result, match)) + } + + return output +} + +function toAssetResult( + result: RetrievalResult, + match: AssetReferenceMatch, +): RetrievalResult { + return { + ...result, + assetUrl: match.assetUrl, + chunkType: getAssetChunkType(match, result.chunkType), + source: { + ...result.source, + sectionPath: getAssetSectionPath(result, match.assetPath), + }, + } +} + +function getAssetSectionPath( + result: RetrievalResult, + assetPath: string, +): string | null | undefined { + const sectionPath = getTrimmedString(result.source.sectionPath) + if (!sectionPath) return assetPath + + const normalizedSectionPath = normalizeAssetLookupText(sectionPath) + const normalizedAssetPath = normalizeAssetLookupText(assetPath) + const assetBasename = getNormalizedBasename(assetPath) + if ( + normalizedAssetPath && + normalizedSectionPath?.includes(normalizedAssetPath) + ) { + return sectionPath + } + if (assetBasename && normalizedSectionPath?.includes(assetBasename)) { + return sectionPath + } + + return assetPath +} + +function getAssetChunkType( + match: AssetReferenceMatch, + fallback: RetrievalResult["chunkType"], +): RetrievalResult["chunkType"] { + const normalizedAssetPath = normalizeAssetLookupText(match.assetPath) + if ( + normalizedAssetPath?.startsWith("images/") || + isImageAssetUrl(match.assetUrl) + ) { + return "image" + } + if (normalizedAssetPath?.startsWith("tables/")) { + return "table" + } + return fallback +} + +function resolveAssetReferenceMatches( result: RetrievalResult, assetUrlsByFilePath: Readonly>, -): string | null { +): readonly AssetReferenceMatch[] { const normalizedHaystacks = [ result.source.sectionPath, result.content, @@ -122,10 +217,33 @@ function resolveResultAssetUrl( const normalized = normalizeAssetLookupText(value) return normalized ? [normalized] : [] }) - if (normalizedHaystacks.length === 0) return null + if (normalizedHaystacks.length === 0) return [] + return resolveAssetReferenceMatchesFromHaystacks( + normalizedHaystacks, + assetUrlsByFilePath, + ) +} + +function resolveAssetReferenceMatchesFromText( + value: string | null | undefined, + assetUrlsByFilePath: Readonly>, +): readonly AssetReferenceMatch[] { + const normalized = normalizeAssetLookupText(value) + if (!normalized) return [] + + return resolveAssetReferenceMatchesFromHaystacks( + [normalized], + assetUrlsByFilePath, + ) +} + +function resolveAssetReferenceMatchesFromHaystacks( + normalizedHaystacks: readonly string[], + assetUrlsByFilePath: Readonly>, +): readonly AssetReferenceMatch[] { const basenameCounts = getNormalizedBasenameCounts(assetUrlsByFilePath) - const matches = Object.entries(assetUrlsByFilePath) + return Object.entries(assetUrlsByFilePath) .flatMap(([assetPath, assetUrl]): readonly AssetReferenceMatch[] => { const trimmedUrl = getTrimmedString(assetUrl) if (!trimmedUrl || !isSupportedAssetPath(assetPath)) return [] @@ -138,8 +256,6 @@ function resolveResultAssetUrl( return index === null ? [] : [{ assetPath, assetUrl: trimmedUrl, index }] }) .sort(compareAssetReferenceMatches) - - return matches[0]?.assetUrl ?? null } type AssetReferenceMatch = {