From 4e03be545f314a5ddbf393912d7eb177e3f5cede Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 08:38:19 +0000 Subject: [PATCH 01/13] Use agentic retrieval loop for chat --- src/domains/chat/contracts.ts | 26 +- src/domains/chat/index.test.ts | 331 ++++++++++++++++++++----- src/domains/chat/index.ts | 157 +++++++++--- src/domains/chat/prompt.ts | 296 +++++++++++++++++++++- src/domains/chat/route-answer.ts | 6 +- src/domains/chat/route-service.test.ts | 9 +- src/domains/chat/service.test.ts | 57 ++--- src/domains/chat/service.ts | 3 - 8 files changed, 732 insertions(+), 153 deletions(-) diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 5205ed0..505955a 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -14,19 +14,26 @@ export type ChatHistoryMessage = { citations?: readonly ChatCitationView[] } -export type GenerateRetrievalQuery = (input: { - question: string - messages: readonly ChatHistoryMessage[] - sources: readonly Source[] - excludedSourceIds: readonly string[] -}) => Promise +export type AgenticRetrievalQuery = Pick< + RetrievalQueryParams, + | "query" + | "topK" + | "dataType" + | "signalPaths" + | "filterMode" + | "threshold" +> + +export type SearchSources = ( + input: AgenticRetrievalQuery, +) => Promise export type GenerateAnswer = (input: { question: string - retrievalQuery: string messages: readonly ChatHistoryMessage[] - evidenceText: string - mediaAssetContext?: string + sources: readonly Source[] + excludedSourceIds: readonly string[] + searchSources: SearchSources }) => Promise export type AnswerQuestionInput = { @@ -35,7 +42,6 @@ export type AnswerQuestionInput = { sources: readonly Source[] excludedSourceIds: readonly string[] retrieval: RetrievalClient - generateRetrievalQuery: GenerateRetrievalQuery generateAnswer: GenerateAnswer loadSourceAssetUrls?: LoadSourceAssetUrls messages: readonly ChatHistoryMessage[] diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index c4048d7..1591460 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -5,15 +5,18 @@ import { generateText } from "ai" import { answerQuestionWithRetrieval, + buildAgenticChatSystemPrompt, buildGroundedPrompt, buildRetrievalQueryPrompt, + generateAgenticGroundedAnswer, generateContextualRetrievalQuery, generateGroundedAnswer, parseChatRequestBody, } from "." import type { Source } from "@/infrastructure/db/schema" -vi.mock("ai", () => ({ +vi.mock("ai", async (importOriginal) => ({ + ...(await importOriginal()), generateText: vi.fn(), })); @@ -24,9 +27,10 @@ afterEach(() => { describe("answerQuestionWithRetrieval", () => { it("queries the workspace namespace and excludes unchecked ready documents", async () => { + const result = makeRetrievalResult(); const retrieval = { query: vi.fn().mockResolvedValue({ - results: [makeRetrievalResult()], + results: [result], evidenceText: "Grounding content from evidence tree", referencedChunks: [], namespace: "notebook-workspace", @@ -35,22 +39,22 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateAnswer = vi.fn().mockResolvedValue("The answer is grounded."); - const generateRetrievalQuery = vi - .fn() - .mockResolvedValue("What does the document say?"); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "What does the document say?" }); + return "The answer is grounded."; + }); + const sources = [ + makeSource({ knowhereDocumentId: "doc_included" }), + makeSource({ id: "source_2", knowhereDocumentId: "doc_excluded" }), + ]; const answer = await Effect.runPromise( answerQuestionWithRetrieval({ question: "What does the document say?", namespace: "notebook-workspace", - sources: [ - makeSource({ knowhereDocumentId: "doc_included" }), - makeSource({ id: "source_2", knowhereDocumentId: "doc_excluded" }), - ], + sources, excludedSourceIds: ["source_2"], retrieval, - generateRetrievalQuery, generateAnswer, messages: [], }), @@ -65,13 +69,14 @@ describe("answerQuestionWithRetrieval", () => { }); expect(generateAnswer).toHaveBeenCalledWith({ question: "What does the document say?", - retrievalQuery: "What does the document say?", messages: [], - evidenceText: "Grounding content from evidence tree", + sources, + excludedSourceIds: ["source_2"], + searchSources: expect.any(Function), }); expect(answer).toEqual({ answer: "The answer is grounded.", - citations: [makeRetrievalResult()], + citations: [result], }); }); @@ -102,12 +107,10 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateAnswer = vi - .fn() - .mockResolvedValue( - "Revenue improved [Source 1: revenue growth]. Margins expanded [Source 2: margin expansion].", - ); - const generateRetrievalQuery = vi.fn().mockResolvedValue("What improved?"); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "What improved?" }); + return "Revenue improved [Source 1: revenue growth]. Margins expanded [Source 2: margin expansion]."; + }); const answer = await Effect.runPromise( answerQuestionWithRetrieval({ @@ -116,7 +119,6 @@ describe("answerQuestionWithRetrieval", () => { sources: [makeSource()], excludedSourceIds: [], retrieval, - generateRetrievalQuery, generateAnswer, messages: [], }), @@ -147,24 +149,24 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateAnswer = vi - .fn() - .mockResolvedValue("Tesla invested in xAI [Source 1: xAI investment]."); - const generateRetrievalQuery = vi.fn().mockResolvedValue("Tesla xAI investment"); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "Tesla xAI investment" }); + return "Tesla invested in xAI [Source 1: xAI investment]."; + }); + const sources = [ + makeSource({ + title: "TSLA-Q4-2025-Update.pdf", + knowhereDocumentId: "doc_tesla", + }), + ]; const answer = await Effect.runPromise( answerQuestionWithRetrieval({ question: "What does the document say about xAI?", namespace: "notebook-workspace", - sources: [ - makeSource({ - title: "TSLA-Q4-2025-Update.pdf", - knowhereDocumentId: "doc_tesla", - }), - ], + sources, excludedSourceIds: [], retrieval, - generateRetrievalQuery, generateAnswer, messages: [], }), @@ -172,9 +174,10 @@ describe("answerQuestionWithRetrieval", () => { expect(generateAnswer).toHaveBeenCalledWith({ question: "What does the document say about xAI?", - retrievalQuery: "Tesla xAI investment", messages: [], - evidenceText: "Tesla invested in xAI.", + sources, + excludedSourceIds: [], + searchSources: expect.any(Function), }); const expectedResult = { ...result, @@ -208,12 +211,10 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateAnswer = vi - .fn() - .mockResolvedValue( - "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", - ); - const generateRetrievalQuery = vi.fn().mockResolvedValue("SpaceX rocket photos"); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "SpaceX rocket photos", dataType: 3 }); + return "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg"; + }); const loadSourceAssetUrls = vi.fn().mockResolvedValue({ "images/image-9-Night Rocket Launch.jpg": "https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", @@ -232,7 +233,6 @@ describe("answerQuestionWithRetrieval", () => { ], excludedSourceIds: [], retrieval, - generateRetrievalQuery, generateAnswer, loadSourceAssetUrls, messages: [], @@ -242,13 +242,12 @@ describe("answerQuestionWithRetrieval", () => { expect(loadSourceAssetUrls).toHaveBeenCalledWith( expect.objectContaining({ id: "source_spacex" }), ); - expect(generateAnswer).toHaveBeenCalledWith({ - question: "Show me the SpaceX rocket photos.", - retrievalQuery: "SpaceX rocket photos", - messages: [], - evidenceText: "A SpaceX rocket launches at night.", - mediaAssetContext: - "- spacex-s1.pdf / Assets / images / image-9-Night Rocket Launch.jpg: https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + expect(retrieval.query).toHaveBeenCalledWith({ + namespace: "notebook-workspace", + query: "SpaceX rocket photos", + topK: 8, + useAgentic: true, + dataType: 3, }); expect(answer.answer).toBe("Use this launch photo."); expect(answer.citations).toEqual([ @@ -264,7 +263,7 @@ describe("answerQuestionWithRetrieval", () => { ]); }); - it("returns a deterministic no-results answer without calling the model", async () => { + it("returns the agent answer without citations when retrieval has no results", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ results: [], @@ -276,8 +275,10 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateAnswer = vi.fn(); - const generateRetrievalQuery = vi.fn().mockResolvedValue("Missing fact?"); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "Missing fact?" }); + return "I couldn't find that in your sources."; + }); const answer = await Effect.runPromise( answerQuestionWithRetrieval({ @@ -286,20 +287,18 @@ describe("answerQuestionWithRetrieval", () => { sources: [makeSource()], excludedSourceIds: [], retrieval, - generateRetrievalQuery, generateAnswer, messages: [], }), ); - expect(generateAnswer).not.toHaveBeenCalled(); expect(answer).toEqual({ answer: "I couldn't find that in your sources.", citations: [], }); }); - it("uses an LLM-contextualized query while answering the user's original question", async () => { + it("lets the agent issue contextual retrieval queries while answering the original question", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ results: [makeRetrievalResult()], @@ -311,12 +310,12 @@ describe("answerQuestionWithRetrieval", () => { answerText: null, }), }; - const generateRetrievalQuery = vi - .fn() - .mockResolvedValue( - "Tesla Q4 2025 Update energy generation and storage deployments", - ); - const generateAnswer = vi.fn().mockResolvedValue("Energy storage grew."); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ + query: "Tesla Q4 2025 Update energy generation and storage deployments", + }); + return "Energy storage grew."; + }); const messages = [ { role: "user" as const, @@ -335,7 +334,6 @@ describe("answerQuestionWithRetrieval", () => { sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], excludedSourceIds: [], retrieval, - generateRetrievalQuery, generateAnswer, messages, }), @@ -349,11 +347,70 @@ describe("answerQuestionWithRetrieval", () => { }); expect(generateAnswer).toHaveBeenCalledWith({ question: "What about energy storage in this document?", - retrievalQuery: - "Tesla Q4 2025 Update energy generation and storage deployments", messages, - evidenceText: "Energy storage deployments grew significantly.", + sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], + excludedSourceIds: [], + searchSources: expect.any(Function), + }); + }); + + it("uses structured referenced chunks from RetrievalQueryResponse as citations", async () => { + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [], + evidenceText: "A launch image was referenced.", + referencedChunks: [ + { + chunkId: "chunk_1", + documentId: "doc_spacex", + chunkType: "image", + sectionPath: "Assets / images / launch.jpg", + filePath: "images/launch.jpg", + jobId: "job_1", + assetUrl: "https://blob.example/images/launch.jpg", + }, + ], + namespace: "notebook-workspace", + query: "SpaceX launch image", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "SpaceX launch image", dataType: 3 }); + return "Here is the launch image."; }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "Show me the launch image.", + namespace: "notebook-workspace", + sources: [ + makeSource({ + title: "spacex-s1.pdf", + knowhereDocumentId: "doc_spacex", + }), + ], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(answer.citations).toEqual([ + { + content: "", + chunkType: "image", + score: null, + assetUrl: "https://blob.example/images/launch.jpg", + source: { + documentId: "doc_spacex", + sourceFileName: "spacex-s1.pdf", + sectionPath: "Assets / images / launch.jpg", + }, + }, + ]); }); }); @@ -418,6 +475,132 @@ describe("generateGroundedAnswer", () => { }); }); +describe("generateAgenticGroundedAnswer", () => { + it("builds a Vercel AI SDK tool loop around Knowhere retrieval", async () => { + process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; + vi.mocked(generateText).mockResolvedValue({ + text: "Here are the requested identity images.", + } as Awaited>); + const searchSources = vi.fn().mockResolvedValue({ + results: [ + makeRetrievalResult({ + content: "Identity card image front side.", + chunkType: "image", + assetUrl: "https://blob.example/images/id-front.jpg", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "Assets / images / id-front.jpg", + }, + }), + ], + evidenceText: + "Identity image evidence. https://blob.example/images/id-front.jpg", + referencedChunks: [ + { + chunkId: "chunk_identity_1", + documentId: "doc_identity", + chunkType: "image", + sectionPath: "Assets / images / id-front.jpg", + filePath: "images/id-front.jpg", + jobId: "job_1", + assetUrl: "https://blob.example/images/id-front.jpg", + }, + ], + namespace: "notebook-workspace", + query: "公民身份证 图片", + routerUsed: "workflow_single_step", + answerText: + "The source includes identity card images. https://blob.example/images/id-front.jpg", + stopReason: "answer_done", + failureReason: null, + decisionTrace: [ + { + step: "final", + stop: "answer_done", + assetUrl: "https://blob.example/images/id-front.jpg", + }, + ], + }); + + const answer = await generateAgenticGroundedAnswer({ + question: "请发送几张关于公民身份的图片给我", + messages: [], + sources: [ + makeSource({ + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ], + excludedSourceIds: [], + searchSources, + }); + + expect(answer).toBe("Here are the requested identity images."); + const generateTextInput = vi.mocked(generateText).mock.calls[0]?.[0] as unknown as { + readonly system: string + readonly messages: readonly { readonly role: string; readonly content: string }[] + readonly tools: { + readonly searchSources: { + readonly execute: (input: { + readonly query: string + readonly dataType?: number + }) => Promise + } + } + readonly prepareStep: (input: { readonly stepNumber: number }) => unknown + } + expect(generateTextInput.system).toContain("RetrievalQueryResponse") + expect(generateTextInput.system).toContain("dataType=3") + expect(generateTextInput.messages.at(-1)).toEqual({ + role: "user", + content: "请发送几张关于公民身份的图片给我", + }) + expect(generateTextInput.prepareStep({ stepNumber: 0 })).toMatchObject({ + toolChoice: { type: "tool", toolName: "searchSources" }, + activeTools: ["searchSources"], + }) + + const toolOutput = await generateTextInput.tools.searchSources.execute({ + query: "公民身份证 图片", + dataType: 3, + }); + + expect(searchSources).toHaveBeenCalledWith({ + query: "公民身份证 图片", + dataType: 3, + }); + expect(toolOutput).toMatchObject({ + query: "公民身份证 图片", + routerUsed: "workflow_single_step", + stopReason: "answer_done", + failureReason: null, + answerText: + "The source includes identity card images. [media asset URL hidden]", + resultCount: 1, + referencedChunkCount: 1, + hasEvidenceText: true, + results: [ + expect.objectContaining({ + chunkType: "image", + hasAssetUrl: true, + content: "Identity card image front side.", + }), + ], + referencedChunks: [ + expect.objectContaining({ + chunkId: "chunk_identity_1", + chunkType: "image", + filePath: "images/id-front.jpg", + hasAssetUrl: true, + }), + ], + agentGuidance: expect.stringContaining("Use this evidence"), + }); + expect(JSON.stringify(toolOutput)).not.toContain("https://blob.example"); + }); +}); + describe("buildGroundedPrompt", () => { it("includes evidence text and uses evidence-based citation format", () => { const prompt = buildGroundedPrompt({ @@ -472,6 +655,26 @@ describe("buildGroundedPrompt", () => { }); }); +describe("buildAgenticChatSystemPrompt", () => { + it("instructs the agent how to continue or stop from retrieval responses", () => { + const prompt = buildAgenticChatSystemPrompt({ + messages: [], + sources: [makeSource({ title: "商务标文件.pdf" })], + excludedSourceIds: [], + }); + + expect(prompt).toContain("Always call searchSources") + expect(prompt).toContain("evidenceText") + expect(prompt).toContain("failureReason") + expect(prompt).toContain("decisionTrace") + expect(prompt).toContain("remote source index") + expect(prompt).toContain("person or section but not an image asset") + expect(prompt).toContain("身份证") + expect(prompt).toContain("For image requests use dataType=3") + expect(prompt).toContain("商务标文件.pdf") + }); +}); + describe("buildRetrievalQueryPrompt", () => { it("includes source and history context for stateless retrieval", () => { const prompt = buildRetrievalQueryPrompt({ diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index e84ec89..fcf7059 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -1,22 +1,32 @@ import { Effect } from "effect" +import type { + RetrievalQueryParams, + RetrievalQueryResponse, + RetrievalResult, +} from "@ontos-ai/knowhere-sdk" import type { ChatCitationView } from "@/domains/chat/types" import { toChatCitationViews, useNotebookSourceTitles, } from "./citations" -import type { AnswerQuestionInput, AnswerQuestionResult } from "./contracts" +import type { + AgenticRetrievalQuery, + AnswerQuestionInput, + AnswerQuestionResult, +} from "./contracts" import { excludeDocuments, normalizeRetrievalQuery, } from "./retrieval" import { enrichRetrievalResultsWithAssetUrls, - formatRetrievedMediaAssetContext, removeRetrievedMediaAssetUrls, } from "./media-assets" const DEFAULT_TOP_K = 8 +const MAX_AGENTIC_TOP_K = 12 +const MAX_CITATION_RESULTS = 20 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." export type { @@ -24,12 +34,15 @@ export type { AnswerQuestionResult, ChatHistoryMessage, GenerateAnswer, - GenerateRetrievalQuery, RetrievalClient, + SearchSources, } from "./contracts" export { + buildAgenticChatSystemPrompt, buildGroundedPrompt, buildRetrievalQueryPrompt, + generateAgenticGroundedAnswer, + generateAgenticGroundedAnswerEffect, generateContextualRetrievalQuery, generateContextualRetrievalQueryEffect, generateGroundedAnswer, @@ -46,51 +59,137 @@ export const answerQuestionWithRetrieval = ( ): Effect.Effect => Effect.gen(function* () { const question = input.question.trim() - const generatedQuery = yield* Effect.tryPromise(() => - input.generateRetrievalQuery({ + const retrievalResponses: RetrievalQueryResponse[] = [] + + const searchSources = async ( + queryInput: AgenticRetrievalQuery, + ): Promise => { + const response = await input.retrieval.query( + buildRetrievalQueryParams({ + input: queryInput, + fallbackQuestion: question, + namespace: input.namespace, + sources: input.sources, + excludedSourceIds: input.excludedSourceIds, + }), + ) + retrievalResponses.push(response) + return response + } + + const generatedAnswer = yield* Effect.tryPromise(() => + input.generateAnswer({ question, messages: input.messages, sources: input.sources, excludedSourceIds: input.excludedSourceIds, - }), - ) - const query = normalizeRetrievalQuery(generatedQuery, question) - const response = yield* Effect.tryPromise(() => - input.retrieval.query({ - namespace: input.namespace, - query, - topK: DEFAULT_TOP_K, - useAgentic: true, - ...excludeDocuments(input.sources, input.excludedSourceIds), + searchSources, }), ) - const evidenceText = response.evidenceText ?? "" - if (response.results.length === 0 && !evidenceText) { + const rawResults = collectRetrievalResults(retrievalResponses, input.sources) + if (rawResults.length === 0 && generatedAnswer.trim().length === 0) { return { answer: NO_RESULTS_ANSWER, citations: [] as ChatCitationView[] } } const results = yield* Effect.tryPromise(() => enrichRetrievalResultsWithAssetUrls({ - results: useNotebookSourceTitles(response.results, input.sources), + results: useNotebookSourceTitles(rawResults, input.sources), sources: input.sources, loadSourceAssetUrls: input.loadSourceAssetUrls, }), ) - const mediaAssetContext = formatRetrievedMediaAssetContext(results) - const generateAnswerInput = { - question, - retrievalQuery: query, - messages: input.messages, - evidenceText, - ...(mediaAssetContext ? { mediaAssetContext } : {}), - } - const generatedAnswer = yield* Effect.tryPromise(() => - input.generateAnswer(generateAnswerInput), - ) const answer = removeRetrievedMediaAssetUrls(generatedAnswer, results) return { answer, citations: toChatCitationViews(results, answer), } }) + +function buildRetrievalQueryParams(input: { + readonly input: AgenticRetrievalQuery + readonly fallbackQuestion: string + readonly namespace: string + readonly sources: AnswerQuestionInput["sources"] + readonly excludedSourceIds: readonly string[] +}): RetrievalQueryParams { + const query = normalizeRetrievalQuery( + input.input.query, + input.fallbackQuestion, + ) + return { + namespace: input.namespace, + query, + topK: normalizeTopK(input.input.topK), + useAgentic: true, + ...(input.input.dataType ? { dataType: input.input.dataType } : {}), + ...(input.input.signalPaths && input.input.signalPaths.length > 0 + ? { signalPaths: input.input.signalPaths } + : {}), + ...(input.input.filterMode ? { filterMode: input.input.filterMode } : {}), + ...(typeof input.input.threshold === "number" + ? { threshold: input.input.threshold } + : {}), + ...excludeDocuments(input.sources, input.excludedSourceIds), + } +} + +function normalizeTopK(value: number | undefined): number { + if (typeof value !== "number" || !Number.isSafeInteger(value)) { + return DEFAULT_TOP_K + } + return Math.min(Math.max(value, 1), MAX_AGENTIC_TOP_K) +} + +function collectRetrievalResults( + responses: readonly RetrievalQueryResponse[], + sources: readonly AnswerQuestionInput["sources"][number][], +): RetrievalResult[] { + const results: RetrievalResult[] = [] + const seenKeys = new Set() + const sourceTitlesByDocumentId = new Map( + sources.flatMap((source): readonly [string, string][] => + source.knowhereDocumentId ? [[source.knowhereDocumentId, source.title]] : [], + ), + ) + + for (const response of responses) { + for (const result of [ + ...response.results, + ...response.referencedChunks.map((chunk): RetrievalResult => ({ + content: "", + chunkType: chunk.chunkType, + score: null, + ...(chunk.assetUrl ? { assetUrl: chunk.assetUrl } : {}), + source: { + documentId: chunk.documentId, + sourceFileName: sourceTitlesByDocumentId.get(chunk.documentId), + sectionPath: chunk.sectionPath, + }, + })), + ]) { + const key = getRetrievalResultKey(result) + if (seenKeys.has(key)) continue + + seenKeys.add(key) + results.push(result) + if (results.length >= MAX_CITATION_RESULTS) return results + } + } + + return results +} + +function getRetrievalResultKey(result: RetrievalResult): string { + const source = result.source + return [ + source.documentId ?? "", + source.sourceFileName ?? "", + source.sectionPath ?? "", + result.chunkType, + result.assetUrl ?? "", + result.content.slice(0, 500), + ] + .map((part) => `${part.length}:${part}`) + .join("|") +} diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index e07e7df..da9e49f 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -1,15 +1,28 @@ -import { generateText } from "ai" +import { generateText, stepCountIs, tool, type ModelMessage } from "ai" import { Effect } from "effect" +import type { RetrievalQueryResponse, RetrievalResult } from "@ontos-ai/knowhere-sdk" +import { z } from "zod" import { CHAT_MODEL } from "@/lib/ai" import type { Source } from "@/infrastructure/db/schema" import type { ChatCitationView } from "@/domains/chat/types" -import type { ChatHistoryMessage } from "./contracts" +import type { + AgenticRetrievalQuery, + ChatHistoryMessage, + SearchSources, +} from "./contracts" import { normalizeRetrievalQuery } from "./retrieval" const RECENT_CONTEXT_MESSAGE_LIMIT = 8 const CONTEXT_CONTENT_CHAR_LIMIT = 900 const SOURCE_CONTEXT_LIMIT = 12 +const AGENTIC_SEARCH_STEP_LIMIT = 5 +const TOOL_EVIDENCE_CHAR_LIMIT = 6_000 +const TOOL_RESULT_CONTENT_CHAR_LIMIT = 700 +const TOOL_RESULT_LIMIT = 8 +const TOOL_REFERENCED_CHUNK_LIMIT = 12 +const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g +const REDACTED_MEDIA_URL = "[media asset URL hidden]" type GenerateContextualRetrievalQueryInput = { question: string @@ -34,6 +47,14 @@ type BuildGroundedPromptInput = { mediaAssetContext?: string } +type GenerateAgenticGroundedAnswerInput = { + question: string + messages: readonly ChatHistoryMessage[] + sources: readonly Source[] + excludedSourceIds: readonly string[] + searchSources: SearchSources +} + export const generateContextualRetrievalQueryEffect = ( input: GenerateContextualRetrievalQueryInput, ): Effect.Effect => @@ -64,7 +85,7 @@ export const generateContextualRetrievalQueryEffect = ( return normalizeRetrievalQuery(response.text, question) }) -/** Async wrapper matching the GenerateRetrievalQuery signature. */ +/** Async wrapper for the legacy single-query retrieval flow. */ export async function generateContextualRetrievalQuery( input: GenerateContextualRetrievalQueryInput, ): Promise { @@ -93,13 +114,111 @@ export const generateGroundedAnswerEffect = ( return response.text.trim() }) -/** Async wrapper matching the GenerateAnswer signature. */ +/** Async wrapper for the legacy single-response answer flow. */ export async function generateGroundedAnswer( input: GenerateGroundedAnswerInput, ): Promise { return Effect.runPromise(generateGroundedAnswerEffect(input)) } +export const generateAgenticGroundedAnswerEffect = ( + input: GenerateAgenticGroundedAnswerInput, +): Effect.Effect => + Effect.gen(function* () { + if (!process.env.AI_GATEWAY_API_KEY) { + return yield* Effect.die( + new Error( + "AI_GATEWAY_API_KEY environment variable is required. " + + "Set it in your .env.local file.", + ), + ) + } + + const response = yield* Effect.tryPromise(() => + generateText({ + model: CHAT_MODEL, + system: buildAgenticChatSystemPrompt(input), + messages: buildAgenticChatMessages(input), + tools: { + searchSources: tool({ + description: + "Search the user's Notebook sources through Knowhere retrieval. " + + "Treat each response as external context from a remote source index. " + + "Use it before answering, and call it again with refined text, media, " + + "or section-path queries when the RetrievalQueryResponse says evidence is missing or weak.", + inputSchema: z.object({ + query: z + .string() + .min(1) + .describe( + "A self-contained retrieval query. Include document, topic, person, date, or section context from the conversation when needed.", + ), + topK: z + .number() + .int() + .min(1) + .max(12) + .optional() + .describe("Number of chunks to return. Defaults to 8."), + dataType: z + .union([ + z.literal(1), + z.literal(2), + z.literal(3), + z.literal(4), + z.literal(5), + z.literal(6), + ]) + .optional() + .describe( + "Optional chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table.", + ), + signalPaths: z + .array(z.string().min(1)) + .max(8) + .optional() + .describe( + "Optional section/path keywords when a previous result points to a useful section.", + ), + filterMode: z + .enum(["keep", "delete"]) + .optional() + .describe( + "How to apply signalPaths. Use keep to focus on matching paths, delete to exclude them.", + ), + threshold: z + .number() + .min(0) + .max(1) + .optional() + .describe("Optional minimum retrieval score threshold."), + }), + execute: async (queryInput: AgenticRetrievalQuery) => + buildRetrievalToolOutput(await input.searchSources(queryInput)), + }), + }, + stopWhen: stepCountIs(AGENTIC_SEARCH_STEP_LIMIT), + prepareStep: ({ stepNumber }) => + stepNumber === 0 + ? { + toolChoice: { + type: "tool" as const, + toolName: "searchSources" as const, + }, + activeTools: ["searchSources" as const], + } + : undefined, + }), + ) + return response.text.trim() + }) + +export async function generateAgenticGroundedAnswer( + input: GenerateAgenticGroundedAnswerInput, +): Promise { + return Effect.runPromise(generateAgenticGroundedAnswerEffect(input)) +} + export function buildRetrievalQueryPrompt( input: GenerateContextualRetrievalQueryInput, ): string { @@ -167,6 +286,148 @@ export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { return promptLines.join("\n") } +export function buildAgenticChatSystemPrompt( + input: Pick< + GenerateAgenticGroundedAnswerInput, + "messages" | "sources" | "excludedSourceIds" + >, +): string { + const sourceContext = formatSourceContext(input.sources, input.excludedSourceIds) + const conversationContext = formatConversationContext(input.messages) + + return [ + "You are a Notebook research agent that answers user questions from their uploaded sources.", + "You have one tool: searchSources. It runs Knowhere retrieval and returns a RetrievalQueryResponse summary.", + "Treat each tool result like external context from a remote source index: inspect it, reason over it, then decide whether to retrieve again.", + "", + "Agent loop rules:", + "1. Always call searchSources before writing a final answer.", + "2. Read the tool output fields: evidenceText, answerText, results, referencedChunks, stopReason, failureReason, and decisionTrace.", + "3. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, chunk types, and failure reasons.", + "4. If evidenceText/results/referencedChunks directly support the answer, stop searching and answer.", + "5. If failureReason is present, result counts are zero, or evidence does not cover the user's requested entity/topic/media, call searchSources again with a more specific or broader query.", + "6. For image requests use dataType=3 or dataType=5. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", + "7. For table requests use dataType=4 or dataType=6.", + "8. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", + "", + "Answering rules:", + "Use retrieved evidence as the factual source of truth.", + "Do not invent document-specific facts.", + "Use the recent conversation only to resolve references like \"this document\" or \"those images\".", + "Cite document sections in the answer, e.g. [文档名 / 章节名].", + "When retrieved image or table assets are relevant, cite the matching source label; the UI renders media from citation metadata.", + "Do not write raw media asset URLs in the answer. They are internal metadata only.", + "Start with the answer first. Keep answers concise unless the user asks for detail.", + "", + "Searchable sources:", + sourceContext, + "", + "Recent conversation summary:", + conversationContext, + ].join("\n") +} + +function buildAgenticChatMessages( + input: Pick, +): ModelMessage[] { + const recentMessages = input.messages.slice(-RECENT_CONTEXT_MESSAGE_LIMIT) + return [ + ...recentMessages.map((message): ModelMessage => ({ + role: message.role, + content: message.content, + })), + { role: "user", content: input.question }, + ] +} + +function buildRetrievalToolOutput(response: RetrievalQueryResponse): object { + return { + namespace: response.namespace, + query: response.query, + routerUsed: response.routerUsed, + stopReason: response.stopReason ?? null, + failureReason: response.failureReason ?? null, + answerText: response.answerText + ? redactRawUrls(response.answerText) + : response.answerText, + resultCount: response.results.length, + referencedChunkCount: response.referencedChunks.length, + hasEvidenceText: Boolean(response.evidenceText?.trim()), + evidenceText: truncateSafeContextTextToLimit( + response.evidenceText ?? "", + TOOL_EVIDENCE_CHAR_LIMIT, + ), + results: response.results.slice(0, TOOL_RESULT_LIMIT).map(formatToolResult), + referencedChunks: response.referencedChunks + .slice(0, TOOL_REFERENCED_CHUNK_LIMIT) + .map((chunk) => ({ + chunkId: chunk.chunkId, + documentId: chunk.documentId, + chunkType: chunk.chunkType, + sectionPath: chunk.sectionPath, + filePath: chunk.filePath ? redactRawUrls(chunk.filePath) : null, + hasAssetUrl: Boolean(chunk.assetUrl), + })), + decisionTrace: + response.decisionTrace + ?.slice(-6) + .map((trace) => redactRawUrlsFromUnknown(trace)) ?? [], + agentGuidance: getRetrievalResponseGuidance(response), + } +} + +function formatToolResult(result: RetrievalResult): object { + return { + chunkType: result.chunkType, + score: result.score, + hasAssetUrl: Boolean(result.assetUrl), + source: { + documentId: result.source.documentId ?? null, + sourceFileName: result.source.sourceFileName + ? redactRawUrls(result.source.sourceFileName) + : null, + sectionPath: result.source.sectionPath + ? redactRawUrls(result.source.sectionPath) + : null, + }, + content: truncateSafeContextTextToLimit( + result.content, + TOOL_RESULT_CONTENT_CHAR_LIMIT, + ), + } +} + +function getRetrievalResponseGuidance( + response: RetrievalQueryResponse, +): string { + const hasEvidence = Boolean(response.evidenceText?.trim()) + const hasResults = + response.results.length > 0 || response.referencedChunks.length > 0 + + if (response.failureReason) { + return ( + "Retrieval reported a semantic failure. If the user question is still answerable, " + + "try one refined query; otherwise say the sources do not contain enough support." + ) + } + if (!hasEvidence && !hasResults) { + return ( + "No useful evidence was returned. Try a broader query, a different wording, " + + "or a media/table dataType filter if the user asked for images or tables." + ) + } + if (response.stopReason && response.stopReason !== "answer_done") { + return ( + `Retrieval stopped with stopReason=${response.stopReason}. Inspect evidence; ` + + "if it does not directly answer the user, query again with a better target." + ) + } + return ( + "Use this evidence if it directly answers the user. Query again only if an " + + "important requested detail, source, image, table, person, date, or section is missing." + ) +} + function formatSourceContext( sources: readonly Source[], excludedSourceIds: readonly string[], @@ -220,3 +481,30 @@ function truncateContextText(value: string): string { if (normalized.length <= CONTEXT_CONTENT_CHAR_LIMIT) return normalized return `${normalized.slice(0, CONTEXT_CONTENT_CHAR_LIMIT)}...` } + +function truncateContextTextToLimit(value: string, limit: number): string { + const normalized = value.replace(/\s+/g, " ").trim() + if (normalized.length <= limit) return normalized + return `${normalized.slice(0, limit)}...` +} + +function truncateSafeContextTextToLimit(value: string, limit: number): string { + return truncateContextTextToLimit(redactRawUrls(value), limit) +} + +function redactRawUrls(value: string): string { + return value.replace(RAW_URL_PATTERN, REDACTED_MEDIA_URL) +} + +function redactRawUrlsFromUnknown(value: unknown): unknown { + if (typeof value === "string") return redactRawUrls(value) + if (Array.isArray(value)) return value.map(redactRawUrlsFromUnknown) + if (!value || typeof value !== "object") return value + + return Object.fromEntries( + Object.entries(value).map(([key, nestedValue]) => [ + key, + redactRawUrlsFromUnknown(nestedValue), + ]), + ) +} diff --git a/src/domains/chat/route-answer.ts b/src/domains/chat/route-answer.ts index 17946d8..d20a264 100644 --- a/src/domains/chat/route-answer.ts +++ b/src/domains/chat/route-answer.ts @@ -1,8 +1,7 @@ import { Effect, Either } from "effect" import { - generateContextualRetrievalQuery, - generateGroundedAnswer, + generateAgenticGroundedAnswer, parseChatRequestBody, } from "@/domains/chat" import { @@ -57,8 +56,7 @@ const answerChatEffect = (input: AnswerChatInput) => threadId: body.value.threadId, excludedSourceIds: body.value.excludedSourceIds, retrieval: client.retrieval, - generateRetrievalQuery: generateContextualRetrievalQuery, - generateAnswer: generateGroundedAnswer, + generateAnswer: generateAgenticGroundedAnswer, loadSourceAssetUrls: (source) => sourceService.getParseAssetUrls(workspace.id, source.id), repository: chatTurnPersistence.createRepository(), diff --git a/src/domains/chat/route-service.test.ts b/src/domains/chat/route-service.test.ts index 8fcc9cf..6796455 100644 --- a/src/domains/chat/route-service.test.ts +++ b/src/domains/chat/route-service.test.ts @@ -8,8 +8,7 @@ const mocks = vi.hoisted(() => ({ createChatThread: vi.fn(), ensureDefaultChatThread: vi.fn(), findChatThreadInWorkspace: vi.fn(), - generateContextualRetrievalQuery: vi.fn(), - generateGroundedAnswer: vi.fn(), + generateAgenticGroundedAnswer: vi.fn(), getAuthenticated: vi.fn(), getAuthenticatedWithClient: vi.fn(), handleChatTurn: vi.fn(), @@ -23,8 +22,7 @@ vi.mock("@/domains/chat", async (importOriginal) => { const original = await importOriginal() return { ...original, - generateContextualRetrievalQuery: mocks.generateContextualRetrievalQuery, - generateGroundedAnswer: mocks.generateGroundedAnswer, + generateAgenticGroundedAnswer: mocks.generateAgenticGroundedAnswer, } }) @@ -114,8 +112,7 @@ describe("chat route services", () => { threadId: "thread_1", excludedSourceIds: ["source_skipped"], retrieval: client.retrieval, - generateRetrievalQuery: mocks.generateContextualRetrievalQuery, - generateAnswer: mocks.generateGroundedAnswer, + generateAnswer: mocks.generateAgenticGroundedAnswer, repository: expect.objectContaining({ appendMessageToThread: expect.any(Function), ensureDefaultChatThread: expect.any(Function), diff --git a/src/domains/chat/service.test.ts b/src/domains/chat/service.test.ts index 4aea9a5..0150ca2 100644 --- a/src/domains/chat/service.test.ts +++ b/src/domains/chat/service.test.ts @@ -19,21 +19,21 @@ describe("handleChatTurn", () => { }), }; const repository = makeRepository(); - const generateRetrievalQuery = vi - .fn() - .mockResolvedValue("What does the document say?"); - const generateAnswer = vi.fn().mockResolvedValue("Grounded answer."); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "What does the document say?" }); + return "Grounded answer."; + }); + const sources = [ + makeSource({ id: "source_included", knowhereDocumentId: "doc_included" }), + makeSource({ id: "source_excluded", knowhereDocumentId: "doc_excluded" }), + ]; const result = await handleChatTurn({ workspace: makeWorkspace(), - sources: [ - makeSource({ id: "source_included", knowhereDocumentId: "doc_included" }), - makeSource({ id: "source_excluded", knowhereDocumentId: "doc_excluded" }), - ], + sources, question: "What does the document say?", excludedSourceIds: ["source_excluded"], retrieval, - generateRetrievalQuery, generateAnswer, repository, }); @@ -59,20 +59,12 @@ describe("handleChatTurn", () => { useAgentic: true, excludeDocumentIds: ["doc_excluded"], }); - expect(generateRetrievalQuery).toHaveBeenCalledWith({ - question: "What does the document say?", - messages: [], - sources: [ - makeSource({ id: "source_included", knowhereDocumentId: "doc_included" }), - makeSource({ id: "source_excluded", knowhereDocumentId: "doc_excluded" }), - ], - excludedSourceIds: ["source_excluded"], - }); expect(generateAnswer).toHaveBeenCalledWith({ question: "What does the document say?", - retrievalQuery: "What does the document say?", messages: [], - evidenceText: "Grounding content", + sources, + excludedSourceIds: ["source_excluded"], + searchSources: expect.any(Function), }); expect(repository.appendMessageToThread).toHaveBeenNthCalledWith(1, "workspace_1", { threadId: "thread_1", @@ -97,7 +89,6 @@ describe("handleChatTurn", () => { question: "Can I ask yet?", excludedSourceIds: [], retrieval, - generateRetrievalQuery: vi.fn(), generateAnswer: vi.fn(), repository, }); @@ -125,7 +116,6 @@ describe("handleChatTurn", () => { threadId: "thread_from_other_workspace", excludedSourceIds: [], retrieval: { query: vi.fn() }, - generateRetrievalQuery: vi.fn(), generateAnswer: vi.fn(), repository, }); @@ -140,7 +130,7 @@ describe("handleChatTurn", () => { expect(repository.appendMessageToThread).not.toHaveBeenCalled(); }); - it("passes prior thread messages to the stateless retrieval query planner", async () => { + it("passes prior thread messages to the agentic answer generator", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ results: [makeRetrievalResult()], @@ -176,27 +166,27 @@ describe("handleChatTurn", () => { const repository = makeRepository({ listMessagesForThread: vi.fn().mockResolvedValue(previousMessages), }); - const generateRetrievalQuery = vi - .fn() - .mockResolvedValue( - "Tesla Q4 2025 Update energy generation and storage deployments", - ); - const generateAnswer = vi.fn().mockResolvedValue("Grounded answer."); + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ + query: "Tesla Q4 2025 Update energy generation and storage deployments", + }); + return "Grounded answer."; + }); + const sources = [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })]; const result = await handleChatTurn({ workspace: makeWorkspace(), - sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], + sources, question: "What about energy storage in this document?", threadId: "thread_1", excludedSourceIds: [], retrieval, - generateRetrievalQuery, generateAnswer, repository, }); expect(Either.isRight(result)).toBe(true); - expect(generateRetrievalQuery).toHaveBeenCalledWith({ + expect(generateAnswer).toHaveBeenCalledWith({ question: "What about energy storage in this document?", messages: [ { @@ -220,8 +210,9 @@ describe("handleChatTurn", () => { ], }, ], - sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], + sources, excludedSourceIds: [], + searchSources: expect.any(Function), }); expect(retrieval.query).toHaveBeenCalledWith({ namespace: "notebook-namespace", diff --git a/src/domains/chat/service.ts b/src/domains/chat/service.ts index edf31f8..0bc663a 100644 --- a/src/domains/chat/service.ts +++ b/src/domains/chat/service.ts @@ -5,7 +5,6 @@ import { type AnswerQuestionInput, type ChatHistoryMessage, type GenerateAnswer, - type GenerateRetrievalQuery, type RetrievalClient, } from "." import { toChatMessageView } from "./view" @@ -61,7 +60,6 @@ type ChatTurnInput = { threadId?: string excludedSourceIds: readonly string[] retrieval: RetrievalClient - generateRetrievalQuery: GenerateRetrievalQuery generateAnswer: GenerateAnswer loadSourceAssetUrls?: AnswerQuestionInput["loadSourceAssetUrls"] repository: ChatRepository @@ -119,7 +117,6 @@ export const handleChatTurnEffect = (input: ChatTurnInput) => sources: readySources, excludedSourceIds: input.excludedSourceIds, retrieval: input.retrieval, - generateRetrievalQuery: input.generateRetrievalQuery, generateAnswer: input.generateAnswer, loadSourceAssetUrls: input.loadSourceAssetUrls, messages: chatHistoryMessages, From 68682273572e2727f843ece91cc46c2779484de3 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 08:01:01 +0000 Subject: [PATCH 02/13] Render image references from retrieved evidence --- src/components/chat-message-list.test.ts | 46 ++++++++ src/components/chat-message-list.tsx | 21 +++- src/domains/chat/index.test.ts | 77 ++++++++++++ src/domains/chat/index.ts | 12 ++ src/domains/chat/media-assets.test.ts | 47 ++++++++ src/domains/chat/media-assets.ts | 142 ++++++++++++++++++++--- 6 files changed, 326 insertions(+), 19 deletions(-) diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index 2c0ca80..c515976 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -100,6 +100,52 @@ describe("ChatMessageList", () => { ).toBeNull(); }); + it("does not hide image cards when source links dedupe the same section", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "这里是相关身份证明图片。", + citations: [ + { + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }, + { + chunkType: "image", + score: 0.9, + assetUrl: "https://blob.example/images/image-6-id-front.jpg", + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }, + ], + }, + ], + }), + ); + + expect( + screen.getByRole("img", { + name: "商务标文件.pdf · 二、法定代表人身份证明", + }), + ).toBeTruthy(); + expect( + screen.getAllByRole("button", { + name: "Open source 商务标文件.pdf · 二、法定代表人身份证明", + }), + ).toHaveLength(1); + }); + it("shows thinking progress after existing messages while sending", () => { render( React.createElement(ChatMessageList, { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index 1f81c74..fe2f85f 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -244,7 +244,10 @@ function MessageBubble({ message, sourceTitlesByDocumentId, ); - const displayImageCitations = getDisplayImageCitations(displayCitations); + const displayImageCitations = getDisplayImageCitations( + message, + sourceTitlesByDocumentId, + ); return (
@@ -336,18 +339,24 @@ function getDisplayCitations( } function getDisplayImageCitations( - citations: readonly DisplayCitation[], + message: ChatMessageView, + sourceTitlesByDocumentId: Readonly>, ): readonly DisplayImageCitation[] { const seenAssetUrls = new Set(); const imageCitations: DisplayImageCitation[] = []; - for (const citation of citations) { - const assetUrl = getTrimmedCitationField(citation.citation.assetUrl); - if (!assetUrl || !isImageCitation(citation.citation, assetUrl)) continue; + for (const [index, citation] of (message.citations ?? []).entries()) { + const assetUrl = getTrimmedCitationField(citation.assetUrl); + if (!assetUrl || !isImageCitation(citation, assetUrl)) continue; if (seenAssetUrls.has(assetUrl)) continue; seenAssetUrls.add(assetUrl); - imageCitations.push({ ...citation, assetUrl }); + imageCitations.push({ + citation, + citationId: chatPanelModel.getCitationId(message.id, index), + label: chatPanelModel.getCitationLabel(citation, sourceTitlesByDocumentId), + assetUrl, + }); } return imageCitations; diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index 1591460..cc2bb59 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -263,6 +263,83 @@ describe("answerQuestionWithRetrieval", () => { ]); }); + it("turns retrieved evidence image filenames into image citations", async () => { + const result = makeRetrievalResult({ + content: "This section contains identity proof attachments.", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }); + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [result], + evidenceText: + "[image-6-中华人民共和国居民身份证.jpg]\n[image-7-中国居民身份证.jpg]", + referencedChunks: [], + namespace: "notebook-workspace", + query: "公民身份证明 图片", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "公民身份证明 图片", dataType: 3 }); + return "这里是相关身份证明图片。"; + }); + const loadSourceAssetUrls = vi.fn().mockResolvedValue({ + "images/image-6-中华人民共和国居民身份证.jpg": + "https://blob.example/images/image-6-id-front.jpg", + "images/image-7-中国居民身份证.jpg": + "https://blob.example/images/image-7-id-back.jpg", + }); + const sources = [ + makeSource({ + id: "source_identity", + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ]; + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "请发送几张关于公民身份的图片给我", + namespace: "notebook-workspace", + sources, + excludedSourceIds: [], + retrieval, + generateAnswer, + loadSourceAssetUrls, + messages: [], + }), + ); + + expect(generateAnswer).toHaveBeenCalledWith({ + question: "请发送几张关于公民身份的图片给我", + messages: [], + sources, + excludedSourceIds: [], + searchSources: expect.any(Function), + }); + expect(retrieval.query).toHaveBeenCalledWith({ + namespace: "notebook-workspace", + query: "公民身份证明 图片", + topK: 8, + useAgentic: true, + dataType: 3, + }); + expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ + undefined, + "https://blob.example/images/image-6-id-front.jpg", + "https://blob.example/images/image-7-id-back.jpg", + ]); + expect(answer.citations.slice(1).map((citation) => citation.chunkType)).toEqual([ + "image", + "image", + ]); + }); + it("returns the agent answer without citations when retrieval has no results", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index fcf7059..442e62a 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -97,6 +97,7 @@ export const answerQuestionWithRetrieval = ( results: useNotebookSourceTitles(rawResults, input.sources), sources: input.sources, loadSourceAssetUrls: input.loadSourceAssetUrls, + evidenceText: formatRetrievalEvidenceText(retrievalResponses), }), ) const answer = removeRetrievedMediaAssetUrls(generatedAnswer, results) @@ -180,6 +181,17 @@ function collectRetrievalResults( return results } +function formatRetrievalEvidenceText( + responses: readonly RetrievalQueryResponse[], +): string | undefined { + const evidenceText = responses + .map((response): string => response.evidenceText?.trim() ?? "") + .filter((value): boolean => value.length > 0) + .join("\n") + + return evidenceText || undefined +} + function getRetrievalResultKey(result: RetrievalResult): string { const source = result.source return [ diff --git a/src/domains/chat/media-assets.test.ts b/src/domains/chat/media-assets.test.ts index 95d9398..9ad4b3c 100644 --- a/src/domains/chat/media-assets.test.ts +++ b/src/domains/chat/media-assets.test.ts @@ -42,6 +42,53 @@ describe("chat media assets", () => { ) }) + it("adds image citation results for asset filenames that only appear in evidence text", async () => { + const loadSourceAssetUrls = vi.fn().mockResolvedValue({ + "images/image-6-中华人民共和国居民身份证.jpg": + "https://blob.example/images/image-6-id-front.jpg", + "images/image-7-中国居民身份证.jpg": + "https://blob.example/images/image-7-id-back.jpg", + }) + + const results = await enrichRetrievalResultsWithAssetUrls({ + results: [ + makeRetrievalResult({ + content: "The section contains citizen identity proof copies.", + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }), + ], + sources: [ + makeSource({ + id: "source_identity", + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ], + loadSourceAssetUrls, + evidenceText: + "[image-6-中华人民共和国居民身份证.jpg]\n[image-7-中国居民身份证.jpg]", + }) + + expect(results).toHaveLength(3) + expect(results[0]?.assetUrl).toBeUndefined() + expect(results.slice(1).map((result) => result.assetUrl)).toEqual([ + "https://blob.example/images/image-6-id-front.jpg", + "https://blob.example/images/image-7-id-back.jpg", + ]) + expect(results.slice(1).map((result) => result.chunkType)).toEqual([ + "image", + "image", + ]) + expect(results.slice(1).map((result) => result.source.sectionPath)).toEqual([ + "images/image-6-中华人民共和国居民身份证.jpg", + "images/image-7-中国居民身份证.jpg", + ]) + }) + it("formats a bounded media asset context for the grounded prompt", () => { const context = formatRetrievedMediaAssetContext([ makeRetrievalResult({ diff --git a/src/domains/chat/media-assets.ts b/src/domains/chat/media-assets.ts index 00b05f5..1832019 100644 --- a/src/domains/chat/media-assets.ts +++ b/src/domains/chat/media-assets.ts @@ -13,12 +13,14 @@ export type RetrievalResultAssetInput = { readonly results: readonly RetrievalResult[] readonly sources: readonly Source[] readonly loadSourceAssetUrls?: LoadSourceAssetUrls + readonly evidenceText?: string } export async function enrichRetrievalResultsWithAssetUrls({ results, sources, loadSourceAssetUrls, + evidenceText, }: RetrievalResultAssetInput): Promise { if (!loadSourceAssetUrls || results.length === 0) return [...results] @@ -32,23 +34,22 @@ export async function enrichRetrievalResultsWithAssetUrls({ Promise>> >() - return Promise.all( - results.map(async (result): Promise => { - if (getTrimmedString(result.assetUrl)) return result - + const enrichedResults = await Promise.all( + results.map(async (result): Promise => { const documentId = getTrimmedString(result.source.documentId) const source = documentId ? sourcesByDocumentId.get(documentId) : undefined - if (!source) return result + if (!source) return [result] const assetUrls = await getCachedSourceAssetUrls( source, loadSourceAssetUrls, assetUrlsBySourceId, ) - const assetUrl = resolveResultAssetUrl(result, assetUrls) - return assetUrl ? { ...result, assetUrl } : result + return addAssetCitationResults(result, assetUrls, evidenceText) }), ) + + return enrichedResults.flat() } export function formatRetrievedMediaAssetContext( @@ -111,10 +112,104 @@ async function getCachedSourceAssetUrls( return cached } -function resolveResultAssetUrl( +function addAssetCitationResults( + result: RetrievalResult, + assetUrlsByFilePath: Readonly>, + evidenceText: string | undefined, +): readonly RetrievalResult[] { + const existingAssetUrl = getTrimmedString(result.assetUrl) + const resultMatches = resolveAssetReferenceMatches(result, assetUrlsByFilePath) + const evidenceMatches = resolveAssetReferenceMatchesFromText( + evidenceText, + assetUrlsByFilePath, + ) + const seenAssetUrls = new Set() + const output: RetrievalResult[] = [] + + if (existingAssetUrl) { + seenAssetUrls.add(existingAssetUrl) + output.push(result) + } else if (resultMatches.length > 0) { + const [firstMatch, ...remainingMatches] = resultMatches + seenAssetUrls.add(firstMatch.assetUrl) + output.push(toAssetResult(result, firstMatch)) + for (const match of remainingMatches) { + if (seenAssetUrls.has(match.assetUrl)) continue + seenAssetUrls.add(match.assetUrl) + output.push(toAssetResult(result, match)) + } + } else { + output.push(result) + } + + for (const match of evidenceMatches) { + if (seenAssetUrls.has(match.assetUrl)) continue + seenAssetUrls.add(match.assetUrl) + output.push(toAssetResult(result, match)) + } + + return output +} + +function toAssetResult( + result: RetrievalResult, + match: AssetReferenceMatch, +): RetrievalResult { + return { + ...result, + assetUrl: match.assetUrl, + chunkType: getAssetChunkType(match, result.chunkType), + source: { + ...result.source, + sectionPath: getAssetSectionPath(result, match.assetPath), + }, + } +} + +function getAssetSectionPath( + result: RetrievalResult, + assetPath: string, +): string | null | undefined { + const sectionPath = getTrimmedString(result.source.sectionPath) + if (!sectionPath) return assetPath + + const normalizedSectionPath = normalizeAssetLookupText(sectionPath) + const normalizedAssetPath = normalizeAssetLookupText(assetPath) + const assetBasename = getNormalizedBasename(assetPath) + if ( + normalizedAssetPath && + normalizedSectionPath?.includes(normalizedAssetPath) + ) { + return sectionPath + } + if (assetBasename && normalizedSectionPath?.includes(assetBasename)) { + return sectionPath + } + + return assetPath +} + +function getAssetChunkType( + match: AssetReferenceMatch, + fallback: RetrievalResult["chunkType"], +): RetrievalResult["chunkType"] { + const normalizedAssetPath = normalizeAssetLookupText(match.assetPath) + if ( + normalizedAssetPath?.startsWith("images/") || + isImageAssetUrl(match.assetUrl) + ) { + return "image" + } + if (normalizedAssetPath?.startsWith("tables/")) { + return "table" + } + return fallback +} + +function resolveAssetReferenceMatches( result: RetrievalResult, assetUrlsByFilePath: Readonly>, -): string | null { +): readonly AssetReferenceMatch[] { const normalizedHaystacks = [ result.source.sectionPath, result.content, @@ -122,10 +217,33 @@ function resolveResultAssetUrl( const normalized = normalizeAssetLookupText(value) return normalized ? [normalized] : [] }) - if (normalizedHaystacks.length === 0) return null + if (normalizedHaystacks.length === 0) return [] + return resolveAssetReferenceMatchesFromHaystacks( + normalizedHaystacks, + assetUrlsByFilePath, + ) +} + +function resolveAssetReferenceMatchesFromText( + value: string | null | undefined, + assetUrlsByFilePath: Readonly>, +): readonly AssetReferenceMatch[] { + const normalized = normalizeAssetLookupText(value) + if (!normalized) return [] + + return resolveAssetReferenceMatchesFromHaystacks( + [normalized], + assetUrlsByFilePath, + ) +} + +function resolveAssetReferenceMatchesFromHaystacks( + normalizedHaystacks: readonly string[], + assetUrlsByFilePath: Readonly>, +): readonly AssetReferenceMatch[] { const basenameCounts = getNormalizedBasenameCounts(assetUrlsByFilePath) - const matches = Object.entries(assetUrlsByFilePath) + return Object.entries(assetUrlsByFilePath) .flatMap(([assetPath, assetUrl]): readonly AssetReferenceMatch[] => { const trimmedUrl = getTrimmedString(assetUrl) if (!trimmedUrl || !isSupportedAssetPath(assetPath)) return [] @@ -138,8 +256,6 @@ function resolveResultAssetUrl( return index === null ? [] : [{ assetPath, assetUrl: trimmedUrl, index }] }) .sort(compareAssetReferenceMatches) - - return matches[0]?.assetUrl ?? null } type AssetReferenceMatch = { From 109d981efc158ee334da6bc4901403f888b34bd8 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 09:44:56 +0000 Subject: [PATCH 03/13] Use ToolLoopAgent for retrieval chat --- src/domains/chat/index.test.ts | 201 ++++++++++++++++++--- src/domains/chat/prompt.ts | 307 ++++++++++++++++++++++++--------- 2 files changed, 402 insertions(+), 106 deletions(-) diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index cc2bb59..b1e5429 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -1,7 +1,7 @@ import { afterEach, describe, expect, it, vi } from "vitest" import type { RetrievalResult } from "@ontos-ai/knowhere-sdk" import { Effect } from "effect" -import { generateText } from "ai" +import { generateText, ToolLoopAgent, type ModelMessage } from "ai" import { answerQuestionWithRetrieval, @@ -21,6 +21,7 @@ vi.mock("ai", async (importOriginal) => ({ })); afterEach(() => { + vi.restoreAllMocks(); vi.mocked(generateText).mockReset(); delete process.env.AI_GATEWAY_API_KEY; }); @@ -431,6 +432,57 @@ describe("answerQuestionWithRetrieval", () => { }); }); + it("does not append chat history to Knowhere tool queries", async () => { + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [makeRetrievalResult()], + evidenceText: "Energy storage deployments grew.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "Tesla energy storage deployments", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "Tesla energy storage deployments" }); + return "Energy storage grew."; + }); + const messages = [ + { + role: "user" as const, + content: "do-not-append-this-history-to-query", + }, + { + role: "assistant" as const, + content: "This older answer should not be concatenated into retrieval.", + }, + ]; + + await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "What about it?", + namespace: "notebook-workspace", + sources: [makeSource()], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages, + }), + ); + + const queryInput = retrieval.query.mock.calls[0]?.[0]; + expect(queryInput).toMatchObject({ + namespace: "notebook-workspace", + query: "Tesla energy storage deployments", + topK: 8, + useAgentic: true, + }); + expect(JSON.stringify(queryInput)).not.toContain( + "do-not-append-this-history-to-query", + ); + }); + it("uses structured referenced chunks from RetrievalQueryResponse as citations", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ @@ -555,9 +607,19 @@ describe("generateGroundedAnswer", () => { describe("generateAgenticGroundedAnswer", () => { it("builds a Vercel AI SDK tool loop around Knowhere retrieval", async () => { process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; - vi.mocked(generateText).mockResolvedValue({ - text: "Here are the requested identity images.", - } as Awaited>); + let capturedGenerateInput: + | Parameters[0] + | undefined; + const generateSpy = vi + .spyOn(ToolLoopAgent.prototype, "generate") + .mockImplementation(( + input: Parameters[0], + ): ReturnType => { + capturedGenerateInput = input; + return Promise.resolve({ + text: "Here are the requested identity images.", + } as Awaited>); + }); const searchSources = vi.fn().mockResolvedValue({ results: [ makeRetrievalResult({ @@ -614,31 +676,33 @@ describe("generateAgenticGroundedAnswer", () => { }); expect(answer).toBe("Here are the requested identity images."); - const generateTextInput = vi.mocked(generateText).mock.calls[0]?.[0] as unknown as { - readonly system: string - readonly messages: readonly { readonly role: string; readonly content: string }[] - readonly tools: { - readonly searchSources: { - readonly execute: (input: { - readonly query: string - readonly dataType?: number - }) => Promise - } - } - readonly prepareStep: (input: { readonly stepNumber: number }) => unknown - } - expect(generateTextInput.system).toContain("RetrievalQueryResponse") - expect(generateTextInput.system).toContain("dataType=3") - expect(generateTextInput.messages.at(-1)).toEqual({ + expect(generateSpy).toHaveBeenCalledWith({ + messages: expect.any(Array), + }); + const agent = getCapturedAgent(generateSpy.mock.contexts[0]); + const settings = getCapturedAgentSettings(agent); + const generateInput = getCapturedGenerateInput(capturedGenerateInput); + + expect(settings.instructions).toContain("RetrievalQueryResponse") + expect(settings.instructions).toContain("dataType=3") + expect(settings.instructions).toContain( + "Do not paste raw prior messages into searchSources.query", + ) + expect(generateInput.messages.at(-1)).toEqual({ role: "user", content: "请发送几张关于公民身份的图片给我", }) - expect(generateTextInput.prepareStep({ stepNumber: 0 })).toMatchObject({ + expect( + settings.prepareStep({ + stepNumber: 0, + messages: [...generateInput.messages], + }), + ).toMatchObject({ toolChoice: { type: "tool", toolName: "searchSources" }, activeTools: ["searchSources"], }) - const toolOutput = await generateTextInput.tools.searchSources.execute({ + const toolOutput = await getCapturedAgentTools(agent).searchSources.execute({ query: "公民身份证 图片", dataType: 3, }); @@ -676,6 +740,60 @@ describe("generateAgenticGroundedAnswer", () => { }); expect(JSON.stringify(toolOutput)).not.toContain("https://blob.example"); }); + + it("uses managed context for stored history and loop steps", async () => { + process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; + let capturedGenerateInput: + | Parameters[0] + | undefined; + const generateSpy = vi + .spyOn(ToolLoopAgent.prototype, "generate") + .mockImplementation(( + input: Parameters[0], + ): ReturnType => { + capturedGenerateInput = input; + return Promise.resolve({ + text: "The answer is grounded.", + } as Awaited>); + }); + const messages = Array.from({ length: 24 }, (_, index) => ({ + role: index % 2 === 0 ? ("user" as const) : ("assistant" as const), + content: `history-message-${index} ${"context ".repeat(80)}`, + })); + + await generateAgenticGroundedAnswer({ + question: "What should I know now?", + messages, + sources: [makeSource()], + excludedSourceIds: [], + searchSources: vi.fn(), + }); + + const generateInput = getCapturedGenerateInput(capturedGenerateInput); + const serializedMessages = JSON.stringify(generateInput.messages); + expect(generateInput.messages[0]).toMatchObject({ + role: "system", + content: expect.stringContaining("Compacted earlier conversation"), + }); + expect(serializedMessages).not.toContain("history-message-0"); + expect(serializedMessages).toContain("history-message-23"); + + const settings = getCapturedAgentSettings( + getCapturedAgent(generateSpy.mock.contexts[0]), + ); + const oversizedLoopMessages = Array.from({ length: 25 }, (_, index) => ({ + role: "user" as const, + content: `loop-message-${index}`, + })); + const preparedStep = settings.prepareStep({ + stepNumber: 1, + messages: oversizedLoopMessages, + }) as { readonly messages: readonly ModelMessage[] }; + + expect(preparedStep.messages.length).toBeLessThanOrEqual(12); + expect(JSON.stringify(preparedStep.messages)).not.toContain("loop-message-0"); + expect(JSON.stringify(preparedStep.messages)).toContain("loop-message-24"); + }); }); describe("buildGroundedPrompt", () => { @@ -746,6 +864,7 @@ describe("buildAgenticChatSystemPrompt", () => { expect(prompt).toContain("decisionTrace") expect(prompt).toContain("remote source index") expect(prompt).toContain("person or section but not an image asset") + expect(prompt).toContain("Do not paste raw prior messages") expect(prompt).toContain("身份证") expect(prompt).toContain("For image requests use dataType=3") expect(prompt).toContain("商务标文件.pdf") @@ -861,3 +980,41 @@ function makeSource(overrides: Partial = {}): Source { ...overrides, }; } + +type CapturedAgentSettings = { + readonly instructions: string + readonly prepareStep: (input: { + readonly stepNumber: number + readonly messages: ModelMessage[] + }) => unknown +} + +type CapturedAgentTools = { + readonly searchSources: { + readonly execute: (input: { + readonly query: string + readonly dataType?: number + }) => Promise + } +} + +function getCapturedAgent(agent: unknown): ToolLoopAgent { + expect(agent).toBeInstanceOf(ToolLoopAgent) + return agent as ToolLoopAgent +} + +function getCapturedGenerateInput( + input: Parameters[0] | undefined, +): { readonly messages: ModelMessage[] } { + expect(input).toBeDefined() + return input as { readonly messages: ModelMessage[] } +} + +function getCapturedAgentSettings(agent: ToolLoopAgent): CapturedAgentSettings { + return (agent as unknown as { readonly settings: CapturedAgentSettings }) + .settings +} + +function getCapturedAgentTools(agent: ToolLoopAgent): CapturedAgentTools { + return agent.tools as unknown as CapturedAgentTools +} diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index da9e49f..7d635c7 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -1,4 +1,12 @@ -import { generateText, stepCountIs, tool, type ModelMessage } from "ai" +import { + generateText, + pruneMessages, + stepCountIs, + ToolLoopAgent, + tool, + type ModelMessage, + type PrepareStepFunction, +} from "ai" import { Effect } from "effect" import type { RetrievalQueryResponse, RetrievalResult } from "@ontos-ai/knowhere-sdk" import { z } from "zod" @@ -15,6 +23,13 @@ import { normalizeRetrievalQuery } from "./retrieval" const RECENT_CONTEXT_MESSAGE_LIMIT = 8 const CONTEXT_CONTENT_CHAR_LIMIT = 900 +const COMPACTED_HISTORY_MESSAGE_LIMIT = 12 +const COMPACTED_HISTORY_CONTENT_CHAR_LIMIT = 500 +const STORED_HISTORY_MESSAGE_LIMIT = 20 +const STORED_HISTORY_CHAR_BUDGET = 12_000 +const AGENT_STEP_MESSAGE_LIMIT = 20 +const AGENT_STEP_RECENT_MESSAGE_LIMIT = 12 +const AGENT_STEP_CONTEXT_CHAR_BUDGET = 16_000 const SOURCE_CONTEXT_LIMIT = 12 const AGENTIC_SEARCH_STEP_LIMIT = 5 const TOOL_EVIDENCE_CHAR_LIMIT = 6_000 @@ -55,6 +70,8 @@ type GenerateAgenticGroundedAnswerInput = { searchSources: SearchSources } +type AgenticChatTools = ReturnType + export const generateContextualRetrievalQueryEffect = ( input: GenerateContextualRetrievalQueryInput, ): Effect.Effect => @@ -134,80 +151,10 @@ export const generateAgenticGroundedAnswerEffect = ( ) } + const agent = buildAgenticChatAgent(input) const response = yield* Effect.tryPromise(() => - generateText({ - model: CHAT_MODEL, - system: buildAgenticChatSystemPrompt(input), + agent.generate({ messages: buildAgenticChatMessages(input), - tools: { - searchSources: tool({ - description: - "Search the user's Notebook sources through Knowhere retrieval. " + - "Treat each response as external context from a remote source index. " + - "Use it before answering, and call it again with refined text, media, " + - "or section-path queries when the RetrievalQueryResponse says evidence is missing or weak.", - inputSchema: z.object({ - query: z - .string() - .min(1) - .describe( - "A self-contained retrieval query. Include document, topic, person, date, or section context from the conversation when needed.", - ), - topK: z - .number() - .int() - .min(1) - .max(12) - .optional() - .describe("Number of chunks to return. Defaults to 8."), - dataType: z - .union([ - z.literal(1), - z.literal(2), - z.literal(3), - z.literal(4), - z.literal(5), - z.literal(6), - ]) - .optional() - .describe( - "Optional chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table.", - ), - signalPaths: z - .array(z.string().min(1)) - .max(8) - .optional() - .describe( - "Optional section/path keywords when a previous result points to a useful section.", - ), - filterMode: z - .enum(["keep", "delete"]) - .optional() - .describe( - "How to apply signalPaths. Use keep to focus on matching paths, delete to exclude them.", - ), - threshold: z - .number() - .min(0) - .max(1) - .optional() - .describe("Optional minimum retrieval score threshold."), - }), - execute: async (queryInput: AgenticRetrievalQuery) => - buildRetrievalToolOutput(await input.searchSources(queryInput)), - }), - }, - stopWhen: stepCountIs(AGENTIC_SEARCH_STEP_LIMIT), - prepareStep: ({ stepNumber }) => - stepNumber === 0 - ? { - toolChoice: { - type: "tool" as const, - toolName: "searchSources" as const, - }, - activeTools: ["searchSources" as const], - } - : undefined, }), ) return response.text.trim() @@ -293,7 +240,6 @@ export function buildAgenticChatSystemPrompt( >, ): string { const sourceContext = formatSourceContext(input.sources, input.excludedSourceIds) - const conversationContext = formatConversationContext(input.messages) return [ "You are a Notebook research agent that answers user questions from their uploaded sources.", @@ -308,12 +254,13 @@ export function buildAgenticChatSystemPrompt( "5. If failureReason is present, result counts are zero, or evidence does not cover the user's requested entity/topic/media, call searchSources again with a more specific or broader query.", "6. For image requests use dataType=3 or dataType=5. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", "7. For table requests use dataType=4 or dataType=6.", - "8. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", + "8. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", + "9. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", "", "Answering rules:", "Use retrieved evidence as the factual source of truth.", "Do not invent document-specific facts.", - "Use the recent conversation only to resolve references like \"this document\" or \"those images\".", + "Conversation context is supplied as managed model messages. Use it only to resolve references like \"this document\" or \"those images\".", "Cite document sections in the answer, e.g. [文档名 / 章节名].", "When retrieved image or table assets are relevant, cite the matching source label; the UI renders media from citation metadata.", "Do not write raw media asset URLs in the answer. They are internal metadata only.", @@ -321,25 +268,217 @@ export function buildAgenticChatSystemPrompt( "", "Searchable sources:", sourceContext, - "", - "Recent conversation summary:", - conversationContext, ].join("\n") } +function buildAgenticChatAgent( + input: GenerateAgenticGroundedAnswerInput, +): ToolLoopAgent { + return new ToolLoopAgent({ + model: CHAT_MODEL, + instructions: buildAgenticChatSystemPrompt(input), + tools: buildAgenticChatTools(input), + stopWhen: stepCountIs(AGENTIC_SEARCH_STEP_LIMIT), + prepareStep: buildAgenticPrepareStep(), + }) +} + +function buildAgenticChatTools( + input: Pick, +) { + return { + searchSources: tool({ + description: + "Search the user's Notebook sources through Knowhere retrieval. " + + "Treat each response as external context from a remote source index. " + + "Use it before answering, and call it again with refined text, media, " + + "or section-path queries when the RetrievalQueryResponse says evidence is missing or weak.", + inputSchema: z.object({ + query: z + .string() + .min(1) + .describe( + "A concise, self-contained retrieval query. Do not paste raw chat history or previous messages. Use only distilled terms such as document title, person, topic, date, section path, or asset kind when needed.", + ), + topK: z + .number() + .int() + .min(1) + .max(12) + .optional() + .describe("Number of chunks to return. Defaults to 8."), + dataType: z + .union([ + z.literal(1), + z.literal(2), + z.literal(3), + z.literal(4), + z.literal(5), + z.literal(6), + ]) + .optional() + .describe( + "Optional chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table.", + ), + signalPaths: z + .array(z.string().min(1)) + .max(8) + .optional() + .describe( + "Optional section/path keywords when a previous result points to a useful section.", + ), + filterMode: z + .enum(["keep", "delete"]) + .optional() + .describe( + "How to apply signalPaths. Use keep to focus on matching paths, delete to exclude them.", + ), + threshold: z + .number() + .min(0) + .max(1) + .optional() + .describe("Optional minimum retrieval score threshold."), + }), + execute: async (queryInput: AgenticRetrievalQuery) => + buildRetrievalToolOutput(await input.searchSources(queryInput)), + }), + } as const +} + +function buildAgenticPrepareStep(): PrepareStepFunction { + return ({ stepNumber, messages }) => { + const managedMessages = buildAgentStepMessages(messages) + if (stepNumber === 0) { + return { + messages: managedMessages, + toolChoice: { + type: "tool" as const, + toolName: "searchSources" as const, + }, + activeTools: ["searchSources" as const], + } + } + + return { messages: managedMessages } + } +} + function buildAgenticChatMessages( input: Pick, ): ModelMessage[] { - const recentMessages = input.messages.slice(-RECENT_CONTEXT_MESSAGE_LIMIT) return [ - ...recentMessages.map((message): ModelMessage => ({ - role: message.role, - content: message.content, - })), + ...buildManagedStoredHistoryMessages(input.messages), { role: "user", content: input.question }, ] } +function buildManagedStoredHistoryMessages( + messages: readonly ChatHistoryMessage[], +): ModelMessage[] { + const exactMessages = messages.map(toModelMessage) + if ( + exactMessages.length <= STORED_HISTORY_MESSAGE_LIMIT && + getModelMessagesCharLength(exactMessages) <= STORED_HISTORY_CHAR_BUDGET + ) { + return exactMessages + } + + const recentMessages = messages.slice(-RECENT_CONTEXT_MESSAGE_LIMIT) + const olderMessages = messages.slice(0, -RECENT_CONTEXT_MESSAGE_LIMIT) + const compactedHistoryContext = formatCompactedHistoryContext(olderMessages) + + return [ + ...(compactedHistoryContext + ? [ + { + role: "system" as const, + content: compactedHistoryContext, + }, + ] + : []), + ...recentMessages.map(toModelMessage), + ] +} + +function buildAgentStepMessages(messages: ModelMessage[]): ModelMessage[] { + const prunedMessages = pruneMessages({ + messages: [...messages], + reasoning: "before-last-message", + toolCalls: [{ type: "before-last-4-messages", tools: ["searchSources"] }], + emptyMessages: "remove", + }) + + if ( + prunedMessages.length <= AGENT_STEP_MESSAGE_LIMIT && + getModelMessagesCharLength(prunedMessages) <= AGENT_STEP_CONTEXT_CHAR_BUDGET + ) { + return prunedMessages + } + + const systemMessages = prunedMessages.filter( + (message): boolean => message.role === "system", + ) + const nonSystemMessages = prunedMessages.filter( + (message): boolean => message.role !== "system", + ) + + return [ + ...systemMessages, + ...nonSystemMessages.slice(-AGENT_STEP_RECENT_MESSAGE_LIMIT), + ] +} + +function toModelMessage(message: ChatHistoryMessage): ModelMessage { + return { + role: message.role, + content: message.content, + } +} + +function formatCompactedHistoryContext( + messages: readonly ChatHistoryMessage[], +): string { + if (messages.length === 0) return "" + + const selectedMessages = messages.slice(-COMPACTED_HISTORY_MESSAGE_LIMIT) + const omittedMessageCount = messages.length - selectedMessages.length + const lines = selectedMessages.map((message): string => { + const content = truncateContextTextToLimit( + message.content, + COMPACTED_HISTORY_CONTENT_CHAR_LIMIT, + ) + const citationContext = formatCitationContext(message.citations ?? []) + return citationContext + ? `- ${message.role}: ${content}\n citations: ${citationContext}` + : `- ${message.role}: ${content}` + }) + + return [ + "Compacted earlier conversation for context. This is not a retrieval query and must not be pasted into searchSources.query.", + omittedMessageCount > 0 + ? `${omittedMessageCount} earlier messages were omitted before this compacted context.` + : "", + ...lines, + ] + .filter((line): boolean => line.length > 0) + .join("\n") +} + +function getModelMessagesCharLength(messages: readonly ModelMessage[]): number { + return messages.reduce( + (totalLength, message): number => + totalLength + getUnknownTextLength(message.content), + 0, + ) +} + +function getUnknownTextLength(value: unknown): number { + if (typeof value === "string") return value.length + if (value === null || value === undefined) return 0 + return JSON.stringify(value).length +} + function buildRetrievalToolOutput(response: RetrievalQueryResponse): object { return { namespace: response.namespace, From 05636b0807881229f3fefd6e3ee0c821a19fb4de Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 10:21:29 +0000 Subject: [PATCH 04/13] Add retrieved chunk read tool --- src/domains/chat/contracts.ts | 53 +++++- src/domains/chat/index.test.ts | 143 +++++++++++++++++ src/domains/chat/index.ts | 266 +++++++++++++++++++++++++++++-- src/domains/chat/prompt.ts | 204 ++++++++++++++++++++---- src/domains/chat/service.test.ts | 2 + 5 files changed, 624 insertions(+), 44 deletions(-) diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 505955a..234cf4d 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -1,4 +1,8 @@ -import type { RetrievalQueryParams, RetrievalQueryResponse } from "@ontos-ai/knowhere-sdk" +import type { + RetrievalQueryParams, + RetrievalQueryResponse, + RetrievalSource, +} from "@ontos-ai/knowhere-sdk" import type { Source } from "@/infrastructure/db/schema" import type { ChatCitationView } from "@/domains/chat/types" @@ -24,9 +28,53 @@ export type AgenticRetrievalQuery = Pick< | "threshold" > +export type RetrievedChunkReference = { + id: string + chunkId: string | null + kind: "result" | "referencedChunk" + resultIndex: number | null + chunkType: string + score: number | null + source: RetrievalSource + hasAssetUrl: boolean + contentLength: number + contentPreview: string + contentTruncated: boolean +} + +export type AgenticRetrievalResponse = RetrievalQueryResponse & { + chunkReferences: readonly RetrievedChunkReference[] +} + export type SearchSources = ( input: AgenticRetrievalQuery, -) => Promise +) => Promise + +export type ReadRetrievedChunkInput = { + id: string + offset?: number + limit?: number +} + +export type ReadRetrievedChunkResult = { + id: string + chunkId: string | null + found: boolean + chunkType: string | null + score: number | null + source: RetrievalSource | null + hasAssetUrl: boolean + offset: number + limit: number + contentLength: number + contentSlice: string + hasMoreContent: boolean + nextOffset: number | null +} + +export type ReadRetrievedChunk = ( + input: ReadRetrievedChunkInput, +) => Promise export type GenerateAnswer = (input: { question: string @@ -34,6 +82,7 @@ export type GenerateAnswer = (input: { sources: readonly Source[] excludedSourceIds: readonly string[] searchSources: SearchSources + readRetrievedChunk: ReadRetrievedChunk }) => Promise export type AnswerQuestionInput = { diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index b1e5429..208f768 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -74,6 +74,7 @@ describe("answerQuestionWithRetrieval", () => { sources, excludedSourceIds: ["source_2"], searchSources: expect.any(Function), + readRetrievedChunk: expect.any(Function), }); expect(answer).toEqual({ answer: "The answer is grounded.", @@ -179,6 +180,7 @@ describe("answerQuestionWithRetrieval", () => { sources, excludedSourceIds: [], searchSources: expect.any(Function), + readRetrievedChunk: expect.any(Function), }); const expectedResult = { ...result, @@ -322,6 +324,7 @@ describe("answerQuestionWithRetrieval", () => { sources, excludedSourceIds: [], searchSources: expect.any(Function), + readRetrievedChunk: expect.any(Function), }); expect(retrieval.query).toHaveBeenCalledWith({ namespace: "notebook-workspace", @@ -429,6 +432,7 @@ describe("answerQuestionWithRetrieval", () => { sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], excludedSourceIds: [], searchSources: expect.any(Function), + readRetrievedChunk: expect.any(Function), }); }); @@ -483,6 +487,77 @@ describe("answerQuestionWithRetrieval", () => { ); }); + it("lets the agent read untruncated content from returned chunk ids", async () => { + const longContent = `${"Earlier context. ".repeat(160)}Critical obligation: retain source receipts.`; + const result = { + ...makeRetrievalResult({ + content: longContent, + source: { + documentId: "doc_contract", + sourceFileName: "contract.pdf", + sectionPath: "Obligations", + }, + }), + chunkId: "chunk_contract_1", + } as RetrievalResult & { readonly chunkId: string }; + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [result], + evidenceText: "Contract obligations were retrieved.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "contract obligations", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn( + async ({ searchSources, readRetrievedChunk }) => { + const response = await searchSources({ query: "contract obligations" }); + expect(response.chunkReferences[0]).toMatchObject({ + id: "chunk_contract_1", + chunkId: "chunk_contract_1", + contentTruncated: true, + contentLength: longContent.length, + }); + + const detail = await readRetrievedChunk({ + id: "chunk_contract_1", + offset: 2_000, + limit: 80, + }); + + expect(detail).toMatchObject({ + id: "chunk_contract_1", + found: true, + offset: 2_000, + limit: 80, + contentLength: longContent.length, + }); + return detail.contentSlice; + }, + ); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "What obligation matters?", + namespace: "notebook-workspace", + sources: [ + makeSource({ + title: "contract.pdf", + knowhereDocumentId: "doc_contract", + }), + ], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(answer.answer).toBe(longContent.slice(2_000, 2_080)); + }); + it("uses structured referenced chunks from RetrievalQueryResponse as citations", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ @@ -649,6 +724,25 @@ describe("generateAgenticGroundedAnswer", () => { namespace: "notebook-workspace", query: "公民身份证 图片", routerUsed: "workflow_single_step", + chunkReferences: [ + { + id: "chunk_identity_1", + chunkId: "chunk_identity_1", + kind: "result", + resultIndex: 1, + chunkType: "image", + score: 0.9, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "Assets / images / id-front.jpg", + }, + hasAssetUrl: true, + contentLength: "Identity card image front side.".length, + contentPreview: "Identity card image front side.", + contentTruncated: false, + }, + ], answerText: "The source includes identity card images. https://blob.example/images/id-front.jpg", stopReason: "answer_done", @@ -661,6 +755,26 @@ describe("generateAgenticGroundedAnswer", () => { }, ], }); + const readRetrievedChunk = vi.fn().mockResolvedValue({ + id: "chunk_identity_1", + chunkId: "chunk_identity_1", + found: true, + chunkType: "image", + score: 0.9, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "Assets / images / id-front.jpg", + }, + hasAssetUrl: true, + offset: 0, + limit: 80, + contentLength: 96, + contentSlice: + "Full identity card text. https://blob.example/images/id-front.jpg", + hasMoreContent: false, + nextOffset: null, + }); const answer = await generateAgenticGroundedAnswer({ question: "请发送几张关于公民身份的图片给我", @@ -673,6 +787,7 @@ describe("generateAgenticGroundedAnswer", () => { ], excludedSourceIds: [], searchSources, + readRetrievedChunk, }); expect(answer).toBe("Here are the requested identity images."); @@ -739,6 +854,25 @@ describe("generateAgenticGroundedAnswer", () => { agentGuidance: expect.stringContaining("Use this evidence"), }); expect(JSON.stringify(toolOutput)).not.toContain("https://blob.example"); + + const chunkOutput = await getCapturedAgentTools(agent).readRetrievedChunk.execute({ + id: "chunk_identity_1", + offset: 0, + limit: 80, + }); + + expect(readRetrievedChunk).toHaveBeenCalledWith({ + id: "chunk_identity_1", + offset: 0, + limit: 80, + }); + expect(chunkOutput).toMatchObject({ + id: "chunk_identity_1", + found: true, + contentSlice: "Full identity card text. [media asset URL hidden]", + hasMoreContent: false, + }); + expect(JSON.stringify(chunkOutput)).not.toContain("https://blob.example"); }); it("uses managed context for stored history and loop steps", async () => { @@ -767,6 +901,7 @@ describe("generateAgenticGroundedAnswer", () => { sources: [makeSource()], excludedSourceIds: [], searchSources: vi.fn(), + readRetrievedChunk: vi.fn(), }); const generateInput = getCapturedGenerateInput(capturedGenerateInput); @@ -859,6 +994,7 @@ describe("buildAgenticChatSystemPrompt", () => { }); expect(prompt).toContain("Always call searchSources") + expect(prompt).toContain("readRetrievedChunk") expect(prompt).toContain("evidenceText") expect(prompt).toContain("failureReason") expect(prompt).toContain("decisionTrace") @@ -996,6 +1132,13 @@ type CapturedAgentTools = { readonly dataType?: number }) => Promise } + readonly readRetrievedChunk: { + readonly execute: (input: { + readonly id: string + readonly offset?: number + readonly limit?: number + }) => Promise + } } function getCapturedAgent(agent: unknown): ToolLoopAgent { diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index 442e62a..081068e 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -3,8 +3,10 @@ import type { RetrievalQueryParams, RetrievalQueryResponse, RetrievalResult, + RetrievalSource, } from "@ontos-ai/knowhere-sdk" +import { logger } from "@/lib/logger" import type { ChatCitationView } from "@/domains/chat/types" import { toChatCitationViews, @@ -12,8 +14,12 @@ import { } from "./citations" import type { AgenticRetrievalQuery, + AgenticRetrievalResponse, AnswerQuestionInput, AnswerQuestionResult, + ReadRetrievedChunkInput, + ReadRetrievedChunkResult, + RetrievedChunkReference, } from "./contracts" import { excludeDocuments, @@ -27,8 +33,22 @@ import { const DEFAULT_TOP_K = 8 const MAX_AGENTIC_TOP_K = 12 const MAX_CITATION_RESULTS = 20 +const DEFAULT_CHUNK_READ_LIMIT = 2_000 +const MAX_CHUNK_READ_LIMIT = 4_000 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." +type StoredRetrievedChunk = { + id: string + chunkId: string | null + kind: RetrievedChunkReference["kind"] + resultIndex: number | null + content: string + chunkType: string + score: number | null + source: RetrievalSource + hasAssetUrl: boolean +} + export type { AnswerQuestionInput, AnswerQuestionResult, @@ -60,21 +80,80 @@ export const answerQuestionWithRetrieval = ( Effect.gen(function* () { const question = input.question.trim() const retrievalResponses: RetrievalQueryResponse[] = [] + const retrievedChunkContext = createRetrievedChunkContext() + + logger.info("chat-agent: answer start", { + questionLength: question.length, + sourceCount: input.sources.length, + excludedSourceCount: input.excludedSourceIds.length, + messageCount: input.messages.length, + }) const searchSources = async ( queryInput: AgenticRetrievalQuery, - ): Promise => { - const response = await input.retrieval.query( - buildRetrievalQueryParams({ - input: queryInput, - fallbackQuestion: question, - namespace: input.namespace, - sources: input.sources, - excludedSourceIds: input.excludedSourceIds, - }), - ) - retrievalResponses.push(response) - return response + ): Promise => { + const startedAt = Date.now() + const retrievalQueryParams = buildRetrievalQueryParams({ + input: queryInput, + fallbackQuestion: question, + namespace: input.namespace, + sources: input.sources, + excludedSourceIds: input.excludedSourceIds, + }) + logger.info("chat-agent: searchSources start", { + query: retrievalQueryParams.query, + topK: retrievalQueryParams.topK, + dataType: retrievalQueryParams.dataType ?? null, + signalPathCount: retrievalQueryParams.signalPaths?.length ?? 0, + filterMode: retrievalQueryParams.filterMode ?? null, + threshold: retrievalQueryParams.threshold ?? null, + }) + + try { + const response = await input.retrieval.query(retrievalQueryParams) + retrievalResponses.push(response) + const chunkReferences = retrievedChunkContext.registerResponse({ + response, + responseIndex: retrievalResponses.length, + }) + logger.info("chat-agent: searchSources ok", { + query: response.query, + durationMs: Date.now() - startedAt, + resultCount: response.results.length, + referencedChunkCount: response.referencedChunks.length, + readableChunkCount: chunkReferences.length, + truncatedChunkCount: chunkReferences.filter( + (reference): boolean => reference.contentTruncated, + ).length, + stopReason: response.stopReason ?? null, + failureReason: response.failureReason ?? null, + }) + return { ...response, chunkReferences } + } catch (error) { + logger.error("chat-agent: searchSources failed", { + query: retrievalQueryParams.query, + durationMs: Date.now() - startedAt, + error: error instanceof Error ? error.message : String(error), + }) + throw error + } + } + + const readRetrievedChunk = async ( + readInput: ReadRetrievedChunkInput, + ): Promise => { + const result = retrievedChunkContext.read(readInput) + logger.info("chat-agent: readRetrievedChunk", { + id: result.id, + found: result.found, + offset: result.offset, + limit: result.limit, + contentLength: result.contentLength, + returnedLength: result.contentSlice.length, + hasMoreContent: result.hasMoreContent, + nextOffset: result.nextOffset, + }) + return result } const generatedAnswer = yield* Effect.tryPromise(() => @@ -84,9 +163,16 @@ export const answerQuestionWithRetrieval = ( sources: input.sources, excludedSourceIds: input.excludedSourceIds, searchSources, + readRetrievedChunk, }), ) + logger.info("chat-agent: answer generated", { + answerLength: generatedAnswer.length, + retrievalCallCount: retrievalResponses.length, + registeredChunkCount: retrievedChunkContext.size(), + }) + const rawResults = collectRetrievalResults(retrievalResponses, input.sources) if (rawResults.length === 0 && generatedAnswer.trim().length === 0) { return { answer: NO_RESULTS_ANSWER, citations: [] as ChatCitationView[] } @@ -101,6 +187,10 @@ export const answerQuestionWithRetrieval = ( }), ) const answer = removeRetrievedMediaAssetUrls(generatedAnswer, results) + logger.info("chat-agent: answer complete", { + answerLength: answer.length, + citationCount: results.length, + }) return { answer, citations: toChatCitationViews(results, answer), @@ -135,6 +225,158 @@ function buildRetrievalQueryParams(input: { } } +function createRetrievedChunkContext(): { + registerResponse(input: { + readonly response: RetrievalQueryResponse + readonly responseIndex: number + }): readonly RetrievedChunkReference[] + read(input: ReadRetrievedChunkInput): ReadRetrievedChunkResult + size(): number +} { + const chunksById = new Map() + + function storeChunk(chunk: StoredRetrievedChunk): void { + chunksById.set(chunk.id, chunk) + if (chunk.chunkId && chunk.chunkId !== chunk.id) { + chunksById.set(chunk.chunkId, chunk) + } + } + + return { + registerResponse(input): readonly RetrievedChunkReference[] { + const references: RetrievedChunkReference[] = [] + input.response.results.forEach((result, index): void => { + const resultIndex = index + 1 + const chunkId = getRetrievalResultChunkId(result) + const id = chunkId ?? `search_${input.responseIndex}_result_${resultIndex}` + const storedChunk: StoredRetrievedChunk = { + id, + chunkId, + kind: "result", + resultIndex, + content: result.content, + chunkType: result.chunkType, + score: result.score, + source: result.source, + hasAssetUrl: Boolean(result.assetUrl), + } + storeChunk(storedChunk) + references.push(toRetrievedChunkReference(storedChunk)) + }) + + input.response.referencedChunks.forEach((chunk, index): void => { + const id = chunk.chunkId || `search_${input.responseIndex}_reference_${index + 1}` + const existingChunk = chunksById.get(id) + if (existingChunk) { + references.push(toRetrievedChunkReference(existingChunk)) + return + } + + const storedChunk: StoredRetrievedChunk = { + id, + chunkId: chunk.chunkId || null, + kind: "referencedChunk", + resultIndex: null, + content: "", + chunkType: chunk.chunkType, + score: null, + source: { + documentId: chunk.documentId, + sourceFileName: null, + sectionPath: chunk.sectionPath, + }, + hasAssetUrl: Boolean(chunk.assetUrl), + } + storeChunk(storedChunk) + references.push(toRetrievedChunkReference(storedChunk)) + }) + + return references + }, + read(input): ReadRetrievedChunkResult { + const offset = normalizeChunkReadOffset(input.offset) + const limit = normalizeChunkReadLimit(input.limit) + const chunk = chunksById.get(input.id) + if (!chunk) { + return { + id: input.id, + chunkId: null, + found: false, + chunkType: null, + score: null, + source: null, + hasAssetUrl: false, + offset, + limit, + contentLength: 0, + contentSlice: "", + hasMoreContent: false, + nextOffset: null, + } + } + + const boundedOffset = Math.min(offset, chunk.content.length) + const endOffset = Math.min(boundedOffset + limit, chunk.content.length) + return { + id: chunk.id, + chunkId: chunk.chunkId, + found: true, + chunkType: chunk.chunkType, + score: chunk.score, + source: chunk.source, + hasAssetUrl: chunk.hasAssetUrl, + offset: boundedOffset, + limit, + contentLength: chunk.content.length, + contentSlice: chunk.content.slice(boundedOffset, endOffset), + hasMoreContent: endOffset < chunk.content.length, + nextOffset: endOffset < chunk.content.length ? endOffset : null, + } + }, + size(): number { + return chunksById.size + }, + } +} + +function toRetrievedChunkReference( + chunk: StoredRetrievedChunk, +): RetrievedChunkReference { + const contentPreview = chunk.content.slice(0, DEFAULT_CHUNK_READ_LIMIT) + return { + id: chunk.id, + chunkId: chunk.chunkId, + kind: chunk.kind, + resultIndex: chunk.resultIndex, + chunkType: chunk.chunkType, + score: chunk.score, + source: chunk.source, + hasAssetUrl: chunk.hasAssetUrl, + contentLength: chunk.content.length, + contentPreview, + contentTruncated: contentPreview.length < chunk.content.length, + } +} + +function getRetrievalResultChunkId(result: RetrievalResult): string | null { + const resultWithChunkId = result as RetrievalResult & { + readonly chunkId?: string | null + } + return resultWithChunkId.chunkId?.trim() || null +} + +function normalizeChunkReadOffset(value: number | undefined): number { + if (typeof value !== "number" || !Number.isSafeInteger(value)) return 0 + return Math.max(value, 0) +} + +function normalizeChunkReadLimit(value: number | undefined): number { + if (typeof value !== "number" || !Number.isSafeInteger(value)) { + return DEFAULT_CHUNK_READ_LIMIT + } + return Math.min(Math.max(value, 1), MAX_CHUNK_READ_LIMIT) +} + function normalizeTopK(value: number | undefined): number { if (typeof value !== "number" || !Number.isSafeInteger(value)) { return DEFAULT_TOP_K diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 7d635c7..6501622 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -8,15 +8,24 @@ import { type PrepareStepFunction, } from "ai" import { Effect } from "effect" -import type { RetrievalQueryResponse, RetrievalResult } from "@ontos-ai/knowhere-sdk" +import type { + RetrievalQueryResponse, + RetrievalReferencedChunk, +} from "@ontos-ai/knowhere-sdk" import { z } from "zod" import { CHAT_MODEL } from "@/lib/ai" +import { logger } from "@/lib/logger" import type { Source } from "@/infrastructure/db/schema" import type { ChatCitationView } from "@/domains/chat/types" import type { AgenticRetrievalQuery, + AgenticRetrievalResponse, ChatHistoryMessage, + ReadRetrievedChunk, + ReadRetrievedChunkInput, + ReadRetrievedChunkResult, + RetrievedChunkReference, SearchSources, } from "./contracts" import { normalizeRetrievalQuery } from "./retrieval" @@ -34,8 +43,8 @@ const SOURCE_CONTEXT_LIMIT = 12 const AGENTIC_SEARCH_STEP_LIMIT = 5 const TOOL_EVIDENCE_CHAR_LIMIT = 6_000 const TOOL_RESULT_CONTENT_CHAR_LIMIT = 700 -const TOOL_RESULT_LIMIT = 8 -const TOOL_REFERENCED_CHUNK_LIMIT = 12 +const TOOL_CHUNK_READ_LIMIT_DEFAULT = 2_000 +const TOOL_CHUNK_READ_LIMIT_MAX = 4_000 const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g const REDACTED_MEDIA_URL = "[media asset URL hidden]" @@ -68,6 +77,7 @@ type GenerateAgenticGroundedAnswerInput = { sources: readonly Source[] excludedSourceIds: readonly string[] searchSources: SearchSources + readRetrievedChunk: ReadRetrievedChunk } type AgenticChatTools = ReturnType @@ -243,19 +253,23 @@ export function buildAgenticChatSystemPrompt( return [ "You are a Notebook research agent that answers user questions from their uploaded sources.", - "You have one tool: searchSources. It runs Knowhere retrieval and returns a RetrievalQueryResponse summary.", - "Treat each tool result like external context from a remote source index: inspect it, reason over it, then decide whether to retrieve again.", + "You have two tools: searchSources and readRetrievedChunk.", + "searchSources runs Knowhere retrieval and returns a RetrievalQueryResponse summary with compact previews and request-local chunk ids.", + "readRetrievedChunk reads more content from a chunk id returned by searchSources in this same answer run.", + "Treat each tool result like external context from a remote source index: inspect it, reason over it, then decide whether to retrieve again or read more from a returned chunk.", "", "Agent loop rules:", "1. Always call searchSources before writing a final answer.", - "2. Read the tool output fields: evidenceText, answerText, results, referencedChunks, stopReason, failureReason, and decisionTrace.", + "2. Read the tool output fields: evidenceText, answerText, results, referencedChunks, chunkReferences, stopReason, failureReason, and decisionTrace.", "3. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, chunk types, and failure reasons.", "4. If evidenceText/results/referencedChunks directly support the answer, stop searching and answer.", "5. If failureReason is present, result counts are zero, or evidence does not cover the user's requested entity/topic/media, call searchSources again with a more specific or broader query.", "6. For image requests use dataType=3 or dataType=5. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", "7. For table requests use dataType=4 or dataType=6.", "8. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", - "9. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", + "9. If a returned chunk preview looks relevant but you want more data before answering, call readRetrievedChunk with that chunk id plus offset/limit. If hasMoreContent is true and the next slice is still needed, call readRetrievedChunk again with nextOffset.", + "10. Use readRetrievedChunk selectively; do not read every chunk when the previews already answer the question.", + "11. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", "", "Answering rules:", "Use retrieved evidence as the factual source of truth.", @@ -280,11 +294,39 @@ function buildAgenticChatAgent( tools: buildAgenticChatTools(input), stopWhen: stepCountIs(AGENTIC_SEARCH_STEP_LIMIT), prepareStep: buildAgenticPrepareStep(), + onStepFinish: (event) => { + logger.info("chat-agent: loop step finished", { + stepNumber: event.stepNumber, + finishReason: event.finishReason, + textLength: event.text.length, + toolCalls: event.toolCalls.map((toolCall) => toolCall.toolName), + toolResultCount: event.toolResults.length, + inputTokens: event.usage.inputTokens, + outputTokens: event.usage.outputTokens, + totalTokens: event.usage.totalTokens, + }) + }, + onFinish: (event) => { + logger.info("chat-agent: loop finished", { + stepCount: event.steps.length, + finishReason: event.finishReason, + textLength: event.text.length, + toolCalls: event.steps.flatMap((step) => + step.toolCalls.map((toolCall) => toolCall.toolName), + ), + inputTokens: event.totalUsage.inputTokens, + outputTokens: event.totalUsage.outputTokens, + totalTokens: event.totalUsage.totalTokens, + }) + }, }) } function buildAgenticChatTools( - input: Pick, + input: Pick< + GenerateAgenticGroundedAnswerInput, + "searchSources" | "readRetrievedChunk" + >, ) { return { searchSources: tool({ @@ -343,6 +385,39 @@ function buildAgenticChatTools( execute: async (queryInput: AgenticRetrievalQuery) => buildRetrievalToolOutput(await input.searchSources(queryInput)), }), + readRetrievedChunk: tool({ + description: + "Read an offset/limit content slice from a request-local chunk id " + + "returned by searchSources. Use this when a returned chunk preview is relevant " + + "and you want more data before answering.", + inputSchema: z.object({ + id: z + .string() + .min(1) + .describe( + "The request-local id or chunkId from searchSources.results, searchSources.referencedChunks, or searchSources.chunkReferences.", + ), + offset: z + .number() + .int() + .min(0) + .optional() + .describe("Character offset to start reading from. Defaults to 0."), + limit: z + .number() + .int() + .min(1) + .max(TOOL_CHUNK_READ_LIMIT_MAX) + .optional() + .describe( + `Maximum characters to return. Defaults to ${TOOL_CHUNK_READ_LIMIT_DEFAULT}; max ${TOOL_CHUNK_READ_LIMIT_MAX}.`, + ), + }), + execute: async (readInput: ReadRetrievedChunkInput) => + buildRetrievedChunkToolOutput( + await input.readRetrievedChunk(readInput), + ), + }), } as const } @@ -479,7 +554,7 @@ function getUnknownTextLength(value: unknown): number { return JSON.stringify(value).length } -function buildRetrievalToolOutput(response: RetrievalQueryResponse): object { +function buildRetrievalToolOutput(response: AgenticRetrievalResponse): object { return { namespace: response.namespace, query: response.query, @@ -491,22 +566,17 @@ function buildRetrievalToolOutput(response: RetrievalQueryResponse): object { : response.answerText, resultCount: response.results.length, referencedChunkCount: response.referencedChunks.length, + readableChunkCount: response.chunkReferences.length, hasEvidenceText: Boolean(response.evidenceText?.trim()), evidenceText: truncateSafeContextTextToLimit( response.evidenceText ?? "", TOOL_EVIDENCE_CHAR_LIMIT, ), - results: response.results.slice(0, TOOL_RESULT_LIMIT).map(formatToolResult), - referencedChunks: response.referencedChunks - .slice(0, TOOL_REFERENCED_CHUNK_LIMIT) - .map((chunk) => ({ - chunkId: chunk.chunkId, - documentId: chunk.documentId, - chunkType: chunk.chunkType, - sectionPath: chunk.sectionPath, - filePath: chunk.filePath ? redactRawUrls(chunk.filePath) : null, - hasAssetUrl: Boolean(chunk.assetUrl), - })), + results: response.chunkReferences + .filter((reference): boolean => reference.kind === "result") + .map(formatToolResultReference), + referencedChunks: response.referencedChunks.map(formatToolReferencedChunk), + chunkReferences: response.chunkReferences.map(formatToolChunkReference), decisionTrace: response.decisionTrace ?.slice(-6) @@ -515,27 +585,101 @@ function buildRetrievalToolOutput(response: RetrievalQueryResponse): object { } } -function formatToolResult(result: RetrievalResult): object { +function formatToolResultReference(reference: RetrievedChunkReference): object { return { - chunkType: result.chunkType, - score: result.score, - hasAssetUrl: Boolean(result.assetUrl), + id: reference.id, + chunkId: reference.chunkId, + resultIndex: reference.resultIndex, + chunkType: reference.chunkType, + score: reference.score, + hasAssetUrl: reference.hasAssetUrl, + contentLength: reference.contentLength, + contentTruncated: reference.contentTruncated, source: { - documentId: result.source.documentId ?? null, - sourceFileName: result.source.sourceFileName - ? redactRawUrls(result.source.sourceFileName) + documentId: reference.source.documentId ?? null, + sourceFileName: reference.source.sourceFileName + ? redactRawUrls(reference.source.sourceFileName) : null, - sectionPath: result.source.sectionPath - ? redactRawUrls(result.source.sectionPath) + sectionPath: reference.source.sectionPath + ? redactRawUrls(reference.source.sectionPath) : null, }, + contentPreview: truncateSafeContextTextToLimit( + reference.contentPreview, + TOOL_RESULT_CONTENT_CHAR_LIMIT, + ), content: truncateSafeContextTextToLimit( - result.content, + reference.contentPreview, TOOL_RESULT_CONTENT_CHAR_LIMIT, ), } } +function formatToolReferencedChunk(chunk: RetrievalReferencedChunk): object { + return { + id: chunk.chunkId, + chunkId: chunk.chunkId, + documentId: chunk.documentId, + chunkType: chunk.chunkType, + sectionPath: redactRawUrls(chunk.sectionPath), + filePath: chunk.filePath ? redactRawUrls(chunk.filePath) : null, + hasAssetUrl: Boolean(chunk.assetUrl), + } +} + +function formatToolChunkReference(reference: RetrievedChunkReference): object { + return { + id: reference.id, + chunkId: reference.chunkId, + kind: reference.kind, + resultIndex: reference.resultIndex, + chunkType: reference.chunkType, + score: reference.score, + hasAssetUrl: reference.hasAssetUrl, + contentLength: reference.contentLength, + contentTruncated: reference.contentTruncated, + source: { + documentId: reference.source.documentId ?? null, + sourceFileName: reference.source.sourceFileName + ? redactRawUrls(reference.source.sourceFileName) + : null, + sectionPath: reference.source.sectionPath + ? redactRawUrls(reference.source.sectionPath) + : null, + }, + } +} + +function buildRetrievedChunkToolOutput( + result: ReadRetrievedChunkResult, +): object { + return { + id: result.id, + chunkId: result.chunkId, + found: result.found, + chunkType: result.chunkType, + score: result.score, + source: result.source + ? { + documentId: result.source.documentId ?? null, + sourceFileName: result.source.sourceFileName + ? redactRawUrls(result.source.sourceFileName) + : null, + sectionPath: result.source.sectionPath + ? redactRawUrls(result.source.sectionPath) + : null, + } + : null, + hasAssetUrl: result.hasAssetUrl, + offset: result.offset, + limit: result.limit, + contentLength: result.contentLength, + contentSlice: redactRawUrls(result.contentSlice), + hasMoreContent: result.hasMoreContent, + nextOffset: result.nextOffset, + } +} + function getRetrievalResponseGuidance( response: RetrievalQueryResponse, ): string { diff --git a/src/domains/chat/service.test.ts b/src/domains/chat/service.test.ts index 0150ca2..5120a68 100644 --- a/src/domains/chat/service.test.ts +++ b/src/domains/chat/service.test.ts @@ -65,6 +65,7 @@ describe("handleChatTurn", () => { sources, excludedSourceIds: ["source_excluded"], searchSources: expect.any(Function), + readRetrievedChunk: expect.any(Function), }); expect(repository.appendMessageToThread).toHaveBeenNthCalledWith(1, "workspace_1", { threadId: "thread_1", @@ -213,6 +214,7 @@ describe("handleChatTurn", () => { sources, excludedSourceIds: [], searchSources: expect.any(Function), + readRetrievedChunk: expect.any(Function), }); expect(retrieval.query).toHaveBeenCalledWith({ namespace: "notebook-namespace", From dac93e362ebae68e5c850539129f9b5e87666f8e Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 10:47:35 +0000 Subject: [PATCH 05/13] Add typed retrieval planning to chat agent --- src/domains/chat/contracts.ts | 22 ++++++++++++- src/domains/chat/index.test.ts | 44 ++++++++++++++++++------- src/domains/chat/index.ts | 44 +++++++++++++++++++++++-- src/domains/chat/prompt.ts | 60 +++++++++++++++++++++++++++------- 4 files changed, 144 insertions(+), 26 deletions(-) diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 234cf4d..8da1707 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -18,6 +18,21 @@ export type ChatHistoryMessage = { citations?: readonly ChatCitationView[] } +export type AgenticRetrievalIntent = + | "overview" + | "entity" + | "section" + | "image" + | "table" + | "detail" + | "citation" + +export type AgenticRetrievalPlan = { + intent: AgenticRetrievalIntent | null + purpose: string | null + priority: number | null +} + export type AgenticRetrievalQuery = Pick< RetrievalQueryParams, | "query" @@ -26,7 +41,11 @@ export type AgenticRetrievalQuery = Pick< | "signalPaths" | "filterMode" | "threshold" -> +> & { + intent?: AgenticRetrievalIntent + purpose?: string + priority?: number +} export type RetrievedChunkReference = { id: string @@ -44,6 +63,7 @@ export type RetrievedChunkReference = { export type AgenticRetrievalResponse = RetrievalQueryResponse & { chunkReferences: readonly RetrievedChunkReference[] + retrievalPlan?: AgenticRetrievalPlan } export type SearchSources = ( diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index 208f768..c695cd8 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -14,6 +14,10 @@ import { parseChatRequestBody, } from "." import type { Source } from "@/infrastructure/db/schema" +import type { + AgenticRetrievalQuery, + ReadRetrievedChunkInput, +} from "./contracts" vi.mock("ai", async (importOriginal) => ({ ...(await importOriginal()), @@ -215,7 +219,12 @@ describe("answerQuestionWithRetrieval", () => { }), }; const generateAnswer = vi.fn(async ({ searchSources }) => { - await searchSources({ query: "SpaceX rocket photos", dataType: 3 }); + await searchSources({ + query: "SpaceX rocket photos", + intent: "image", + purpose: "Find visual rocket launch chunks.", + priority: 5, + }); return "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg"; }); const loadSourceAssetUrls = vi.fn().mockResolvedValue({ @@ -724,6 +733,11 @@ describe("generateAgenticGroundedAnswer", () => { namespace: "notebook-workspace", query: "公民身份证 图片", routerUsed: "workflow_single_step", + retrievalPlan: { + intent: "image", + purpose: "Find identity-card image evidence.", + priority: 5, + }, chunkReferences: [ { id: "chunk_identity_1", @@ -799,6 +813,8 @@ describe("generateAgenticGroundedAnswer", () => { const generateInput = getCapturedGenerateInput(capturedGenerateInput); expect(settings.instructions).toContain("RetrievalQueryResponse") + expect(settings.instructions).toContain("L0/L1 retrieval") + expect(settings.instructions).toContain("typed retrieval plan") expect(settings.instructions).toContain("dataType=3") expect(settings.instructions).toContain( "Do not paste raw prior messages into searchSources.query", @@ -819,15 +835,26 @@ describe("generateAgenticGroundedAnswer", () => { const toolOutput = await getCapturedAgentTools(agent).searchSources.execute({ query: "公民身份证 图片", + intent: "image", + purpose: "Find identity-card image evidence.", + priority: 5, dataType: 3, }); expect(searchSources).toHaveBeenCalledWith({ query: "公民身份证 图片", + intent: "image", + purpose: "Find identity-card image evidence.", + priority: 5, dataType: 3, }); expect(toolOutput).toMatchObject({ query: "公民身份证 图片", + retrievalPlan: { + intent: "image", + purpose: "Find identity-card image evidence.", + priority: 5, + }, routerUsed: "workflow_single_step", stopReason: "answer_done", failureReason: null, @@ -995,6 +1022,8 @@ describe("buildAgenticChatSystemPrompt", () => { expect(prompt).toContain("Always call searchSources") expect(prompt).toContain("readRetrievedChunk") + expect(prompt).toContain("L0/L1 retrieval") + expect(prompt).toContain("typed retrieval plan") expect(prompt).toContain("evidenceText") expect(prompt).toContain("failureReason") expect(prompt).toContain("decisionTrace") @@ -1002,7 +1031,7 @@ describe("buildAgenticChatSystemPrompt", () => { expect(prompt).toContain("person or section but not an image asset") expect(prompt).toContain("Do not paste raw prior messages") expect(prompt).toContain("身份证") - expect(prompt).toContain("For image requests use dataType=3") + expect(prompt).toContain("For image requests use intent=image") expect(prompt).toContain("商务标文件.pdf") }); }); @@ -1127,17 +1156,10 @@ type CapturedAgentSettings = { type CapturedAgentTools = { readonly searchSources: { - readonly execute: (input: { - readonly query: string - readonly dataType?: number - }) => Promise + readonly execute: (input: AgenticRetrievalQuery) => Promise } readonly readRetrievedChunk: { - readonly execute: (input: { - readonly id: string - readonly offset?: number - readonly limit?: number - }) => Promise + readonly execute: (input: ReadRetrievedChunkInput) => Promise } } diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index 081068e..e45efbd 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -14,6 +14,7 @@ import { } from "./citations" import type { AgenticRetrievalQuery, + AgenticRetrievalPlan, AgenticRetrievalResponse, AnswerQuestionInput, AnswerQuestionResult, @@ -93,6 +94,7 @@ export const answerQuestionWithRetrieval = ( queryInput: AgenticRetrievalQuery, ): Promise => { const startedAt = Date.now() + const retrievalPlan = toAgenticRetrievalPlan(queryInput) const retrievalQueryParams = buildRetrievalQueryParams({ input: queryInput, fallbackQuestion: question, @@ -107,6 +109,9 @@ export const answerQuestionWithRetrieval = ( signalPathCount: retrievalQueryParams.signalPaths?.length ?? 0, filterMode: retrievalQueryParams.filterMode ?? null, threshold: retrievalQueryParams.threshold ?? null, + intent: retrievalPlan.intent, + purpose: retrievalPlan.purpose, + priority: retrievalPlan.priority, }) try { @@ -127,13 +132,17 @@ export const answerQuestionWithRetrieval = ( ).length, stopReason: response.stopReason ?? null, failureReason: response.failureReason ?? null, + intent: retrievalPlan.intent, + priority: retrievalPlan.priority, }) - return { ...response, chunkReferences } + return { ...response, chunkReferences, retrievalPlan } } catch (error) { logger.error("chat-agent: searchSources failed", { query: retrievalQueryParams.query, durationMs: Date.now() - startedAt, error: error instanceof Error ? error.message : String(error), + intent: retrievalPlan.intent, + priority: retrievalPlan.priority, }) throw error } @@ -208,12 +217,13 @@ function buildRetrievalQueryParams(input: { input.input.query, input.fallbackQuestion, ) + const dataType = normalizeRetrievalDataType(input.input) return { namespace: input.namespace, query, topK: normalizeTopK(input.input.topK), useAgentic: true, - ...(input.input.dataType ? { dataType: input.input.dataType } : {}), + ...(dataType ? { dataType } : {}), ...(input.input.signalPaths && input.input.signalPaths.length > 0 ? { signalPaths: input.input.signalPaths } : {}), @@ -225,6 +235,36 @@ function buildRetrievalQueryParams(input: { } } +function toAgenticRetrievalPlan( + input: AgenticRetrievalQuery, +): AgenticRetrievalPlan { + return { + intent: input.intent ?? null, + purpose: normalizeRetrievalPurpose(input.purpose), + priority: normalizeRetrievalPriority(input.priority), + } +} + +function normalizeRetrievalPurpose(value: string | undefined): string | null { + const normalized = value?.replace(/\s+/g, " ").trim() + if (!normalized) return null + return normalized.slice(0, 240) +} + +function normalizeRetrievalPriority(value: number | undefined): number | null { + if (typeof value !== "number" || !Number.isSafeInteger(value)) return null + return Math.min(Math.max(value, 1), 5) +} + +function normalizeRetrievalDataType( + input: AgenticRetrievalQuery, +): RetrievalQueryParams["dataType"] | undefined { + if (input.dataType) return input.dataType + if (input.intent === "image") return 3 + if (input.intent === "table") return 4 + return undefined +} + function createRetrievedChunkContext(): { registerResponse(input: { readonly response: RetrievalQueryResponse diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 6501622..63ba3ec 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -257,19 +257,22 @@ export function buildAgenticChatSystemPrompt( "searchSources runs Knowhere retrieval and returns a RetrievalQueryResponse summary with compact previews and request-local chunk ids.", "readRetrievedChunk reads more content from a chunk id returned by searchSources in this same answer run.", "Treat each tool result like external context from a remote source index: inspect it, reason over it, then decide whether to retrieve again or read more from a returned chunk.", + "Use searchSources like L0/L1 retrieval: compact previews are for quick relevance, navigation, and rerank-style selection. Use readRetrievedChunk like L2 detail: full content slices are loaded only after a returned chunk looks relevant.", "", "Agent loop rules:", "1. Always call searchSources before writing a final answer.", - "2. Read the tool output fields: evidenceText, answerText, results, referencedChunks, chunkReferences, stopReason, failureReason, and decisionTrace.", - "3. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, chunk types, and failure reasons.", - "4. If evidenceText/results/referencedChunks directly support the answer, stop searching and answer.", - "5. If failureReason is present, result counts are zero, or evidence does not cover the user's requested entity/topic/media, call searchSources again with a more specific or broader query.", - "6. For image requests use dataType=3 or dataType=5. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", - "7. For table requests use dataType=4 or dataType=6.", - "8. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", - "9. If a returned chunk preview looks relevant but you want more data before answering, call readRetrievedChunk with that chunk id plus offset/limit. If hasMoreContent is true and the next slice is still needed, call readRetrievedChunk again with nextOffset.", - "10. Use readRetrievedChunk selectively; do not read every chunk when the previews already answer the question.", - "11. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", + "2. Before each searchSources call, choose a typed retrieval plan: intent, purpose, and priority. This is Notebook-side intent analysis for the agent loop.", + "3. Use intent=overview for broad discovery, entity for people/organizations, section for located headings/paths, image for visual assets, table for tabular evidence, detail for precise facts, and citation for source verification.", + "4. Read the tool output fields: retrievalPlan, evidenceText, answerText, results, referencedChunks, chunkReferences, stopReason, failureReason, and decisionTrace.", + "5. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, chunk types, and failure reasons.", + "6. If evidenceText/results/referencedChunks directly support the answer, stop searching and answer.", + "7. If failureReason is present, result counts are zero, or evidence does not cover the user's requested entity/topic/media, call searchSources again with a more specific or broader query.", + "8. For image requests use intent=image and dataType=3 or dataType=5. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", + "9. For table requests use intent=table and dataType=4 or dataType=6.", + "10. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", + "11. If a returned chunk preview looks relevant but you want more data before answering, call readRetrievedChunk with that chunk id plus offset/limit. If hasMoreContent is true and the next slice is still needed, call readRetrievedChunk again with nextOffset.", + "12. Use readRetrievedChunk selectively; do not read every chunk when the previews already answer the question.", + "13. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", "", "Answering rules:", "Use retrieved evidence as the factual source of truth.", @@ -333,8 +336,9 @@ function buildAgenticChatTools( description: "Search the user's Notebook sources through Knowhere retrieval. " + "Treat each response as external context from a remote source index. " + - "Use it before answering, and call it again with refined text, media, " + - "or section-path queries when the RetrievalQueryResponse says evidence is missing or weak.", + "Use it before answering, include a typed retrieval plan, and call it " + + "again with refined text, media, or section-path queries when the " + + "RetrievalQueryResponse says evidence is missing or weak.", inputSchema: z.object({ query: z .string() @@ -342,6 +346,37 @@ function buildAgenticChatTools( .describe( "A concise, self-contained retrieval query. Do not paste raw chat history or previous messages. Use only distilled terms such as document title, person, topic, date, section path, or asset kind when needed.", ), + intent: z + .enum([ + "overview", + "entity", + "section", + "image", + "table", + "detail", + "citation", + ]) + .optional() + .describe( + "Typed retrieval intent for the agent loop: overview, entity, section, image, table, detail, or citation. Use image/table for visual or tabular requests.", + ), + purpose: z + .string() + .min(1) + .max(240) + .optional() + .describe( + "Short reason this query is needed, such as finding an entity, locating an image asset, or verifying a citation.", + ), + priority: z + .number() + .int() + .min(1) + .max(5) + .optional() + .describe( + "Planner priority from 1-5. Use 5 for required evidence and lower values for exploratory follow-up.", + ), topK: z .number() .int() @@ -558,6 +593,7 @@ function buildRetrievalToolOutput(response: AgenticRetrievalResponse): object { return { namespace: response.namespace, query: response.query, + retrievalPlan: response.retrievalPlan ?? null, routerUsed: response.routerUsed, stopReason: response.stopReason ?? null, failureReason: response.failureReason ?? null, From d380992e2630b7e32aaab83fdc564f6694cc227b Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 21:50:51 +0800 Subject: [PATCH 06/13] fix(chat): surface generation failures explicitly --- src/domains/chat/contracts.ts | 16 ++-- src/domains/chat/index.test.ts | 53 ++++++++++- src/domains/chat/index.ts | 13 ++- src/domains/chat/prompt.ts | 12 +-- src/domains/chat/route-answer.ts | 127 ++++++++++++++++++++----- src/domains/chat/route-service.test.ts | 78 +++++++++++++++ 6 files changed, 257 insertions(+), 42 deletions(-) diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 8da1707..46ec50a 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -33,18 +33,16 @@ export type AgenticRetrievalPlan = { priority: number | null } +export type AgenticRetrievalDataTypeInput = number + export type AgenticRetrievalQuery = Pick< RetrievalQueryParams, - | "query" - | "topK" - | "dataType" - | "signalPaths" - | "filterMode" - | "threshold" + "query" | "topK" | "signalPaths" | "filterMode" | "threshold" > & { - intent?: AgenticRetrievalIntent - purpose?: string - priority?: number + readonly dataType?: AgenticRetrievalDataTypeInput + readonly intent?: AgenticRetrievalIntent + readonly purpose?: string + readonly priority?: number } export type RetrievedChunkReference = { diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index c695cd8..a4d0dd1 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -833,7 +833,25 @@ describe("generateAgenticGroundedAnswer", () => { activeTools: ["searchSources"], }) - const toolOutput = await getCapturedAgentTools(agent).searchSources.execute({ + const searchSourcesTool = getCapturedAgentTools(agent).searchSources + expect( + getSearchSourcesDataTypeSchema(searchSourcesTool)._def?.innerType?._def + ?.type, + ).toBe("number") + expect( + searchSourcesTool.inputSchema.safeParse({ + query: "公民身份证 图片", + dataType: 3, + }).success, + ).toBe(true) + expect( + searchSourcesTool.inputSchema.safeParse({ + query: "公民身份证 图片", + dataType: 7, + }).success, + ).toBe(false) + + const toolOutput = await searchSourcesTool.execute({ query: "公民身份证 图片", intent: "image", purpose: "Find identity-card image evidence.", @@ -1156,6 +1174,15 @@ type CapturedAgentSettings = { type CapturedAgentTools = { readonly searchSources: { + readonly inputSchema: { + readonly _def?: { + readonly type?: string + readonly shape?: + | Record + | (() => Record) + } + readonly safeParse: (value: unknown) => { readonly success: boolean } + } readonly execute: (input: AgenticRetrievalQuery) => Promise } readonly readRetrievedChunk: { @@ -1163,6 +1190,13 @@ type CapturedAgentTools = { } } +type CapturedZodSchema = { + readonly _def?: { + readonly type?: string + readonly innerType?: CapturedZodSchema + } +} + function getCapturedAgent(agent: unknown): ToolLoopAgent { expect(agent).toBeInstanceOf(ToolLoopAgent) return agent as ToolLoopAgent @@ -1183,3 +1217,20 @@ function getCapturedAgentSettings(agent: ToolLoopAgent): CapturedAgentSettings { function getCapturedAgentTools(agent: ToolLoopAgent): CapturedAgentTools { return agent.tools as unknown as CapturedAgentTools } + +function getSearchSourcesDataTypeSchema( + tool: CapturedAgentTools["searchSources"], +): CapturedZodSchema { + const shape = tool.inputSchema._def?.shape + const fields = typeof shape === "function" ? shape() : shape + if (!fields) { + throw new Error("searchSources input schema should expose fields.") + } + + const dataTypeSchema = fields.dataType + if (!dataTypeSchema) { + throw new Error("searchSources input schema should include dataType.") + } + + return dataTypeSchema +} diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index e45efbd..82b1ebc 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -38,6 +38,8 @@ const DEFAULT_CHUNK_READ_LIMIT = 2_000 const MAX_CHUNK_READ_LIMIT = 4_000 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." +type RetrievalDataType = NonNullable + type StoredRetrievedChunk = { id: string chunkId: string | null @@ -259,12 +261,21 @@ function normalizeRetrievalPriority(value: number | undefined): number | null { function normalizeRetrievalDataType( input: AgenticRetrievalQuery, ): RetrievalQueryParams["dataType"] | undefined { - if (input.dataType) return input.dataType + if (isRetrievalDataType(input.dataType)) return input.dataType if (input.intent === "image") return 3 if (input.intent === "table") return 4 return undefined } +function isRetrievalDataType(value: unknown): value is RetrievalDataType { + return ( + typeof value === "number" && + Number.isSafeInteger(value) && + value >= 1 && + value <= 6 + ) +} + function createRetrievedChunkContext(): { registerResponse(input: { readonly response: RetrievalQueryResponse diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 63ba3ec..6e7d6f5 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -385,14 +385,10 @@ function buildAgenticChatTools( .optional() .describe("Number of chunks to return. Defaults to 8."), dataType: z - .union([ - z.literal(1), - z.literal(2), - z.literal(3), - z.literal(4), - z.literal(5), - z.literal(6), - ]) + .number() + .int() + .min(1) + .max(6) .optional() .describe( "Optional chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table.", diff --git a/src/domains/chat/route-answer.ts b/src/domains/chat/route-answer.ts index d20a264..741749b 100644 --- a/src/domains/chat/route-answer.ts +++ b/src/domains/chat/route-answer.ts @@ -1,4 +1,4 @@ -import { Effect, Either } from "effect" +import { Cause, Effect, Either, Option } from "effect" import { generateAgenticGroundedAnswer, @@ -6,12 +6,16 @@ import { } from "@/domains/chat" import { handleChatTurn, + type ChatTurnError, type ChatTurnValue, } from "@/domains/chat/service" import { chatTurnPersistence } from "@/domains/chat/chat-turn-persistence" import { reconcileSourcesForWorkspace } from "@/domains/sources/reconcile" import { sourceService } from "@/domains/sources/service" import { notebookRequestContext } from "@/domains/workspace/request-context" +import { isAuthError } from "@/integrations/dashboard/api-key-service" +import { summarizeUnknownError } from "@/lib/format-log-value" +import { logger } from "@/lib/logger" import { routeResult, type RouteResult } from "@/lib/route-result" type RouteResponse = RouteResult @@ -20,6 +24,13 @@ type MessageBody = { readonly message: string } +type ChatRouteFailure = { + readonly status: 401 | 502 + readonly message: string +} + +type ChatAnswerFailure = ChatTurnError | ChatRouteFailure + type AnswerChatInput = { readonly body: unknown } @@ -48,29 +59,50 @@ const answerChatEffect = (input: AnswerChatInput) => reconcileSourcesForWorkspace(workspace, client), ) - const result = yield* Effect.tryPromise(() => - handleChatTurn({ - workspace, - sources, - question: body.value.question, - threadId: body.value.threadId, - excludedSourceIds: body.value.excludedSourceIds, - retrieval: client.retrieval, - generateAnswer: generateAgenticGroundedAnswer, - loadSourceAssetUrls: (source) => - sourceService.getParseAssetUrls(workspace.id, source.id), - repository: chatTurnPersistence.createRepository(), - }), - ).pipe( - Effect.catchAll(() => - Effect.succeed( - Either.left({ - status: 401, - message: "Your session may have expired. Please refresh the page.", - }), + const result: Either.Either = + yield* Effect.tryPromise(() => + handleChatTurn({ + workspace, + sources, + question: body.value.question, + threadId: body.value.threadId, + excludedSourceIds: body.value.excludedSourceIds, + retrieval: client.retrieval, + generateAnswer: generateAgenticGroundedAnswer, + loadSourceAssetUrls: (source) => + sourceService.getParseAssetUrls(workspace.id, source.id), + repository: chatTurnPersistence.createRepository(), + }), + ).pipe( + Effect.catchAllCause( + ( + cause, + ): Effect.Effect< + Either.Either + > => + Effect.gen(function* () { + const detail = getCauseSummary(cause) + const prettyCause = Cause.pretty(cause).slice(0, 2_000) + const failure = toChatRouteFailure(detail) + yield* Effect.logError("chat: answer failed").pipe( + Effect.annotateLogs({ + status: failure.status, + detail, + cause: prettyCause, + }), + ) + yield* Effect.sync(() => + logger.error("chat: answer failed", { + status: failure.status, + detail, + cause: prettyCause, + }), + ) + + return Either.left(failure) + }), ), - ), - ) + ) return Either.match(result, { onLeft: (error): RouteResponse => @@ -92,3 +124,52 @@ async function answerChat( export const chatAnswerRouteService: ChatAnswerRouteService = { answerChat, } + +function toChatRouteFailure(detail: string): ChatRouteFailure { + const routeDetail = getSafeRouteDetail(detail) + if (isAuthError({ message: routeDetail })) { + return { + status: 401, + message: `Chat authentication failed: ${routeDetail}`, + } + } + + return { + status: 502, + message: `Chat generation failed: ${routeDetail}`, + } +} + +function getCauseSummary(cause: Cause.Cause): string { + const failure = Cause.failureOption(cause) + const failureSummary = Option.isSome(failure) + ? summarizeUnknownError(failure.value) + : null + if (failureSummary && isMeaningfulSummary(failureSummary)) { + return failureSummary + } + + for (const defect of Cause.defects(cause)) { + const defectSummary = summarizeUnknownError(defect) + if (isMeaningfulSummary(defectSummary)) return defectSummary + } + + const squashedSummary = summarizeUnknownError(Cause.squash(cause)) + if (isMeaningfulSummary(squashedSummary)) return squashedSummary + + return Cause.pretty(cause) +} + +function getSafeRouteDetail(detail: string): string { + const normalized = detail.replace(/\s+/g, " ").trim() + if (normalized.length === 0) return "Unexpected chat generation failure." + return normalized.slice(0, 800) +} + +function isMeaningfulSummary(value: string): boolean { + const normalized = value.trim() + return ( + normalized.length > 0 && + normalized !== "An unknown error occurred in Effect.tryPromise" + ) +} diff --git a/src/domains/chat/route-service.test.ts b/src/domains/chat/route-service.test.ts index 6796455..b2b9ef4 100644 --- a/src/domains/chat/route-service.test.ts +++ b/src/domains/chat/route-service.test.ts @@ -14,6 +14,9 @@ const mocks = vi.hoisted(() => ({ handleChatTurn: vi.fn(), listChatThreadsForWorkspace: vi.fn(), listMessagesForThread: vi.fn(), + loggerError: vi.fn(), + loggerInfo: vi.fn(), + loggerWarn: vi.fn(), reconcileSourcesForWorkspace: vi.fn(), softDeleteChatThread: vi.fn(), })) @@ -53,6 +56,14 @@ vi.mock("@/domains/chat/thread-service", () => ({ }, })) +vi.mock("@/lib/logger", () => ({ + logger: { + error: mocks.loggerError, + info: mocks.loggerInfo, + warn: mocks.loggerWarn, + }, +})) + import { chatAnswerRouteService } from "./route-answer" import { chatThreadRouteService } from "./route-threads" @@ -123,6 +134,73 @@ describe("chat route services", () => { ) }) + it("returns an explicit generation failure instead of a fake session error", async () => { + const workspace = makeWorkspace() + const client = { retrieval: { query: vi.fn() } } + mocks.getAuthenticatedWithClient.mockResolvedValue({ + user: { id: "user_1" }, + workspace, + apiKey: "jwt_123", + client, + }) + mocks.reconcileSourcesForWorkspace.mockResolvedValue([makeSource()]) + mocks.handleChatTurn.mockRejectedValue( + new Error("Gateway rejected tool schema: dataType enum invalid"), + ) + + const result = await chatAnswerRouteService.answerChat({ + body: { message: "Summarize it" }, + }) + + expect(result).toEqual({ + status: 502, + body: { + message: + "Chat generation failed: Gateway rejected tool schema: dataType enum invalid", + }, + }) + expect(mocks.loggerError).toHaveBeenCalledWith( + "chat: answer failed", + expect.objectContaining({ + status: 502, + detail: "Gateway rejected tool schema: dataType enum invalid", + }), + ) + }) + + it("returns an explicit authentication failure for auth-shaped chat errors", async () => { + const workspace = makeWorkspace() + const client = { retrieval: { query: vi.fn() } } + mocks.getAuthenticatedWithClient.mockResolvedValue({ + user: { id: "user_1" }, + workspace, + apiKey: "jwt_123", + client, + }) + mocks.reconcileSourcesForWorkspace.mockResolvedValue([makeSource()]) + mocks.handleChatTurn.mockRejectedValue( + new Error("HTTP 401: invalid API key"), + ) + + const result = await chatAnswerRouteService.answerChat({ + body: { message: "Summarize it" }, + }) + + expect(result).toEqual({ + status: 401, + body: { + message: "Chat authentication failed: HTTP 401: invalid API key", + }, + }) + expect(mocks.loggerError).toHaveBeenCalledWith( + "chat: answer failed", + expect.objectContaining({ + status: 401, + detail: "HTTP 401: invalid API key", + }), + ) + }) + it("lists chat threads as route-ready view data", async () => { mocks.getAuthenticated.mockResolvedValue({ workspace: makeWorkspace() }) mocks.listChatThreadsForWorkspace.mockResolvedValue([ From 8e86be21c01712b596005f8ba41566921e687152 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 22:57:19 +0800 Subject: [PATCH 07/13] fix(chunks): dedupe repeated parsed chunks --- src/components/chunks-panel-state.test.ts | 57 +++++++++++++++++++++++ src/components/chunks-panel-state.ts | 33 ++++++++++--- src/components/chunks-panel.test.ts | 42 +++++++++++++++++ 3 files changed, 126 insertions(+), 6 deletions(-) diff --git a/src/components/chunks-panel-state.test.ts b/src/components/chunks-panel-state.test.ts index a95c98e..bcee0f0 100644 --- a/src/components/chunks-panel-state.test.ts +++ b/src/components/chunks-panel-state.test.ts @@ -82,6 +82,63 @@ describe("chunksPanelState", () => { ]) }) + it("deduplicates repeated chunk ids before ordering and building the section tree", () => { + type TestSectionTreeNode = { + readonly chunkCount: number + readonly chunks: readonly ParsedChunkView[] + readonly children: readonly TestSectionTreeNode[] + } + const buildSectionTree = ( + chunksPanelState as typeof chunksPanelState & { + readonly buildSectionTree?: ( + chunks: readonly ParsedChunkView[], + sourceTitle: string, + ) => TestSectionTreeNode + } + ).buildSectionTree + const chunks: ParsedChunkView[] = [ + { + chunkId: "duplicate_chunk", + type: "text", + content: "First copy.", + sectionPath: "manual.pdf/Overview", + sourceTitle: "manual.pdf", + pageNums: [1], + }, + { + chunkId: "other_chunk", + type: "text", + content: "Other chunk.", + sectionPath: "manual.pdf/Overview", + sourceTitle: "manual.pdf", + pageNums: [2], + }, + { + chunkId: "duplicate_chunk", + type: "text", + content: "Duplicate copy.", + sectionPath: "manual.pdf/Overview", + sourceTitle: "manual.pdf", + pageNums: [3], + }, + ] + + expect( + chunksPanelState + .getChunksWithFocusedFirst(chunks, null) + .map((chunk) => chunk.content), + ).toEqual(["First copy.", "Other chunk."]) + + const tree = buildSectionTree?.(chunks, "manual.pdf") + const overviewSection = tree?.children[0] + + expect(tree?.chunkCount).toBe(2) + expect(overviewSection?.chunks.map((chunk) => chunk.content)).toEqual([ + "First copy.", + "Other chunk.", + ]) + }) + it("formats Knowhere section paths and reference labels for display", () => { expect( chunksPanelState.formatChunkSectionPath( diff --git a/src/components/chunks-panel-state.ts b/src/components/chunks-panel-state.ts index 83ea12b..16aaa33 100644 --- a/src/components/chunks-panel-state.ts +++ b/src/components/chunks-panel-state.ts @@ -55,7 +55,9 @@ function getChunksWithFocusedFirst( chunks: readonly ParsedChunkView[], focusedChunkId: string | null, ): readonly ParsedChunkView[] { - const orderedChunks = getChunksOrderedByPageNumber(chunks) + const orderedChunks = getChunksOrderedByPageNumber( + dedupeChunksById(chunks), + ) if (!focusedChunkId) return orderedChunks const focusedIndex = orderedChunks.findIndex( @@ -98,20 +100,23 @@ function buildSectionTree( chunks: readonly ParsedChunkView[], sourceTitle: string, ): ChunkSectionTreeNode { + const uniqueChunks = dedupeChunksById(chunks) const root = createMutableSectionTreeNode({ id: "root", kind: "root", label: sourceTitle.trim() || "Parsed Chunks", }) const chunksByParserChunkId = new Map( - chunks + uniqueChunks .filter((chunk) => chunk.parserChunkId) .map((chunk) => [chunk.parserChunkId!, chunk]), ) - const chunksByChunkId = new Map(chunks.map((chunk) => [chunk.chunkId, chunk])) + const chunksByChunkId = new Map( + uniqueChunks.map((chunk) => [chunk.chunkId, chunk]), + ) const sectionSegmentsByChunkId = new Map() - chunks.forEach((chunk) => { + uniqueChunks.forEach((chunk) => { const sectionSegments = getChunkSectionSegments(chunk, sourceTitle) if (sectionSegments.length > 0) { sectionSegmentsByChunkId.set(chunk.chunkId, sectionSegments) @@ -119,13 +124,13 @@ function buildSectionTree( }) const embeddedSectionSegmentsByChunkId = getEmbeddedSectionSegmentsByChunkId({ - chunks, + chunks: uniqueChunks, chunksByChunkId, chunksByParserChunkId, sectionSegmentsByChunkId, }) - chunks.forEach((chunk) => { + uniqueChunks.forEach((chunk) => { const sectionSegments = embeddedSectionSegmentsByChunkId.get(chunk.chunkId) ?? sectionSegmentsByChunkId.get(chunk.chunkId) ?? @@ -136,6 +141,22 @@ function buildSectionTree( return toReadonlySectionTreeNode(root) } +function dedupeChunksById( + chunks: readonly ParsedChunkView[], +): readonly ParsedChunkView[] { + const seenChunkIds = new Set() + const uniqueChunks: ParsedChunkView[] = [] + + chunks.forEach((chunk) => { + if (seenChunkIds.has(chunk.chunkId)) return + + seenChunkIds.add(chunk.chunkId) + uniqueChunks.push(chunk) + }) + + return uniqueChunks +} + function createMutableSectionTreeNode(input: { readonly id: string readonly kind: ChunkSectionTreeNodeKind diff --git a/src/components/chunks-panel.test.ts b/src/components/chunks-panel.test.ts index 3e110ac..c3e87aa 100644 --- a/src/components/chunks-panel.test.ts +++ b/src/components/chunks-panel.test.ts @@ -117,6 +117,48 @@ describe("ChunksPanel", () => { ).toBeTruthy(); }); + it("deduplicates repeated chunks before rendering section tree keys", () => { + const consoleError = vi + .spyOn(console, "error") + .mockImplementation(() => undefined); + + render( + React.createElement(C, { + chunks: [ + { + chunkId: "duplicate_chunk", + type: "text", + content: "Overview text", + sectionPath: "manual.pdf/Overview", + sourceTitle: "manual.pdf", + }, + { + chunkId: "duplicate_chunk", + type: "text", + content: "Duplicate overview text", + sectionPath: "manual.pdf/Overview", + sourceTitle: "manual.pdf", + }, + ], + selectedSource: "manual.pdf", + }), + ); + + expect( + screen.getByRole("treeitem", { + name: "Overview section with 1 chunk", + }), + ).toBeTruthy(); + expect( + screen.getAllByRole("treeitem", { name: "Overview text Text" }), + ).toHaveLength(1); + expect( + consoleError.mock.calls.some((call) => + String(call[0]).includes("Encountered two children with the same key"), + ), + ).toBe(false); + }); + it("requests the full chunk list before showing the section tree", async () => { const user = userEvent.setup(); const handleLoadAllChunks = vi.fn(); From af8c674581fb00ef5b0d6798fa995f6f5b4ee7ef Mon Sep 17 00:00:00 2001 From: suguanYang Date: Thu, 4 Jun 2026 22:57:44 +0800 Subject: [PATCH 08/13] chore(chat): add verbose agent loop logging --- src/domains/chat/index.test.ts | 354 ++++++++++++++++++++++ src/domains/chat/index.ts | 89 ++++++ src/domains/chat/prompt.ts | 526 +++++++++++++++++++++++++++++++-- src/lib/logger.ts | 34 ++- 4 files changed, 976 insertions(+), 27 deletions(-) diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index a4d0dd1..bb42787 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -19,14 +19,31 @@ import type { ReadRetrievedChunkInput, } from "./contracts" +const loggerMock = vi.hoisted(() => ({ + info: vi.fn(), + warn: vi.fn(), + error: vi.fn(), +})); + vi.mock("ai", async (importOriginal) => ({ ...(await importOriginal()), generateText: vi.fn(), })); +vi.mock("@/lib/logger", () => ({ + logger: { + info: loggerMock.info, + warn: loggerMock.warn, + error: loggerMock.error, + }, +})); + afterEach(() => { vi.restoreAllMocks(); vi.mocked(generateText).mockReset(); + loggerMock.info.mockReset(); + loggerMock.warn.mockReset(); + loggerMock.error.mockReset(); delete process.env.AI_GATEWAY_API_KEY; }); @@ -86,6 +103,87 @@ describe("answerQuestionWithRetrieval", () => { }); }); + it("logs bounded Knowhere query response chunks", async () => { + const result = makeRetrievalResult({ + chunkType: "image", + content: `Identity card front image https://blob.example/id.jpg ${"content ".repeat( + 80, + )}`, + }); + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [result], + evidenceText: `Evidence https://blob.example/evidence.jpg ${"evidence ".repeat( + 80, + )}`, + referencedChunks: [ + { + chunkId: "chunk_identity_1", + documentId: "doc_identity", + chunkType: "image", + sectionPath: `Assets / images / identity card front ${"summary ".repeat( + 80, + )}`, + filePath: "images/id-front.jpg", + jobId: "job_1", + assetUrl: "https://blob.example/id.jpg", + }, + ], + namespace: "notebook-workspace", + query: "冯荣洲 身份证 ID card", + routerUsed: "workflow_single_step", + answerText: `Matched identity card image ${"answer ".repeat(80)}`, + stopReason: "answer_done", + failureReason: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ + query: "冯荣洲 身份证 ID card", + intent: "image", + dataType: 3, + }); + return "Matched identity card image."; + }); + + await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "请将 冯荣洲 的身份证图片发给我", + namespace: "notebook-workspace", + sources: [makeSource({ knowhereDocumentId: "doc_identity" })], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + const meta = getLoggerInfoMeta("chat-agent: knowhere query response"); + const response = meta.response as KnowhereQueryResponseLogMeta; + expect(response).toMatchObject({ + query: "冯荣洲 身份证 ID card", + resultCount: 1, + referencedChunkCount: 1, + results: [ + { + chunkType: "image", + }, + ], + referencedChunks: [ + { + chunkType: "image", + }, + ], + }); + expect(response.answerText.length).toBeLessThanOrEqual(203); + expect(response.evidenceText.length).toBeLessThanOrEqual(203); + expect(response.results[0]?.content.length).toBeLessThanOrEqual(103); + expect(response.referencedChunks[0]?.summary.length).toBeLessThanOrEqual( + 103, + ); + expect(JSON.stringify(meta)).not.toContain("https://blob.example"); + }); + it("attaches citation descriptions from generated source labels", async () => { const firstResult = makeRetrievalResult({ source: { @@ -685,6 +783,18 @@ describe("generateGroundedAnswer", () => { prompt: expect.stringContaining("PR-E wires chat to Knowhere retrieval."), }); expect(answer).toBe("PR-E wires chat to retrieval."); + expect(getLoggerInfoMeta("chat-agent: llm request")).toMatchObject({ + operation: "generateGroundedAnswer", + model: "google/gemini-3-flash", + promptType: "text", + prompt: expect.stringContaining("PR-E wires chat to Knowhere retrieval."), + }); + expect(getLoggerInfoMeta("chat-agent: llm response")).toMatchObject({ + operation: "generateGroundedAnswer", + model: "google/gemini-3-flash", + responseText: "PR-E wires chat to retrieval.", + responseTextCharLength: "PR-E wires chat to retrieval.".length, + }); }); }); @@ -973,6 +1083,185 @@ describe("generateAgenticGroundedAnswer", () => { expect(preparedStep.messages.length).toBeLessThanOrEqual(12); expect(JSON.stringify(preparedStep.messages)).not.toContain("loop-message-0"); expect(JSON.stringify(preparedStep.messages)).toContain("loop-message-24"); + expect(getLoggerInfoMeta("chat-agent: llm request")).toMatchObject({ + operation: "generateAgenticGroundedAnswer.step", + model: "google/gemini-3-flash", + promptType: "messages", + stepNumber: 1, + instructions: expect.stringContaining("Notebook research agent"), + messageCount: preparedStep.messages.length, + messages: expect.arrayContaining([ + expect.objectContaining({ + role: "user", + content: "loop-message-24", + }), + ]), + }); + }); + + it("logs bounded tool call and tool result previews for each loop step", async () => { + process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; + const generateSpy = vi + .spyOn(ToolLoopAgent.prototype, "generate") + .mockResolvedValue({ + text: "The answer is grounded.", + } as Awaited>); + + await generateAgenticGroundedAnswer({ + question: "请将 冯荣洲 的身份证图片发给我", + messages: [], + sources: [makeSource()], + excludedSourceIds: [], + searchSources: vi.fn(), + readRetrievedChunk: vi.fn(), + }); + + const settings = getCapturedAgentSettings( + getCapturedAgent(generateSpy.mock.contexts[0]), + ); + loggerMock.info.mockClear(); + + settings.onStepFinish({ + stepNumber: 1, + finishReason: "tool-calls", + text: `Inspecting identity image candidates. ${"reason ".repeat(200)}`, + toolCalls: [ + { + toolName: "searchSources", + toolCallId: "call_1", + input: { + query: "冯荣洲 身份证 ID card", + purpose: `Find the matching identity card image. ${"input ".repeat( + 300, + )}`, + dataType: 3, + }, + }, + ], + toolResults: [ + { + toolName: "searchSources", + toolCallId: "call_1", + output: { + query: "冯荣洲 身份证 ID card", + routerUsed: "workflow_single_step", + resultCount: 6, + referencedChunkCount: 2, + readableChunkCount: 6, + evidenceText: `Image evidence https://blob.example/id-front.jpg ${"evidence ".repeat( + 600, + )}`, + results: [ + { + chunkType: "image", + content: `Identity image content ${"result ".repeat(80)}`, + }, + ], + referencedChunks: [ + { + chunkType: "image", + sectionPath: `Assets / identity card ${"reference ".repeat(80)}`, + }, + ], + }, + }, + ], + usage: { + inputTokens: 11, + outputTokens: 22, + totalTokens: 33, + }, + }); + + const stepMeta = getLoggerInfoMeta("chat-agent: llm response"); + const stepLog = stepMeta as unknown as AgentLoopStepLogMeta; + expect(stepLog).toMatchObject({ + operation: "generateAgenticGroundedAnswer.step", + model: "google/gemini-3-flash", + stepNumber: 1, + finishReason: "tool-calls", + toolCallCount: 1, + toolResultCount: 1, + inputTokens: 11, + outputTokens: 22, + totalTokens: 33, + }); + expect(stepLog.responseText).toContain("Inspecting identity image candidates."); + expect(stepLog.toolCalls[0]?.input.truncated).toBe(true); + expect(stepLog.toolResults[0]?.output).toMatchObject({ + kind: "searchSources", + query: "冯荣洲 身份证 ID card", + resultCount: 6, + results: [ + { + chunkType: "image", + }, + ], + referencedChunks: [ + { + chunkType: "image", + }, + ], + }); + const searchSourcesOutput = stepLog.toolResults[0] + ?.output as SearchSourcesToolOutputLogMeta; + expect(searchSourcesOutput.evidenceText.length).toBeLessThanOrEqual(203); + expect(searchSourcesOutput.results[0]?.content.length).toBeLessThanOrEqual( + 103, + ); + expect( + searchSourcesOutput.referencedChunks[0]?.summary.length, + ).toBeLessThanOrEqual(103); + expect(JSON.stringify(stepMeta)).not.toContain("https://blob.example"); + + settings.onFinish({ + steps: [ + { + stepNumber: 1, + finishReason: "tool-calls", + text: "Read tool result.", + toolCalls: [ + { + toolName: "searchSources", + toolCallId: "call_1", + input: { query: "冯荣洲 身份证 ID card" }, + }, + ], + toolResults: [ + { + toolName: "searchSources", + toolCallId: "call_1", + output: { evidenceText: "Matched image evidence." }, + }, + ], + }, + ], + finishReason: "stop", + text: "Here is the matched identity card image.", + totalUsage: { + inputTokens: 40, + outputTokens: 20, + totalTokens: 60, + }, + }); + + const finishMeta = getLoggerInfoMeta("chat-agent: loop finished"); + expect(finishMeta).toMatchObject({ + stepCount: 1, + finishReason: "stop", + responseText: "Here is the matched identity card image.", + toolNames: ["searchSources"], + steps: [ + expect.objectContaining({ + stepNumber: 1, + toolCallCount: 1, + toolResultCount: 1, + }), + ], + inputTokens: 40, + outputTokens: 20, + totalTokens: 60, + }); }); }); @@ -1170,6 +1459,8 @@ type CapturedAgentSettings = { readonly stepNumber: number readonly messages: ModelMessage[] }) => unknown + readonly onStepFinish: (input: unknown) => void + readonly onFinish: (input: unknown) => void } type CapturedAgentTools = { @@ -1197,6 +1488,57 @@ type CapturedZodSchema = { } } +type AgentLoopStepLogMeta = { + readonly operation: string + readonly model: string + readonly stepNumber: number + readonly finishReason: string + readonly responseText: string + readonly toolCallCount: number + readonly toolCalls: readonly { + readonly input: AgentLoopLogPreviewMeta + }[] + readonly toolResultCount: number + readonly toolResults: readonly { + readonly output: SearchSourcesToolOutputLogMeta | AgentLoopLogPreviewMeta + }[] + readonly inputTokens: number + readonly outputTokens: number + readonly totalTokens: number +} + +type SearchSourcesToolOutputLogMeta = { + readonly kind: "searchSources" + readonly evidenceText: string + readonly results: readonly { + readonly content: string + }[] + readonly referencedChunks: readonly { + readonly summary: string + }[] +} + +type AgentLoopLogPreviewMeta = { + readonly truncated: boolean + readonly preview: string +} + +type KnowhereQueryResponseLogMeta = { + readonly query: string + readonly resultCount: number + readonly referencedChunkCount: number + readonly answerText: string + readonly evidenceText: string + readonly results: readonly { + readonly chunkType: string + readonly content: string + }[] + readonly referencedChunks: readonly { + readonly chunkType: string + readonly summary: string + }[] +} + function getCapturedAgent(agent: unknown): ToolLoopAgent { expect(agent).toBeInstanceOf(ToolLoopAgent) return agent as ToolLoopAgent @@ -1234,3 +1576,15 @@ function getSearchSourcesDataTypeSchema( return dataTypeSchema } + +function getLoggerInfoMeta(message: string): Record { + const calls = loggerMock.info.mock.calls as unknown as readonly (readonly [ + string, + Record | undefined, + ])[] + const call = calls.findLast(([currentMessage]) => currentMessage === message) + expect(call).toBeDefined() + const meta = call?.[1] + expect(meta).toBeDefined() + return meta ?? {} +} diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index 82b1ebc..f258077 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -36,7 +36,11 @@ const MAX_AGENTIC_TOP_K = 12 const MAX_CITATION_RESULTS = 20 const DEFAULT_CHUNK_READ_LIMIT = 2_000 const MAX_CHUNK_READ_LIMIT = 4_000 +const KNOWHERE_RESPONSE_TEXT_LOG_LIMIT = 200 +const KNOWHERE_CHUNK_LOG_LIMIT = 100 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." +const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g +const REDACTED_MEDIA_URL = "[media asset URL hidden]" type RetrievalDataType = NonNullable @@ -52,6 +56,30 @@ type StoredRetrievedChunk = { hasAssetUrl: boolean } +type KnowhereQueryResponseLog = { + readonly namespace: string + readonly query: string + readonly routerUsed: string | null | undefined + readonly stopReason: string | null | undefined + readonly failureReason: string | null | undefined + readonly resultCount: number + readonly referencedChunkCount: number + readonly answerText: string + readonly evidenceText: string + readonly results: readonly KnowhereResultChunkLog[] + readonly referencedChunks: readonly KnowhereReferencedChunkLog[] +} + +type KnowhereResultChunkLog = { + readonly chunkType: string + readonly content: string +} + +type KnowhereReferencedChunkLog = { + readonly chunkType: string + readonly summary: string +} + export type { AnswerQuestionInput, AnswerQuestionResult, @@ -137,6 +165,10 @@ export const answerQuestionWithRetrieval = ( intent: retrievalPlan.intent, priority: retrievalPlan.priority, }) + logger.info("chat-agent: knowhere query response", { + durationMs: Date.now() - startedAt, + response: formatKnowhereQueryResponseForLog(response), + }) return { ...response, chunkReferences, retrievalPlan } } catch (error) { logger.error("chat-agent: searchSources failed", { @@ -208,6 +240,63 @@ export const answerQuestionWithRetrieval = ( } }) +function formatKnowhereQueryResponseForLog( + response: RetrievalQueryResponse, +): KnowhereQueryResponseLog { + return { + namespace: response.namespace, + query: response.query, + routerUsed: response.routerUsed, + stopReason: response.stopReason, + failureReason: response.failureReason, + resultCount: response.results.length, + referencedChunkCount: response.referencedChunks.length, + answerText: truncateLogText( + response.answerText ?? "", + KNOWHERE_RESPONSE_TEXT_LOG_LIMIT, + ), + evidenceText: truncateLogText( + response.evidenceText ?? "", + KNOWHERE_RESPONSE_TEXT_LOG_LIMIT, + ), + results: response.results.map(formatKnowhereResultChunkForLog), + referencedChunks: response.referencedChunks.map( + formatKnowhereReferencedChunkForLog, + ), + } +} + +function formatKnowhereResultChunkForLog( + result: RetrievalResult, +): KnowhereResultChunkLog { + return { + chunkType: result.chunkType, + content: truncateLogText(result.content, KNOWHERE_CHUNK_LOG_LIMIT), + } +} + +function formatKnowhereReferencedChunkForLog( + chunk: RetrievalQueryResponse["referencedChunks"][number], +): KnowhereReferencedChunkLog { + return { + chunkType: chunk.chunkType, + summary: truncateLogText( + chunk.sectionPath || chunk.filePath || chunk.chunkId, + KNOWHERE_CHUNK_LOG_LIMIT, + ), + } +} + +function truncateLogText(value: string, limit: number): string { + const normalized = redactRawUrls(value).replace(/\s+/g, " ").trim() + if (normalized.length <= limit) return normalized + return `${normalized.slice(0, limit)}...` +} + +function redactRawUrls(value: string): string { + return value.replace(RAW_URL_PATTERN, REDACTED_MEDIA_URL) +} + function buildRetrievalQueryParams(input: { readonly input: AgenticRetrievalQuery readonly fallbackQuestion: string diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 6e7d6f5..c88773c 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -45,6 +45,11 @@ const TOOL_EVIDENCE_CHAR_LIMIT = 6_000 const TOOL_RESULT_CONTENT_CHAR_LIMIT = 700 const TOOL_CHUNK_READ_LIMIT_DEFAULT = 2_000 const TOOL_CHUNK_READ_LIMIT_MAX = 4_000 +const AGENT_LOOP_TOOL_INPUT_LOG_LIMIT = 1_200 +const AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT = 2_400 +const AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT = 4 +const KNOWHERE_TOOL_TEXT_LOG_LIMIT = 200 +const KNOWHERE_TOOL_CHUNK_LOG_LIMIT = 100 const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g const REDACTED_MEDIA_URL = "[media asset URL hidden]" @@ -82,6 +87,91 @@ type GenerateAgenticGroundedAnswerInput = { type AgenticChatTools = ReturnType +type AgentLoopLogPreview = { + readonly charLength: number + readonly truncated: boolean + readonly preview: string +} + +type AgentLoopToolCallLog = { + readonly toolName: string + readonly toolCallId: string | null + readonly input: AgentLoopLogPreview +} + +type AgentLoopToolOutputLog = + | AgentLoopLogPreview + | AgentLoopSearchSourcesOutputLog + | AgentLoopReadChunkOutputLog + +type AgentLoopToolResultLog = { + readonly toolName: string + readonly toolCallId: string | null + readonly output: AgentLoopToolOutputLog +} + +type AgentLoopStepLog = { + readonly stepNumber: number + readonly finishReason: string | null + readonly responseText: string + readonly responseTextCharLength: number + readonly toolCallCount: number + readonly toolCalls: readonly AgentLoopToolCallLog[] + readonly toolCallsOmitted: number + readonly toolResultCount: number + readonly toolResults: readonly AgentLoopToolResultLog[] + readonly toolResultsOmitted: number +} + +type AgentLoopSearchSourcesOutputLog = { + readonly kind: "searchSources" + readonly query: string | null + readonly routerUsed: string | null + readonly stopReason: string | null + readonly failureReason: string | null + readonly resultCount: number | null + readonly referencedChunkCount: number | null + readonly readableChunkCount: number | null + readonly answerText: string | null + readonly evidenceText: string + readonly results: readonly AgentLoopChunkContentLog[] + readonly referencedChunks: readonly AgentLoopChunkSummaryLog[] + readonly chunkReferences: readonly AgentLoopChunkSummaryLog[] +} + +type AgentLoopReadChunkOutputLog = { + readonly kind: "readRetrievedChunk" + readonly found: boolean | null + readonly chunkType: string | null + readonly offset: number | null + readonly limit: number | null + readonly contentLength: number | null + readonly contentSlice: string + readonly hasMoreContent: boolean | null + readonly nextOffset: number | null +} + +type AgentLoopChunkContentLog = { + readonly chunkType: string | null + readonly content: string +} + +type AgentLoopChunkSummaryLog = { + readonly chunkType: string | null + readonly summary: string +} + +type LlmModelMessageLog = { + readonly role: string + readonly contentCharLength: number + readonly content: unknown +} + +type GenerateLoggedTextInput = { + readonly operation: string + readonly prompt: string +} + export const generateContextualRetrievalQueryEffect = ( input: GenerateContextualRetrievalQueryInput, ): Effect.Effect => @@ -98,15 +188,16 @@ export const generateContextualRetrievalQueryEffect = ( ) } + const prompt = buildRetrievalQueryPrompt({ + question, + messages: input.messages, + sources: input.sources, + excludedSourceIds: input.excludedSourceIds, + }) const response = yield* Effect.tryPromise(() => - generateText({ - model: CHAT_MODEL, - prompt: buildRetrievalQueryPrompt({ - question, - messages: input.messages, - sources: input.sources, - excludedSourceIds: input.excludedSourceIds, - }), + generateLoggedText({ + operation: "generateContextualRetrievalQuery", + prompt, }), ) return normalizeRetrievalQuery(response.text, question) @@ -133,8 +224,8 @@ export const generateGroundedAnswerEffect = ( } const response = yield* Effect.tryPromise(() => - generateText({ - model: CHAT_MODEL, + generateLoggedText({ + operation: "generateGroundedAnswer", prompt: buildGroundedPrompt(input), }), ) @@ -162,11 +253,24 @@ export const generateAgenticGroundedAnswerEffect = ( } const agent = buildAgenticChatAgent(input) - const response = yield* Effect.tryPromise(() => - agent.generate({ - messages: buildAgenticChatMessages(input), - }), - ) + const messages = buildAgenticChatMessages(input) + logger.info("chat-agent: llm request", { + operation: "generateAgenticGroundedAnswer.initial", + model: CHAT_MODEL, + promptType: "messages", + messageCount: messages.length, + messages: formatModelMessagesForLlmLog(messages), + }) + const response = yield* Effect.tryPromise(async () => { + const generationResponse = await agent.generate({ messages }) + logger.info("chat-agent: llm response", { + operation: "generateAgenticGroundedAnswer.final", + model: CHAT_MODEL, + responseTextCharLength: generationResponse.text.length, + responseText: redactRawUrls(generationResponse.text), + }) + return generationResponse + }) return response.text.trim() }) @@ -176,6 +280,29 @@ export async function generateAgenticGroundedAnswer( return Effect.runPromise(generateAgenticGroundedAnswerEffect(input)) } +async function generateLoggedText( + input: GenerateLoggedTextInput, +): Promise>> { + logger.info("chat-agent: llm request", { + operation: input.operation, + model: CHAT_MODEL, + promptType: "text", + promptCharLength: input.prompt.length, + prompt: redactRawUrls(input.prompt), + }) + const response = await generateText({ + model: CHAT_MODEL, + prompt: input.prompt, + }) + logger.info("chat-agent: llm response", { + operation: input.operation, + model: CHAT_MODEL, + responseTextCharLength: response.text.length, + responseText: redactRawUrls(response.text), + }) + return response +} + export function buildRetrievalQueryPrompt( input: GenerateContextualRetrievalQueryInput, ): string { @@ -291,19 +418,27 @@ export function buildAgenticChatSystemPrompt( function buildAgenticChatAgent( input: GenerateAgenticGroundedAnswerInput, ): ToolLoopAgent { + const instructions = buildAgenticChatSystemPrompt(input) return new ToolLoopAgent({ model: CHAT_MODEL, - instructions: buildAgenticChatSystemPrompt(input), + instructions, tools: buildAgenticChatTools(input), stopWhen: stepCountIs(AGENTIC_SEARCH_STEP_LIMIT), - prepareStep: buildAgenticPrepareStep(), + prepareStep: buildAgenticPrepareStep(instructions), onStepFinish: (event) => { - logger.info("chat-agent: loop step finished", { + logger.info("chat-agent: llm response", { + operation: "generateAgenticGroundedAnswer.step", + model: CHAT_MODEL, stepNumber: event.stepNumber, finishReason: event.finishReason, - textLength: event.text.length, - toolCalls: event.toolCalls.map((toolCall) => toolCall.toolName), + responseTextCharLength: event.text.length, + responseText: redactRawUrls(event.text), + toolCallCount: event.toolCalls.length, + toolCalls: formatAgentLoopToolCalls(event.toolCalls), + toolCallsOmitted: getOmittedAgentLoopEntryCount(event.toolCalls), toolResultCount: event.toolResults.length, + toolResults: formatAgentLoopToolResults(event.toolResults), + toolResultsOmitted: getOmittedAgentLoopEntryCount(event.toolResults), inputTokens: event.usage.inputTokens, outputTokens: event.usage.outputTokens, totalTokens: event.usage.totalTokens, @@ -313,9 +448,15 @@ function buildAgenticChatAgent( logger.info("chat-agent: loop finished", { stepCount: event.steps.length, finishReason: event.finishReason, - textLength: event.text.length, - toolCalls: event.steps.flatMap((step) => - step.toolCalls.map((toolCall) => toolCall.toolName), + responseTextCharLength: event.text.length, + responseText: redactRawUrls(event.text), + steps: event.steps.map(formatAgentLoopStep), + toolNames: Array.from( + new Set( + event.steps.flatMap((step) => + step.toolCalls.map((toolCall) => toolCall.toolName), + ), + ), ), inputTokens: event.totalUsage.inputTokens, outputTokens: event.totalUsage.outputTokens, @@ -452,11 +593,13 @@ function buildAgenticChatTools( } as const } -function buildAgenticPrepareStep(): PrepareStepFunction { +function buildAgenticPrepareStep( + instructions: string, +): PrepareStepFunction { return ({ stepNumber, messages }) => { const managedMessages = buildAgentStepMessages(messages) if (stepNumber === 0) { - return { + const stepInput = { messages: managedMessages, toolChoice: { type: "tool" as const, @@ -464,12 +607,345 @@ function buildAgenticPrepareStep(): PrepareStepFunction { }, activeTools: ["searchSources" as const], } + logAgentStepLlmRequest({ + stepNumber, + instructions, + messages: managedMessages, + toolChoice: stepInput.toolChoice, + activeTools: stepInput.activeTools, + }) + return stepInput } + logAgentStepLlmRequest({ + stepNumber, + instructions, + messages: managedMessages, + toolChoice: null, + activeTools: null, + }) return { messages: managedMessages } } } +function formatAgentLoopStep(step: unknown, index: number): AgentLoopStepLog { + const record = getRecordFromUnknown(step) + const toolCalls = getRecordArray(record, "toolCalls") + const toolResults = getRecordArray(record, "toolResults") + const responseText = getRecordString(record, "text") ?? "" + return { + stepNumber: + getRecordNumber(record, "stepNumber") ?? + getRecordNumber(record, "stepIndex") ?? + index + 1, + finishReason: getRecordString(record, "finishReason"), + responseText: redactRawUrls(responseText), + responseTextCharLength: responseText.length, + toolCallCount: toolCalls.length, + toolCalls: formatAgentLoopToolCalls(toolCalls), + toolCallsOmitted: getOmittedAgentLoopEntryCount(toolCalls), + toolResultCount: toolResults.length, + toolResults: formatAgentLoopToolResults(toolResults), + toolResultsOmitted: getOmittedAgentLoopEntryCount(toolResults), + } +} + +function formatAgentLoopToolCalls( + toolCalls: readonly unknown[], +): readonly AgentLoopToolCallLog[] { + return toolCalls + .slice(0, AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) + .map(formatAgentLoopToolCall) +} + +function formatAgentLoopToolCall(toolCall: unknown): AgentLoopToolCallLog { + const record = getRecordFromUnknown(toolCall) + return { + toolName: getRecordString(record, "toolName") ?? "unknown", + toolCallId: getRecordString(record, "toolCallId"), + input: buildAgentLoopPreview( + getFirstRecordValue(record, ["input", "args", "arguments"]), + AGENT_LOOP_TOOL_INPUT_LOG_LIMIT, + ), + } +} + +function formatAgentLoopToolResults( + toolResults: readonly unknown[], +): readonly AgentLoopToolResultLog[] { + return toolResults + .slice(0, AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) + .map(formatAgentLoopToolResult) +} + +function formatAgentLoopToolResult(toolResult: unknown): AgentLoopToolResultLog { + const record = getRecordFromUnknown(toolResult) + const toolName = getRecordString(record, "toolName") ?? "unknown" + return { + toolName, + toolCallId: getRecordString(record, "toolCallId"), + output: formatAgentLoopToolOutput( + toolName, + getFirstRecordValue(record, ["output", "result", "content"]), + ), + } +} + +function formatAgentLoopToolOutput( + toolName: string, + output: unknown, +): AgentLoopToolOutputLog { + if (toolName === "searchSources") { + return formatSearchSourcesToolOutput(output) + } + if (toolName === "readRetrievedChunk") { + return formatReadRetrievedChunkToolOutput(output) + } + return buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT) +} + +function formatSearchSourcesToolOutput( + output: unknown, +): AgentLoopSearchSourcesOutputLog { + const record = getRecordFromUnknown(output) + return { + kind: "searchSources", + query: getRecordString(record, "query"), + routerUsed: getRecordString(record, "routerUsed"), + stopReason: getRecordString(record, "stopReason"), + failureReason: getRecordString(record, "failureReason"), + resultCount: getRecordNumber(record, "resultCount"), + referencedChunkCount: getRecordNumber(record, "referencedChunkCount"), + readableChunkCount: getRecordNumber(record, "readableChunkCount"), + answerText: truncateAgentLoopLogTextOrNull( + getRecordString(record, "answerText"), + KNOWHERE_TOOL_TEXT_LOG_LIMIT, + ), + evidenceText: truncateAgentLoopLogText( + getRecordString(record, "evidenceText") ?? "", + KNOWHERE_TOOL_TEXT_LOG_LIMIT, + ), + results: getRecordArray(record, "results").map(formatToolOutputChunkContent), + referencedChunks: getRecordArray(record, "referencedChunks").map( + formatToolOutputChunkSummary, + ), + chunkReferences: getRecordArray(record, "chunkReferences").map( + formatToolOutputChunkSummary, + ), + } +} + +function formatReadRetrievedChunkToolOutput( + output: unknown, +): AgentLoopReadChunkOutputLog { + const record = getRecordFromUnknown(output) + return { + kind: "readRetrievedChunk", + found: getRecordBoolean(record, "found"), + chunkType: getRecordString(record, "chunkType"), + offset: getRecordNumber(record, "offset"), + limit: getRecordNumber(record, "limit"), + contentLength: getRecordNumber(record, "contentLength"), + contentSlice: truncateAgentLoopLogText( + getRecordString(record, "contentSlice") ?? "", + KNOWHERE_TOOL_CHUNK_LOG_LIMIT, + ), + hasMoreContent: getRecordBoolean(record, "hasMoreContent"), + nextOffset: getRecordNumber(record, "nextOffset"), + } +} + +function formatToolOutputChunkContent( + value: unknown, +): AgentLoopChunkContentLog { + const record = getRecordFromUnknown(value) + return { + chunkType: getRecordString(record, "chunkType"), + content: truncateAgentLoopLogText( + getFirstRecordString(record, ["content", "contentPreview"]), + KNOWHERE_TOOL_CHUNK_LOG_LIMIT, + ), + } +} + +function formatToolOutputChunkSummary( + value: unknown, +): AgentLoopChunkSummaryLog { + const record = getRecordFromUnknown(value) + const source = getRecordFromUnknown(record?.source) + return { + chunkType: getRecordString(record, "chunkType"), + summary: truncateAgentLoopLogText( + getFirstRecordString(record, [ + "summary", + "sectionPath", + "filePath", + "content", + "contentPreview", + ]) || getRecordString(source, "sectionPath") || "", + KNOWHERE_TOOL_CHUNK_LOG_LIMIT, + ), + } +} + +function getOmittedAgentLoopEntryCount(entries: readonly unknown[]): number { + return Math.max(0, entries.length - AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) +} + +function logAgentStepLlmRequest(input: { + readonly stepNumber: number + readonly instructions: string + readonly messages: readonly ModelMessage[] + readonly toolChoice: unknown + readonly activeTools: readonly string[] | null +}): void { + logger.info("chat-agent: llm request", { + operation: "generateAgenticGroundedAnswer.step", + model: CHAT_MODEL, + promptType: "messages", + stepNumber: input.stepNumber, + instructionsCharLength: input.instructions.length, + instructions: redactRawUrls(input.instructions), + messageCount: input.messages.length, + messages: formatModelMessagesForLlmLog(input.messages), + toolChoice: input.toolChoice, + activeTools: input.activeTools, + }) +} + +function formatModelMessagesForLlmLog( + messages: readonly ModelMessage[], +): readonly LlmModelMessageLog[] { + return messages.map(formatModelMessageForLlmLog) +} + +function formatModelMessageForLlmLog(message: ModelMessage): LlmModelMessageLog { + return { + role: message.role, + contentCharLength: getUnknownTextLength(message.content), + content: redactRawUrlsFromUnknown(message.content), + } +} + +function buildAgentLoopPreview( + value: unknown, + limit: number, +): AgentLoopLogPreview { + const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)) + .replace(/\s+/g, " ") + .trim() + const truncated = normalized.length > limit + return { + charLength: normalized.length, + truncated, + preview: truncated ? `${normalized.slice(0, limit)}...` : normalized, + } +} + +function stringifyAgentLoopLogValue(value: unknown): string { + if (typeof value === "string") return value + if (value === undefined) return "undefined" + if (typeof value === "function") { + return `[Function ${value.name || "anonymous"}]` + } + if (typeof value === "symbol") return value.toString() + + const json = JSON.stringify(value, createAgentLoopLogJsonReplacer()) + return json ?? String(value) +} + +function createAgentLoopLogJsonReplacer(): ( + key: string, + value: unknown, +) => unknown { + const seenObjects = new WeakSet() + return (_key: string, value: unknown): unknown => { + if (typeof value === "bigint") return value.toString() + if (typeof value === "function") { + return `[Function ${value.name || "anonymous"}]` + } + if (typeof value === "symbol") return value.toString() + if (value instanceof Error) { + return { + name: value.name, + message: value.message, + } + } + if (!value || typeof value !== "object") return value + if (seenObjects.has(value)) return "[Circular]" + seenObjects.add(value) + return value + } +} + +function truncateAgentLoopLogText(value: string, limit: number): string { + const normalized = redactRawUrls(value).replace(/\s+/g, " ").trim() + if (normalized.length <= limit) return normalized + return `${normalized.slice(0, limit)}...` +} + +function truncateAgentLoopLogTextOrNull( + value: string | null, + limit: number, +): string | null { + return value === null ? null : truncateAgentLoopLogText(value, limit) +} + +function getRecordFromUnknown( + value: unknown, +): Readonly> | null { + if (!value || typeof value !== "object" || Array.isArray(value)) return null + return value as Readonly> +} + +function getRecordString( + record: Readonly> | null, + key: string, +): string | null { + const value = record?.[key] + return typeof value === "string" ? value : null +} + +function getRecordNumber( + record: Readonly> | null, + key: string, +): number | null { + const value = record?.[key] + return typeof value === "number" ? value : null +} + +function getRecordBoolean( + record: Readonly> | null, + key: string, +): boolean | null { + const value = record?.[key] + return typeof value === "boolean" ? value : null +} + +function getRecordArray( + record: Readonly> | null, + key: string, +): readonly unknown[] { + const value = record?.[key] + return Array.isArray(value) ? value : [] +} + +function getFirstRecordValue( + record: Readonly> | null, + keys: readonly string[], +): unknown { + const matchingKey = keys.find((key): boolean => record?.[key] !== undefined) + return matchingKey ? record?.[matchingKey] : undefined +} + +function getFirstRecordString( + record: Readonly> | null, + keys: readonly string[], +): string { + const value = getFirstRecordValue(record, keys) + return typeof value === "string" ? value : "" +} + function buildAgenticChatMessages( input: Pick, ): ModelMessage[] { diff --git a/src/lib/logger.ts b/src/lib/logger.ts index 89dc1d9..1edc512 100644 --- a/src/lib/logger.ts +++ b/src/lib/logger.ts @@ -1,6 +1,7 @@ import "server-only" type LogLevel = "info" | "warn" | "error" +const LOG_JSON_INDENT = 2 interface LogEntry { ts: string @@ -18,10 +19,39 @@ function formatLog(entry: LogEntry): string { ([key]) => key !== "ts" && key !== "level" && key !== "msg", ), ) - const metaStr = Object.keys(meta).length > 0 ? " " + JSON.stringify(meta) : "" + const metaStr = + Object.keys(meta).length > 0 + ? `\n${stringifyLogJson(meta, LOG_JSON_INDENT)}` + : "" return `${entry.ts} ${prefix} ${entry.msg}${metaStr}` } - return JSON.stringify(entry) + return stringifyLogJson(entry, LOG_JSON_INDENT) +} + +function stringifyLogJson(value: unknown, space: number): string { + return JSON.stringify(value, createLogJsonReplacer(), space) ?? String(value) +} + +function createLogJsonReplacer(): (key: string, value: unknown) => unknown { + const seenObjects = new WeakSet() + return (_key: string, value: unknown): unknown => { + if (typeof value === "bigint") return value.toString() + if (typeof value === "function") { + return `[Function ${value.name || "anonymous"}]` + } + if (typeof value === "symbol") return value.toString() + if (value instanceof Error) { + return { + name: value.name, + message: value.message, + stack: value.stack, + } + } + if (!value || typeof value !== "object") return value + if (seenObjects.has(value)) return "[Circular]" + seenObjects.add(value) + return value + } } function log(level: LogLevel, msg: string, meta?: Record): void { From 9e97be40d3ca3cafe9f10920c2f68cd5f4bdd769 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Fri, 5 Jun 2026 01:15:18 +0800 Subject: [PATCH 09/13] feat(chat): improve retrieval answers and markdown rendering --- src/app/globals.css | 122 ++++ src/components/chat-message-list.test.ts | 57 ++ src/components/chat-message-list.tsx | 28 +- src/domains/chat/contracts.ts | 20 +- src/domains/chat/index.test.ts | 359 ++++++++---- src/domains/chat/index.ts | 54 +- src/domains/chat/media-assets.test.ts | 72 +++ src/domains/chat/media-assets.ts | 187 +++++- src/domains/chat/prompt.ts | 540 +++++++----------- src/domains/chat/service.test.ts | 2 + .../sources/route-upload-request.test.ts | 23 +- 11 files changed, 963 insertions(+), 501 deletions(-) diff --git a/src/app/globals.css b/src/app/globals.css index 0fab70e..42112a3 100644 --- a/src/app/globals.css +++ b/src/app/globals.css @@ -206,6 +206,128 @@ } @layer utilities { + .chat-markdown-content { + color: var(--foreground); + font-size: 14px; + line-height: 1.55; + overflow-wrap: break-word; + } + + .chat-markdown-content > :first-child { + margin-top: 0; + } + + .chat-markdown-content > :last-child { + margin-bottom: 0; + } + + .chat-markdown-content h1, + .chat-markdown-content h2, + .chat-markdown-content h3, + .chat-markdown-content h4, + .chat-markdown-content h5, + .chat-markdown-content h6 { + margin-top: 12px; + margin-bottom: 6px; + font-weight: 700; + line-height: 1.3; + } + + .chat-markdown-content h1, + .chat-markdown-content h2 { + font-size: 1.05em; + } + + .chat-markdown-content h3, + .chat-markdown-content h4, + .chat-markdown-content h5, + .chat-markdown-content h6 { + font-size: 1em; + } + + .chat-markdown-content p, + .chat-markdown-content blockquote, + .chat-markdown-content ul, + .chat-markdown-content ol, + .chat-markdown-content table, + .chat-markdown-content pre { + margin-top: 0; + margin-bottom: 10px; + } + + .chat-markdown-content a { + color: var(--primary); + text-decoration: underline; + text-underline-offset: 2px; + } + + .chat-markdown-content strong { + font-weight: 700; + } + + .chat-markdown-content ul, + .chat-markdown-content ol { + padding-left: 1.35em; + } + + .chat-markdown-content li + li { + margin-top: 0.2em; + } + + .chat-markdown-content blockquote { + border-left: 3px solid var(--border); + color: var(--muted-foreground); + padding-left: 0.85em; + } + + .chat-markdown-content table { + border-collapse: collapse; + display: block; + font-size: 12px; + max-width: 100%; + overflow-x: auto; + width: max-content; + } + + .chat-markdown-content th, + .chat-markdown-content td { + border: 1px solid var(--border); + padding: 5px 8px; + text-align: left; + vertical-align: top; + } + + .chat-markdown-content th { + background-color: color-mix(in srgb, var(--muted) 60%, transparent); + font-weight: 700; + } + + .chat-markdown-content code { + background-color: color-mix(in srgb, var(--muted) 72%, transparent); + border-radius: 4px; + font-family: var(--font-mono); + font-size: 85%; + padding: 0.15em 0.35em; + } + + .chat-markdown-content pre { + background-color: color-mix(in srgb, var(--muted) 80%, transparent); + border: 1px solid var(--border); + border-radius: 6px; + line-height: 1.45; + max-width: 100%; + overflow-x: auto; + padding: 10px; + } + + .chat-markdown-content pre code { + background: transparent; + border-radius: 0; + font-size: 100%; + padding: 0; + white-space: pre; + } + .original-markdown-preview { color: var(--foreground); font-size: 14px; diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index c515976..22f89b7 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -100,6 +100,63 @@ describe("ChatMessageList", () => { ).toBeNull(); }); + it("renders assistant markdown with GitHub-flavored tables", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: + "### Summary\n\n- **Deadline:** Monday\n\n| Item | Status |\n| --- | --- |\n| Draft | Ready |", + }, + ], + }), + ); + + expect( + screen.getByRole("heading", { name: "Summary", level: 3 }), + ).toBeTruthy(); + expect(screen.getByRole("listitem").textContent).toContain("Deadline:"); + expect(screen.getByRole("table")).toBeTruthy(); + expect(screen.getByRole("columnheader", { name: "Item" })).toBeTruthy(); + expect(screen.getByRole("cell", { name: "Ready" })).toBeTruthy(); + }); + + it("keeps user markdown-looking text literal", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "user_1", + role: "user", + content: "**Do not render this as bold**", + }, + ], + }), + ); + + expect(screen.getByText("**Do not render this as bold**")).toBeTruthy(); + expect(screen.queryByText("Do not render this as bold")).toBeNull(); + }); + + it("skips assistant inline HTML while rendering markdown text", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "Visible **text** \"hidden", + }, + ], + }), + ); + + expect(screen.getByText("text")).toBeTruthy(); + expect(screen.queryByAltText("hidden image")).toBeNull(); + }); + it("does not hide image cards when source links dedupe the same section", () => { render( React.createElement(ChatMessageList, { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index fe2f85f..937da4e 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -3,6 +3,8 @@ import { type CSSProperties, type ReactElement } from "react"; import { type VirtualItem } from "@tanstack/react-virtual"; import { ImageIcon, MessageCircle } from "lucide-react"; +import ReactMarkdown, { type Components } from "react-markdown"; +import remarkGfm from "remark-gfm"; import { useChatMessageListWorkflow } from "@/components/chat-message-list-workflow"; import { chatPanelModel } from "@/components/chat-panel-model"; @@ -23,6 +25,12 @@ type DisplayImageCitation = DisplayCitation & { readonly assetUrl: string; }; +const assistantMarkdownComponents: Components = { + p: ({ children }) => ( +

{children}

+ ), +}; + export type ChatMessageListProps = { readonly isDisabled?: boolean; readonly isSending?: boolean; @@ -252,7 +260,7 @@ function MessageBubble({ return (
-

{message.content}

+ {displayImageCitations.length > 0 && (

@@ -312,6 +320,24 @@ function MessageBubble({ ); } +function AssistantMessageContent({ + content, +}: { + readonly content: string; +}): ReactElement { + return ( +

+ + {content} + +
+ ); +} + function getDisplayCitations( message: ChatMessageView, sourceTitlesByDocumentId: Readonly>, diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 46ec50a..0bef342 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -18,31 +18,25 @@ export type ChatHistoryMessage = { citations?: readonly ChatCitationView[] } -export type AgenticRetrievalIntent = - | "overview" - | "entity" - | "section" +export type AgenticRetrievalTargetContent = + | "all" + | "text" | "image" | "table" - | "detail" - | "citation" + | "text_image" + | "text_table" export type AgenticRetrievalPlan = { - intent: AgenticRetrievalIntent | null + targetContent: AgenticRetrievalTargetContent purpose: string | null - priority: number | null } -export type AgenticRetrievalDataTypeInput = number - export type AgenticRetrievalQuery = Pick< RetrievalQueryParams, "query" | "topK" | "signalPaths" | "filterMode" | "threshold" > & { - readonly dataType?: AgenticRetrievalDataTypeInput - readonly intent?: AgenticRetrievalIntent + readonly targetContent?: AgenticRetrievalTargetContent readonly purpose?: string - readonly priority?: number } export type RetrievedChunkReference = { diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index bb42787..2643fb7 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -87,6 +87,7 @@ describe("answerQuestionWithRetrieval", () => { query: "What does the document say?", topK: 8, useAgentic: true, + dataType: 1, excludeDocumentIds: ["doc_excluded"], }); expect(generateAnswer).toHaveBeenCalledWith({ @@ -140,8 +141,7 @@ describe("answerQuestionWithRetrieval", () => { const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "冯荣洲 身份证 ID card", - intent: "image", - dataType: 3, + targetContent: "image", }); return "Matched identity card image."; }); @@ -319,9 +319,8 @@ describe("answerQuestionWithRetrieval", () => { const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "SpaceX rocket photos", - intent: "image", + targetContent: "image", purpose: "Find visual rocket launch chunks.", - priority: 5, }); return "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg"; }); @@ -395,7 +394,10 @@ describe("answerQuestionWithRetrieval", () => { }), }; const generateAnswer = vi.fn(async ({ searchSources }) => { - await searchSources({ query: "公民身份证明 图片", dataType: 3 }); + await searchSources({ + query: "公民身份证明 图片", + targetContent: "image", + }); return "这里是相关身份证明图片。"; }); const loadSourceAssetUrls = vi.fn().mockResolvedValue({ @@ -451,6 +453,132 @@ describe("answerQuestionWithRetrieval", () => { ]); }); + it("sends requested identity-card images without exposing internal media metadata", async () => { + const frontAssetUrl = "https://blob.example/images/feng-rongzhou-id-front.jpg"; + const backAssetUrl = "https://blob.example/images/feng-rongzhou-id-back.jpg"; + const textResult = makeRetrievalResult({ + content: "冯荣洲的法定代表人身份证明页包含居民身份证图片。", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "二、法定代表人身份证明", + }, + }); + const duplicateFrontResult = { + ...makeRetrievalResult({ + chunkType: "image", + content: "冯荣洲居民身份证正面图片。", + assetUrl: frontAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "images/feng-rongzhou-id-front.jpg", + }, + }), + chunkId: "chunk_front_direct", + } as RetrievalResult & { readonly chunkId: string }; + const richerDuplicateFrontResult = { + ...makeRetrievalResult({ + chunkType: "image", + content: "冯荣洲居民身份证正面图片,来源于身份证明章节。", + assetUrl: frontAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "二、法定代表人身份证明 / 身份证正面", + }, + }), + chunkId: "chunk_front_richer", + } as RetrievalResult & { readonly chunkId: string }; + const backResult = { + ...makeRetrievalResult({ + chunkType: "image", + content: "冯荣洲居民身份证反面图片。", + assetUrl: backAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "二、法定代表人身份证明 / 身份证反面", + }, + }), + chunkId: "chunk_back", + } as RetrievalResult & { readonly chunkId: string }; + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [ + textResult, + duplicateFrontResult, + richerDuplicateFrontResult, + backResult, + ], + evidenceText: "冯荣洲 身份证 图片", + referencedChunks: [], + namespace: "notebook-workspace", + query: "冯荣洲 身份证 图片", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ + query: "冯荣洲 身份证 图片", + targetContent: "image", + purpose: "查找冯荣洲的身份证图片。", + }); + return [ + "已找到冯荣洲的身份证图片。[商务标文件.pdf / 二、法定代表人身份证明]", + `{"asset_id":"asset_front","assetUrl":"${frontAssetUrl}","chunkId":"chunk_front_direct"}`, + ].join("\n"); + }); + const sources = [ + makeSource({ + id: "source_identity", + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ]; + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "请将 冯荣洲 的身份证图片发给我", + namespace: "notebook-workspace", + sources, + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(retrieval.query).toHaveBeenCalledWith({ + namespace: "notebook-workspace", + query: "冯荣洲 身份证 图片", + topK: 8, + useAgentic: true, + dataType: 3, + }); + expect(answer.answer).toBe( + "已找到冯荣洲的身份证图片。[商务标文件.pdf / 二、法定代表人身份证明]", + ); + expect(answer.answer).not.toMatch( + /asset_id|assetUrl|asset_url|chunkId|chunk_id|https?:\/\//, + ); + expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ + undefined, + frontAssetUrl, + backAssetUrl, + ]); + expect( + answer.citations.filter( + (citation) => citation.assetUrl === frontAssetUrl, + ), + ).toHaveLength(1); + expect(answer.citations[1]?.source).toMatchObject({ + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明 / 身份证正面", + }); + }); + it("returns the agent answer without citations when retrieval has no results", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ @@ -532,6 +660,7 @@ describe("answerQuestionWithRetrieval", () => { query: "Tesla Q4 2025 Update energy generation and storage deployments", topK: 8, useAgentic: true, + dataType: 1, }); expect(generateAnswer).toHaveBeenCalledWith({ question: "What about energy storage in this document?", @@ -588,6 +717,7 @@ describe("answerQuestionWithRetrieval", () => { query: "Tesla energy storage deployments", topK: 8, useAgentic: true, + dataType: 1, }); expect(JSON.stringify(queryInput)).not.toContain( "do-not-append-this-history-to-query", @@ -688,7 +818,10 @@ describe("answerQuestionWithRetrieval", () => { }), }; const generateAnswer = vi.fn(async ({ searchSources }) => { - await searchSources({ query: "SpaceX launch image", dataType: 3 }); + await searchSources({ + query: "SpaceX launch image", + targetContent: "image", + }); return "Here is the launch image."; }); @@ -844,9 +977,8 @@ describe("generateAgenticGroundedAnswer", () => { query: "公民身份证 图片", routerUsed: "workflow_single_step", retrievalPlan: { - intent: "image", + targetContent: "image", purpose: "Find identity-card image evidence.", - priority: 5, }, chunkReferences: [ { @@ -922,10 +1054,9 @@ describe("generateAgenticGroundedAnswer", () => { const settings = getCapturedAgentSettings(agent); const generateInput = getCapturedGenerateInput(capturedGenerateInput); - expect(settings.instructions).toContain("RetrievalQueryResponse") - expect(settings.instructions).toContain("L0/L1 retrieval") - expect(settings.instructions).toContain("typed retrieval plan") - expect(settings.instructions).toContain("dataType=3") + expect(settings.instructions).toContain("markdown output gives guidance") + expect(settings.instructions).toContain("image or text+image search") + expect(settings.instructions).toContain("Read IDs") expect(settings.instructions).toContain( "Do not paste raw prior messages into searchSources.query", ) @@ -945,70 +1076,56 @@ describe("generateAgenticGroundedAnswer", () => { const searchSourcesTool = getCapturedAgentTools(agent).searchSources expect( - getSearchSourcesDataTypeSchema(searchSourcesTool)._def?.innerType?._def - ?.type, - ).toBe("number") + getSearchSourcesTargetContentSchema(searchSourcesTool)._def?.innerType + ?._def?.type, + ).toBe("enum") expect( searchSourcesTool.inputSchema.safeParse({ query: "公民身份证 图片", - dataType: 3, + targetContent: "image", }).success, ).toBe(true) expect( searchSourcesTool.inputSchema.safeParse({ query: "公民身份证 图片", - dataType: 7, + targetContent: "video", }).success, ).toBe(false) const toolOutput = await searchSourcesTool.execute({ query: "公民身份证 图片", - intent: "image", + targetContent: "image", purpose: "Find identity-card image evidence.", - priority: 5, - dataType: 3, }); expect(searchSources).toHaveBeenCalledWith({ query: "公民身份证 图片", - intent: "image", + targetContent: "image", purpose: "Find identity-card image evidence.", - priority: 5, - dataType: 3, }); - expect(toolOutput).toMatchObject({ - query: "公民身份证 图片", - retrievalPlan: { - intent: "image", - purpose: "Find identity-card image evidence.", - priority: 5, - }, - routerUsed: "workflow_single_step", - stopReason: "answer_done", - failureReason: null, - answerText: - "The source includes identity card images. [media asset URL hidden]", - resultCount: 1, - referencedChunkCount: 1, - hasEvidenceText: true, - results: [ - expect.objectContaining({ - chunkType: "image", - hasAssetUrl: true, - content: "Identity card image front side.", - }), - ], - referencedChunks: [ - expect.objectContaining({ - chunkId: "chunk_identity_1", - chunkType: "image", - filePath: "images/id-front.jpg", - hasAssetUrl: true, - }), - ], - agentGuidance: expect.stringContaining("Use this evidence"), + expect(toolOutput).toEqual(expect.any(String)); + expect(toolOutput).toContain("## Retrieval Result"); + expect(toolOutput).toContain("Status: useful_evidence_found"); + expect(toolOutput).toContain("Guidance: Use this evidence"); + expect(toolOutput).toContain("## Evidence"); + expect(toolOutput).toContain("### Result 1"); + expect(toolOutput).toContain("Type: image"); + expect(toolOutput).toContain( + "Source: document-generated.pdf / Assets / images / id-front.jpg", + ); + expect(toolOutput).toContain("Media: image available"); + expect(toolOutput).toContain("Read ID: chunk_identity_1"); + expect(toolOutput).toContain("Identity card image front side."); + expect(toolOutput).not.toContain("https://blob.example"); + expect(toolOutput).not.toContain("assetUrl"); + expect(toolOutput).not.toContain("retrievalPlan"); + expect(toolOutput).not.toContain("decisionTrace"); + expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ + toolName: "searchSources", + output: expect.objectContaining({ + preview: expect.stringContaining("## Retrieval Result"), + }), }); - expect(JSON.stringify(toolOutput)).not.toContain("https://blob.example"); const chunkOutput = await getCapturedAgentTools(agent).readRetrievedChunk.execute({ id: "chunk_identity_1", @@ -1021,13 +1138,24 @@ describe("generateAgenticGroundedAnswer", () => { offset: 0, limit: 80, }); - expect(chunkOutput).toMatchObject({ - id: "chunk_identity_1", - found: true, - contentSlice: "Full identity card text. [media asset URL hidden]", - hasMoreContent: false, + expect(chunkOutput).toEqual(expect.any(String)); + expect(chunkOutput).toContain("## Retrieved Content"); + expect(chunkOutput).toContain("Status: found"); + expect(chunkOutput).toContain("Read ID: chunk_identity_1"); + expect(chunkOutput).toContain( + "Source: document-generated.pdf / Assets / images / id-front.jpg", + ); + expect(chunkOutput).toContain( + "Full identity card text. [media asset URL hidden]", + ); + expect(chunkOutput).not.toContain("https://blob.example"); + expect(chunkOutput).not.toContain("chunkId"); + expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ + toolName: "readRetrievedChunk", + output: expect.objectContaining({ + preview: expect.stringContaining("## Retrieved Content"), + }), }); - expect(JSON.stringify(chunkOutput)).not.toContain("https://blob.example"); }); it("uses managed context for stored history and loop steps", async () => { @@ -1134,7 +1262,7 @@ describe("generateAgenticGroundedAnswer", () => { purpose: `Find the matching identity card image. ${"input ".repeat( 300, )}`, - dataType: 3, + targetContent: "image", }, }, ], @@ -1142,28 +1270,28 @@ describe("generateAgenticGroundedAnswer", () => { { toolName: "searchSources", toolCallId: "call_1", - output: { - query: "冯荣洲 身份证 ID card", - routerUsed: "workflow_single_step", - resultCount: 6, - referencedChunkCount: 2, - readableChunkCount: 6, - evidenceText: `Image evidence https://blob.example/id-front.jpg ${"evidence ".repeat( + output: [ + "## Retrieval Result", + "", + "Status: useful_evidence_found", + "Query: 冯荣洲 身份证 ID card", + "Guidance: Use this evidence if it directly answers the user.", + "", + "## Evidence", + `Image evidence https://blob.example/id-front.jpg ${"evidence ".repeat( 600, )}`, - results: [ - { - chunkType: "image", - content: `Identity image content ${"result ".repeat(80)}`, - }, - ], - referencedChunks: [ - { - chunkType: "image", - sectionPath: `Assets / identity card ${"reference ".repeat(80)}`, - }, - ], - }, + "", + "## Results", + "### Result 1", + "Type: image", + "Source: 商务标文件.pdf / 二、法定代表人身份证明", + "Media: image available", + "Read ID: chunk_identity_1", + "", + "Preview:", + `Identity image content ${"result ".repeat(80)}`, + ].join("\n"), }, ], usage: { @@ -1190,28 +1318,19 @@ describe("generateAgenticGroundedAnswer", () => { expect(stepLog.toolCalls[0]?.input.truncated).toBe(true); expect(stepLog.toolResults[0]?.output).toMatchObject({ kind: "searchSources", - query: "冯荣洲 身份证 ID card", - resultCount: 6, - results: [ - { - chunkType: "image", - }, - ], - referencedChunks: [ - { - chunkType: "image", - }, - ], + output: { + truncated: true, + }, }); const searchSourcesOutput = stepLog.toolResults[0] ?.output as SearchSourcesToolOutputLogMeta; - expect(searchSourcesOutput.evidenceText.length).toBeLessThanOrEqual(203); - expect(searchSourcesOutput.results[0]?.content.length).toBeLessThanOrEqual( - 103, + expect(searchSourcesOutput.output.preview).toContain("## Retrieval Result"); + expect(searchSourcesOutput.output.preview).toContain( + "Query: 冯荣洲 身份证 ID card", + ); + expect(searchSourcesOutput.output.preview).toContain( + "[media asset URL hidden]", ); - expect( - searchSourcesOutput.referencedChunks[0]?.summary.length, - ).toBeLessThanOrEqual(103); expect(JSON.stringify(stepMeta)).not.toContain("https://blob.example"); settings.onFinish({ @@ -1231,7 +1350,7 @@ describe("generateAgenticGroundedAnswer", () => { { toolName: "searchSources", toolCallId: "call_1", - output: { evidenceText: "Matched image evidence." }, + output: "## Evidence\nMatched image evidence.", }, ], }, @@ -1290,6 +1409,7 @@ describe("buildGroundedPrompt", () => { }); expect(prompt).toContain("Answer in a natural, friendly, and direct tone."); + expect(prompt).toContain("Use GitHub-flavored Markdown when it improves readability"); expect(prompt).toContain("Start with the answer first."); expect(prompt).toContain("Avoid meta phrases like \"Based on the sources\""); expect(prompt).toContain("Keep answers concise by default"); @@ -1315,6 +1435,8 @@ describe("buildGroundedPrompt", () => { expect(prompt).toContain( "Do not write raw media asset URLs in the answer. They are internal metadata only.", ); + expect(prompt).toContain("Never output JSON metadata blocks"); + expect(prompt).toContain("Never mention asset_id, assetUrl"); expect(prompt).toContain("https://blob.example/images/launch.jpg"); }); }); @@ -1329,16 +1451,19 @@ describe("buildAgenticChatSystemPrompt", () => { expect(prompt).toContain("Always call searchSources") expect(prompt).toContain("readRetrievedChunk") - expect(prompt).toContain("L0/L1 retrieval") - expect(prompt).toContain("typed retrieval plan") - expect(prompt).toContain("evidenceText") - expect(prompt).toContain("failureReason") - expect(prompt).toContain("decisionTrace") - expect(prompt).toContain("remote source index") + expect(prompt).toContain("markdown output gives guidance") + expect(prompt).toContain("Read IDs") + expect(prompt).toContain("image or text+image search") + expect(prompt).toContain("remote index") expect(prompt).toContain("person or section but not an image asset") expect(prompt).toContain("Do not paste raw prior messages") expect(prompt).toContain("身份证") - expect(prompt).toContain("For image requests use intent=image") + expect(prompt).toContain("For image requests, search visual content directly") + expect(prompt).toContain("Never output JSON metadata blocks") + expect(prompt).toContain("Use GitHub-flavored Markdown when it improves readability") + expect(prompt).not.toContain("targetContent maps") + expect(prompt).not.toContain("Read the tool output fields") + expect(prompt).not.toContain("intent=overview") expect(prompt).toContain("商务标文件.pdf") }); }); @@ -1509,13 +1634,7 @@ type AgentLoopStepLogMeta = { type SearchSourcesToolOutputLogMeta = { readonly kind: "searchSources" - readonly evidenceText: string - readonly results: readonly { - readonly content: string - }[] - readonly referencedChunks: readonly { - readonly summary: string - }[] + readonly output: AgentLoopLogPreviewMeta } type AgentLoopLogPreviewMeta = { @@ -1560,7 +1679,7 @@ function getCapturedAgentTools(agent: ToolLoopAgent): CapturedAgentTools { return agent.tools as unknown as CapturedAgentTools } -function getSearchSourcesDataTypeSchema( +function getSearchSourcesTargetContentSchema( tool: CapturedAgentTools["searchSources"], ): CapturedZodSchema { const shape = tool.inputSchema._def?.shape @@ -1569,12 +1688,12 @@ function getSearchSourcesDataTypeSchema( throw new Error("searchSources input schema should expose fields.") } - const dataTypeSchema = fields.dataType - if (!dataTypeSchema) { - throw new Error("searchSources input schema should include dataType.") + const targetContentSchema = fields.targetContent + if (!targetContentSchema) { + throw new Error("searchSources input schema should include targetContent.") } - return dataTypeSchema + return targetContentSchema } function getLoggerInfoMeta(message: string): Record { diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index f258077..940782a 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -15,6 +15,7 @@ import { import type { AgenticRetrievalQuery, AgenticRetrievalPlan, + AgenticRetrievalTargetContent, AgenticRetrievalResponse, AnswerQuestionInput, AnswerQuestionResult, @@ -41,6 +42,16 @@ const KNOWHERE_CHUNK_LOG_LIMIT = 100 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g const REDACTED_MEDIA_URL = "[media asset URL hidden]" +const RETRIEVAL_TARGET_CONTENT_DATA_TYPES: Readonly< + Record +> = { + all: 1, + text: 2, + image: 3, + table: 4, + text_image: 5, + text_table: 6, +} as const type RetrievalDataType = NonNullable @@ -139,9 +150,8 @@ export const answerQuestionWithRetrieval = ( signalPathCount: retrievalQueryParams.signalPaths?.length ?? 0, filterMode: retrievalQueryParams.filterMode ?? null, threshold: retrievalQueryParams.threshold ?? null, - intent: retrievalPlan.intent, + targetContent: retrievalPlan.targetContent, purpose: retrievalPlan.purpose, - priority: retrievalPlan.priority, }) try { @@ -162,8 +172,7 @@ export const answerQuestionWithRetrieval = ( ).length, stopReason: response.stopReason ?? null, failureReason: response.failureReason ?? null, - intent: retrievalPlan.intent, - priority: retrievalPlan.priority, + targetContent: retrievalPlan.targetContent, }) logger.info("chat-agent: knowhere query response", { durationMs: Date.now() - startedAt, @@ -175,8 +184,7 @@ export const answerQuestionWithRetrieval = ( query: retrievalQueryParams.query, durationMs: Date.now() - startedAt, error: error instanceof Error ? error.message : String(error), - intent: retrievalPlan.intent, - priority: retrievalPlan.priority, + targetContent: retrievalPlan.targetContent, }) throw error } @@ -308,13 +316,13 @@ function buildRetrievalQueryParams(input: { input.input.query, input.fallbackQuestion, ) - const dataType = normalizeRetrievalDataType(input.input) + const dataType = normalizeRetrievalDataType(input.input.targetContent) return { namespace: input.namespace, query, topK: normalizeTopK(input.input.topK), useAgentic: true, - ...(dataType ? { dataType } : {}), + dataType, ...(input.input.signalPaths && input.input.signalPaths.length > 0 ? { signalPaths: input.input.signalPaths } : {}), @@ -330,9 +338,8 @@ function toAgenticRetrievalPlan( input: AgenticRetrievalQuery, ): AgenticRetrievalPlan { return { - intent: input.intent ?? null, + targetContent: normalizeRetrievalTargetContent(input.targetContent), purpose: normalizeRetrievalPurpose(input.purpose), - priority: normalizeRetrievalPriority(input.priority), } } @@ -342,27 +349,18 @@ function normalizeRetrievalPurpose(value: string | undefined): string | null { return normalized.slice(0, 240) } -function normalizeRetrievalPriority(value: number | undefined): number | null { - if (typeof value !== "number" || !Number.isSafeInteger(value)) return null - return Math.min(Math.max(value, 1), 5) -} - function normalizeRetrievalDataType( - input: AgenticRetrievalQuery, -): RetrievalQueryParams["dataType"] | undefined { - if (isRetrievalDataType(input.dataType)) return input.dataType - if (input.intent === "image") return 3 - if (input.intent === "table") return 4 - return undefined + targetContent: AgenticRetrievalTargetContent | undefined, +): RetrievalDataType { + return RETRIEVAL_TARGET_CONTENT_DATA_TYPES[ + normalizeRetrievalTargetContent(targetContent) + ] } -function isRetrievalDataType(value: unknown): value is RetrievalDataType { - return ( - typeof value === "number" && - Number.isSafeInteger(value) && - value >= 1 && - value <= 6 - ) +function normalizeRetrievalTargetContent( + value: AgenticRetrievalTargetContent | undefined, +): AgenticRetrievalTargetContent { + return value ?? "all" } function createRetrievedChunkContext(): { diff --git a/src/domains/chat/media-assets.test.ts b/src/domains/chat/media-assets.test.ts index 9ad4b3c..32eb409 100644 --- a/src/domains/chat/media-assets.test.ts +++ b/src/domains/chat/media-assets.test.ts @@ -89,6 +89,51 @@ describe("chat media assets", () => { ]) }) + it("deduplicates media citation assets globally by asset URL", async () => { + const assetUrl = "https://blob.example/images/id-front.jpg" + + const results = await enrichRetrievalResultsWithAssetUrls({ + results: [ + makeRetrievalResult({ + chunkType: "image", + assetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "images/id-front.jpg", + }, + }), + makeRetrievalResult({ + chunkType: "image", + assetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明 / 身份证正面", + }, + }), + makeRetrievalResult({ + chunkType: "image", + assetUrl: "https://blob.example/images/id-back.jpg", + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "二、法定代表人身份证明 / 身份证反面", + }, + }), + ], + sources: [], + }) + + expect(results.map((result) => result.assetUrl)).toEqual([ + assetUrl, + "https://blob.example/images/id-back.jpg", + ]) + expect(results[0]?.source.sectionPath).toBe( + "二、法定代表人身份证明 / 身份证正面", + ) + }) + it("formats a bounded media asset context for the grounded prompt", () => { const context = formatRetrievedMediaAssetContext([ makeRetrievalResult({ @@ -133,6 +178,33 @@ describe("chat media assets", () => { ) expect(answer).not.toContain("https://blob.example") }) + + it("removes internal media JSON blocks from generated answer text", () => { + const answer = removeRetrievedMediaAssetUrls( + [ + "这里是相关身份证图片。", + "{\"asset_id\":\"asset_front\",\"assetUrl\":\"https://blob.example/images/id-front.jpg\",\"chunk_id\":\"chunk_front\"}", + ].join("\n"), + [ + makeRetrievalResult({ + chunkType: "image", + assetUrl: "https://blob.example/images/id-front.jpg", + }), + ], + ) + + expect(answer).toBe("这里是相关身份证图片。") + expect(answer).not.toMatch(/asset_id|assetUrl|chunk_id|https?:\/\//) + }) + + it("preserves ordinary JSON answers that do not expose internal metadata", () => { + const answer = removeRetrievedMediaAssetUrls( + "{\"name\":\"冯荣洲\",\"status\":\"matched\"}", + [], + ) + + expect(answer).toBe("{\"name\":\"冯荣洲\",\"status\":\"matched\"}") + }) }) function makeRetrievalResult( diff --git a/src/domains/chat/media-assets.ts b/src/domains/chat/media-assets.ts index 1832019..ce54ca2 100644 --- a/src/domains/chat/media-assets.ts +++ b/src/domains/chat/media-assets.ts @@ -4,6 +4,13 @@ import type { Source } from "@/infrastructure/db/schema" const retrievedMediaAssetLimit = 6 const imageExtensions = [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"] as const +const internalMetadataKeys = new Set([ + "asset_id", + "assetUrl", + "asset_url", + "chunkId", + "chunk_id", +]) export type LoadSourceAssetUrls = ( source: Source, @@ -22,7 +29,9 @@ export async function enrichRetrievalResultsWithAssetUrls({ loadSourceAssetUrls, evidenceText, }: RetrievalResultAssetInput): Promise { - if (!loadSourceAssetUrls || results.length === 0) return [...results] + if (!loadSourceAssetUrls || results.length === 0) { + return dedupeMediaCitationResults(results) + } const sourcesByDocumentId = new Map( sources.flatMap((source): readonly [string, Source][] => @@ -49,7 +58,39 @@ export async function enrichRetrievalResultsWithAssetUrls({ }), ) - return enrichedResults.flat() + return dedupeMediaCitationResults(enrichedResults.flat()) +} + +export function dedupeMediaCitationResults( + results: readonly RetrievalResult[], +): RetrievalResult[] { + const dedupedResults: RetrievalResult[] = [] + const resultIndexesByAssetUrl = new Map() + + for (const result of results) { + const assetUrl = getTrimmedString(result.assetUrl) + if (!assetUrl || !isRenderableMediaAsset(result, assetUrl)) { + dedupedResults.push(result) + continue + } + + const existingIndex = resultIndexesByAssetUrl.get(assetUrl) + if (existingIndex === undefined) { + resultIndexesByAssetUrl.set(assetUrl, dedupedResults.length) + dedupedResults.push(result) + continue + } + + const existingResult = dedupedResults[existingIndex] + if ( + existingResult && + compareMediaCitationResult(result, existingResult, assetUrl) > 0 + ) { + dedupedResults[existingIndex] = result + } + } + + return dedupedResults } export function formatRetrievedMediaAssetContext( @@ -90,15 +131,54 @@ export function removeRetrievedMediaAssetUrls( .filter((assetUrl): assetUrl is string => assetUrl !== null), ), ) - if (assetUrls.length === 0) return answer - - const sanitizedAnswer = assetUrls - .flatMap(getAssetUrlTextVariants) - .reduce(removeAssetUrlFromAnswer, answer) + const urlSanitizedAnswer = + assetUrls.length > 0 + ? assetUrls + .flatMap(getAssetUrlTextVariants) + .reduce(removeAssetUrlFromAnswer, answer) + : answer + const sanitizedAnswer = removeInternalMetadataJsonBlocks(urlSanitizedAnswer) return cleanSanitizedAnswer(sanitizedAnswer) } +function compareMediaCitationResult( + candidate: RetrievalResult, + current: RetrievalResult, + assetUrl: string, +): number { + return ( + getMediaCitationResultScore(candidate, assetUrl) - + getMediaCitationResultScore(current, assetUrl) + ) +} + +function getMediaCitationResultScore( + result: RetrievalResult, + assetUrl: string, +): number { + const chunkType = result.chunkType.toLowerCase() + const isImageAsset = isImageAssetUrl(assetUrl) + const isTableAsset = chunkType === "table" + const source = result.source + let score = 0 + + if (chunkType === "image" && isImageAsset) score += 100 + if (isTableAsset) score += 90 + if (chunkType === "image" || chunkType === "table") score += 30 + if (getTrimmedString(source.documentId)) score += 10 + if (getTrimmedString(source.sourceFileName)) score += 20 + + const sectionPath = getTrimmedString(source.sectionPath) + if (sectionPath) { + score += 30 + if (!isAssetFilePath(sectionPath)) score += 15 + score += Math.min(sectionPath.length, 120) / 12 + } + + return score +} + async function getCachedSourceAssetUrls( source: Source, loadSourceAssetUrls: LoadSourceAssetUrls, @@ -206,6 +286,11 @@ function getAssetChunkType( return fallback } +function isAssetFilePath(value: string): boolean { + const normalizedPath = normalizeAssetLookupText(value) + return normalizedPath ? /^(images|tables)\//.test(normalizedPath) : false +} + function resolveAssetReferenceMatches( result: RetrievalResult, assetUrlsByFilePath: Readonly>, @@ -389,6 +474,94 @@ function removeAssetUrlFromAnswer(answer: string, assetUrl: string): string { .replace(new RegExp(escapedAssetUrl, "g"), "") } +function removeInternalMetadataJsonBlocks(answer: string): string { + let output = "" + let index = 0 + + while (index < answer.length) { + if (answer[index] !== "{") { + output += answer[index] + index += 1 + continue + } + + const objectEndIndex = findJsonObjectEndIndex(answer, index) + if (objectEndIndex === null) { + output += answer[index] + index += 1 + continue + } + + const objectText = answer.slice(index, objectEndIndex + 1) + if (isInternalMetadataJsonObject(objectText)) { + index = objectEndIndex + 1 + continue + } + + output += objectText + index = objectEndIndex + 1 + } + + return output +} + +function findJsonObjectEndIndex(value: string, startIndex: number): number | null { + let depth = 0 + let isInsideString = false + let isEscaped = false + + for (let index = startIndex; index < value.length; index += 1) { + const char = value[index] + if (isInsideString) { + if (isEscaped) { + isEscaped = false + continue + } + if (char === "\\") { + isEscaped = true + continue + } + if (char === "\"") { + isInsideString = false + } + continue + } + + if (char === "\"") { + isInsideString = true + continue + } + if (char === "{") { + depth += 1 + continue + } + if (char === "}") { + depth -= 1 + if (depth === 0) return index + } + } + + return null +} + +function isInternalMetadataJsonObject(value: string): boolean { + try { + return hasInternalMetadataKey(JSON.parse(value)) + } catch { + return false + } +} + +function hasInternalMetadataKey(value: unknown): boolean { + if (!value || typeof value !== "object") return false + if (Array.isArray(value)) return value.some(hasInternalMetadataKey) + + return Object.entries(value).some( + ([key, nestedValue]): boolean => + internalMetadataKeys.has(key) || hasInternalMetadataKey(nestedValue), + ) +} + function cleanSanitizedAnswer(answer: string): string { const cleanedAnswer = answer .replace(/[ \t]+([,.;:!?])/g, "$1") diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index c88773c..e00c3d7 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -10,7 +10,7 @@ import { import { Effect } from "effect" import type { RetrievalQueryResponse, - RetrievalReferencedChunk, + RetrievalSource, } from "@ontos-ai/knowhere-sdk" import { z } from "zod" @@ -48,8 +48,6 @@ const TOOL_CHUNK_READ_LIMIT_MAX = 4_000 const AGENT_LOOP_TOOL_INPUT_LOG_LIMIT = 1_200 const AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT = 2_400 const AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT = 4 -const KNOWHERE_TOOL_TEXT_LOG_LIMIT = 200 -const KNOWHERE_TOOL_CHUNK_LOG_LIMIT = 100 const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g const REDACTED_MEDIA_URL = "[media asset URL hidden]" @@ -125,40 +123,12 @@ type AgentLoopStepLog = { type AgentLoopSearchSourcesOutputLog = { readonly kind: "searchSources" - readonly query: string | null - readonly routerUsed: string | null - readonly stopReason: string | null - readonly failureReason: string | null - readonly resultCount: number | null - readonly referencedChunkCount: number | null - readonly readableChunkCount: number | null - readonly answerText: string | null - readonly evidenceText: string - readonly results: readonly AgentLoopChunkContentLog[] - readonly referencedChunks: readonly AgentLoopChunkSummaryLog[] - readonly chunkReferences: readonly AgentLoopChunkSummaryLog[] + readonly output: AgentLoopLogPreview } type AgentLoopReadChunkOutputLog = { readonly kind: "readRetrievedChunk" - readonly found: boolean | null - readonly chunkType: string | null - readonly offset: number | null - readonly limit: number | null - readonly contentLength: number | null - readonly contentSlice: string - readonly hasMoreContent: boolean | null - readonly nextOffset: number | null -} - -type AgentLoopChunkContentLog = { - readonly chunkType: string | null - readonly content: string -} - -type AgentLoopChunkSummaryLog = { - readonly chunkType: string | null - readonly summary: string + readonly output: AgentLoopLogPreview } type LlmModelMessageLog = { @@ -339,11 +309,15 @@ export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { "Cite document sections (e.g. [文档名 / 章节名]) when they support a claim.", "When retrieved image or table asset references are relevant to the user's request, cite the matching source label; the UI renders media from citation metadata.", "Do not write raw media asset URLs in the answer. They are internal metadata only.", + "Never output JSON metadata blocks for citations, images, tables, or media.", + "Never mention asset_id, assetUrl, raw URLs, chunk ids, request-local ids, or retrieval internals.", + "For image requests, answer briefly and let the UI render images from citation metadata.", "Do not invent asset URLs; use only the retrieved media asset references listed below.", "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", "Do not invent document-specific facts that are not in the sources.", "Use the recent conversation only to resolve references like \"this document\"; do not use it as factual evidence.", "Answer in a natural, friendly, and direct tone.", + "Use GitHub-flavored Markdown when it improves readability, such as short lists, tables, or code blocks. Keep simple answers as plain sentences.", "Start with the answer first. Avoid meta phrases like \"Based on the sources\" or \"Based on the source excerpts\" unless the user asks how you know.", "Use plain language.", "Keep answers concise by default: 1-3 short paragraphs unless the user asks for detail.", @@ -379,38 +353,44 @@ export function buildAgenticChatSystemPrompt( const sourceContext = formatSourceContext(input.sources, input.excludedSourceIds) return [ + "Role", "You are a Notebook research agent that answers user questions from their uploaded sources.", + "Use retrieved source evidence as the factual source of truth. Do not invent document-specific facts.", + "", + "Retrieval strategy", "You have two tools: searchSources and readRetrievedChunk.", - "searchSources runs Knowhere retrieval and returns a RetrievalQueryResponse summary with compact previews and request-local chunk ids.", - "readRetrievedChunk reads more content from a chunk id returned by searchSources in this same answer run.", - "Treat each tool result like external context from a remote source index: inspect it, reason over it, then decide whether to retrieve again or read more from a returned chunk.", - "Use searchSources like L0/L1 retrieval: compact previews are for quick relevance, navigation, and rerank-style selection. Use readRetrievedChunk like L2 detail: full content slices are loaded only after a returned chunk looks relevant.", + "Use searchSources for source discovery. Its markdown output gives guidance, evidence, result previews, and Read IDs.", + "Use readRetrievedChunk only when a relevant search result preview is too short and the markdown output shows a Read ID.", + "Treat tool output like source notes from a remote index: inspect it, reason over it, then decide whether to answer, search again, or read more.", "", - "Agent loop rules:", + "Tool use rules", "1. Always call searchSources before writing a final answer.", - "2. Before each searchSources call, choose a typed retrieval plan: intent, purpose, and priority. This is Notebook-side intent analysis for the agent loop.", - "3. Use intent=overview for broad discovery, entity for people/organizations, section for located headings/paths, image for visual assets, table for tabular evidence, detail for precise facts, and citation for source verification.", - "4. Read the tool output fields: retrievalPlan, evidenceText, answerText, results, referencedChunks, chunkReferences, stopReason, failureReason, and decisionTrace.", - "5. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, chunk types, and failure reasons.", - "6. If evidenceText/results/referencedChunks directly support the answer, stop searching and answer.", - "7. If failureReason is present, result counts are zero, or evidence does not cover the user's requested entity/topic/media, call searchSources again with a more specific or broader query.", - "8. For image requests use intent=image and dataType=3 or dataType=5. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", - "9. For table requests use intent=table and dataType=4 or dataType=6.", - "10. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", - "11. If a returned chunk preview looks relevant but you want more data before answering, call readRetrievedChunk with that chunk id plus offset/limit. If hasMoreContent is true and the next slice is still needed, call readRetrievedChunk again with nextOffset.", - "12. Use readRetrievedChunk selectively; do not read every chunk when the previews already answer the question.", - "13. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", + "2. Choose the content target from the user's request: broad questions use broad or text-only search, image requests use image or text+image search, and table requests use table or text+table search.", + "3. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", + "4. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, content types, and failure hints.", + "5. If the markdown guidance says the evidence is useful and the evidence/results directly support the answer, stop searching and answer.", + "6. If results are missing, weak, or do not cover the requested entity/topic/media/table, search again with a broader or more specific query.", + "7. Use readRetrievedChunk selectively; do not read every result when the previews already answer the question.", + "8. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", "", - "Answering rules:", - "Use retrieved evidence as the factual source of truth.", - "Do not invent document-specific facts.", + "Media/table handling", + "For image requests, search visual content directly or combine text and image evidence. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", + "For table requests, search table content directly or combine text and table evidence.", + "When retrieved image or table assets are relevant, cite the matching source label; the UI renders media from citation metadata.", + "Do not invent asset URLs or describe hidden asset metadata.", + "", + "Final answer contract", "Conversation context is supplied as managed model messages. Use it only to resolve references like \"this document\" or \"those images\".", "Cite document sections in the answer, e.g. [文档名 / 章节名].", - "When retrieved image or table assets are relevant, cite the matching source label; the UI renders media from citation metadata.", - "Do not write raw media asset URLs in the answer. They are internal metadata only.", + "Use existing [Source N: label] labels only when they are the clearest available citation form.", + "Never output JSON metadata blocks for citations, images, tables, or media.", + "Never mention asset_id, assetUrl, raw URLs, chunk ids, Read IDs, tool parameters, or retrieval internals.", + "For image requests, answer briefly and let the UI render images from citation metadata.", + "Do not add unrelated personal details for send/show image requests unless the user asks.", + "Use GitHub-flavored Markdown when it improves readability, such as short lists, tables, or code blocks. Keep simple answers as plain sentences.", "Start with the answer first. Keep answers concise unless the user asks for detail.", "", - "Searchable sources:", + "Searchable sources", sourceContext, ].join("\n") } @@ -476,10 +456,9 @@ function buildAgenticChatTools( searchSources: tool({ description: "Search the user's Notebook sources through Knowhere retrieval. " + - "Treat each response as external context from a remote source index. " + - "Use it before answering, include a typed retrieval plan, and call it " + - "again with refined text, media, or section-path queries when the " + - "RetrievalQueryResponse says evidence is missing or weak.", + "It returns markdown source notes with guidance, evidence, previews, " + + "and Read IDs for follow-up reads. Use it before answering and call it " + + "again with refined text, media, or section-path queries when evidence is missing or weak.", inputSchema: z.object({ query: z .string() @@ -487,19 +466,18 @@ function buildAgenticChatTools( .describe( "A concise, self-contained retrieval query. Do not paste raw chat history or previous messages. Use only distilled terms such as document title, person, topic, date, section path, or asset kind when needed.", ), - intent: z + targetContent: z .enum([ - "overview", - "entity", - "section", + "all", + "text", "image", "table", - "detail", - "citation", + "text_image", + "text_table", ]) .optional() .describe( - "Typed retrieval intent for the agent loop: overview, entity, section, image, table, detail, or citation. Use image/table for visual or tabular requests.", + "The content type to retrieve: all, text, image, table, text_image, or text_table. Omit only when all content types are useful.", ), purpose: z .string() @@ -509,15 +487,6 @@ function buildAgenticChatTools( .describe( "Short reason this query is needed, such as finding an entity, locating an image asset, or verifying a citation.", ), - priority: z - .number() - .int() - .min(1) - .max(5) - .optional() - .describe( - "Planner priority from 1-5. Use 5 for required evidence and lower values for exploratory follow-up.", - ), topK: z .number() .int() @@ -525,15 +494,6 @@ function buildAgenticChatTools( .max(12) .optional() .describe("Number of chunks to return. Defaults to 8."), - dataType: z - .number() - .int() - .min(1) - .max(6) - .optional() - .describe( - "Optional chunk type filter: 1=all, 2=text, 3=image, 4=table, 5=text+image, 6=text+table.", - ), signalPaths: z .array(z.string().min(1)) .max(8) @@ -554,20 +514,24 @@ function buildAgenticChatTools( .optional() .describe("Optional minimum retrieval score threshold."), }), - execute: async (queryInput: AgenticRetrievalQuery) => - buildRetrievalToolOutput(await input.searchSources(queryInput)), + execute: async (queryInput: AgenticRetrievalQuery) => { + const output = buildRetrievalToolOutput( + await input.searchSources(queryInput), + ) + logToolMarkdownOutput("searchSources", output) + return output + }, }), readRetrievedChunk: tool({ description: - "Read an offset/limit content slice from a request-local chunk id " + - "returned by searchSources. Use this when a returned chunk preview is relevant " + - "and you want more data before answering.", + "Read an offset/limit content slice from a Read ID shown in searchSources markdown. " + + "Use this when a returned result preview is relevant and you want more data before answering.", inputSchema: z.object({ id: z .string() .min(1) .describe( - "The request-local id or chunkId from searchSources.results, searchSources.referencedChunks, or searchSources.chunkReferences.", + "The Read ID shown in searchSources markdown for a relevant result.", ), offset: z .number() @@ -585,10 +549,13 @@ function buildAgenticChatTools( `Maximum characters to return. Defaults to ${TOOL_CHUNK_READ_LIMIT_DEFAULT}; max ${TOOL_CHUNK_READ_LIMIT_MAX}.`, ), }), - execute: async (readInput: ReadRetrievedChunkInput) => - buildRetrievedChunkToolOutput( + execute: async (readInput: ReadRetrievedChunkInput) => { + const output = buildRetrievedChunkToolOutput( await input.readRetrievedChunk(readInput), - ), + ) + logToolMarkdownOutput("readRetrievedChunk", output) + return output + }, }), } as const } @@ -696,98 +663,20 @@ function formatAgentLoopToolOutput( output: unknown, ): AgentLoopToolOutputLog { if (toolName === "searchSources") { - return formatSearchSourcesToolOutput(output) + return { + kind: "searchSources", + output: buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT), + } } if (toolName === "readRetrievedChunk") { - return formatReadRetrievedChunkToolOutput(output) + return { + kind: "readRetrievedChunk", + output: buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT), + } } return buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT) } -function formatSearchSourcesToolOutput( - output: unknown, -): AgentLoopSearchSourcesOutputLog { - const record = getRecordFromUnknown(output) - return { - kind: "searchSources", - query: getRecordString(record, "query"), - routerUsed: getRecordString(record, "routerUsed"), - stopReason: getRecordString(record, "stopReason"), - failureReason: getRecordString(record, "failureReason"), - resultCount: getRecordNumber(record, "resultCount"), - referencedChunkCount: getRecordNumber(record, "referencedChunkCount"), - readableChunkCount: getRecordNumber(record, "readableChunkCount"), - answerText: truncateAgentLoopLogTextOrNull( - getRecordString(record, "answerText"), - KNOWHERE_TOOL_TEXT_LOG_LIMIT, - ), - evidenceText: truncateAgentLoopLogText( - getRecordString(record, "evidenceText") ?? "", - KNOWHERE_TOOL_TEXT_LOG_LIMIT, - ), - results: getRecordArray(record, "results").map(formatToolOutputChunkContent), - referencedChunks: getRecordArray(record, "referencedChunks").map( - formatToolOutputChunkSummary, - ), - chunkReferences: getRecordArray(record, "chunkReferences").map( - formatToolOutputChunkSummary, - ), - } -} - -function formatReadRetrievedChunkToolOutput( - output: unknown, -): AgentLoopReadChunkOutputLog { - const record = getRecordFromUnknown(output) - return { - kind: "readRetrievedChunk", - found: getRecordBoolean(record, "found"), - chunkType: getRecordString(record, "chunkType"), - offset: getRecordNumber(record, "offset"), - limit: getRecordNumber(record, "limit"), - contentLength: getRecordNumber(record, "contentLength"), - contentSlice: truncateAgentLoopLogText( - getRecordString(record, "contentSlice") ?? "", - KNOWHERE_TOOL_CHUNK_LOG_LIMIT, - ), - hasMoreContent: getRecordBoolean(record, "hasMoreContent"), - nextOffset: getRecordNumber(record, "nextOffset"), - } -} - -function formatToolOutputChunkContent( - value: unknown, -): AgentLoopChunkContentLog { - const record = getRecordFromUnknown(value) - return { - chunkType: getRecordString(record, "chunkType"), - content: truncateAgentLoopLogText( - getFirstRecordString(record, ["content", "contentPreview"]), - KNOWHERE_TOOL_CHUNK_LOG_LIMIT, - ), - } -} - -function formatToolOutputChunkSummary( - value: unknown, -): AgentLoopChunkSummaryLog { - const record = getRecordFromUnknown(value) - const source = getRecordFromUnknown(record?.source) - return { - chunkType: getRecordString(record, "chunkType"), - summary: truncateAgentLoopLogText( - getFirstRecordString(record, [ - "summary", - "sectionPath", - "filePath", - "content", - "contentPreview", - ]) || getRecordString(source, "sectionPath") || "", - KNOWHERE_TOOL_CHUNK_LOG_LIMIT, - ), - } -} - function getOmittedAgentLoopEntryCount(entries: readonly unknown[]): number { return Math.max(0, entries.length - AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) } @@ -878,19 +767,6 @@ function createAgentLoopLogJsonReplacer(): ( } } -function truncateAgentLoopLogText(value: string, limit: number): string { - const normalized = redactRawUrls(value).replace(/\s+/g, " ").trim() - if (normalized.length <= limit) return normalized - return `${normalized.slice(0, limit)}...` -} - -function truncateAgentLoopLogTextOrNull( - value: string | null, - limit: number, -): string | null { - return value === null ? null : truncateAgentLoopLogText(value, limit) -} - function getRecordFromUnknown( value: unknown, ): Readonly> | null { @@ -914,14 +790,6 @@ function getRecordNumber( return typeof value === "number" ? value : null } -function getRecordBoolean( - record: Readonly> | null, - key: string, -): boolean | null { - const value = record?.[key] - return typeof value === "boolean" ? value : null -} - function getRecordArray( record: Readonly> | null, key: string, @@ -938,14 +806,6 @@ function getFirstRecordValue( return matchingKey ? record?.[matchingKey] : undefined } -function getFirstRecordString( - record: Readonly> | null, - keys: readonly string[], -): string { - const value = getFirstRecordValue(record, keys) - return typeof value === "string" ? value : "" -} - function buildAgenticChatMessages( input: Pick, ): ModelMessage[] { @@ -1061,131 +921,163 @@ function getUnknownTextLength(value: unknown): number { return JSON.stringify(value).length } -function buildRetrievalToolOutput(response: AgenticRetrievalResponse): object { - return { - namespace: response.namespace, - query: response.query, - retrievalPlan: response.retrievalPlan ?? null, - routerUsed: response.routerUsed, - stopReason: response.stopReason ?? null, - failureReason: response.failureReason ?? null, - answerText: response.answerText - ? redactRawUrls(response.answerText) - : response.answerText, - resultCount: response.results.length, - referencedChunkCount: response.referencedChunks.length, - readableChunkCount: response.chunkReferences.length, - hasEvidenceText: Boolean(response.evidenceText?.trim()), - evidenceText: truncateSafeContextTextToLimit( - response.evidenceText ?? "", - TOOL_EVIDENCE_CHAR_LIMIT, - ), - results: response.chunkReferences - .filter((reference): boolean => reference.kind === "result") - .map(formatToolResultReference), - referencedChunks: response.referencedChunks.map(formatToolReferencedChunk), - chunkReferences: response.chunkReferences.map(formatToolChunkReference), - decisionTrace: - response.decisionTrace - ?.slice(-6) - .map((trace) => redactRawUrlsFromUnknown(trace)) ?? [], - agentGuidance: getRetrievalResponseGuidance(response), - } +function buildRetrievalToolOutput(response: AgenticRetrievalResponse): string { + const resultReferences = response.chunkReferences.filter( + (reference): boolean => reference.kind === "result", + ) + const relatedReferences = response.chunkReferences.filter( + (reference): boolean => reference.kind === "referencedChunk", + ) + const lines = [ + "## Retrieval Result", + "", + `Status: ${getRetrievalResponseStatus(response)}`, + `Query: ${redactRawUrls(response.query)}`, + `Guidance: ${getRetrievalResponseGuidance(response)}`, + "", + "## Summary", + formatOptionalMarkdownText(response.answerText, "No answer summary returned."), + "", + "## Evidence", + formatOptionalMarkdownText(response.evidenceText, "No evidence text returned."), + "", + "## Results", + ...formatResultReferencesMarkdown(resultReferences), + "", + "## Related Sources", + ...formatRelatedReferencesMarkdown(relatedReferences), + ] + + return lines.join("\n") } -function formatToolResultReference(reference: RetrievedChunkReference): object { - return { - id: reference.id, - chunkId: reference.chunkId, - resultIndex: reference.resultIndex, - chunkType: reference.chunkType, - score: reference.score, - hasAssetUrl: reference.hasAssetUrl, - contentLength: reference.contentLength, - contentTruncated: reference.contentTruncated, - source: { - documentId: reference.source.documentId ?? null, - sourceFileName: reference.source.sourceFileName - ? redactRawUrls(reference.source.sourceFileName) - : null, - sectionPath: reference.source.sectionPath - ? redactRawUrls(reference.source.sectionPath) - : null, - }, - contentPreview: truncateSafeContextTextToLimit( - reference.contentPreview, - TOOL_RESULT_CONTENT_CHAR_LIMIT, - ), - content: truncateSafeContextTextToLimit( - reference.contentPreview, - TOOL_RESULT_CONTENT_CHAR_LIMIT, +function formatResultReferencesMarkdown( + references: readonly RetrievedChunkReference[], +): readonly string[] { + if (references.length === 0) return ["- No direct results returned."] + + return references.flatMap((reference, index): readonly string[] => [ + `### Result ${reference.resultIndex ?? index + 1}`, + `Type: ${reference.chunkType}`, + `Source: ${formatToolSourceLabel(reference.source)}`, + `Media: ${formatMediaAvailability(reference)}`, + `Read ID: ${reference.id}`, + `More content available: ${reference.contentTruncated ? "yes" : "no"}`, + "", + "Preview:", + formatMarkdownCodeBlock( + truncateSafeContextTextToLimit( + reference.contentPreview, + TOOL_RESULT_CONTENT_CHAR_LIMIT, + ) || "No preview text returned.", ), - } + "", + ]) } -function formatToolReferencedChunk(chunk: RetrievalReferencedChunk): object { - return { - id: chunk.chunkId, - chunkId: chunk.chunkId, - documentId: chunk.documentId, - chunkType: chunk.chunkType, - sectionPath: redactRawUrls(chunk.sectionPath), - filePath: chunk.filePath ? redactRawUrls(chunk.filePath) : null, - hasAssetUrl: Boolean(chunk.assetUrl), - } -} +function formatRelatedReferencesMarkdown( + references: readonly RetrievedChunkReference[], +): readonly string[] { + if (references.length === 0) return ["- No related sources returned."] -function formatToolChunkReference(reference: RetrievedChunkReference): object { - return { - id: reference.id, - chunkId: reference.chunkId, - kind: reference.kind, - resultIndex: reference.resultIndex, - chunkType: reference.chunkType, - score: reference.score, - hasAssetUrl: reference.hasAssetUrl, - contentLength: reference.contentLength, - contentTruncated: reference.contentTruncated, - source: { - documentId: reference.source.documentId ?? null, - sourceFileName: reference.source.sourceFileName - ? redactRawUrls(reference.source.sourceFileName) - : null, - sectionPath: reference.source.sectionPath - ? redactRawUrls(reference.source.sectionPath) - : null, - }, - } + return references.flatMap((reference, index): readonly string[] => [ + `### Related Source ${index + 1}`, + `Type: ${reference.chunkType}`, + `Source: ${formatToolSourceLabel(reference.source)}`, + `Media: ${formatMediaAvailability(reference)}`, + "", + ]) } function buildRetrievedChunkToolOutput( result: ReadRetrievedChunkResult, -): object { - return { - id: result.id, - chunkId: result.chunkId, - found: result.found, - chunkType: result.chunkType, - score: result.score, - source: result.source - ? { - documentId: result.source.documentId ?? null, - sourceFileName: result.source.sourceFileName - ? redactRawUrls(result.source.sourceFileName) - : null, - sectionPath: result.source.sectionPath - ? redactRawUrls(result.source.sectionPath) - : null, - } - : null, - hasAssetUrl: result.hasAssetUrl, - offset: result.offset, - limit: result.limit, - contentLength: result.contentLength, - contentSlice: redactRawUrls(result.contentSlice), - hasMoreContent: result.hasMoreContent, - nextOffset: result.nextOffset, +): string { + if (!result.found) { + return [ + "## Retrieved Content", + "", + "Status: not_found", + `Read ID: ${result.id}`, + "Guidance: The requested Read ID was not found. Search again or use a Read ID shown in the latest retrieval result.", + ].join("\n") + } + + return [ + "## Retrieved Content", + "", + "Status: found", + `Read ID: ${result.id}`, + `Type: ${result.chunkType ?? "unknown"}`, + `Source: ${result.source ? formatToolSourceLabel(result.source) : "Unknown source"}`, + `Media: ${result.hasAssetUrl ? "available" : "none"}`, + `Returned range: ${result.offset}-${result.offset + result.contentSlice.length} of ${result.contentLength} characters`, + `More content available: ${result.hasMoreContent ? "yes" : "no"}`, + ...(result.nextOffset === null ? [] : [`Next offset: ${result.nextOffset}`]), + "", + "## Content", + formatMarkdownCodeBlock(redactRawUrls(result.contentSlice)), + ].join("\n") +} + +function getRetrievalResponseStatus( + response: RetrievalQueryResponse, +): + | "useful_evidence_found" + | "needs_refinement" + | "needs_review" + | "no_results" { + const hasEvidence = Boolean(response.evidenceText?.trim()) + const hasResults = + response.results.length > 0 || response.referencedChunks.length > 0 + + if (response.failureReason) return "needs_refinement" + if (!hasEvidence && !hasResults) return "no_results" + if (response.stopReason && response.stopReason !== "answer_done") { + return "needs_review" } + return "useful_evidence_found" +} + +function formatOptionalMarkdownText( + value: string | null | undefined, + fallback: string, +): string { + const normalized = truncateSafeContextTextToLimit( + value ?? "", + TOOL_EVIDENCE_CHAR_LIMIT, + ) + return formatMarkdownCodeBlock(normalized || fallback) +} + +function formatMarkdownCodeBlock(value: string): string { + return ["```text", value.replaceAll("```", "'''"), "```"].join("\n") +} + +function formatToolSourceLabel(source: RetrievalSource): string { + const label = [ + source.sourceFileName ? redactRawUrls(source.sourceFileName) : null, + source.sectionPath ? redactRawUrls(source.sectionPath) : null, + ] + .filter((value): value is string => Boolean(value?.trim())) + .join(" / ") + + return label || "Unknown source" +} + +function formatMediaAvailability(reference: RetrievedChunkReference): string { + if (!reference.hasAssetUrl) return "none" + + const chunkType = reference.chunkType.toLowerCase() + if (chunkType === "image") return "image available" + if (chunkType === "table") return "table available" + return "media available" +} + +function logToolMarkdownOutput(toolName: string, output: string): void { + logger.info("chat-agent: tool output", { + toolName, + output: buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT), + }) } function getRetrievalResponseGuidance( @@ -1204,7 +1096,7 @@ function getRetrievalResponseGuidance( if (!hasEvidence && !hasResults) { return ( "No useful evidence was returned. Try a broader query, a different wording, " + - "or a media/table dataType filter if the user asked for images or tables." + "or an image/table-focused content target if the user asked for images or tables." ) } if (response.stopReason && response.stopReason !== "answer_done") { diff --git a/src/domains/chat/service.test.ts b/src/domains/chat/service.test.ts index 5120a68..e449c5d 100644 --- a/src/domains/chat/service.test.ts +++ b/src/domains/chat/service.test.ts @@ -57,6 +57,7 @@ describe("handleChatTurn", () => { query: "What does the document say?", topK: 8, useAgentic: true, + dataType: 1, excludeDocumentIds: ["doc_excluded"], }); expect(generateAnswer).toHaveBeenCalledWith({ @@ -221,6 +222,7 @@ describe("handleChatTurn", () => { query: "Tesla Q4 2025 Update energy generation and storage deployments", topK: 8, useAgentic: true, + dataType: 1, }); }); }); diff --git a/src/domains/sources/route-upload-request.test.ts b/src/domains/sources/route-upload-request.test.ts index 1379e42..7acc944 100644 --- a/src/domains/sources/route-upload-request.test.ts +++ b/src/domains/sources/route-upload-request.test.ts @@ -10,14 +10,21 @@ describe("sourceRouteUploadRequest", () => { }) formData.set("file", file) - await expect( - sourceRouteUploadRequest.read( - new Request("http://localhost/api/sources", { - method: "POST", - body: formData, - }), - ), - ).resolves.toEqual({ type: "file", file }) + const result = await sourceRouteUploadRequest.read( + new Request("http://localhost/api/sources", { + method: "POST", + body: formData, + }), + ) + + expect(result.type).toBe("file") + if (result.type !== "file") { + throw new Error("Expected multipart upload request to return a file.") + } + expect(result.file.name).toBe("notes.pdf") + expect(result.file.type).toBe("application/pdf") + expect(result.file.size).toBe(5) + await expect(result.file.text()).resolves.toBe("hello") }) it("reads Blob-backed Source Upload handoff bodies", async () => { From fcabaefc4c1fb598059e5db3e71de915598ac023 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Fri, 5 Jun 2026 01:55:30 +0800 Subject: [PATCH 10/13] fix(chat): clean image answers and media citations --- src/domains/chat/index.test.ts | 41 ++++-- src/domains/chat/index.ts | 183 +++++++++++++++++++++++++- src/domains/chat/media-assets.test.ts | 33 +++++ src/domains/chat/media-assets.ts | 71 ++++++++-- src/domains/chat/prompt.ts | 30 ++++- 5 files changed, 334 insertions(+), 24 deletions(-) diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index 2643fb7..ef1901c 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -443,11 +443,10 @@ describe("answerQuestionWithRetrieval", () => { dataType: 3, }); expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ - undefined, "https://blob.example/images/image-6-id-front.jpg", "https://blob.example/images/image-7-id-back.jpg", ]); - expect(answer.citations.slice(1).map((citation) => citation.chunkType)).toEqual([ + expect(answer.citations.map((citation) => citation.chunkType)).toEqual([ "image", "image", ]); @@ -456,6 +455,7 @@ describe("answerQuestionWithRetrieval", () => { it("sends requested identity-card images without exposing internal media metadata", async () => { const frontAssetUrl = "https://blob.example/images/feng-rongzhou-id-front.jpg"; const backAssetUrl = "https://blob.example/images/feng-rongzhou-id-back.jpg"; + const unrelatedAssetUrl = "https://blob.example/images/company-license.jpg"; const textResult = makeRetrievalResult({ content: "冯荣洲的法定代表人身份证明页包含居民身份证图片。", source: { @@ -503,6 +503,19 @@ describe("answerQuestionWithRetrieval", () => { }), chunkId: "chunk_back", } as RetrievalResult & { readonly chunkId: string }; + const unrelatedImageResult = { + ...makeRetrievalResult({ + chunkType: "image", + content: "公司证照图片。", + assetUrl: unrelatedAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "images/company-license.jpg", + }, + }), + chunkId: "chunk_company_license", + } as RetrievalResult & { readonly chunkId: string }; const retrieval = { query: vi.fn().mockResolvedValue({ results: [ @@ -510,6 +523,7 @@ describe("answerQuestionWithRetrieval", () => { duplicateFrontResult, richerDuplicateFrontResult, backResult, + unrelatedImageResult, ], evidenceText: "冯荣洲 身份证 图片", referencedChunks: [], @@ -526,7 +540,11 @@ describe("answerQuestionWithRetrieval", () => { purpose: "查找冯荣洲的身份证图片。", }); return [ - "已找到冯荣洲的身份证图片。[商务标文件.pdf / 二、法定代表人身份证明]", + "为您找到冯荣洲的居民身份证图片,相关信息如下:", + "- **姓名**:冯荣洲", + "- **公民身份号码**:123456789012345678", + "- **签发机关**:某公安局", + "- **有效期限**:长期", `{"asset_id":"asset_front","assetUrl":"${frontAssetUrl}","chunkId":"chunk_front_direct"}`, ].join("\n"); }); @@ -557,14 +575,14 @@ describe("answerQuestionWithRetrieval", () => { useAgentic: true, dataType: 3, }); - expect(answer.answer).toBe( - "已找到冯荣洲的身份证图片。[商务标文件.pdf / 二、法定代表人身份证明]", - ); + expect(answer.answer).toBe("已找到相关身份证图片,见下方图片。"); expect(answer.answer).not.toMatch( /asset_id|assetUrl|asset_url|chunkId|chunk_id|https?:\/\//, ); + expect(answer.answer).not.toMatch( + /姓名|公民身份号码|签发机关|有效期限|123456789012345678/, + ); expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ - undefined, frontAssetUrl, backAssetUrl, ]); @@ -573,7 +591,7 @@ describe("answerQuestionWithRetrieval", () => { (citation) => citation.assetUrl === frontAssetUrl, ), ).toHaveLength(1); - expect(answer.citations[1]?.source).toMatchObject({ + expect(answer.citations[0]?.source).toMatchObject({ sourceFileName: "商务标文件.pdf", sectionPath: "二、法定代表人身份证明 / 身份证正面", }); @@ -1123,7 +1141,7 @@ describe("generateAgenticGroundedAnswer", () => { expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ toolName: "searchSources", output: expect.objectContaining({ - preview: expect.stringContaining("## Retrieval Result"), + preview: expect.stringContaining("## Retrieval Result\n\nStatus"), }), }); @@ -1153,7 +1171,7 @@ describe("generateAgenticGroundedAnswer", () => { expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ toolName: "readRetrievedChunk", output: expect.objectContaining({ - preview: expect.stringContaining("## Retrieved Content"), + preview: expect.stringContaining("## Retrieved Content\n\nStatus"), }), }); }); @@ -1325,6 +1343,7 @@ describe("generateAgenticGroundedAnswer", () => { const searchSourcesOutput = stepLog.toolResults[0] ?.output as SearchSourcesToolOutputLogMeta; expect(searchSourcesOutput.output.preview).toContain("## Retrieval Result"); + expect(searchSourcesOutput.output.preview).toContain("\n\n## Evidence"); expect(searchSourcesOutput.output.preview).toContain( "Query: 冯荣洲 身份证 ID card", ); @@ -1437,6 +1456,7 @@ describe("buildGroundedPrompt", () => { ); expect(prompt).toContain("Never output JSON metadata blocks"); expect(prompt).toContain("Never mention asset_id, assetUrl"); + expect(prompt).toContain("do not transcribe personal details"); expect(prompt).toContain("https://blob.example/images/launch.jpg"); }); }); @@ -1461,6 +1481,7 @@ describe("buildAgenticChatSystemPrompt", () => { expect(prompt).toContain("For image requests, search visual content directly") expect(prompt).toContain("Never output JSON metadata blocks") expect(prompt).toContain("Use GitHub-flavored Markdown when it improves readability") + expect(prompt).toContain("do not transcribe personal details") expect(prompt).not.toContain("targetContent maps") expect(prompt).not.toContain("Read the tool output fields") expect(prompt).not.toContain("intent=overview") diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index 940782a..c7266aa 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -29,6 +29,7 @@ import { } from "./retrieval" import { enrichRetrievalResultsWithAssetUrls, + isImageAssetUrl, removeRetrievedMediaAssetUrls, } from "./media-assets" @@ -237,17 +238,193 @@ export const answerQuestionWithRetrieval = ( evidenceText: formatRetrievalEvidenceText(retrievalResponses), }), ) - const answer = removeRetrievedMediaAssetUrls(generatedAnswer, results) + const answer = sanitizeGeneratedAnswer({ + answer: generatedAnswer, + question, + results, + }) + const citationResults = selectCitationResultsForAnswer({ + question, + results, + }) logger.info("chat-agent: answer complete", { answerLength: answer.length, - citationCount: results.length, + citationCount: citationResults.length, }) return { answer, - citations: toChatCitationViews(results, answer), + citations: toChatCitationViews(citationResults, answer), } }) +type GeneratedAnswerSanitizerInput = { + readonly answer: string + readonly question: string + readonly results: readonly RetrievalResult[] +} + +function sanitizeGeneratedAnswer({ + answer, + question, + results, +}: GeneratedAnswerSanitizerInput): string { + const sanitizedAnswer = removeRetrievedMediaAssetUrls(answer, results) + + if ( + shouldUseConciseImageRequestAnswer({ + answer: sanitizedAnswer, + question, + results, + }) + ) { + return buildConciseImageRequestAnswer(question) + } + + return sanitizedAnswer +} + +function shouldUseConciseImageRequestAnswer({ + answer, + question, + results, +}: GeneratedAnswerSanitizerInput): boolean { + return ( + isShowOrSendImageRequest(question) && + !isExplicitPersonalDetailRequest(question) && + hasImageCitationResult(results) && + shouldSimplifyImageRequestAnswer(answer) + ) +} + +function selectCitationResultsForAnswer(input: { + readonly question: string + readonly results: readonly RetrievalResult[] +}): readonly RetrievalResult[] { + if (!isShowOrSendImageRequest(input.question)) return input.results + + const imageResults = input.results.filter(isImageCitationResult) + if (imageResults.length === 0) return input.results + + const focusedImageResults = filterFocusedImageCitationResults( + input.question, + imageResults, + ) + return focusedImageResults.length > 0 ? focusedImageResults : imageResults +} + +function hasImageCitationResult(results: readonly RetrievalResult[]): boolean { + return results.some(isImageCitationResult) +} + +function isImageCitationResult(result: RetrievalResult): boolean { + const assetUrl = result.assetUrl?.trim() + if (!assetUrl) return false + + return result.chunkType.toLowerCase() === "image" || isImageAssetUrl(assetUrl) +} + +function filterFocusedImageCitationResults( + question: string, + results: readonly RetrievalResult[], +): readonly RetrievalResult[] { + const labelPattern = getFocusedImageCitationLabelPattern(question) + if (!labelPattern) return results + + return results.filter((result): boolean => + labelPattern.test(getImageCitationLabel(result)), + ) +} + +function getFocusedImageCitationLabelPattern(question: string): RegExp | null { + if (/身份证|公民身份|居民身份证|\bid card\b|\bidentity card\b/iu.test(question)) { + return /身份证|居民身份证|\bid card\b|\bidentity card\b/iu + } + + return null +} + +function getImageCitationLabel(result: RetrievalResult): string { + return [ + result.source.sourceFileName, + result.source.sectionPath, + getAssetPathFromCitationUrl(result.assetUrl), + ] + .filter((value): value is string => Boolean(value?.trim())) + .join(" ") +} + +function getAssetPathFromCitationUrl(assetUrl: string | undefined): string | null { + if (!assetUrl) return null + + try { + return decodeURIComponent(new URL(assetUrl).pathname) + } catch { + return assetUrl + } +} + +function isShowOrSendImageRequest(question: string): boolean { + const normalizedQuestion = question.toLowerCase() + const hasImageTerm = + /图片|照片|图像|截图|身份证|\bimage\b|\bimages\b|\bphoto\b|\bphotos\b|\bpicture\b|\bpictures\b|\bscreenshot\b|\bid card\b|\bidentity card\b/u.test( + normalizedQuestion, + ) + const hasActionTerm = + /请将|请把|发送|发给我|发来|给我看|展示|显示|看一下|\bshow\b|\bsend\b|\bdisplay\b|\battach\b|\bgive me\b/u.test( + normalizedQuestion, + ) + + return hasImageTerm && hasActionTerm +} + +function isExplicitPersonalDetailRequest(question: string): boolean { + return /号码|身份证号|身份号码|住址|地址|出生|有效期限|签发机关|姓名|是什么|多少|\bid number\b|\bidentity number\b|\baddress\b|\bbirth\b|\bissuer\b|\bvalid/u.test( + question.toLowerCase(), + ) +} + +function containsPersonalDetailField(answer: string): boolean { + return /公民身份号码|身份号码|身份证号|身份证号码|住址|地址|出生日期|出生|有效期限|签发机关|性别|民族|姓名|\bid number\b|\bidentity number\b|\baddress\b|\bdate of birth\b|\bbirth date\b|\bissuer\b|\bissuing authority\b|\bvalid until\b|\bvalid through\b/i.test( + answer, + ) +} + +function shouldSimplifyImageRequestAnswer(answer: string): boolean { + const trimmedAnswer = answer.trim() + return ( + containsPersonalDetailField(trimmedAnswer) || + containsMarkdownList(trimmedAnswer) || + containsSourceIndexReference(trimmedAnswer) || + trimmedAnswer.length > getConciseImageAnswerLengthLimit(trimmedAnswer) + ) +} + +function containsMarkdownList(value: string): boolean { + return /\n\s*[-*]\s+/u.test(value) +} + +function containsSourceIndexReference(value: string): boolean { + return /\bSource\s+\d+\b/iu.test(value) +} + +function getConciseImageAnswerLengthLimit(answer: string): number { + return containsCjkText(answer) ? 120 : 220 +} + +function buildConciseImageRequestAnswer(question: string): string { + if (containsCjkText(question)) { + return question.includes("身份证") + ? "已找到相关身份证图片,见下方图片。" + : "已找到相关图片,见下方图片。" + } + + return "I found the relevant image. See the image below." +} + +function containsCjkText(value: string): boolean { + return /[\u3400-\u9fff]/u.test(value) +} + function formatKnowhereQueryResponseForLog( response: RetrievalQueryResponse, ): KnowhereQueryResponseLog { diff --git a/src/domains/chat/media-assets.test.ts b/src/domains/chat/media-assets.test.ts index 32eb409..ceded86 100644 --- a/src/domains/chat/media-assets.test.ts +++ b/src/domains/chat/media-assets.test.ts @@ -134,6 +134,39 @@ describe("chat media assets", () => { ) }) + it("deduplicates equivalent media assets served from different URLs", async () => { + const results = await enrichRetrievalResultsWithAssetUrls({ + results: [ + makeRetrievalResult({ + chunkType: "image", + assetUrl: + "https://knowhere-storage.example/results/job_1/images/id-front.jpg?AWSAccessKeyId=test", + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "Root", + }, + }), + makeRetrievalResult({ + chunkType: "image", + assetUrl: + "https://blob.example/workspaces/workspace_1/sources/source_1/parsed-result/images/id-front.jpg", + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "images/id-front.jpg", + }, + }), + ], + sources: [], + }) + + expect(results.map((result) => result.assetUrl)).toEqual([ + "https://blob.example/workspaces/workspace_1/sources/source_1/parsed-result/images/id-front.jpg", + ]) + expect(results[0]?.source.sectionPath).toBe("images/id-front.jpg") + }) + it("formats a bounded media asset context for the grounded prompt", () => { const context = formatRetrievedMediaAssetContext([ makeRetrievalResult({ diff --git a/src/domains/chat/media-assets.ts b/src/domains/chat/media-assets.ts index ce54ca2..e87c655 100644 --- a/src/domains/chat/media-assets.ts +++ b/src/domains/chat/media-assets.ts @@ -65,7 +65,7 @@ export function dedupeMediaCitationResults( results: readonly RetrievalResult[], ): RetrievalResult[] { const dedupedResults: RetrievalResult[] = [] - const resultIndexesByAssetUrl = new Map() + const resultIndexesByAssetKey = new Map() for (const result of results) { const assetUrl = getTrimmedString(result.assetUrl) @@ -74,17 +74,25 @@ export function dedupeMediaCitationResults( continue } - const existingIndex = resultIndexesByAssetUrl.get(assetUrl) + const assetKey = getMediaCitationDedupeKey(result, assetUrl) + const existingIndex = resultIndexesByAssetKey.get(assetKey) if (existingIndex === undefined) { - resultIndexesByAssetUrl.set(assetUrl, dedupedResults.length) + resultIndexesByAssetKey.set(assetKey, dedupedResults.length) dedupedResults.push(result) continue } const existingResult = dedupedResults[existingIndex] + const existingAssetUrl = getTrimmedString(existingResult?.assetUrl) if ( existingResult && - compareMediaCitationResult(result, existingResult, assetUrl) > 0 + existingAssetUrl && + compareMediaCitationResult( + result, + existingResult, + assetUrl, + existingAssetUrl, + ) > 0 ) { dedupedResults[existingIndex] = result } @@ -145,11 +153,12 @@ export function removeRetrievedMediaAssetUrls( function compareMediaCitationResult( candidate: RetrievalResult, current: RetrievalResult, - assetUrl: string, + candidateAssetUrl: string, + currentAssetUrl: string, ): number { return ( - getMediaCitationResultScore(candidate, assetUrl) - - getMediaCitationResultScore(current, assetUrl) + getMediaCitationResultScore(candidate, candidateAssetUrl) - + getMediaCitationResultScore(current, currentAssetUrl) ) } @@ -170,15 +179,61 @@ function getMediaCitationResultScore( if (getTrimmedString(source.sourceFileName)) score += 20 const sectionPath = getTrimmedString(source.sectionPath) - if (sectionPath) { + if (sectionPath && !isGenericSectionPath(sectionPath)) { score += 30 if (!isAssetFilePath(sectionPath)) score += 15 score += Math.min(sectionPath.length, 120) / 12 } + if (isNotebookParsedAssetUrl(assetUrl)) score += 25 return score } +function getMediaCitationDedupeKey( + result: RetrievalResult, + assetUrl: string, +): string { + const documentKey = + getTrimmedString(result.source.documentId) ?? + getTrimmedString(result.source.sourceFileName) ?? + "unknown" + const assetPath = + getCanonicalAssetPathFromSource(result) ?? getCanonicalAssetPathFromUrl(assetUrl) + + return assetPath ? `asset:${documentKey}:${assetPath}` : `url:${assetUrl}` +} + +function getCanonicalAssetPathFromSource( + result: RetrievalResult, +): string | null { + const sectionPath = normalizeAssetLookupText(result.source.sectionPath) + return sectionPath && isSupportedAssetPath(sectionPath) ? sectionPath : null +} + +function getCanonicalAssetPathFromUrl(assetUrl: string): string | null { + const normalizedPath = normalizeAssetLookupText(getUrlPathname(assetUrl)) + if (!normalizedPath) return null + + const pathMatch = /(?:^|\/)((?:images|tables)\/[^?#]+)$/.exec(normalizedPath) + if (pathMatch?.[1]) return pathMatch[1] + + const basename = getNormalizedBasename(normalizedPath) + if (!basename) return null + + if (isImageAssetUrl(assetUrl)) return `images/${basename}` + return null +} + +function isGenericSectionPath(value: string): boolean { + return ["root", "unknown source"].includes(value.trim().toLowerCase()) +} + +function isNotebookParsedAssetUrl(assetUrl: string): boolean { + return normalizeAssetLookupText(getUrlPathname(assetUrl))?.includes( + "/parsed-result/", + ) === true +} + async function getCachedSourceAssetUrls( source: Source, loadSourceAssetUrls: LoadSourceAssetUrls, diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index e00c3d7..6d36814 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -312,6 +312,7 @@ export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { "Never output JSON metadata blocks for citations, images, tables, or media.", "Never mention asset_id, assetUrl, raw URLs, chunk ids, request-local ids, or retrieval internals.", "For image requests, answer briefly and let the UI render images from citation metadata.", + "For send/show image requests, do not transcribe personal details from the image; do not list identity numbers, addresses, birth dates, or document fields unless the user explicitly asks for those details.", "Do not invent asset URLs; use only the retrieved media asset references listed below.", "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", "Do not invent document-specific facts that are not in the sources.", @@ -386,6 +387,7 @@ export function buildAgenticChatSystemPrompt( "Never output JSON metadata blocks for citations, images, tables, or media.", "Never mention asset_id, assetUrl, raw URLs, chunk ids, Read IDs, tool parameters, or retrieval internals.", "For image requests, answer briefly and let the UI render images from citation metadata.", + "For send/show image requests, do not transcribe personal details from the image; do not list identity numbers, addresses, birth dates, or document fields unless the user explicitly asks for those details.", "Do not add unrelated personal details for send/show image requests unless the user asks.", "Use GitHub-flavored Markdown when it improves readability, such as short lists, tables, or code blocks. Keep simple answers as plain sentences.", "Start with the answer first. Keep answers concise unless the user asks for detail.", @@ -665,13 +667,19 @@ function formatAgentLoopToolOutput( if (toolName === "searchSources") { return { kind: "searchSources", - output: buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT), + output: buildAgentLoopMarkdownPreview( + output, + AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT, + ), } } if (toolName === "readRetrievedChunk") { return { kind: "readRetrievedChunk", - output: buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT), + output: buildAgentLoopMarkdownPreview( + output, + AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT, + ), } } return buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT) @@ -731,6 +739,19 @@ function buildAgentLoopPreview( } } +function buildAgentLoopMarkdownPreview( + value: unknown, + limit: number, +): AgentLoopLogPreview { + const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)).trim() + const truncated = normalized.length > limit + return { + charLength: normalized.length, + truncated, + preview: truncated ? `${normalized.slice(0, limit)}...` : normalized, + } +} + function stringifyAgentLoopLogValue(value: unknown): string { if (typeof value === "string") return value if (value === undefined) return "undefined" @@ -1076,7 +1097,10 @@ function formatMediaAvailability(reference: RetrievedChunkReference): string { function logToolMarkdownOutput(toolName: string, output: string): void { logger.info("chat-agent: tool output", { toolName, - output: buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT), + output: buildAgentLoopMarkdownPreview( + output, + AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT, + ), }) } From 6ca6ef1b58d7a5bd4a7d0417432abe96b23939f7 Mon Sep 17 00:00:00 2001 From: suguanYang Date: Fri, 5 Jun 2026 02:46:42 +0800 Subject: [PATCH 11/13] feat(chat): tune agent context budgets --- src/domains/chat/index.test.ts | 102 ++++++++++++++++++++++++++++++--- src/domains/chat/index.ts | 4 +- src/domains/chat/prompt.ts | 48 +++++++++++++--- 3 files changed, 136 insertions(+), 18 deletions(-) diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index ef1901c..07407f8 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -743,7 +743,7 @@ describe("answerQuestionWithRetrieval", () => { }); it("lets the agent read untruncated content from returned chunk ids", async () => { - const longContent = `${"Earlier context. ".repeat(160)}Critical obligation: retain source receipts.`; + const longContent = `${"Earlier context. ".repeat(300)}Critical obligation: retain source receipts.`; const result = { ...makeRetrievalResult({ content: longContent, @@ -778,14 +778,14 @@ describe("answerQuestionWithRetrieval", () => { const detail = await readRetrievedChunk({ id: "chunk_contract_1", - offset: 2_000, + offset: 4_000, limit: 80, }); expect(detail).toMatchObject({ id: "chunk_contract_1", found: true, - offset: 2_000, + offset: 4_000, limit: 80, contentLength: longContent.length, }); @@ -810,7 +810,7 @@ describe("answerQuestionWithRetrieval", () => { }), ); - expect(answer.answer).toBe(longContent.slice(2_000, 2_080)); + expect(answer.answer).toBe(longContent.slice(4_000, 4_080).trim()); }); it("uses structured referenced chunks from RetrievalQueryResponse as citations", async () => { @@ -965,10 +965,19 @@ describe("generateAgenticGroundedAnswer", () => { text: "Here are the requested identity images.", } as Awaited>); }); + const previewWithinNewLimitMarker = "within-new-preview-limit"; + const previewAfterNewLimitMarker = "after-new-preview-limit"; + const longIdentityPreview = [ + "Identity card image front side.", + "preview ".repeat(170), + previewWithinNewLimitMarker, + "preview ".repeat(70), + previewAfterNewLimitMarker, + ].join(" "); const searchSources = vi.fn().mockResolvedValue({ results: [ makeRetrievalResult({ - content: "Identity card image front side.", + content: longIdentityPreview, chunkType: "image", assetUrl: "https://blob.example/images/id-front.jpg", source: { @@ -1012,9 +1021,9 @@ describe("generateAgenticGroundedAnswer", () => { sectionPath: "Assets / images / id-front.jpg", }, hasAssetUrl: true, - contentLength: "Identity card image front side.".length, - contentPreview: "Identity card image front side.", - contentTruncated: false, + contentLength: longIdentityPreview.length, + contentPreview: longIdentityPreview, + contentTruncated: true, }, ], answerText: @@ -1134,6 +1143,8 @@ describe("generateAgenticGroundedAnswer", () => { expect(toolOutput).toContain("Media: image available"); expect(toolOutput).toContain("Read ID: chunk_identity_1"); expect(toolOutput).toContain("Identity card image front side."); + expect(toolOutput).toContain(previewWithinNewLimitMarker); + expect(toolOutput).not.toContain(previewAfterNewLimitMarker); expect(toolOutput).not.toContain("https://blob.example"); expect(toolOutput).not.toContain("assetUrl"); expect(toolOutput).not.toContain("retrievalPlan"); @@ -1145,7 +1156,21 @@ describe("generateAgenticGroundedAnswer", () => { }), }); - const chunkOutput = await getCapturedAgentTools(agent).readRetrievedChunk.execute({ + const readRetrievedChunkTool = getCapturedAgentTools(agent).readRetrievedChunk; + expect( + readRetrievedChunkTool.inputSchema.safeParse({ + id: "chunk_identity_1", + limit: 8_000, + }).success, + ).toBe(true); + expect( + readRetrievedChunkTool.inputSchema.safeParse({ + id: "chunk_identity_1", + limit: 8_001, + }).success, + ).toBe(false); + + const chunkOutput = await readRetrievedChunkTool.execute({ id: "chunk_identity_1", offset: 0, limit: 80, @@ -1243,6 +1268,34 @@ describe("generateAgenticGroundedAnswer", () => { }), ]), }); + + const hugeLoopMessages = [ + { + role: "user" as const, + content: `huge-loop-message ${"context ".repeat(9_000)}`, + }, + { + role: "assistant" as const, + content: "middle-loop-message", + }, + { + role: "user" as const, + content: "latest-loop-message", + }, + ]; + const preparedHugeStep = settings.prepareStep({ + stepNumber: 1, + messages: hugeLoopMessages, + }) as { readonly messages: readonly ModelMessage[] }; + const serializedHugeStepMessages = JSON.stringify( + preparedHugeStep.messages, + ); + + expect(getTestModelMessagesCharLength(preparedHugeStep.messages)).toBeLessThanOrEqual( + 64_000, + ); + expect(serializedHugeStepMessages).not.toContain("huge-loop-message"); + expect(serializedHugeStepMessages).toContain("latest-loop-message"); }); it("logs bounded tool call and tool result previews for each loop step", async () => { @@ -1623,6 +1676,9 @@ type CapturedAgentTools = { readonly execute: (input: AgenticRetrievalQuery) => Promise } readonly readRetrievedChunk: { + readonly inputSchema: { + readonly safeParse: (value: unknown) => { readonly success: boolean } + } readonly execute: (input: ReadRetrievedChunkInput) => Promise } } @@ -1700,6 +1756,34 @@ function getCapturedAgentTools(agent: ToolLoopAgent): CapturedAgentTools { return agent.tools as unknown as CapturedAgentTools } +function getTestModelMessagesCharLength( + messages: readonly ModelMessage[], +): number { + return messages.reduce( + (totalLength, message): number => + totalLength + getTestUnknownTextLength(message.content), + 0, + ) +} + +function getTestUnknownTextLength(value: unknown): number { + if (typeof value === "string") return value.length + if (Array.isArray(value)) { + return value.reduce( + (totalLength, nestedValue): number => + totalLength + getTestUnknownTextLength(nestedValue), + 0, + ) + } + if (!value || typeof value !== "object") return 0 + + return Object.values(value as Record).reduce( + (totalLength, nestedValue): number => + totalLength + getTestUnknownTextLength(nestedValue), + 0, + ) +} + function getSearchSourcesTargetContentSchema( tool: CapturedAgentTools["searchSources"], ): CapturedZodSchema { diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index c7266aa..b764648 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -36,8 +36,8 @@ import { const DEFAULT_TOP_K = 8 const MAX_AGENTIC_TOP_K = 12 const MAX_CITATION_RESULTS = 20 -const DEFAULT_CHUNK_READ_LIMIT = 2_000 -const MAX_CHUNK_READ_LIMIT = 4_000 +const DEFAULT_CHUNK_READ_LIMIT = 4_000 +const MAX_CHUNK_READ_LIMIT = 8_000 const KNOWHERE_RESPONSE_TEXT_LOG_LIMIT = 200 const KNOWHERE_CHUNK_LOG_LIMIT = 100 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 6d36814..b12bd26 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -35,16 +35,16 @@ const CONTEXT_CONTENT_CHAR_LIMIT = 900 const COMPACTED_HISTORY_MESSAGE_LIMIT = 12 const COMPACTED_HISTORY_CONTENT_CHAR_LIMIT = 500 const STORED_HISTORY_MESSAGE_LIMIT = 20 -const STORED_HISTORY_CHAR_BUDGET = 12_000 +const STORED_HISTORY_CHAR_BUDGET = 32_000 const AGENT_STEP_MESSAGE_LIMIT = 20 const AGENT_STEP_RECENT_MESSAGE_LIMIT = 12 -const AGENT_STEP_CONTEXT_CHAR_BUDGET = 16_000 +const AGENT_STEP_CONTEXT_CHAR_BUDGET = 64_000 const SOURCE_CONTEXT_LIMIT = 12 const AGENTIC_SEARCH_STEP_LIMIT = 5 -const TOOL_EVIDENCE_CHAR_LIMIT = 6_000 -const TOOL_RESULT_CONTENT_CHAR_LIMIT = 700 -const TOOL_CHUNK_READ_LIMIT_DEFAULT = 2_000 -const TOOL_CHUNK_READ_LIMIT_MAX = 4_000 +const TOOL_EVIDENCE_CHAR_LIMIT = 12_000 +const TOOL_RESULT_CONTENT_CHAR_LIMIT = 1_500 +const TOOL_CHUNK_READ_LIMIT_DEFAULT = 4_000 +const TOOL_CHUNK_READ_LIMIT_MAX = 8_000 const AGENT_LOOP_TOOL_INPUT_LOG_LIMIT = 1_200 const AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT = 2_400 const AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT = 4 @@ -888,10 +888,44 @@ function buildAgentStepMessages(messages: ModelMessage[]): ModelMessage[] { return [ ...systemMessages, - ...nonSystemMessages.slice(-AGENT_STEP_RECENT_MESSAGE_LIMIT), + ...selectRecentMessagesWithinBudget({ + messages: nonSystemMessages, + reservedCharLength: getModelMessagesCharLength(systemMessages), + charBudget: AGENT_STEP_CONTEXT_CHAR_BUDGET, + messageLimit: AGENT_STEP_RECENT_MESSAGE_LIMIT, + }), ] } +function selectRecentMessagesWithinBudget(input: { + readonly messages: readonly ModelMessage[] + readonly reservedCharLength: number + readonly charBudget: number + readonly messageLimit: number +}): ModelMessage[] { + const selectedMessages: ModelMessage[] = [] + const remainingCharBudget = Math.max( + input.charBudget - input.reservedCharLength, + 0, + ) + let selectedCharLength = 0 + + for (const message of [...input.messages].reverse()) { + if (selectedMessages.length >= input.messageLimit) break + + const messageCharLength = getUnknownTextLength(message.content) + const isLatestMessage = selectedMessages.length === 0 + const canFitWithinBudget = + selectedCharLength + messageCharLength <= remainingCharBudget + if (!isLatestMessage && !canFitWithinBudget) continue + + selectedMessages.push(message) + selectedCharLength += messageCharLength + } + + return selectedMessages.reverse() +} + function toModelMessage(message: ChatHistoryMessage): ModelMessage { return { role: message.role, From c1cff36533664f94c25f50345282770fddfe43de Mon Sep 17 00:00:00 2001 From: suguanYang Date: Fri, 5 Jun 2026 11:13:59 +0800 Subject: [PATCH 12/13] fix(chat): remove redundant status and summary from retrieval tool output --- src/domains/chat/prompt.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index b12bd26..36a2d61 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -986,13 +986,9 @@ function buildRetrievalToolOutput(response: AgenticRetrievalResponse): string { const lines = [ "## Retrieval Result", "", - `Status: ${getRetrievalResponseStatus(response)}`, `Query: ${redactRawUrls(response.query)}`, `Guidance: ${getRetrievalResponseGuidance(response)}`, "", - "## Summary", - formatOptionalMarkdownText(response.answerText, "No answer summary returned."), - "", "## Evidence", formatOptionalMarkdownText(response.evidenceText, "No evidence text returned."), "", From 015c6f58d91c0896497db29fa3251f26b8bc992b Mon Sep 17 00:00:00 2001 From: suguanYang Date: Fri, 5 Jun 2026 11:28:08 +0800 Subject: [PATCH 13/13] feat(chat): improve retrieval tool context --- src/domains/chat/index.test.ts | 109 ++++++++++++----- src/domains/chat/prompt.ts | 211 +++++++++++++++++++++++++-------- 2 files changed, 239 insertions(+), 81 deletions(-) diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index 07407f8..a60f3c1 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -967,6 +967,14 @@ describe("generateAgenticGroundedAnswer", () => { }); const previewWithinNewLimitMarker = "within-new-preview-limit"; const previewAfterNewLimitMarker = "after-new-preview-limit"; + const fullToolOutputEvidenceMarker = "full-tool-output-evidence-end"; + const evidenceTreeText = [ + "Identity image evidence. https://blob.example/images/id-front.jpg", + "[Document] document-generated.pdf", + "▸ [L1] Assets", + " ▸ [L2] images / id-front.jpg", + ` ┈ ${"evidence ".repeat(500)}${fullToolOutputEvidenceMarker}`, + ].join("\n"); const longIdentityPreview = [ "Identity card image front side.", "preview ".repeat(170), @@ -987,8 +995,7 @@ describe("generateAgenticGroundedAnswer", () => { }, }), ], - evidenceText: - "Identity image evidence. https://blob.example/images/id-front.jpg", + evidenceText: evidenceTreeText, referencedChunks: [ { chunkId: "chunk_identity_1", @@ -1100,6 +1107,23 @@ describe("generateAgenticGroundedAnswer", () => { toolChoice: { type: "tool", toolName: "searchSources" }, activeTools: ["searchSources"], }) + expect( + settings.prepareStep({ + stepNumber: 1, + messages: [...generateInput.messages], + }), + ).toMatchObject({ + toolChoice: { type: "tool", toolName: "searchSources" }, + activeTools: ["searchSources"], + }) + expect( + settings.prepareStep({ + stepNumber: 2, + messages: [...generateInput.messages], + }), + ).toMatchObject({ + messages: expect.any(Array), + }) const searchSourcesTool = getCapturedAgentTools(agent).searchSources expect( @@ -1132,9 +1156,20 @@ describe("generateAgenticGroundedAnswer", () => { }); expect(toolOutput).toEqual(expect.any(String)); expect(toolOutput).toContain("## Retrieval Result"); - expect(toolOutput).toContain("Status: useful_evidence_found"); + expect(toolOutput).toContain("Query: 公民身份证 图片"); expect(toolOutput).toContain("Guidance: Use this evidence"); expect(toolOutput).toContain("## Evidence"); + expect(toolOutput).toContain( + "[Document] document-generated.pdf\n▸ [L1] Assets\n ▸ [L2] images / id-front.jpg", + ); + expect(toolOutput).toContain(fullToolOutputEvidenceMarker); + expect(toolOutput).not.toContain( + "[Document] document-generated.pdf ▸ [L1] Assets", + ); + expect(toolOutput).toContain("## Decision Trace"); + expect(toolOutput).toContain("- Step 1:"); + expect(toolOutput).toContain("- step: final"); + expect(toolOutput).toContain("- stop: answer_done"); expect(toolOutput).toContain("### Result 1"); expect(toolOutput).toContain("Type: image"); expect(toolOutput).toContain( @@ -1152,7 +1187,8 @@ describe("generateAgenticGroundedAnswer", () => { expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ toolName: "searchSources", output: expect.objectContaining({ - preview: expect.stringContaining("## Retrieval Result\n\nStatus"), + truncated: false, + preview: expect.stringContaining(fullToolOutputEvidenceMarker), }), }); @@ -1196,6 +1232,7 @@ describe("generateAgenticGroundedAnswer", () => { expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ toolName: "readRetrievedChunk", output: expect.objectContaining({ + truncated: false, preview: expect.stringContaining("## Retrieved Content\n\nStatus"), }), }); @@ -1247,7 +1284,7 @@ describe("generateAgenticGroundedAnswer", () => { content: `loop-message-${index}`, })); const preparedStep = settings.prepareStep({ - stepNumber: 1, + stepNumber: 2, messages: oversizedLoopMessages, }) as { readonly messages: readonly ModelMessage[] }; @@ -1258,7 +1295,7 @@ describe("generateAgenticGroundedAnswer", () => { operation: "generateAgenticGroundedAnswer.step", model: "google/gemini-3-flash", promptType: "messages", - stepNumber: 1, + stepNumber: 2, instructions: expect.stringContaining("Notebook research agent"), messageCount: preparedStep.messages.length, messages: expect.arrayContaining([ @@ -1284,7 +1321,7 @@ describe("generateAgenticGroundedAnswer", () => { }, ]; const preparedHugeStep = settings.prepareStep({ - stepNumber: 1, + stepNumber: 2, messages: hugeLoopMessages, }) as { readonly messages: readonly ModelMessage[] }; const serializedHugeStepMessages = JSON.stringify( @@ -1298,8 +1335,32 @@ describe("generateAgenticGroundedAnswer", () => { expect(serializedHugeStepMessages).toContain("latest-loop-message"); }); - it("logs bounded tool call and tool result previews for each loop step", async () => { + it("logs bounded tool calls and complete tool results for each loop step", async () => { process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; + const fullStepToolOutputMarker = "full-step-tool-output-end"; + const fullStepToolOutput = `\n${[ + "## Retrieval Result", + "", + "Status: useful_evidence_found", + "Query: 冯荣洲 身份证 ID card", + "Guidance: Use this evidence if it directly answers the user.", + "", + "## Evidence", + `Image evidence https://blob.example/id-front.jpg ${"evidence ".repeat( + 600, + )}`, + "", + "## Results", + "### Result 1", + "Type: image", + "Source: 商务标文件.pdf / 二、法定代表人身份证明", + "Media: image available", + "Read ID: chunk_identity_1", + "", + "Preview:", + `Identity image content ${"result ".repeat(80)}`, + fullStepToolOutputMarker, + ].join("\n")}\n `; const generateSpy = vi .spyOn(ToolLoopAgent.prototype, "generate") .mockResolvedValue({ @@ -1341,28 +1402,7 @@ describe("generateAgenticGroundedAnswer", () => { { toolName: "searchSources", toolCallId: "call_1", - output: [ - "## Retrieval Result", - "", - "Status: useful_evidence_found", - "Query: 冯荣洲 身份证 ID card", - "Guidance: Use this evidence if it directly answers the user.", - "", - "## Evidence", - `Image evidence https://blob.example/id-front.jpg ${"evidence ".repeat( - 600, - )}`, - "", - "## Results", - "### Result 1", - "Type: image", - "Source: 商务标文件.pdf / 二、法定代表人身份证明", - "Media: image available", - "Read ID: chunk_identity_1", - "", - "Preview:", - `Identity image content ${"result ".repeat(80)}`, - ].join("\n"), + output: fullStepToolOutput, }, ], usage: { @@ -1390,11 +1430,14 @@ describe("generateAgenticGroundedAnswer", () => { expect(stepLog.toolResults[0]?.output).toMatchObject({ kind: "searchSources", output: { - truncated: true, + truncated: false, }, }); const searchSourcesOutput = stepLog.toolResults[0] ?.output as SearchSourcesToolOutputLogMeta; + expect(searchSourcesOutput.output.preview.startsWith("\n## Retrieval Result")) + .toBe(true); + expect(searchSourcesOutput.output.preview.endsWith("\n ")).toBe(true); expect(searchSourcesOutput.output.preview).toContain("## Retrieval Result"); expect(searchSourcesOutput.output.preview).toContain("\n\n## Evidence"); expect(searchSourcesOutput.output.preview).toContain( @@ -1403,6 +1446,9 @@ describe("generateAgenticGroundedAnswer", () => { expect(searchSourcesOutput.output.preview).toContain( "[media asset URL hidden]", ); + expect(searchSourcesOutput.output.preview).toContain( + fullStepToolOutputMarker, + ); expect(JSON.stringify(stepMeta)).not.toContain("https://blob.example"); settings.onFinish({ @@ -1523,6 +1569,7 @@ describe("buildAgenticChatSystemPrompt", () => { }); expect(prompt).toContain("Always call searchSources") + expect(prompt).toContain("Make a second searchSources call") expect(prompt).toContain("readRetrievedChunk") expect(prompt).toContain("markdown output gives guidance") expect(prompt).toContain("Read IDs") diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index 36a2d61..eba12ad 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -46,8 +46,8 @@ const TOOL_RESULT_CONTENT_CHAR_LIMIT = 1_500 const TOOL_CHUNK_READ_LIMIT_DEFAULT = 4_000 const TOOL_CHUNK_READ_LIMIT_MAX = 8_000 const AGENT_LOOP_TOOL_INPUT_LOG_LIMIT = 1_200 -const AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT = 2_400 const AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT = 4 +const AGENT_REQUIRED_SEARCH_STEP_COUNT = 2 const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g const REDACTED_MEDIA_URL = "[media asset URL hidden]" @@ -137,6 +137,12 @@ type LlmModelMessageLog = { readonly content: unknown } +type RetrievalResponseWithDecisionData = AgenticRetrievalResponse & { + readonly decision_trace?: unknown + readonly decisionTree?: unknown + readonly decision_tree?: unknown +} + type GenerateLoggedTextInput = { readonly operation: string readonly prompt: string @@ -366,13 +372,14 @@ export function buildAgenticChatSystemPrompt( "", "Tool use rules", "1. Always call searchSources before writing a final answer.", - "2. Choose the content target from the user's request: broad questions use broad or text-only search, image requests use image or text+image search, and table requests use table or text+table search.", - "3. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", - "4. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, content types, and failure hints.", - "5. If the markdown guidance says the evidence is useful and the evidence/results directly support the answer, stop searching and answer.", - "6. If results are missing, weak, or do not cover the requested entity/topic/media/table, search again with a broader or more specific query.", - "7. Use readRetrievedChunk selectively; do not read every result when the previews already answer the question.", - "8. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", + "2. Make a second searchSources call before answering to double-check the retrieved data. Reuse the same core query or refine it with entities, document names, section paths, file paths, content types, or failure hints from the first output.", + "3. Choose the content target from the user's request: broad questions use broad or text-only search, image requests use image or text+image search, and table requests use table or text+table search.", + "4. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", + "5. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, content types, and failure hints.", + "6. After the verification search, if the markdown guidance says the evidence is useful and the evidence/results directly support the answer, stop searching and answer.", + "7. If results are missing, weak, or do not cover the requested entity/topic/media/table, search again with a broader or more specific query.", + "8. Use readRetrievedChunk selectively; do not read every result when the previews already answer the question.", + "9. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", "", "Media/table handling", "For image requests, search visual content directly or combine text and image evidence. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", @@ -567,7 +574,7 @@ function buildAgenticPrepareStep( ): PrepareStepFunction { return ({ stepNumber, messages }) => { const managedMessages = buildAgentStepMessages(messages) - if (stepNumber === 0) { + if (stepNumber < AGENT_REQUIRED_SEARCH_STEP_COUNT) { const stepInput = { messages: managedMessages, toolChoice: { @@ -667,22 +674,16 @@ function formatAgentLoopToolOutput( if (toolName === "searchSources") { return { kind: "searchSources", - output: buildAgentLoopMarkdownPreview( - output, - AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT, - ), + output: buildAgentLoopFullMarkdownPreview(output), } } if (toolName === "readRetrievedChunk") { return { kind: "readRetrievedChunk", - output: buildAgentLoopMarkdownPreview( - output, - AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT, - ), + output: buildAgentLoopFullMarkdownPreview(output), } } - return buildAgentLoopPreview(output, AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT) + return buildAgentLoopFullPreview(output) } function getOmittedAgentLoopEntryCount(entries: readonly unknown[]): number { @@ -739,16 +740,21 @@ function buildAgentLoopPreview( } } -function buildAgentLoopMarkdownPreview( - value: unknown, - limit: number, -): AgentLoopLogPreview { - const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)).trim() - const truncated = normalized.length > limit +function buildAgentLoopFullPreview(value: unknown): AgentLoopLogPreview { + const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)) return { charLength: normalized.length, - truncated, - preview: truncated ? `${normalized.slice(0, limit)}...` : normalized, + truncated: false, + preview: normalized, + } +} + +function buildAgentLoopFullMarkdownPreview(value: unknown): AgentLoopLogPreview { + const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)) + return { + charLength: normalized.length, + truncated: false, + preview: normalized, } } @@ -992,6 +998,7 @@ function buildRetrievalToolOutput(response: AgenticRetrievalResponse): string { "## Evidence", formatOptionalMarkdownText(response.evidenceText, "No evidence text returned."), "", + ...formatDecisionTraceMarkdown(response), "## Results", ...formatResultReferencesMarkdown(resultReferences), "", @@ -1002,6 +1009,125 @@ function buildRetrievalToolOutput(response: AgenticRetrievalResponse): string { return lines.join("\n") } +function formatDecisionTraceMarkdown( + response: AgenticRetrievalResponse, +): readonly string[] { + const decisionData = getDecisionTraceData(response) + if (!decisionData) return [] + + return [ + "## Decision Trace", + ...formatDecisionValueMarkdown(decisionData, 0), + "", + ] +} + +function getDecisionTraceData(response: AgenticRetrievalResponse): unknown | null { + const record = response as RetrievalResponseWithDecisionData + const candidates = [ + record.decisionTrace, + record.decision_trace, + record.decisionTree, + record.decision_tree, + ] + + return candidates.find(hasRenderableDecisionData) ?? null +} + +function hasRenderableDecisionData(value: unknown): boolean { + if (Array.isArray(value)) return value.length > 0 + if (typeof value === "string") return value.trim().length > 0 + return Boolean(value && typeof value === "object") +} + +function formatDecisionValueMarkdown( + value: unknown, + depth: number, +): readonly string[] { + if (Array.isArray(value)) return formatDecisionArrayMarkdown(value, depth) + if (value && typeof value === "object") { + return formatDecisionRecordMarkdown(value as Record, depth) + } + + return [`${getDecisionIndent(depth)}- ${formatDecisionScalar(value)}`] +} + +function formatDecisionArrayMarkdown( + values: readonly unknown[], + depth: number, +): readonly string[] { + if (values.length === 0) return [`${getDecisionIndent(depth)}- none`] + + return values.flatMap((value, index): readonly string[] => { + const label = depth === 0 ? `Step ${index + 1}` : `Item ${index + 1}` + if (value && typeof value === "object") { + return [ + `${getDecisionIndent(depth)}- ${label}:`, + ...formatDecisionValueMarkdown(value, depth + 1), + ] + } + return [ + `${getDecisionIndent(depth)}- ${label}: ${formatDecisionScalar(value)}`, + ] + }) +} + +function formatDecisionRecordMarkdown( + record: Record, + depth: number, +): readonly string[] { + const entries = Object.entries(record).filter( + ([key, value]): boolean => shouldRenderDecisionEntry(key, value), + ) + if (entries.length === 0) return [`${getDecisionIndent(depth)}- none`] + + return entries.flatMap(([key, value]): readonly string[] => { + if (Array.isArray(value) || (value && typeof value === "object")) { + return [ + `${getDecisionIndent(depth)}- ${key}:`, + ...formatDecisionValueMarkdown(value, depth + 1), + ] + } + return [ + `${getDecisionIndent(depth)}- ${key}: ${formatDecisionScalar(value)}`, + ] + }) +} + +function shouldRenderDecisionEntry(key: string, value: unknown): boolean { + if (value === null || value === undefined) return false + if (typeof value === "string" && value.trim().length === 0) return false + + return !isInternalDecisionField(key) +} + +function isInternalDecisionField(key: string): boolean { + return [ + "assetId", + "asset_id", + "assetUrl", + "asset_url", + "rawUrl", + "raw_url", + "presignedUrl", + "presigned_url", + ].includes(key) +} + +function formatDecisionScalar(value: unknown): string { + if (typeof value === "string") { + return redactRawUrls(value).replace(/\s+/g, " ").trim() + } + if (typeof value === "number" || typeof value === "boolean") { + return String(value) + } + return redactRawUrls(String(value)).replace(/\s+/g, " ").trim() +} + +function getDecisionIndent(depth: number): string { + return " ".repeat(depth) +} + function formatResultReferencesMarkdown( references: readonly RetrievedChunkReference[], ): readonly string[] { @@ -1070,30 +1196,11 @@ function buildRetrievedChunkToolOutput( ].join("\n") } -function getRetrievalResponseStatus( - response: RetrievalQueryResponse, -): - | "useful_evidence_found" - | "needs_refinement" - | "needs_review" - | "no_results" { - const hasEvidence = Boolean(response.evidenceText?.trim()) - const hasResults = - response.results.length > 0 || response.referencedChunks.length > 0 - - if (response.failureReason) return "needs_refinement" - if (!hasEvidence && !hasResults) return "no_results" - if (response.stopReason && response.stopReason !== "answer_done") { - return "needs_review" - } - return "useful_evidence_found" -} - function formatOptionalMarkdownText( value: string | null | undefined, fallback: string, ): string { - const normalized = truncateSafeContextTextToLimit( + const normalized = truncateSafeMarkdownTextToLimit( value ?? "", TOOL_EVIDENCE_CHAR_LIMIT, ) @@ -1127,10 +1234,7 @@ function formatMediaAvailability(reference: RetrievedChunkReference): string { function logToolMarkdownOutput(toolName: string, output: string): void { logger.info("chat-agent: tool output", { toolName, - output: buildAgentLoopMarkdownPreview( - output, - AGENT_LOOP_TOOL_OUTPUT_LOG_LIMIT, - ), + output: buildAgentLoopFullMarkdownPreview(output), }) } @@ -1229,6 +1333,13 @@ function truncateSafeContextTextToLimit(value: string, limit: number): string { return truncateContextTextToLimit(redactRawUrls(value), limit) } +function truncateSafeMarkdownTextToLimit(value: string, limit: number): string { + const normalized = redactRawUrls(value).replace(/\r\n?/g, "\n") + if (normalized.trim().length === 0) return "" + if (normalized.length <= limit) return normalized + return `${normalized.slice(0, limit)}...` +} + function redactRawUrls(value: string): string { return value.replace(RAW_URL_PATTERN, REDACTED_MEDIA_URL) }