From 715af484ef51ed2af5d43335aac6fbceb688bcbe Mon Sep 17 00:00:00 2001 From: chengke <404835780@qq.com> Date: Fri, 12 Jun 2026 17:59:28 +0800 Subject: [PATCH 1/3] feat(chat): add support for chat artifacts in message handling and retrieval --- .../knowhere_agent_harness_03093158.plan.md | 111 ++ drizzle/0008_chat_message_artifacts.sql | 1 + drizzle/meta/0008_snapshot.json | 694 ++++++++ drizzle/meta/_journal.json | 9 +- src/agent-harness/index.ts | 4 + src/agent-harness/ledger.test.ts | 91 + src/agent-harness/ledger.ts | 255 +++ src/agent-harness/runtime.test.ts | 239 +++ src/agent-harness/runtime.ts | 412 +++++ src/agent-harness/types.ts | 160 ++ src/agent-harness/validator.test.ts | 172 ++ src/agent-harness/validator.ts | 137 ++ src/components/chat-message-list.test.ts | 67 + src/components/chat-message-list.tsx | 54 +- src/domains/chat/chat-citation-persistence.ts | 26 + src/domains/chat/chat-message-repository.ts | 5 + .../chat/chat-turn-persistence.test.ts | 1 + src/domains/chat/chat-turn-persistence.ts | 2 + src/domains/chat/contracts.ts | 52 +- src/domains/chat/index.test.ts | 1496 +++++------------ src/domains/chat/index.ts | 569 +++---- src/domains/chat/prompt.ts | 1347 +-------------- src/domains/chat/route-answer.ts | 4 +- src/domains/chat/route-service.test.ts | 8 +- src/domains/chat/service.test.ts | 32 +- src/domains/chat/service.ts | 10 +- src/domains/chat/thread-service.ts | 2 + src/domains/chat/types.ts | 11 + src/domains/chat/view.ts | 49 + src/domains/workspace/initial-state.test.ts | 1 + src/infrastructure/db/schema.ts | 7 + 31 files changed, 3270 insertions(+), 2758 deletions(-) create mode 100644 .cursor/plans/knowhere_agent_harness_03093158.plan.md create mode 100644 drizzle/0008_chat_message_artifacts.sql create mode 100644 drizzle/meta/0008_snapshot.json create mode 100644 src/agent-harness/index.ts create mode 100644 src/agent-harness/ledger.test.ts create mode 100644 src/agent-harness/ledger.ts create mode 100644 src/agent-harness/runtime.test.ts create mode 100644 src/agent-harness/runtime.ts create mode 100644 src/agent-harness/types.ts create mode 100644 src/agent-harness/validator.test.ts create mode 100644 src/agent-harness/validator.ts diff --git a/.cursor/plans/knowhere_agent_harness_03093158.plan.md b/.cursor/plans/knowhere_agent_harness_03093158.plan.md new file mode 100644 index 0000000..5261f9e --- /dev/null +++ b/.cursor/plans/knowhere_agent_harness_03093158.plan.md @@ -0,0 +1,111 @@ +--- +name: knowhere agent harness +overview: 把 notebook 问答与 typing compose 重构成一套共享的、推理驱动的 Knowhere Agent Harness:KNOWHERE 只提供证据,agent 通过结构化工作记忆(意图/上下文策略/计划/证据账本/输出清单)+ 工具 + 合约校验完成理解、规划、证据选择与交付,彻底去掉硬编码 pipeline。 +todos: + - id: harness-core + content: 新建 knowhere-agent-harness 规范源目录与同步脚本;实现核心类型(IntentFrame/ContextPolicy/RetrievalPlan/EvidenceLedger/OutputManifest)、工具集、单推理循环与合约校验器,model/retrieval 注入接口 + 单测与评测集骨架 + status: pending + - id: notebook-adapter + content: Notebook 接入 harness:重写 answerQuestionWithRetrieval/prompt/retrieval/service,暴露 RetrievalCapability,chat_messages 增 artifacts/metadata jsonb,前端按 manifest 渲染,删除 legacy query/answer 函数 + status: pending + - id: typing-adapter + content: Typing compose 迁入同一 harness:compose.ts/retrieval.ts/protocol.ts/tools.ts 改造,extractIntent/planQueries/setMode 收敛为 harness 工具,补发 meta 事件,validateComposeProtocol 升级为 typing 校验 profile + status: pending + - id: eval-cleanup + content: 跑 harness 回归评测集(无关多轮/纠错/图片数量/混合资产/NOT_FOUND/typing continue/generic rewrite);清理 legacy 代码与 notebook lockfile SDK 版本漂移 + status: pending +isProject: false +--- + +# Knowhere Agent Harness 重构方案 + +## 0. 目标与不变量 + +- KNOWHERE-MAIN **不改动**。它已是纯 evidence provider(`POST /v1/retrieval/query` 永远 agentic、`answer_text` 恒空,返回 `evidence_text` / `results` / `referenced_chunks` / `decision_trace` / `stop_reason`)。所有理解、判断、选图、回答归外围 agent。 +- 一套核心、两个 surface:`notebook_chat` 与 `typing_compose`(及 `typing_quick_ask`)。 +- 架构哲学(2A):**单推理循环 deep agent**。harness 提供工作记忆 + 工具 + 合约校验,不写业务 if 分支。 +- 代码共享(1C):新建顶层规范源目录,脚本同步进两个仓库,后续可平滑升级为 npm 包。 + +## 1. 共享与分发(1C) + +- 新建规范源:`/Users/wuchengke/Desktop/knowhere/knowhere-agent-harness/`(纯 TS,仅依赖 `ai`、`zod`,与两边版本兼容)。 +- 同步脚本:`scripts/sync-harness.sh`(copy 到目标 + 写 `HARNESS_REV` 哈希),两仓库各放一份反向校验脚本;CI/dev 比对哈希检测漂移。 + - notebook 落点:`src/agent-harness/` + - typing 落点:`sidecar/knowhere-agent/src/harness/` +- 两边运行时都能消费:Next.js(Node)与 Bun `--compile` 都可 bundle 纯 TS。 + +## 2. 核心抽象(surface 无关) + +工作记忆全部由 agent 经工具读写,代码只做状态管理与守护: + +- `IntentFrame`: task、dependsOnPreviousTurn、retrievalNeeded、targetModalities、constraints{desiredCount,maxCount,language,outputStyle,citationRequired}、groundingPolicy。"要2张图" = agent 推理出的 `constraints.desiredCount=2`,非正则。 +- `ContextPolicy`: carryHistory("none" | "referential_only" | "full_recent" | "repair_previous")、reason、activePriorTurnIds。解决"第二个无关问题返回第一轮答案"——由 agent 判断,不靠代码塞历史。 +- `RetrievalPlan`: 可日志化步骤(retrieve / read_more / select_artifacts / compose)。 +- `EvidenceLedger`: 跨多次 retrieve 累积的 chunks / assets / decisionTrace / stopReason / failureReason。KNOWHERE 给的是候选,不等于最终输出。 +- `OutputManifest`: text、citations[]、artifacts[{type,ref,display,reason}]、unresolved[]。**最终回答不再是裸 markdown 字符串**。 + +```mermaid +flowchart LR + In["AgentTurnInput
(surface,userText,recentTurns,localContext,caps)"] --> Loop + subgraph Loop [单一 ToolLoopAgent 推理循环] + DI["declareIntent → IntentFrame"] + CP["setContextPolicy → ContextPolicy"] + RT["retrieve → KNOWHERE → EvidenceLedger"] + RD["readEvidence(本地账本)"] + SA["selectArtifacts → OutputManifest.artifacts"] + FN["finalize → OutputManifest"] + DI --> CP --> RT --> RD --> SA --> FN + end + FN --> VAL["Validator/Critic
(校验 agent 自声明合约)"] + VAL -->|"违约"| Loop + VAL -->|"通过"| Out["surface adapter 交付"] +``` + +## 3. 工具集(agent 唯一的行动方式) + +- `declareIntent(IntentFrame)`:必须最先调用(harness 门控,合约级而非话题级)。 +- `setContextPolicy(ContextPolicy)`:agent 决定是否/如何带历史。 +- `retrieve({query,modalities,topK,signalPaths,filterMode,threshold})`:封装 KNOWHERE,query 由 agent 拟定(替代 notebook `searchSources`、typing `planQueries+retrieveKnowledge`),结果进 EvidenceLedger。 +- `readEvidence({ref,offset,limit})`:读账本里某 chunk 更多内容(本地,等价 notebook `readRetrievedChunk`,无需再打 KNOWHERE)。 +- `selectArtifacts({refs[],reason})`:agent 显式声明展示哪些资产 → 这才是"只发2张图"的来源。 +- `finalize({text,citations,artifacts,unresolved})`:产出 OutputManifest,结束循环。 + +门控只在**合约层**:`declareIntent` 必须在前;`retrieve` 仅在意图声明后可用;若 `groundingPolicy=must_use_sources`,无证据时禁止 `finalize`。不再硬编码"前两步必须搜索"。 + +## 4. 合约校验器(守护而非业务硬编码) + +`finalize` 后对照 agent **自己声明的 IntentFrame** 校验: +- artifacts 数量满足 agent 声明的 desiredCount/maxCount。 +- `groundingPolicy=must_use_sources` → 必须有 citations/evidence。 +- `carryHistory=none` → 输出不得引用上一轮主题(软校验)。 +- KNOWHERE `stop_reason=no_documents_selected`/空 → 禁止编造。 +- `surface=typing_compose` → 文本必须是纯插入文本(无 markdown/meta)。 + +违约 → 回灌结构化反馈让 agent 修订一次;再不行 → 优雅返回"证据不足/需澄清"。 + +## 5. Surface 适配器 + +- Notebook(`notebook_chat`):thread 历史 → recentTurns;retrieval 绑 workspace namespace + excludedSourceIds;OutputManifest → assistant message{content,citations,artifacts};**前端只渲染 `message.artifacts`**,不再遍历全部 image citation。 + - 改造点:[src/domains/chat/index.ts](knowhere-notebook/src/domains/chat/index.ts)(`answerQuestionWithRetrieval` 改为调用 harness)、[src/domains/chat/prompt.ts](knowhere-notebook/src/domains/chat/prompt.ts)(prompt/agent 逻辑迁入 harness,删除 legacy `generateContextualRetrievalQuery`/`generateGroundedAnswer`)、[src/domains/chat/retrieval.ts](knowhere-notebook/src/domains/chat/retrieval.ts)(query 规范化迁入 retrieve 工具)、[src/domains/chat/service.ts](knowhere-notebook/src/domains/chat/service.ts)、[src/integrations/knowhere.ts](knowhere-notebook/src/integrations/knowhere.ts)(暴露 RetrievalCapability)、[src/components/chat-message-list.tsx](knowhere-notebook/src/components/chat-message-list.tsx)(按 artifacts 渲染)。 + - 数据模型:`chat_messages` 增 `artifacts` jsonb + `metadata` jsonb(存 intent/plan/trace 供调试),见 schema。 +- Typing(`typing_compose`):focusedSnapshot → localContext + userText;outputCapabilities={text,inlineInsertion};OutputManifest.text → 纯插入文本;保留 stdio NDJSON,并补发此前未实现的 `meta` 事件。 + - 改造点:[sidecar/knowhere-agent/src/compose.ts](knowhere-typing/sidecar/knowhere-agent/src/compose.ts)(改为 harness 驱动,extractIntent/planQueries/setMode 收敛为 harness 工具)、[sidecar/knowhere-agent/src/retrieval.ts](knowhere-typing/sidecar/knowhere-agent/src/retrieval.ts)、[sidecar/knowhere-agent/src/protocol.ts](knowhere-typing/sidecar/knowhere-agent/src/protocol.ts)、[sidecar/knowhere-agent/src/tools.ts](knowhere-typing/sidecar/knowhere-agent/src/tools.ts);`validateComposeProtocol` 升级为 harness 的 typing 校验 profile。 +- 模型注入:notebook 传 AI Gateway 模型(`CHAT_MODEL`),typing 传 OpenAI-compatible 模型;harness 自身 model-agnostic。 + +## 6. 解决你提的三个具体问题(均由 agent 推理 + 合约保证) + +- 无关多轮污染 → `ContextPolicy` 由 agent 判定 `carryHistory=none`,并记录 reason 供调试。 +- 只发指定数量图 → agent `declareIntent.desiredCount` + `selectArtifacts`,UI 仅渲染 manifest;校验器兜底数量一致。 +- 输出不智能 → 显式 intent/plan/evidence/critic 闭环 + 可观测 trace,落到 message.metadata,出问题可还原 agent 当时搜了什么、选了什么。 +- 附带小卫生项:同步 notebook `package-lock.json` 残留 `@ontos-ai/knowhere-sdk ^0.4.0` 与 pnpm-lock 的 0.6.0。 + +## 7. 回归评测集(harness regression) + +unrelated follow-up / correction turn / image-count intent / text+image mixed / NOT_FOUND 不编造 / typing continue 必检索 / generic rewrite 不检索。作为 harness 单测与两 surface 集成测试。 + +## 8. 阶段划分 + +- Phase 1:建规范源目录 + 同步脚本 + 核心类型与工具 + 校验器 + 单测/评测集骨架。 +- Phase 2:notebook 适配器接入,artifacts 持久化 + 前端 manifest 渲染,/api/chat 响应向后兼容。 +- Phase 3:typing compose 迁入同一 harness,补 `meta` 事件,校验 profile 化。 +- Phase 4:跑回归评测,清理 legacy 代码与 lockfile。 \ No newline at end of file diff --git a/drizzle/0008_chat_message_artifacts.sql b/drizzle/0008_chat_message_artifacts.sql new file mode 100644 index 0000000..b60b97f --- /dev/null +++ b/drizzle/0008_chat_message_artifacts.sql @@ -0,0 +1 @@ +ALTER TABLE "chat_messages" ADD COLUMN "artifacts" jsonb; \ No newline at end of file diff --git a/drizzle/meta/0008_snapshot.json b/drizzle/meta/0008_snapshot.json new file mode 100644 index 0000000..4f83ce0 --- /dev/null +++ b/drizzle/meta/0008_snapshot.json @@ -0,0 +1,694 @@ +{ + "id": "eefd2e20-20fc-4fa0-afbd-7f6d46a37095", + "prevId": "a3d7f7f1-7afe-48ab-ae22-15c6dfbab98d", + "version": "7", + "dialect": "postgresql", + "tables": { + "public.chat_messages": { + "name": "chat_messages", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "thread_id": { + "name": "thread_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "role": { + "name": "role", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "content": { + "name": "content", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "citations": { + "name": "citations", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "artifacts": { + "name": "artifacts", + "type": "jsonb", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "chat_messages_thread_created_idx": { + "name": "chat_messages_thread_created_idx", + "columns": [ + { + "expression": "thread_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "chat_messages_thread_id_chat_threads_id_fk": { + "name": "chat_messages_thread_id_chat_threads_id_fk", + "tableFrom": "chat_messages", + "tableTo": "chat_threads", + "columnsFrom": [ + "thread_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.chat_threads": { + "name": "chat_threads", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "workspace_id": { + "name": "workspace_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "demo_key": { + "name": "demo_key", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "chat_threads_workspace_updated_idx": { + "name": "chat_threads_workspace_updated_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "updated_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "deleted_at IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "chat_threads_workspace_demo_key_idx": { + "name": "chat_threads_workspace_demo_key_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "demo_key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "chat_threads_workspace_id_workspaces_id_fk": { + "name": "chat_threads_workspace_id_workspaces_id_fk", + "tableFrom": "chat_threads", + "tableTo": "workspaces", + "columnsFrom": [ + "workspace_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.demo_source_visibilities": { + "name": "demo_source_visibilities", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "workspace_id": { + "name": "workspace_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "demo_source_id": { + "name": "demo_source_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "hidden_at": { + "name": "hidden_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "demo_source_visibilities_workspace_source_idx": { + "name": "demo_source_visibilities_workspace_source_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "demo_source_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + }, + "demo_source_visibilities_workspace_idx": { + "name": "demo_source_visibilities_workspace_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "demo_source_visibilities_workspace_id_workspaces_id_fk": { + "name": "demo_source_visibilities_workspace_id_workspaces_id_fk", + "tableFrom": "demo_source_visibilities", + "tableTo": "workspaces", + "columnsFrom": [ + "workspace_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.source_parse_results": { + "name": "source_parse_results", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "source_id": { + "name": "source_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "result_blob_url": { + "name": "result_blob_url", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "asset_urls": { + "name": "asset_urls", + "type": "jsonb", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "source_parse_results_source_id_idx": { + "name": "source_parse_results_source_id_idx", + "columns": [ + { + "expression": "source_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "source_parse_results_source_id_sources_id_fk": { + "name": "source_parse_results_source_id_sources_id_fk", + "tableFrom": "source_parse_results", + "tableTo": "sources", + "columnsFrom": [ + "source_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "source_parse_results_source_id_unique": { + "name": "source_parse_results_source_id_unique", + "nullsNotDistinct": false, + "columns": [ + "source_id" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.sources": { + "name": "sources", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "workspace_id": { + "name": "workspace_id", + "type": "uuid", + "primaryKey": false, + "notNull": true + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "mime_type": { + "name": "mime_type", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "size_bytes": { + "name": "size_bytes", + "type": "bigint", + "primaryKey": false, + "notNull": true + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "failure_reason": { + "name": "failure_reason", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "knowhere_job_id": { + "name": "knowhere_job_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "knowhere_document_id": { + "name": "knowhere_document_id", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "staged_blob_pathname": { + "name": "staged_blob_pathname", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "staged_blob_url": { + "name": "staged_blob_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "original_blob_pathname": { + "name": "original_blob_pathname", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "original_blob_url": { + "name": "original_blob_url", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "demo_key": { + "name": "demo_key", + "type": "text", + "primaryKey": false, + "notNull": false + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "updated_at": { + "name": "updated_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + }, + "deleted_at": { + "name": "deleted_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": false + } + }, + "indexes": { + "sources_workspace_created_idx": { + "name": "sources_workspace_created_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "created_at", + "isExpression": false, + "asc": false, + "nulls": "last" + } + ], + "isUnique": false, + "where": "deleted_at IS NULL", + "concurrently": false, + "method": "btree", + "with": {} + }, + "sources_workspace_status_idx": { + "name": "sources_workspace_status_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "status", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + }, + "sources_workspace_demo_key_idx": { + "name": "sources_workspace_demo_key_idx", + "columns": [ + { + "expression": "workspace_id", + "isExpression": false, + "asc": true, + "nulls": "last" + }, + { + "expression": "demo_key", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": true, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": { + "sources_workspace_id_workspaces_id_fk": { + "name": "sources_workspace_id_workspaces_id_fk", + "tableFrom": "sources", + "tableTo": "workspaces", + "columnsFrom": [ + "workspace_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "cascade", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + }, + "public.workspaces": { + "name": "workspaces", + "schema": "", + "columns": { + "id": { + "name": "id", + "type": "uuid", + "primaryKey": true, + "notNull": true, + "default": "gen_random_uuid()" + }, + "user_id": { + "name": "user_id", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "namespace": { + "name": "namespace", + "type": "text", + "primaryKey": false, + "notNull": true + }, + "created_at": { + "name": "created_at", + "type": "timestamp with time zone", + "primaryKey": false, + "notNull": true, + "default": "now()" + } + }, + "indexes": { + "workspaces_user_id_idx": { + "name": "workspaces_user_id_idx", + "columns": [ + { + "expression": "user_id", + "isExpression": false, + "asc": true, + "nulls": "last" + } + ], + "isUnique": false, + "concurrently": false, + "method": "btree", + "with": {} + } + }, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": { + "workspaces_user_id_unique": { + "name": "workspaces_user_id_unique", + "nullsNotDistinct": false, + "columns": [ + "user_id" + ] + }, + "workspaces_namespace_unique": { + "name": "workspaces_namespace_unique", + "nullsNotDistinct": false, + "columns": [ + "namespace" + ] + } + }, + "policies": {}, + "checkConstraints": {}, + "isRLSEnabled": false + } + }, + "enums": {}, + "schemas": {}, + "sequences": {}, + "roles": {}, + "policies": {}, + "views": {}, + "_meta": { + "columns": {}, + "schemas": {}, + "tables": {} + } +} \ No newline at end of file diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index c14aca0..5823c91 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -57,6 +57,13 @@ "when": 1778569500000, "tag": "0007_normalize_legacy_demo_sources", "breakpoints": true + }, + { + "idx": 8, + "version": "7", + "when": 1781257344080, + "tag": "0008_chat_message_artifacts", + "breakpoints": true } ] -} +} \ No newline at end of file diff --git a/src/agent-harness/index.ts b/src/agent-harness/index.ts new file mode 100644 index 0000000..bca503f --- /dev/null +++ b/src/agent-harness/index.ts @@ -0,0 +1,4 @@ +export * from "./ledger" +export * from "./runtime" +export * from "./types" +export * from "./validator" diff --git a/src/agent-harness/ledger.test.ts b/src/agent-harness/ledger.test.ts new file mode 100644 index 0000000..5685bc3 --- /dev/null +++ b/src/agent-harness/ledger.test.ts @@ -0,0 +1,91 @@ +import { describe, expect, it } from "vitest" +import type { RetrievalQueryResponse } from "@ontos-ai/knowhere-sdk" + +import { createEvidenceLedger } from "./ledger" + +describe("createEvidenceLedger", () => { + it("normalizes retrieval chunks and media assets without treating candidates as final output", () => { + const ledger = createEvidenceLedger() + + const snapshot = ledger.addRetrievalResponse(makeRetrievalResponse()) + + expect(snapshot.retrievalCount).toBe(1) + expect(snapshot.chunks.map((chunk) => chunk.ref)).toEqual([ + "r1:result:1", + "r1:result:2", + "r1:referenced:1", + ]) + expect(snapshot.assets).toEqual([ + expect.objectContaining({ + ref: "asset:r1:result:2", + chunkRef: "r1:result:2", + type: "image", + }), + expect.objectContaining({ + ref: "asset:r1:referenced:1", + chunkRef: "r1:referenced:1", + type: "image", + }), + ]) + }) + + it("reads only evidence already returned by retrieval", () => { + const ledger = createEvidenceLedger() + ledger.addRetrievalResponse(makeRetrievalResponse()) + + expect(ledger.read("r1:result:1", 0, 7)).toMatchObject({ + found: true, + contentSlice: "Revenue", + hasMoreContent: true, + }) + expect(ledger.read("missing")).toMatchObject({ + found: false, + contentSlice: "", + }) + }) +}) + +function makeRetrievalResponse(): RetrievalQueryResponse { + return { + namespace: "notebook", + query: "q4 revenue images", + routerUsed: "workflow_single_step", + answerText: null, + evidenceText: "Evidence tree", + stopReason: "answer_done", + failureReason: null, + decisionTrace: [{ step: "search" }], + results: [ + { + content: "Revenue increased in Q4.", + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "Q4", + }, + }, + { + content: "", + chunkType: "image", + score: 0.8, + assetUrl: "https://assets.example/images/chart.png", + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "images/chart.png", + }, + }, + ], + referencedChunks: [ + { + chunkId: "chunk_image", + documentId: "doc_1", + chunkType: "image", + sectionPath: "images/photo.jpg", + assetUrl: "https://assets.example/images/photo.jpg", + }, + ], + } +} diff --git a/src/agent-harness/ledger.ts b/src/agent-harness/ledger.ts new file mode 100644 index 0000000..da197e8 --- /dev/null +++ b/src/agent-harness/ledger.ts @@ -0,0 +1,255 @@ +import type { + RetrievalQueryResponse, + RetrievalResult, +} from "@ontos-ai/knowhere-sdk" + +import type { + EvidenceAsset, + EvidenceChunk, + EvidenceLedgerSnapshot, +} from "./types" + +const contentPreviewLimit = 1_200 +const imageExtensions = [".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"] as const + +type MutableLedger = { + retrievalCount: number + chunks: EvidenceChunk[] + assets: EvidenceAsset[] + evidenceText: string[] + stopReasons: string[] + failureReasons: string[] + decisionTraces: unknown[] +} + +export type EvidenceLedger = ReturnType + +export function createEvidenceLedger() { + const ledger: MutableLedger = { + retrievalCount: 0, + chunks: [], + assets: [], + evidenceText: [], + stopReasons: [], + failureReasons: [], + decisionTraces: [], + } + + return { + addRetrievalResponse(response: RetrievalQueryResponse): EvidenceLedgerSnapshot { + ledger.retrievalCount += 1 + const retrievalIndex = ledger.retrievalCount + + const evidenceText = response.evidenceText?.trim() + if (evidenceText) ledger.evidenceText.push(evidenceText) + + const stopReason = response.stopReason?.trim() + if (stopReason) ledger.stopReasons.push(stopReason) + + const failureReason = response.failureReason?.trim() + if (failureReason) ledger.failureReasons.push(failureReason) + + const decisionTrace = getDecisionTrace(response) + if (decisionTrace) ledger.decisionTraces.push(decisionTrace) + + response.results.forEach((result, index) => { + addChunkFromResult({ + ledger, + result, + ref: `r${retrievalIndex}:result:${index + 1}`, + kind: "result", + }) + }) + + response.referencedChunks.forEach((chunk, index) => { + const content = "" + addChunk({ + ledger, + chunk: { + ref: `r${retrievalIndex}:referenced:${index + 1}`, + kind: "referenced_chunk", + content, + contentPreview: content, + chunkType: chunk.chunkType, + score: null, + source: { + documentId: chunk.documentId, + sourceFileName: null, + sectionPath: chunk.sectionPath, + }, + ...(chunk.assetUrl ? { assetUrl: chunk.assetUrl } : {}), + }, + }) + }) + + return snapshot(ledger) + }, + + read(ref: string, offset = 0, limit = 4_000) { + const chunk = ledger.chunks.find((candidate) => candidate.ref === ref) + if (!chunk) { + return { + found: false as const, + ref, + contentSlice: "", + contentLength: 0, + offset: 0, + limit, + hasMoreContent: false, + } + } + + const boundedOffset = Math.max(0, Math.min(offset, chunk.content.length)) + const boundedLimit = Math.max(1, limit) + const end = Math.min(boundedOffset + boundedLimit, chunk.content.length) + return { + found: true as const, + ref, + chunk, + contentSlice: chunk.content.slice(boundedOffset, end), + contentLength: chunk.content.length, + offset: boundedOffset, + limit: boundedLimit, + hasMoreContent: end < chunk.content.length, + } + }, + + hasEvidence(): boolean { + return ledger.chunks.length > 0 || ledger.evidenceText.length > 0 + }, + + hasRef(ref: string): boolean { + return ( + ledger.chunks.some((chunk) => chunk.ref === ref) || + ledger.assets.some((asset) => asset.ref === ref) + ) + }, + + snapshot(): EvidenceLedgerSnapshot { + return snapshot(ledger) + }, + } +} + +function addChunkFromResult(input: { + readonly ledger: MutableLedger + readonly result: RetrievalResult + readonly ref: string + readonly kind: EvidenceChunk["kind"] +}): void { + addChunk({ + ledger: input.ledger, + chunk: { + ref: input.ref, + kind: input.kind, + content: input.result.content, + contentPreview: buildContentPreview(input.result.content), + chunkType: input.result.chunkType, + score: input.result.score, + source: { + documentId: input.result.source.documentId, + sourceFileName: input.result.source.sourceFileName, + sectionPath: input.result.source.sectionPath, + }, + ...(input.result.assetUrl ? { assetUrl: input.result.assetUrl } : {}), + }, + }) +} + +function addChunk(input: { + readonly ledger: MutableLedger + readonly chunk: Omit +}): void { + const assetUrl = input.chunk.assetUrl?.trim() + if (!assetUrl || !isRenderableAsset(input.chunk.chunkType, assetUrl)) { + input.ledger.chunks.push(input.chunk) + return + } + + const type = getAssetType(input.chunk.chunkType, assetUrl) + const assetRef = `asset:${input.chunk.ref}` + const chunk: EvidenceChunk = { + ...input.chunk, + assetRef, + } + input.ledger.chunks.push(chunk) + input.ledger.assets.push({ + ref: assetRef, + chunkRef: chunk.ref, + type, + assetUrl, + source: chunk.source, + label: formatAssetLabel(chunk), + }) +} + +function buildContentPreview(content: string): string { + const normalized = content.replace(/\s+/g, " ").trim() + if (normalized.length <= contentPreviewLimit) return normalized + return `${normalized.slice(0, contentPreviewLimit)}...` +} + +function isRenderableAsset(chunkType: string, assetUrl: string): boolean { + const normalizedChunkType = chunkType.toLowerCase() + return ( + normalizedChunkType === "image" || + normalizedChunkType === "table" || + isImageAssetUrl(assetUrl) + ) +} + +function getAssetType(chunkType: string, assetUrl: string): "image" | "table" { + return chunkType.toLowerCase() === "table" && !isImageAssetUrl(assetUrl) + ? "table" + : "image" +} + +function isImageAssetUrl(assetUrl: string): boolean { + const pathname = getUrlPathname(assetUrl).toLowerCase() + return imageExtensions.some((extension) => pathname.endsWith(extension)) +} + +function getUrlPathname(assetUrl: string): string { + try { + return new URL(assetUrl).pathname + } catch { + return assetUrl.split("?")[0] ?? assetUrl + } +} + +function formatAssetLabel(chunk: EvidenceChunk): string { + return [ + chunk.source.sourceFileName, + chunk.source.sectionPath, + chunk.chunkType, + ] + .filter((value): value is string => Boolean(value?.trim())) + .join(" / ") +} + +function snapshot(ledger: MutableLedger): EvidenceLedgerSnapshot { + return { + retrievalCount: ledger.retrievalCount, + chunks: [...ledger.chunks], + assets: [...ledger.assets], + evidenceText: [...ledger.evidenceText], + stopReasons: [...ledger.stopReasons], + failureReasons: [...ledger.failureReasons], + decisionTraces: [...ledger.decisionTraces], + } +} + +function getDecisionTrace(response: RetrievalQueryResponse): unknown | null { + const record = response as RetrievalQueryResponse & { + readonly decision_trace?: unknown + readonly decisionTree?: unknown + readonly decision_tree?: unknown + } + return ( + response.decisionTrace ?? + record.decision_trace ?? + record.decisionTree ?? + record.decision_tree ?? + null + ) +} diff --git a/src/agent-harness/runtime.test.ts b/src/agent-harness/runtime.test.ts new file mode 100644 index 0000000..09cd630 --- /dev/null +++ b/src/agent-harness/runtime.test.ts @@ -0,0 +1,239 @@ +import { describe, expect, it, vi } from "vitest" +import type { RetrievalQueryResponse } from "@ontos-ai/knowhere-sdk" + +import { + buildHarnessMessages, + buildHarnessSystemPrompt, + createHarnessTools, +} from "./runtime" +import { createEvidenceLedger } from "./ledger" +import type { + AgentTurnInput, + ContextPolicy, + IntentFrame, + RetrievalCapability, +} from "./types" + +describe("agent harness runtime", () => { + it("keeps KNOWHERE as an evidence provider instead of exposing internal navigation", () => { + const prompt = buildHarnessSystemPrompt(makeTurnInput()) + + expect(prompt).toContain("KNOWHERE is only an evidence provider") + expect(prompt).toContain("Do not infer or control its internal navigation") + expect(prompt).not.toContain("LegalAction") + expect(prompt).not.toContain("navigation action") + }) + + it("passes only outer retrieval parameters to KNOWHERE after intent and context policy are declared", async () => { + const query = vi.fn().mockResolvedValue( + makeRetrievalResponse(), + ) + const state: { + intent?: IntentFrame + contextPolicy?: ContextPolicy + } = {} + const tools = createHarnessTools({ + state, + ledger: createEvidenceLedger(), + retrieval: { query }, + recentTurns: [], + }) + + expect(await executeTool(tools.retrieve, { query: "q4 chart" })).toEqual({ + ok: false, + message: "declareIntent must be called before retrieve.", + }) + + await executeTool(tools.declareIntent, { + task: "show_media", + dependsOnPreviousTurn: false, + retrievalNeeded: "yes", + targetModalities: ["image"], + constraints: { desiredCount: 2, maxCount: 2 }, + groundingPolicy: "must_use_sources", + }) + expect(await executeTool(tools.retrieve, { query: "q4 chart" })).toEqual({ + ok: false, + message: "setContextPolicy must be called before retrieve.", + }) + + await executeTool(tools.setContextPolicy, { + carryHistory: "none", + reason: "The current request is unrelated to previous turns.", + activePriorTurnIds: [], + }) + const result = await executeTool(tools.retrieve, { + query: "q4 chart", + modalities: ["image"], + topK: 2, + purpose: "Find the two requested charts.", + }) + + expect(result).toMatchObject({ + ok: true, + retrievalCount: 1, + }) + expect(query).toHaveBeenCalledWith({ + query: "q4 chart", + modalities: ["image"], + topK: 2, + purpose: "Find the two requested charts.", + signalPaths: undefined, + filterMode: undefined, + threshold: undefined, + }) + expect(JSON.stringify(query.mock.calls[0]?.[0])).not.toContain( + "LegalAction", + ) + }) + + it("blocks finalize until intent and context policy are declared", async () => { + const state: { + intent?: IntentFrame + contextPolicy?: ContextPolicy + finalizedManifest?: unknown + } = {} + const tools = createHarnessTools({ + state, + ledger: createEvidenceLedger(), + retrieval: { query: vi.fn() }, + recentTurns: [], + }) + + const manifest = { + text: "Answer.", + citations: [], + artifacts: [], + unresolved: [], + } + + expect(await executeTool(tools.finalize, manifest)).toEqual({ + ok: false, + message: "declareIntent must be called before finalize.", + }) + expect(state.finalizedManifest).toBeUndefined() + + await executeTool(tools.declareIntent, { + task: "answer_question", + dependsOnPreviousTurn: false, + retrievalNeeded: "no", + targetModalities: ["text"], + constraints: {}, + groundingPolicy: "may_use_sources", + }) + expect(await executeTool(tools.finalize, manifest)).toEqual({ + ok: false, + message: "setContextPolicy must be called before finalize.", + }) + expect(state.finalizedManifest).toBeUndefined() + + await executeTool(tools.setContextPolicy, { + carryHistory: "none", + reason: "Self-contained request.", + activePriorTurnIds: [], + }) + expect(await executeTool(tools.finalize, manifest)).toMatchObject({ + ok: true, + text: "Answer.", + }) + expect(state.finalizedManifest).toEqual(manifest) + }) + + it("exposes full prior-turn content on demand through readPriorTurn", async () => { + const tools = createHarnessTools({ + state: {}, + ledger: createEvidenceLedger(), + retrieval: { query: vi.fn() }, + recentTurns: [ + { + id: "turn_1", + role: "assistant", + contentPreview: "Truncated preview...", + content: "The full earlier answer about the tax filing deadline.", + citationLabels: ["tax.pdf / deadline"], + }, + ], + }) + + expect(await executeTool(tools.readPriorTurn, { id: "turn_1" })).toEqual({ + found: true, + id: "turn_1", + role: "assistant", + content: "The full earlier answer about the tax filing deadline.", + citationLabels: ["tax.pdf / deadline"], + }) + expect(await executeTool(tools.readPriorTurn, { id: "missing" })).toEqual({ + found: false, + id: "missing", + message: "No prior turn with that id is available.", + }) + }) + + it("summarizes recent turns as an index instead of pasting full history as query context", () => { + const messages = buildHarnessMessages( + makeTurnInput({ + recentTurns: [ + { + id: "turn_1", + role: "assistant", + contentPreview: "First answer about tax filing.", + citationLabels: ["tax.pdf / deadline"], + }, + ], + }), + ) + + expect(messages).toEqual([ + { + role: "user", + content: expect.stringContaining("Recent turn index:"), + }, + ]) + expect(JSON.stringify(messages)).toContain("id=turn_1 role=assistant") + expect(JSON.stringify(messages)).not.toContain("searchSources.query") + }) +}) + +function executeTool(tool: unknown, input: unknown): Promise { + return (tool as { execute: (input: unknown) => Promise }).execute(input) +} + +function makeTurnInput(overrides: Partial = {}): AgentTurnInput { + return { + surface: "notebook_chat", + userText: "Show me two Q4 chart images.", + recentTurns: [], + outputCapabilities: { + text: true, + image: true, + table: true, + }, + ...overrides, + } +} + +function makeRetrievalResponse(): RetrievalQueryResponse { + return { + namespace: "notebook", + query: "q4 chart", + routerUsed: "workflow_single_step", + answerText: null, + evidenceText: "Chart evidence", + stopReason: "answer_done", + failureReason: null, + results: [ + { + content: "", + chunkType: "image", + score: 0.9, + assetUrl: "https://assets.example/chart.png", + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "images/chart.png", + }, + }, + ], + referencedChunks: [], + } +} diff --git a/src/agent-harness/runtime.ts b/src/agent-harness/runtime.ts new file mode 100644 index 0000000..e952c55 --- /dev/null +++ b/src/agent-harness/runtime.ts @@ -0,0 +1,412 @@ +import { stepCountIs, ToolLoopAgent, tool, type ModelMessage } from "ai" +import { z } from "zod" + +import { createEvidenceLedger } from "./ledger" +import type { + AgentTurn, + AgentTurnInput, + ContextPolicy, + HarnessRunResult, + HarnessTrace, + IntentFrame, + OutputManifest, + RetrievalCapability, + TargetModality, +} from "./types" +import { validateOutputManifest } from "./validator" + +const defaultMaxSteps = 10 +const defaultMaxRevisions = 1 + +type ToolLoopAgentSettings = ConstructorParameters[0] + +export type AgentHarnessModel = ToolLoopAgentSettings["model"] + +export type RunAgentHarnessInput = { + readonly model: AgentHarnessModel + readonly turn: AgentTurnInput + readonly retrieval: RetrievalCapability + readonly maxSteps?: number + /** + * How many times the agent may revise after a failed validation pass before + * the harness gives up and returns the last manifest with recorded errors. + */ + readonly maxRevisions?: number +} + +type HarnessToolState = { + intent?: IntentFrame + contextPolicy?: ContextPolicy + finalizedManifest?: OutputManifest +} + +const targetModalitySchema = z.enum(["text", "image", "table"]) + +const intentFrameSchema = z.object({ + task: z.enum([ + "answer", + "show_media", + "summarize", + "compare", + "continue_writing", + "rewrite", + "translate", + "correct_previous", + "clarify", + ]), + dependsOnPreviousTurn: z.boolean(), + retrievalNeeded: z.enum(["yes", "no", "maybe"]), + targetModalities: z.array(targetModalitySchema).default(["text"]), + constraints: z + .object({ + desiredCount: z.number().int().positive().optional(), + maxCount: z.number().int().positive().optional(), + language: z.string().optional(), + outputStyle: z.string().optional(), + citationRequired: z.boolean().optional(), + }) + .default({}), + groundingPolicy: z.enum([ + "must_use_sources", + "can_use_context", + "no_retrieval", + ]), +}) + +const contextPolicySchema = z.object({ + carryHistory: z.enum([ + "none", + "referential_only", + "full_recent", + "repair_previous", + ]), + reason: z.string().min(1), + activePriorTurnIds: z.array(z.string()).default([]), +}) + +const outputCitationSchema = z.object({ + ref: z.string().min(1), + label: z.string().min(1), + source: z.object({ + documentId: z.string().nullable().optional(), + sourceFileName: z.string().nullable().optional(), + sectionPath: z.string().nullable().optional(), + }), +}) + +const outputArtifactSchema = z.object({ + type: z.enum(["image", "table"]), + ref: z.string().min(1), + display: z.boolean(), + reason: z.string().min(1), +}) + +const outputManifestSchema = z.object({ + text: z.string(), + citations: z.array(outputCitationSchema).default([]), + artifacts: z.array(outputArtifactSchema).default([]), + unresolved: z.array(z.string()).default([]), +}) + +export async function runAgentHarness( + input: RunAgentHarnessInput, +): Promise { + const state: HarnessToolState = {} + const ledger = createEvidenceLedger() + const tools = createHarnessTools({ + state, + ledger, + retrieval: input.retrieval, + recentTurns: input.turn.recentTurns, + }) + const agent = new ToolLoopAgent({ + model: input.model, + instructions: buildHarnessSystemPrompt(input.turn), + tools, + stopWhen: stepCountIs(input.maxSteps ?? defaultMaxSteps), + }) + + const maxRevisions = input.maxRevisions ?? defaultMaxRevisions + let messages = buildHarnessMessages(input.turn) + let manifest = buildFallbackManifest("") + let validationErrors: readonly string[] = [] + let revisionsUsed = 0 + + for (let attempt = 0; ; attempt += 1) { + const response = await agent.generate({ messages }) + manifest = + state.finalizedManifest ?? buildFallbackManifest(response.text.trim()) + + const validation = validateOutputManifest({ + manifest, + intent: state.intent, + contextPolicy: state.contextPolicy, + ledger: ledger.snapshot(), + surface: input.turn.surface, + }) + validationErrors = validation.errors + + if (validation.ok || attempt >= maxRevisions) break + + // Self-correction (reflexion): continue the same conversation with + // structured validator feedback and require a fresh finalize so the agent + // can repair its own contract violations instead of shipping them. + revisionsUsed += 1 + state.finalizedManifest = undefined + messages = [ + ...messages, + ...(response.response.messages as ModelMessage[]), + { + role: "user", + content: buildRevisionFeedback(validation.errors), + }, + ] + } + + const ledgerSnapshot = ledger.snapshot() + return { + manifest, + trace: { + intent: state.intent, + contextPolicy: state.contextPolicy, + ledger: ledgerSnapshot, + validationErrors, + revisionsUsed, + }, + } +} + +function buildRevisionFeedback(errors: readonly string[]): string { + return [ + "Your finalize output did not satisfy the output contract:", + ...errors.map((error) => `- ${error}`), + "", + "Fix every issue and call finalize again with a corrected manifest.", + "Do not exceed the user's requested artifact count, only cite or display", + "evidence refs that exist in the evidence ledger, and do not fabricate", + "facts when evidence is missing.", + ].join("\n") +} + +export function createHarnessTools(input: { + readonly state: HarnessToolState + readonly ledger: ReturnType + readonly retrieval: RetrievalCapability + readonly recentTurns: readonly AgentTurn[] +}) { + return { + declareIntent: tool({ + description: + "Declare the user's intent before any other action. This is working memory, not a final answer.", + inputSchema: intentFrameSchema, + execute: async (intent): Promise => { + input.state.intent = intent + return intent + }, + }), + + setContextPolicy: tool({ + description: + "Decide whether prior turns should influence this turn. Use none for unrelated follow-ups.", + inputSchema: contextPolicySchema, + execute: async (policy): Promise => { + input.state.contextPolicy = policy + return policy + }, + }), + + retrieve: tool({ + description: + "Ask KNOWHERE for evidence context. KNOWHERE handles internal navigation; this tool only submits a concise query and records returned evidence.", + inputSchema: z.object({ + query: z.string().min(1), + modalities: z.array(targetModalitySchema).default(["text"]), + purpose: z.string().optional(), + topK: z.number().int().min(1).max(12).optional(), + signalPaths: z.array(z.string().min(1)).max(8).optional(), + filterMode: z.enum(["keep", "delete"]).optional(), + threshold: z.number().min(0).max(1).optional(), + }), + execute: async (request) => { + if (!input.state.intent) { + return { + ok: false, + message: "declareIntent must be called before retrieve.", + } + } + if (!input.state.contextPolicy) { + return { + ok: false, + message: "setContextPolicy must be called before retrieve.", + } + } + + const response = await input.retrieval.query({ + query: request.query, + modalities: request.modalities as TargetModality[], + purpose: request.purpose, + topK: request.topK, + signalPaths: request.signalPaths, + filterMode: request.filterMode, + threshold: request.threshold, + }) + const snapshot = input.ledger.addRetrievalResponse(response) + return { + ok: true, + retrievalCount: snapshot.retrievalCount, + evidenceText: response.evidenceText ?? "", + stopReason: response.stopReason ?? null, + failureReason: response.failureReason ?? null, + chunks: snapshot.chunks.map((chunk) => ({ + ref: chunk.ref, + kind: chunk.kind, + type: chunk.chunkType, + preview: chunk.contentPreview, + source: chunk.source, + assetRef: chunk.assetRef, + })), + assets: snapshot.assets.map((asset) => ({ + ref: asset.ref, + type: asset.type, + label: asset.label, + source: asset.source, + })), + } + }, + }), + + readEvidence: tool({ + description: + "Read more text from an evidence chunk already returned by KNOWHERE.", + inputSchema: z.object({ + ref: z.string().min(1), + offset: z.number().int().min(0).optional(), + limit: z.number().int().min(1).max(8_000).optional(), + }), + execute: async ({ ref, offset = 0, limit = 4_000 }) => + input.ledger.read(ref, offset, limit), + }), + + readPriorTurn: tool({ + description: + "Read the full text and citation labels of a specific prior turn by id " + + "(ids come from the recent turn index). Use this only when the current " + + "request depends on, references, or corrects a previous turn.", + inputSchema: z.object({ + id: z.string().min(1), + }), + execute: async ({ id }) => { + const priorTurn = input.recentTurns.find((turn) => turn.id === id) + if (!priorTurn) { + return { + found: false as const, + id, + message: "No prior turn with that id is available.", + } + } + return { + found: true as const, + id, + role: priorTurn.role, + content: priorTurn.content ?? priorTurn.contentPreview, + citationLabels: priorTurn.citationLabels ?? [], + } + }, + }), + + finalize: tool({ + description: + "Finalize the user-facing output manifest. This is the only final answer " + + "contract. Artifacts listed here with display=true are the exact set of " + + "images/tables shown to the user; cite only refs from the evidence ledger.", + inputSchema: outputManifestSchema, + execute: async (manifest) => { + if (!input.state.intent) { + return { + ok: false as const, + message: "declareIntent must be called before finalize.", + } + } + if (!input.state.contextPolicy) { + return { + ok: false as const, + message: "setContextPolicy must be called before finalize.", + } + } + input.state.finalizedManifest = manifest + return { ok: true as const, ...manifest } + }, + }), + } as const +} + +export function buildHarnessSystemPrompt(turn: AgentTurnInput): string { + return [ + "You are the outer Knowhere Agent Harness.", + "KNOWHERE is only an evidence provider. Do not infer or control its internal navigation algorithm.", + "Your job is to understand intent, decide context use, optionally retrieve evidence, select evidence/artifacts, and finalize an output manifest.", + "", + "Required workflow:", + "1. Call declareIntent first. Capture constraints like a requested image/table count in constraints.desiredCount.", + "2. Call setContextPolicy next, deciding how prior turns should influence this turn.", + "3. When the policy needs prior-turn detail (references or corrections), call readPriorTurn for the relevant ids.", + "4. Call retrieve only when evidence is needed. The query must be concise and self-contained.", + "5. Use readEvidence only for chunk refs already in the evidence ledger.", + "6. Call finalize with text, citations, artifacts, and unresolved issues. finalize requires declareIntent and setContextPolicy first.", + "", + "Context rules:", + "- If the current user request is unrelated to prior turns, set carryHistory to none and do not reuse prior topics.", + "- If the user corrects a previous answer, set carryHistory to repair_previous, read the relevant prior turn, then re-retrieve and re-answer using the correction.", + "- If the user uses references like this document, that image, or the previous answer, choose referential_only or full_recent and read the prior turn you depend on.", + "", + "Output rules:", + "- Final output is the OutputManifest passed to finalize, not freeform tool JSON or trailing text.", + "- artifacts with display=true are the exact images/tables shown. Never display every candidate; honor constraints.desiredCount / maxCount.", + "- citations and artifacts may only reference refs returned by retrieve (in the evidence ledger).", + "- If evidence is insufficient, list it in unresolved instead of fabricating facts.", + "- After a validation-feedback message, fix all listed issues and call finalize again.", + `Surface: ${turn.surface}`, + `Output capabilities: ${JSON.stringify(turn.outputCapabilities)}`, + turn.sourceContext ? `Searchable source context:\n${turn.sourceContext}` : "", + ] + .filter((line): line is string => line.length > 0) + .join("\n") +} + +export function buildHarnessMessages(turn: AgentTurnInput): ModelMessage[] { + return [ + { + role: "user", + content: [ + `Current user request:\n${turn.userText}`, + turn.localContext ? `Local context:\n${turn.localContext}` : "", + formatRecentTurnIndex(turn), + ] + .filter((part) => part.length > 0) + .join("\n\n"), + }, + ] +} + +function formatRecentTurnIndex(turn: AgentTurnInput): string { + if (turn.recentTurns.length === 0) return "Recent turn index: none" + + const lines = turn.recentTurns.map((recentTurn) => { + const citationSuffix = recentTurn.citationLabels?.length + ? ` citations=${recentTurn.citationLabels.join("; ")}` + : "" + return `- id=${recentTurn.id} role=${recentTurn.role}${citationSuffix} preview=${JSON.stringify(recentTurn.contentPreview)}` + }) + return ["Recent turn index:", ...lines].join("\n") +} + +function buildFallbackManifest(text: string): OutputManifest { + return { + text, + citations: [], + artifacts: [], + unresolved: text ? [] : ["The agent did not finalize an output manifest."], + } +} + +export type { HarnessTrace } diff --git a/src/agent-harness/types.ts b/src/agent-harness/types.ts new file mode 100644 index 0000000..1f4cad3 --- /dev/null +++ b/src/agent-harness/types.ts @@ -0,0 +1,160 @@ +import type { + RetrievalQueryParams, + RetrievalQueryResponse, +} from "@ontos-ai/knowhere-sdk" + +export type AgentSurface = "notebook_chat" | "typing_compose" | "typing_quick_ask" + +export type AgentTask = + | "answer" + | "show_media" + | "summarize" + | "compare" + | "continue_writing" + | "rewrite" + | "translate" + | "correct_previous" + | "clarify" + +export type TargetModality = "text" | "image" | "table" + +export type GroundingPolicy = + | "must_use_sources" + | "can_use_context" + | "no_retrieval" + +export type HistoryCarryMode = + | "none" + | "referential_only" + | "full_recent" + | "repair_previous" + +export type IntentFrame = { + readonly task: AgentTask + readonly dependsOnPreviousTurn: boolean + readonly retrievalNeeded: "yes" | "no" | "maybe" + readonly targetModalities: readonly TargetModality[] + readonly constraints: { + readonly desiredCount?: number + readonly maxCount?: number + readonly language?: string + readonly outputStyle?: string + readonly citationRequired?: boolean + } + readonly groundingPolicy: GroundingPolicy +} + +export type ContextPolicy = { + readonly carryHistory: HistoryCarryMode + readonly reason: string + readonly activePriorTurnIds: readonly string[] +} + +export type AgentTurn = { + readonly id: string + readonly role: "user" | "assistant" + readonly contentPreview: string + /** + * Full turn text. Withheld from the default model context (only the preview + * is shown) to avoid cross-turn pollution, and exposed on demand through the + * readPriorTurn tool so repair/correction turns can act on real history. + */ + readonly content?: string + readonly citationLabels?: readonly string[] +} + +export type AgentTurnInput = { + readonly surface: AgentSurface + readonly userText: string + readonly recentTurns: readonly AgentTurn[] + readonly localContext?: string + readonly sourceContext?: string + readonly outputCapabilities: { + readonly text: boolean + readonly image: boolean + readonly table: boolean + readonly inlineInsertion?: boolean + } +} + +export type HarnessRetrievalRequest = Pick< + RetrievalQueryParams, + "query" | "topK" | "signalPaths" | "filterMode" | "threshold" +> & { + readonly modalities: readonly TargetModality[] + readonly purpose?: string +} + +export type RetrievalCapability = { + readonly query: ( + input: HarnessRetrievalRequest, + ) => Promise +} + +export type EvidenceChunk = { + readonly ref: string + readonly kind: "result" | "referenced_chunk" + readonly content: string + readonly contentPreview: string + readonly chunkType: string + readonly score: number | null + readonly source: { + readonly documentId?: string | null + readonly sourceFileName?: string | null + readonly sectionPath?: string | null + } + readonly assetRef?: string + readonly assetUrl?: string +} + +export type EvidenceAsset = { + readonly ref: string + readonly chunkRef: string + readonly type: "image" | "table" + readonly assetUrl: string + readonly source: EvidenceChunk["source"] + readonly label: string +} + +export type EvidenceLedgerSnapshot = { + readonly retrievalCount: number + readonly chunks: readonly EvidenceChunk[] + readonly assets: readonly EvidenceAsset[] + readonly evidenceText: readonly string[] + readonly stopReasons: readonly string[] + readonly failureReasons: readonly string[] + readonly decisionTraces: readonly unknown[] +} + +export type OutputCitation = { + readonly ref: string + readonly label: string + readonly source: EvidenceChunk["source"] +} + +export type OutputArtifact = { + readonly type: "image" | "table" + readonly ref: string + readonly display: boolean + readonly reason: string +} + +export type OutputManifest = { + readonly text: string + readonly citations: readonly OutputCitation[] + readonly artifacts: readonly OutputArtifact[] + readonly unresolved: readonly string[] +} + +export type HarnessTrace = { + readonly intent?: IntentFrame + readonly contextPolicy?: ContextPolicy + readonly ledger: EvidenceLedgerSnapshot + readonly validationErrors: readonly string[] + readonly revisionsUsed: number +} + +export type HarnessRunResult = { + readonly manifest: OutputManifest + readonly trace: HarnessTrace +} diff --git a/src/agent-harness/validator.test.ts b/src/agent-harness/validator.test.ts new file mode 100644 index 0000000..70a7ea9 --- /dev/null +++ b/src/agent-harness/validator.test.ts @@ -0,0 +1,172 @@ +import { describe, expect, it } from "vitest" + +import { validateOutputManifest } from "./validator" +import type { + ContextPolicy, + EvidenceLedgerSnapshot, + IntentFrame, + OutputManifest, +} from "./types" + +describe("validateOutputManifest", () => { + it("requires the agent to declare intent and context policy before finalizing", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ text: "Answer." }), + ledger: emptyLedger, + surface: "notebook_chat", + }) + + expect(validation.errors).toContain( + "Agent must declare intent before finalizing.", + ) + expect(validation.errors).toContain( + "Agent must set context policy before finalizing.", + ) + }) + + it("limits displayed artifacts using the declared intent instead of hard-coded media rules", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ + artifacts: [ + { + type: "image", + ref: "asset:r1:result:1", + display: true, + reason: "front", + }, + { + type: "image", + ref: "asset:r1:result:2", + display: true, + reason: "back", + }, + { + type: "image", + ref: "asset:r1:result:3", + display: true, + reason: "extra candidate", + }, + ], + }), + intent: makeIntent({ desiredCount: 2, maxCount: 2 }), + contextPolicy: unrelatedContextPolicy, + ledger: { + ...emptyLedger, + assets: [ + makeAsset("asset:r1:result:1"), + makeAsset("asset:r1:result:2"), + makeAsset("asset:r1:result:3"), + ], + }, + surface: "notebook_chat", + }) + + expect(validation.errors).toContain( + "Displayed artifact count 3 exceeds desired count 2.", + ) + expect(validation.errors).toContain( + "Displayed artifact count 3 exceeds maximum count 2.", + ) + }) + + it("rejects grounded answers that use evidence without citations or selected artifacts", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ text: "Revenue increased." }), + intent: makeIntent({}), + contextPolicy: unrelatedContextPolicy, + ledger: { + ...emptyLedger, + chunks: [ + { + ref: "r1:result:1", + kind: "result", + content: "Revenue increased.", + contentPreview: "Revenue increased.", + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "Q4", + }, + }, + ], + }, + surface: "notebook_chat", + }) + + expect(validation.errors).toContain( + "Grounded output used evidence but did not cite or display any selected evidence.", + ) + }) + + it("keeps typing compose output insertion-ready", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ text: "- bullet\n- list" }), + intent: makeIntent({ groundingPolicy: "no_retrieval" }), + contextPolicy: unrelatedContextPolicy, + ledger: emptyLedger, + surface: "typing_compose", + }) + + expect(validation.errors).toContain( + "Typing compose output must be insertion-ready plain text.", + ) + }) +}) + +const emptyLedger: EvidenceLedgerSnapshot = { + retrievalCount: 0, + chunks: [], + assets: [], + evidenceText: [], + stopReasons: [], + failureReasons: [], + decisionTraces: [], +} + +const unrelatedContextPolicy: ContextPolicy = { + carryHistory: "none", + reason: "The current turn is self-contained.", + activePriorTurnIds: [], +} + +function makeIntent( + constraints: IntentFrame["constraints"] & { + readonly groundingPolicy?: IntentFrame["groundingPolicy"] + }, +): IntentFrame { + return { + task: "answer", + dependsOnPreviousTurn: false, + retrievalNeeded: constraints.groundingPolicy === "no_retrieval" ? "no" : "yes", + targetModalities: ["text"], + constraints, + groundingPolicy: constraints.groundingPolicy ?? "must_use_sources", + } +} + +function makeManifest(overrides: Partial): OutputManifest { + return { + text: "", + citations: [], + artifacts: [], + unresolved: [], + ...overrides, + } +} + +function makeAsset(ref: string): EvidenceLedgerSnapshot["assets"][number] { + return { + ref, + chunkRef: ref.replace("asset:", ""), + type: "image", + assetUrl: `https://assets.example/${ref}.png`, + label: ref, + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: ref, + }, + } +} diff --git a/src/agent-harness/validator.ts b/src/agent-harness/validator.ts new file mode 100644 index 0000000..195241d --- /dev/null +++ b/src/agent-harness/validator.ts @@ -0,0 +1,137 @@ +import type { + ContextPolicy, + EvidenceLedgerSnapshot, + IntentFrame, + OutputManifest, +} from "./types" + +export type ManifestValidationInput = { + readonly manifest: OutputManifest + readonly intent?: IntentFrame + readonly contextPolicy?: ContextPolicy + readonly ledger: EvidenceLedgerSnapshot + readonly surface: "notebook_chat" | "typing_compose" | "typing_quick_ask" +} + +export type ManifestValidationResult = { + readonly ok: boolean + readonly errors: readonly string[] +} + +export function validateOutputManifest( + input: ManifestValidationInput, +): ManifestValidationResult { + const errors: string[] = [] + const text = input.manifest.text.trim() + + if (!text && input.manifest.artifacts.every((artifact) => !artifact.display)) { + errors.push("Final output must contain text or at least one displayed artifact.") + } + + validateWorkflow(input, errors) + validateArtifactRefs(input, errors) + validateArtifactCounts(input, errors) + validateGrounding(input, errors) + validateTypingText(input, errors) + + return { + ok: errors.length === 0, + errors, + } +} + +function validateWorkflow( + input: ManifestValidationInput, + errors: string[], +): void { + if (!input.intent) { + errors.push("Agent must declare intent before finalizing.") + } + + if (!input.contextPolicy) { + errors.push("Agent must set context policy before finalizing.") + } +} + +function validateArtifactRefs( + input: ManifestValidationInput, + errors: string[], +): void { + const knownRefs = new Set([ + ...input.ledger.chunks.map((chunk) => chunk.ref), + ...input.ledger.assets.map((asset) => asset.ref), + ]) + + for (const artifact of input.manifest.artifacts) { + if (!knownRefs.has(artifact.ref)) { + errors.push(`Artifact ref '${artifact.ref}' was not found in the evidence ledger.`) + } + } + + for (const citation of input.manifest.citations) { + if (!knownRefs.has(citation.ref)) { + errors.push(`Citation ref '${citation.ref}' was not found in the evidence ledger.`) + } + } +} + +function validateArtifactCounts( + input: ManifestValidationInput, + errors: string[], +): void { + const displayedCount = input.manifest.artifacts.filter( + (artifact) => artifact.display, + ).length + const desiredCount = input.intent?.constraints.desiredCount + const maxCount = input.intent?.constraints.maxCount + + if (typeof desiredCount === "number" && displayedCount > desiredCount) { + errors.push( + `Displayed artifact count ${displayedCount} exceeds desired count ${desiredCount}.`, + ) + } + + if (typeof maxCount === "number" && displayedCount > maxCount) { + errors.push( + `Displayed artifact count ${displayedCount} exceeds maximum count ${maxCount}.`, + ) + } +} + +function validateGrounding( + input: ManifestValidationInput, + errors: string[], +): void { + if (input.intent?.groundingPolicy !== "must_use_sources") return + + const hasLedgerEvidence = + input.ledger.chunks.length > 0 || input.ledger.evidenceText.length > 0 + const hasOutputEvidence = + input.manifest.citations.length > 0 || + input.manifest.artifacts.some((artifact) => artifact.display) + const hasUnresolved = input.manifest.unresolved.length > 0 + + if (!hasLedgerEvidence && !hasUnresolved) { + errors.push( + "Grounded output requires evidence or an explicit unresolved reason.", + ) + } + + if (hasLedgerEvidence && !hasOutputEvidence && !hasUnresolved) { + errors.push( + "Grounded output used evidence but did not cite or display any selected evidence.", + ) + } +} + +function validateTypingText( + input: ManifestValidationInput, + errors: string[], +): void { + if (input.surface !== "typing_compose") return + + const text = input.manifest.text + if (/```|^\s*#{1,6}\s|^\s*[-*]\s/m.test(text)) { + errors.push("Typing compose output must be insertion-ready plain text.") + } +} diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index 22f89b7..fa1a181 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -100,6 +100,73 @@ describe("ChatMessageList", () => { ).toBeNull(); }); + it("renders selected image artifacts instead of every retrieved image citation", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "已找到相关图片,见下方图片。", + citations: [ + { + chunkType: "image", + score: 0.9, + assetUrl: "https://blob.example/images/front.jpg", + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "身份证正面", + }, + }, + { + chunkType: "image", + score: 0.88, + assetUrl: "https://blob.example/images/back.jpg", + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "身份证反面", + }, + }, + { + chunkType: "image", + score: 0.7, + assetUrl: "https://blob.example/images/extra.jpg", + source: { + documentId: "doc_1", + sourceFileName: "商务标文件.pdf", + sectionPath: "其他候选图片", + }, + }, + ], + artifacts: [ + { + type: "image", + display: true, + assetUrl: "https://blob.example/images/front.jpg", + label: "身份证正面", + }, + { + type: "image", + display: true, + assetUrl: "https://blob.example/images/back.jpg", + label: "身份证反面", + }, + ], + }, + ], + }), + ); + + const images = screen.getAllByRole("img"); + expect(images.map((image) => image.getAttribute("src"))).toEqual([ + "https://blob.example/images/front.jpg", + "https://blob.example/images/back.jpg", + ]); + expect(screen.queryByRole("img", { name: "其他候选图片" })).toBeNull(); + }); + it("renders assistant markdown with GitHub-flavored tables", () => { render( React.createElement(ChatMessageList, { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index 937da4e..c112314 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -11,6 +11,7 @@ import { chatPanelModel } from "@/components/chat-panel-model"; import { ScrollArea } from "@/components/ui/scroll-area"; import { Spinner } from "@/components/ui/spinner"; import type { + ChatArtifactView, ChatCitationView, ChatMessageView, } from "@/domains/chat/types"; @@ -25,6 +26,12 @@ type DisplayImageCitation = DisplayCitation & { readonly assetUrl: string; }; +type DisplayImageArtifact = { + readonly assetUrl: string; + readonly citationId: string; + readonly label: string; +}; + const assistantMarkdownComponents: Components = { p: ({ children }) => (

{children}

@@ -252,10 +259,14 @@ function MessageBubble({ message, sourceTitlesByDocumentId, ); - const displayImageCitations = getDisplayImageCitations( + const displayImageArtifacts = getDisplayImageArtifacts( message, sourceTitlesByDocumentId, ); + const displayImageCitations = + displayImageArtifacts.length > 0 + ? displayImageArtifacts + : getDisplayImageCitations(message, sourceTitlesByDocumentId); return (
@@ -388,6 +399,47 @@ function getDisplayImageCitations( return imageCitations; } +function getDisplayImageArtifacts( + message: ChatMessageView, + sourceTitlesByDocumentId: Readonly>, +): readonly DisplayImageArtifact[] { + const seenAssetUrls = new Set(); + const imageArtifacts: DisplayImageArtifact[] = []; + + for (const [index, artifact] of (message.artifacts ?? []).entries()) { + if (artifact.display === false || artifact.type !== "image") continue; + + const assetUrl = getTrimmedCitationField(artifact.assetUrl); + if (!assetUrl || seenAssetUrls.has(assetUrl)) continue; + + seenAssetUrls.add(assetUrl); + imageArtifacts.push({ + assetUrl, + citationId: `${message.id}:artifact:${index}`, + label: getArtifactLabel(artifact, sourceTitlesByDocumentId), + }); + } + + return imageArtifacts; +} + +function getArtifactLabel( + artifact: ChatArtifactView, + sourceTitlesByDocumentId: Readonly>, +): string { + const label = getTrimmedCitationField(artifact.label); + if (label) return label; + + if (artifact.citation) { + return chatPanelModel.getCitationLabel( + artifact.citation, + sourceTitlesByDocumentId, + ); + } + + return getTrimmedCitationField(artifact.reason) ?? "Selected image"; +} + function isImageCitation( citation: ChatCitationView, assetUrl: string, diff --git a/src/domains/chat/chat-citation-persistence.ts b/src/domains/chat/chat-citation-persistence.ts index a705362..1b160f0 100644 --- a/src/domains/chat/chat-citation-persistence.ts +++ b/src/domains/chat/chat-citation-persistence.ts @@ -1,4 +1,5 @@ import type { + ChatArtifactView, ChatCitationView, CitationView, RetrievalResultView, @@ -11,6 +12,9 @@ type ChatCitationPersistence = { | null | undefined, ) => CitationView[] | null + readonly normalizeArtifacts: ( + artifacts: readonly ChatArtifactView[] | null | undefined, + ) => ChatArtifactView[] | null readonly replaceDemoCitationDocumentId: ( citations: readonly ChatCitationView[] | undefined, documentIdMap: ReadonlyMap, @@ -27,6 +31,27 @@ function normalizeCitations( return citations.map(toCitationView) } +function normalizeArtifacts( + artifacts: readonly ChatArtifactView[] | null | undefined, +): ChatArtifactView[] | null { + if (!artifacts || artifacts.length === 0) return null + return artifacts.map(toArtifactView) +} + +function toArtifactView(artifact: ChatArtifactView): ChatArtifactView { + return { + type: artifact.type, + ref: artifact.ref, + assetUrl: artifact.assetUrl, + label: artifact.label, + display: artifact.display, + reason: artifact.reason, + citation: artifact.citation + ? toCitationView(artifact.citation) + : undefined, + } +} + function replaceDemoCitationDocumentId( citations: readonly ChatCitationView[] | undefined, documentIdMap: ReadonlyMap, @@ -67,5 +92,6 @@ function toCitationView( export const chatCitationPersistence: ChatCitationPersistence = { normalizeCitations, + normalizeArtifacts, replaceDemoCitationDocumentId, } diff --git a/src/domains/chat/chat-message-repository.ts b/src/domains/chat/chat-message-repository.ts index 4cc04f1..e0f2584 100644 --- a/src/domains/chat/chat-message-repository.ts +++ b/src/domains/chat/chat-message-repository.ts @@ -9,6 +9,7 @@ import { deriveChatThreadTitle } from "./title" import { DbClient } from "@/infrastructure/db" import { chatMessages, chatThreads, type ChatMessage } from "@/infrastructure/db/schema" import type { + ChatArtifactView, ChatCitationView, CitationView, RetrievalResultView, @@ -21,6 +22,7 @@ type AppendChatMessageInput = { readonly citations?: | readonly (ChatCitationView | CitationView | RetrievalResultView)[] | null + readonly artifacts?: readonly ChatArtifactView[] | null } type ChatMessageRepository = { @@ -78,6 +80,9 @@ const appendMessageToThreadEffect: ChatMessageRepository["appendMessageToThreadE citations: chatCitationPersistence.normalizeCitations( input.citations, ), + artifacts: chatCitationPersistence.normalizeArtifacts( + input.artifacts, + ), }) .returning() diff --git a/src/domains/chat/chat-turn-persistence.test.ts b/src/domains/chat/chat-turn-persistence.test.ts index de8b3b9..3dbb85c 100644 --- a/src/domains/chat/chat-turn-persistence.test.ts +++ b/src/domains/chat/chat-turn-persistence.test.ts @@ -61,6 +61,7 @@ function makeMessage(id: string): ChatMessage { role: "user", content: "Question", citations: null, + artifacts: null, createdAt: new Date("2026-05-10T00:00:00.000Z"), }; } diff --git a/src/domains/chat/chat-turn-persistence.ts b/src/domains/chat/chat-turn-persistence.ts index fbb759c..7d70b78 100644 --- a/src/domains/chat/chat-turn-persistence.ts +++ b/src/domains/chat/chat-turn-persistence.ts @@ -4,6 +4,7 @@ import { chatThreadService } from "./thread-service" import type { ChatRepository } from "./service" import type { ChatMessage, ChatThread } from "@/infrastructure/db/schema" import type { + ChatArtifactView, ChatCitationView, CitationView, RetrievalResultView, @@ -16,6 +17,7 @@ type AppendMessageInput = { readonly citations?: | readonly (ChatCitationView | CitationView | RetrievalResultView)[] | null + readonly artifacts?: readonly ChatArtifactView[] | null } type ChatThreadPersistenceAdapter = { diff --git a/src/domains/chat/contracts.ts b/src/domains/chat/contracts.ts index 0bef342..eaf5f57 100644 --- a/src/domains/chat/contracts.ts +++ b/src/domains/chat/contracts.ts @@ -1,11 +1,14 @@ import type { RetrievalQueryParams, RetrievalQueryResponse, - RetrievalSource, } from "@ontos-ai/knowhere-sdk" import type { Source } from "@/infrastructure/db/schema" -import type { ChatCitationView } from "@/domains/chat/types" +import type { HarnessRunResult } from "@/agent-harness" +import type { + ChatArtifactView, + ChatCitationView, +} from "@/domains/chat/types" import type { LoadSourceAssetUrls } from "./media-assets" export type RetrievalClient = { @@ -39,22 +42,7 @@ export type AgenticRetrievalQuery = Pick< readonly purpose?: string } -export type RetrievedChunkReference = { - id: string - chunkId: string | null - kind: "result" | "referencedChunk" - resultIndex: number | null - chunkType: string - score: number | null - source: RetrievalSource - hasAssetUrl: boolean - contentLength: number - contentPreview: string - contentTruncated: boolean -} - export type AgenticRetrievalResponse = RetrievalQueryResponse & { - chunkReferences: readonly RetrievedChunkReference[] retrievalPlan?: AgenticRetrievalPlan } @@ -62,40 +50,13 @@ export type SearchSources = ( input: AgenticRetrievalQuery, ) => Promise -export type ReadRetrievedChunkInput = { - id: string - offset?: number - limit?: number -} - -export type ReadRetrievedChunkResult = { - id: string - chunkId: string | null - found: boolean - chunkType: string | null - score: number | null - source: RetrievalSource | null - hasAssetUrl: boolean - offset: number - limit: number - contentLength: number - contentSlice: string - hasMoreContent: boolean - nextOffset: number | null -} - -export type ReadRetrievedChunk = ( - input: ReadRetrievedChunkInput, -) => Promise - export type GenerateAnswer = (input: { question: string messages: readonly ChatHistoryMessage[] sources: readonly Source[] excludedSourceIds: readonly string[] searchSources: SearchSources - readRetrievedChunk: ReadRetrievedChunk -}) => Promise +}) => Promise export type AnswerQuestionInput = { question: string @@ -111,4 +72,5 @@ export type AnswerQuestionInput = { export type AnswerQuestionResult = { answer: string citations: ChatCitationView[] + artifacts?: ChatArtifactView[] } diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index a60f3c1..0640de2 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -1,23 +1,15 @@ import { afterEach, describe, expect, it, vi } from "vitest" import type { RetrievalResult } from "@ontos-ai/knowhere-sdk" import { Effect } from "effect" -import { generateText, ToolLoopAgent, type ModelMessage } from "ai" +import { ToolLoopAgent } from "ai" +import type { HarnessRunResult } from "@/agent-harness" import { answerQuestionWithRetrieval, - buildAgenticChatSystemPrompt, - buildGroundedPrompt, - buildRetrievalQueryPrompt, - generateAgenticGroundedAnswer, - generateContextualRetrievalQuery, - generateGroundedAnswer, + generateAgenticOutputManifest, parseChatRequestBody, } from "." import type { Source } from "@/infrastructure/db/schema" -import type { - AgenticRetrievalQuery, - ReadRetrievedChunkInput, -} from "./contracts" const loggerMock = vi.hoisted(() => ({ info: vi.fn(), @@ -25,11 +17,6 @@ const loggerMock = vi.hoisted(() => ({ error: vi.fn(), })); -vi.mock("ai", async (importOriginal) => ({ - ...(await importOriginal()), - generateText: vi.fn(), -})); - vi.mock("@/lib/logger", () => ({ logger: { info: loggerMock.info, @@ -40,7 +27,6 @@ vi.mock("@/lib/logger", () => ({ afterEach(() => { vi.restoreAllMocks(); - vi.mocked(generateText).mockReset(); loggerMock.info.mockReset(); loggerMock.warn.mockReset(); loggerMock.error.mockReset(); @@ -63,7 +49,7 @@ describe("answerQuestionWithRetrieval", () => { }; const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "What does the document say?" }); - return "The answer is grounded."; + return makeHarnessRunResult("The answer is grounded."); }); const sources = [ makeSource({ knowhereDocumentId: "doc_included" }), @@ -96,7 +82,6 @@ describe("answerQuestionWithRetrieval", () => { sources, excludedSourceIds: ["source_2"], searchSources: expect.any(Function), - readRetrievedChunk: expect.any(Function), }); expect(answer).toEqual({ answer: "The answer is grounded.", @@ -143,7 +128,7 @@ describe("answerQuestionWithRetrieval", () => { query: "冯荣洲 身份证 ID card", targetContent: "image", }); - return "Matched identity card image."; + return makeHarnessRunResult("Matched identity card image."); }); await Effect.runPromise( @@ -213,7 +198,9 @@ describe("answerQuestionWithRetrieval", () => { }; const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "What improved?" }); - return "Revenue improved [Source 1: revenue growth]. Margins expanded [Source 2: margin expansion]."; + return makeHarnessRunResult( + "Revenue improved [Source 1: revenue growth]. Margins expanded [Source 2: margin expansion].", + ); }); const answer = await Effect.runPromise( @@ -255,7 +242,9 @@ describe("answerQuestionWithRetrieval", () => { }; const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "Tesla xAI investment" }); - return "Tesla invested in xAI [Source 1: xAI investment]."; + return makeHarnessRunResult( + "Tesla invested in xAI [Source 1: xAI investment].", + ); }); const sources = [ makeSource({ @@ -282,7 +271,6 @@ describe("answerQuestionWithRetrieval", () => { sources, excludedSourceIds: [], searchSources: expect.any(Function), - readRetrievedChunk: expect.any(Function), }); const expectedResult = { ...result, @@ -322,7 +310,9 @@ describe("answerQuestionWithRetrieval", () => { targetContent: "image", purpose: "Find visual rocket launch chunks.", }); - return "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg"; + return makeHarnessRunResult( + "Use this launch photo. https://blob.example/images/image-9-Night%20Rocket%20Launch.jpg", + ); }); const loadSourceAssetUrls = vi.fn().mockResolvedValue({ "images/image-9-Night Rocket Launch.jpg": @@ -372,6 +362,184 @@ describe("answerQuestionWithRetrieval", () => { ]); }); + it("returns only harness-selected artifacts when retrieval has extra media candidates", async () => { + const frontAssetUrl = "https://blob.example/images/id-front.jpg"; + const backAssetUrl = "https://blob.example/images/id-back.jpg"; + const extraAssetUrl = "https://blob.example/images/extra.jpg"; + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [ + makeRetrievalResult({ + chunkType: "image", + assetUrl: frontAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "身份证正面", + }, + }), + makeRetrievalResult({ + chunkType: "image", + assetUrl: backAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "身份证反面", + }, + }), + makeRetrievalResult({ + chunkType: "image", + assetUrl: extraAssetUrl, + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "营业执照", + }, + }), + ], + evidenceText: "Identity image candidates.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "冯荣洲 身份证 图片", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ + query: "冯荣洲 身份证 图片", + targetContent: "image", + }); + const harnessResult: HarnessRunResult = { + manifest: { + text: "已找到相关身份证图片,见下方图片。", + citations: [], + artifacts: [ + { + type: "image", + ref: "asset:r1:result:1", + display: true, + reason: "身份证正面", + }, + { + type: "image", + ref: "asset:r1:result:2", + display: true, + reason: "身份证反面", + }, + { + type: "image", + ref: "asset:r1:result:3", + display: true, + reason: "多余候选图片", + }, + ], + unresolved: [], + }, + trace: { + ledger: { + retrievalCount: 1, + evidenceText: ["Identity image candidates."], + stopReasons: [], + failureReasons: [], + decisionTraces: [], + chunks: [], + assets: [ + { + ref: "asset:r1:result:1", + chunkRef: "r1:result:1", + type: "image", + assetUrl: frontAssetUrl, + label: "document-generated.pdf / 身份证正面 / image", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "身份证正面", + }, + }, + { + ref: "asset:r1:result:2", + chunkRef: "r1:result:2", + type: "image", + assetUrl: backAssetUrl, + label: "document-generated.pdf / 身份证反面 / image", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "身份证反面", + }, + }, + { + ref: "asset:r1:result:3", + chunkRef: "r1:result:3", + type: "image", + assetUrl: extraAssetUrl, + label: "document-generated.pdf / 营业执照 / image", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "营业执照", + }, + }, + ], + }, + validationErrors: [], + revisionsUsed: 0, + intent: { + task: "show_media", + dependsOnPreviousTurn: false, + retrievalNeeded: "yes", + targetModalities: ["image"], + constraints: { desiredCount: 2, maxCount: 2 }, + groundingPolicy: "must_use_sources", + }, + contextPolicy: { + carryHistory: "none", + reason: "The current turn is self-contained.", + activePriorTurnIds: [], + }, + }, + }; + return harnessResult; + }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "请只返回冯荣洲的 2 张身份证图片", + namespace: "notebook-workspace", + sources: [ + makeSource({ + title: "商务标文件.pdf", + knowhereDocumentId: "doc_identity", + }), + ], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(answer.artifacts?.map((artifact) => artifact.assetUrl)).toEqual([ + frontAssetUrl, + backAssetUrl, + ]); + expect(answer.artifacts?.map((artifact) => artifact.citation?.source)).toEqual( + [ + { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "身份证正面", + }, + { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "身份证反面", + }, + ], + ); + }); + it("turns retrieved evidence image filenames into image citations", async () => { const result = makeRetrievalResult({ content: "This section contains identity proof attachments.", @@ -398,7 +566,7 @@ describe("answerQuestionWithRetrieval", () => { query: "公民身份证明 图片", targetContent: "image", }); - return "这里是相关身份证明图片。"; + return makeHarnessRunResult("这里是相关身份证明图片。"); }); const loadSourceAssetUrls = vi.fn().mockResolvedValue({ "images/image-6-中华人民共和国居民身份证.jpg": @@ -433,7 +601,6 @@ describe("answerQuestionWithRetrieval", () => { sources, excludedSourceIds: [], searchSources: expect.any(Function), - readRetrievedChunk: expect.any(Function), }); expect(retrieval.query).toHaveBeenCalledWith({ namespace: "notebook-workspace", @@ -442,161 +609,19 @@ describe("answerQuestionWithRetrieval", () => { useAgentic: true, dataType: 3, }); - expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ + const imageCitations = answer.citations.filter( + (citation) => citation.assetUrl, + ) + expect(imageCitations.map((citation) => citation.assetUrl)).toEqual([ "https://blob.example/images/image-6-id-front.jpg", "https://blob.example/images/image-7-id-back.jpg", ]); - expect(answer.citations.map((citation) => citation.chunkType)).toEqual([ + expect(imageCitations.map((citation) => citation.chunkType)).toEqual([ "image", "image", ]); }); - it("sends requested identity-card images without exposing internal media metadata", async () => { - const frontAssetUrl = "https://blob.example/images/feng-rongzhou-id-front.jpg"; - const backAssetUrl = "https://blob.example/images/feng-rongzhou-id-back.jpg"; - const unrelatedAssetUrl = "https://blob.example/images/company-license.jpg"; - const textResult = makeRetrievalResult({ - content: "冯荣洲的法定代表人身份证明页包含居民身份证图片。", - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "二、法定代表人身份证明", - }, - }); - const duplicateFrontResult = { - ...makeRetrievalResult({ - chunkType: "image", - content: "冯荣洲居民身份证正面图片。", - assetUrl: frontAssetUrl, - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "images/feng-rongzhou-id-front.jpg", - }, - }), - chunkId: "chunk_front_direct", - } as RetrievalResult & { readonly chunkId: string }; - const richerDuplicateFrontResult = { - ...makeRetrievalResult({ - chunkType: "image", - content: "冯荣洲居民身份证正面图片,来源于身份证明章节。", - assetUrl: frontAssetUrl, - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "二、法定代表人身份证明 / 身份证正面", - }, - }), - chunkId: "chunk_front_richer", - } as RetrievalResult & { readonly chunkId: string }; - const backResult = { - ...makeRetrievalResult({ - chunkType: "image", - content: "冯荣洲居民身份证反面图片。", - assetUrl: backAssetUrl, - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "二、法定代表人身份证明 / 身份证反面", - }, - }), - chunkId: "chunk_back", - } as RetrievalResult & { readonly chunkId: string }; - const unrelatedImageResult = { - ...makeRetrievalResult({ - chunkType: "image", - content: "公司证照图片。", - assetUrl: unrelatedAssetUrl, - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "images/company-license.jpg", - }, - }), - chunkId: "chunk_company_license", - } as RetrievalResult & { readonly chunkId: string }; - const retrieval = { - query: vi.fn().mockResolvedValue({ - results: [ - textResult, - duplicateFrontResult, - richerDuplicateFrontResult, - backResult, - unrelatedImageResult, - ], - evidenceText: "冯荣洲 身份证 图片", - referencedChunks: [], - namespace: "notebook-workspace", - query: "冯荣洲 身份证 图片", - routerUsed: "workflow_single_step", - answerText: null, - }), - }; - const generateAnswer = vi.fn(async ({ searchSources }) => { - await searchSources({ - query: "冯荣洲 身份证 图片", - targetContent: "image", - purpose: "查找冯荣洲的身份证图片。", - }); - return [ - "为您找到冯荣洲的居民身份证图片,相关信息如下:", - "- **姓名**:冯荣洲", - "- **公民身份号码**:123456789012345678", - "- **签发机关**:某公安局", - "- **有效期限**:长期", - `{"asset_id":"asset_front","assetUrl":"${frontAssetUrl}","chunkId":"chunk_front_direct"}`, - ].join("\n"); - }); - const sources = [ - makeSource({ - id: "source_identity", - title: "商务标文件.pdf", - knowhereDocumentId: "doc_identity", - }), - ]; - - const answer = await Effect.runPromise( - answerQuestionWithRetrieval({ - question: "请将 冯荣洲 的身份证图片发给我", - namespace: "notebook-workspace", - sources, - excludedSourceIds: [], - retrieval, - generateAnswer, - messages: [], - }), - ); - - expect(retrieval.query).toHaveBeenCalledWith({ - namespace: "notebook-workspace", - query: "冯荣洲 身份证 图片", - topK: 8, - useAgentic: true, - dataType: 3, - }); - expect(answer.answer).toBe("已找到相关身份证图片,见下方图片。"); - expect(answer.answer).not.toMatch( - /asset_id|assetUrl|asset_url|chunkId|chunk_id|https?:\/\//, - ); - expect(answer.answer).not.toMatch( - /姓名|公民身份号码|签发机关|有效期限|123456789012345678/, - ); - expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ - frontAssetUrl, - backAssetUrl, - ]); - expect( - answer.citations.filter( - (citation) => citation.assetUrl === frontAssetUrl, - ), - ).toHaveLength(1); - expect(answer.citations[0]?.source).toMatchObject({ - sourceFileName: "商务标文件.pdf", - sectionPath: "二、法定代表人身份证明 / 身份证正面", - }); - }); - it("returns the agent answer without citations when retrieval has no results", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ @@ -611,7 +636,7 @@ describe("answerQuestionWithRetrieval", () => { }; const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "Missing fact?" }); - return "I couldn't find that in your sources."; + return makeHarnessRunResult("I couldn't find that in your sources."); }); const answer = await Effect.runPromise( @@ -648,7 +673,7 @@ describe("answerQuestionWithRetrieval", () => { await searchSources({ query: "Tesla Q4 2025 Update energy generation and storage deployments", }); - return "Energy storage grew."; + return makeHarnessRunResult("Energy storage grew."); }); const messages = [ { @@ -686,7 +711,6 @@ describe("answerQuestionWithRetrieval", () => { sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], excludedSourceIds: [], searchSources: expect.any(Function), - readRetrievedChunk: expect.any(Function), }); }); @@ -704,7 +728,7 @@ describe("answerQuestionWithRetrieval", () => { }; const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "Tesla energy storage deployments" }); - return "Energy storage grew."; + return makeHarnessRunResult("Energy storage grew."); }); const messages = [ { @@ -742,77 +766,6 @@ describe("answerQuestionWithRetrieval", () => { ); }); - it("lets the agent read untruncated content from returned chunk ids", async () => { - const longContent = `${"Earlier context. ".repeat(300)}Critical obligation: retain source receipts.`; - const result = { - ...makeRetrievalResult({ - content: longContent, - source: { - documentId: "doc_contract", - sourceFileName: "contract.pdf", - sectionPath: "Obligations", - }, - }), - chunkId: "chunk_contract_1", - } as RetrievalResult & { readonly chunkId: string }; - const retrieval = { - query: vi.fn().mockResolvedValue({ - results: [result], - evidenceText: "Contract obligations were retrieved.", - referencedChunks: [], - namespace: "notebook-workspace", - query: "contract obligations", - routerUsed: "workflow_single_step", - answerText: null, - }), - }; - const generateAnswer = vi.fn( - async ({ searchSources, readRetrievedChunk }) => { - const response = await searchSources({ query: "contract obligations" }); - expect(response.chunkReferences[0]).toMatchObject({ - id: "chunk_contract_1", - chunkId: "chunk_contract_1", - contentTruncated: true, - contentLength: longContent.length, - }); - - const detail = await readRetrievedChunk({ - id: "chunk_contract_1", - offset: 4_000, - limit: 80, - }); - - expect(detail).toMatchObject({ - id: "chunk_contract_1", - found: true, - offset: 4_000, - limit: 80, - contentLength: longContent.length, - }); - return detail.contentSlice; - }, - ); - - const answer = await Effect.runPromise( - answerQuestionWithRetrieval({ - question: "What obligation matters?", - namespace: "notebook-workspace", - sources: [ - makeSource({ - title: "contract.pdf", - knowhereDocumentId: "doc_contract", - }), - ], - excludedSourceIds: [], - retrieval, - generateAnswer, - messages: [], - }), - ); - - expect(answer.answer).toBe(longContent.slice(4_000, 4_080).trim()); - }); - it("uses structured referenced chunks from RetrievalQueryResponse as citations", async () => { const retrieval = { query: vi.fn().mockResolvedValue({ @@ -840,7 +793,7 @@ describe("answerQuestionWithRetrieval", () => { query: "SpaceX launch image", targetContent: "image", }); - return "Here is the launch image."; + return makeHarnessRunResult("Here is the launch image."); }); const answer = await Effect.runPromise( @@ -876,199 +829,113 @@ describe("answerQuestionWithRetrieval", () => { }); }); -describe("generateContextualRetrievalQuery", () => { - it("uses the latest question directly when there is no chat history", async () => { - const query = await generateContextualRetrievalQuery({ - question: "What does Tesla say about energy storage?", - messages: [], - sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], - excludedSourceIds: [], - }); - - expect(generateText).not.toHaveBeenCalled(); - expect(query).toBe("What does Tesla say about energy storage?"); - }); - - it("asks the model to produce a stateless Knowhere query from chat context", async () => { - process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; - vi.mocked(generateText).mockResolvedValue({ - text: "Query: Tesla Q4 2025 Update energy storage deployments", - } as Awaited>); - - const query = await generateContextualRetrievalQuery({ - question: "What about energy storage in this document?", - messages: [ - { - role: "user", - content: "Tell me about Tesla's Q4 2025 update.", - }, - ], - sources: [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })], - excludedSourceIds: [], - }); - - expect(generateText).toHaveBeenCalledWith({ - model: "google/gemini-3-flash", - prompt: expect.stringContaining("Knowhere retrieval is stateless"), - }); - expect(query).toBe("Tesla Q4 2025 Update energy storage deployments"); - }); -}); - -describe("generateGroundedAnswer", () => { - it("routes grounded prompts through Vercel AI Gateway model strings", async () => { - process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; - vi.mocked(generateText).mockResolvedValue({ - text: "PR-E wires chat to retrieval.", - } as Awaited>); - - const answer = await generateGroundedAnswer({ - question: "What is PR-E?", - retrievalQuery: "PR-E retrieval", - messages: [], - evidenceText: "PR-E wires chat to Knowhere retrieval.", - }); - - expect(generateText).toHaveBeenCalledWith({ - model: "google/gemini-3-flash", - prompt: expect.stringContaining("PR-E wires chat to Knowhere retrieval."), - }); - expect(answer).toBe("PR-E wires chat to retrieval."); - expect(getLoggerInfoMeta("chat-agent: llm request")).toMatchObject({ - operation: "generateGroundedAnswer", - model: "google/gemini-3-flash", - promptType: "text", - prompt: expect.stringContaining("PR-E wires chat to Knowhere retrieval."), - }); - expect(getLoggerInfoMeta("chat-agent: llm response")).toMatchObject({ - operation: "generateGroundedAnswer", - model: "google/gemini-3-flash", - responseText: "PR-E wires chat to retrieval.", - responseTextCharLength: "PR-E wires chat to retrieval.".length, - }); - }); -}); - -describe("generateAgenticGroundedAnswer", () => { - it("builds a Vercel AI SDK tool loop around Knowhere retrieval", async () => { +describe("generateAgenticOutputManifest", () => { + it("runs the outer harness workflow around Knowhere retrieval", async () => { process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; let capturedGenerateInput: | Parameters[0] | undefined; - const generateSpy = vi - .spyOn(ToolLoopAgent.prototype, "generate") - .mockImplementation(( + vi.spyOn(ToolLoopAgent.prototype, "generate").mockImplementation( + async function mockGenerate( + this: ToolLoopAgent, input: Parameters[0], - ): ReturnType => { + ): ReturnType { capturedGenerateInput = input; - return Promise.resolve({ - text: "Here are the requested identity images.", - } as Awaited>); - }); - const previewWithinNewLimitMarker = "within-new-preview-limit"; - const previewAfterNewLimitMarker = "after-new-preview-limit"; - const fullToolOutputEvidenceMarker = "full-tool-output-evidence-end"; - const evidenceTreeText = [ - "Identity image evidence. https://blob.example/images/id-front.jpg", - "[Document] document-generated.pdf", - "▸ [L1] Assets", - " ▸ [L2] images / id-front.jpg", - ` ┈ ${"evidence ".repeat(500)}${fullToolOutputEvidenceMarker}`, - ].join("\n"); - const longIdentityPreview = [ - "Identity card image front side.", - "preview ".repeat(170), - previewWithinNewLimitMarker, - "preview ".repeat(70), - previewAfterNewLimitMarker, - ].join(" "); + const tools = this.tools as unknown as Record< + string, + { execute: (input: unknown) => Promise } + >; + + await tools.declareIntent?.execute({ + task: "show_media", + dependsOnPreviousTurn: false, + retrievalNeeded: "yes", + targetModalities: ["text", "image"], + constraints: { desiredCount: 2, maxCount: 2 }, + groundingPolicy: "must_use_sources", + }); + await tools.setContextPolicy?.execute({ + carryHistory: "none", + reason: "The current request is self-contained.", + activePriorTurnIds: [], + }); + await tools.retrieve?.execute({ + query: "冯荣洲 身份证 图片", + modalities: ["text", "image"], + topK: 2, + purpose: "Find exactly the requested identity-card images.", + }); + await tools.finalize?.execute({ + text: "已找到相关身份证图片,见下方图片。", + citations: [ + { + ref: "r1:result:1", + label: "商务标文件.pdf / 身份证正面", + source: { + documentId: "doc_identity", + sourceFileName: "商务标文件.pdf", + sectionPath: "身份证正面", + }, + }, + ], + artifacts: [ + { + type: "image", + ref: "asset:r1:result:1", + display: true, + reason: "身份证正面", + }, + ], + unresolved: [], + }); + + return { + text: "This freeform text should be ignored.", + } as Awaited>; + }, + ); const searchSources = vi.fn().mockResolvedValue({ results: [ makeRetrievalResult({ - content: longIdentityPreview, chunkType: "image", assetUrl: "https://blob.example/images/id-front.jpg", source: { documentId: "doc_identity", sourceFileName: "document-generated.pdf", - sectionPath: "Assets / images / id-front.jpg", + sectionPath: "身份证正面", }, }), ], - evidenceText: evidenceTreeText, - referencedChunks: [ - { - chunkId: "chunk_identity_1", - documentId: "doc_identity", - chunkType: "image", - sectionPath: "Assets / images / id-front.jpg", - filePath: "images/id-front.jpg", - jobId: "job_1", - assetUrl: "https://blob.example/images/id-front.jpg", - }, - ], + evidenceText: "Identity image evidence.", + referencedChunks: [], namespace: "notebook-workspace", - query: "公民身份证 图片", + query: "冯荣洲 身份证 图片", routerUsed: "workflow_single_step", - retrievalPlan: { - targetContent: "image", - purpose: "Find identity-card image evidence.", - }, - chunkReferences: [ - { - id: "chunk_identity_1", - chunkId: "chunk_identity_1", - kind: "result", - resultIndex: 1, - chunkType: "image", - score: 0.9, - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "Assets / images / id-front.jpg", - }, - hasAssetUrl: true, - contentLength: longIdentityPreview.length, - contentPreview: longIdentityPreview, - contentTruncated: true, - }, - ], - answerText: - "The source includes identity card images. https://blob.example/images/id-front.jpg", + chunkReferences: [], + answerText: null, stopReason: "answer_done", failureReason: null, - decisionTrace: [ + }); + + const result = await generateAgenticOutputManifest({ + question: "请只返回冯荣洲的 2 张身份证图片", + messages: [ { - step: "final", - stop: "answer_done", - assetUrl: "https://blob.example/images/id-front.jpg", + role: "assistant", + content: "上一轮是完全不同的税务问题。", + citations: [ + { + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_tax", + sourceFileName: "tax.pdf", + sectionPath: "deadline", + }, + }, + ], }, ], - }); - const readRetrievedChunk = vi.fn().mockResolvedValue({ - id: "chunk_identity_1", - chunkId: "chunk_identity_1", - found: true, - chunkType: "image", - score: 0.9, - source: { - documentId: "doc_identity", - sourceFileName: "document-generated.pdf", - sectionPath: "Assets / images / id-front.jpg", - }, - hasAssetUrl: true, - offset: 0, - limit: 80, - contentLength: 96, - contentSlice: - "Full identity card text. https://blob.example/images/id-front.jpg", - hasMoreContent: false, - nextOffset: null, - }); - - const answer = await generateAgenticGroundedAnswer({ - question: "请发送几张关于公民身份的图片给我", - messages: [], sources: [ makeSource({ title: "商务标文件.pdf", @@ -1077,559 +944,133 @@ describe("generateAgenticGroundedAnswer", () => { ], excludedSourceIds: [], searchSources, - readRetrievedChunk, }); - expect(answer).toBe("Here are the requested identity images."); - expect(generateSpy).toHaveBeenCalledWith({ - messages: expect.any(Array), + expect(result.manifest.text).toBe("已找到相关身份证图片,见下方图片。"); + expect(result.trace.intent).toMatchObject({ + task: "show_media", + constraints: { desiredCount: 2, maxCount: 2 }, }); - const agent = getCapturedAgent(generateSpy.mock.contexts[0]); - const settings = getCapturedAgentSettings(agent); - const generateInput = getCapturedGenerateInput(capturedGenerateInput); - - expect(settings.instructions).toContain("markdown output gives guidance") - expect(settings.instructions).toContain("image or text+image search") - expect(settings.instructions).toContain("Read IDs") - expect(settings.instructions).toContain( - "Do not paste raw prior messages into searchSources.query", - ) - expect(generateInput.messages.at(-1)).toEqual({ - role: "user", - content: "请发送几张关于公民身份的图片给我", - }) - expect( - settings.prepareStep({ - stepNumber: 0, - messages: [...generateInput.messages], - }), - ).toMatchObject({ - toolChoice: { type: "tool", toolName: "searchSources" }, - activeTools: ["searchSources"], - }) - expect( - settings.prepareStep({ - stepNumber: 1, - messages: [...generateInput.messages], - }), - ).toMatchObject({ - toolChoice: { type: "tool", toolName: "searchSources" }, - activeTools: ["searchSources"], - }) - expect( - settings.prepareStep({ - stepNumber: 2, - messages: [...generateInput.messages], - }), - ).toMatchObject({ - messages: expect.any(Array), - }) - - const searchSourcesTool = getCapturedAgentTools(agent).searchSources - expect( - getSearchSourcesTargetContentSchema(searchSourcesTool)._def?.innerType - ?._def?.type, - ).toBe("enum") - expect( - searchSourcesTool.inputSchema.safeParse({ - query: "公民身份证 图片", - targetContent: "image", - }).success, - ).toBe(true) - expect( - searchSourcesTool.inputSchema.safeParse({ - query: "公民身份证 图片", - targetContent: "video", - }).success, - ).toBe(false) - - const toolOutput = await searchSourcesTool.execute({ - query: "公民身份证 图片", - targetContent: "image", - purpose: "Find identity-card image evidence.", + expect(result.trace.contextPolicy).toMatchObject({ + carryHistory: "none", }); - + expect(result.trace.validationErrors).toEqual([]); expect(searchSources).toHaveBeenCalledWith({ - query: "公民身份证 图片", - targetContent: "image", - purpose: "Find identity-card image evidence.", - }); - expect(toolOutput).toEqual(expect.any(String)); - expect(toolOutput).toContain("## Retrieval Result"); - expect(toolOutput).toContain("Query: 公民身份证 图片"); - expect(toolOutput).toContain("Guidance: Use this evidence"); - expect(toolOutput).toContain("## Evidence"); - expect(toolOutput).toContain( - "[Document] document-generated.pdf\n▸ [L1] Assets\n ▸ [L2] images / id-front.jpg", - ); - expect(toolOutput).toContain(fullToolOutputEvidenceMarker); - expect(toolOutput).not.toContain( - "[Document] document-generated.pdf ▸ [L1] Assets", - ); - expect(toolOutput).toContain("## Decision Trace"); - expect(toolOutput).toContain("- Step 1:"); - expect(toolOutput).toContain("- step: final"); - expect(toolOutput).toContain("- stop: answer_done"); - expect(toolOutput).toContain("### Result 1"); - expect(toolOutput).toContain("Type: image"); - expect(toolOutput).toContain( - "Source: document-generated.pdf / Assets / images / id-front.jpg", - ); - expect(toolOutput).toContain("Media: image available"); - expect(toolOutput).toContain("Read ID: chunk_identity_1"); - expect(toolOutput).toContain("Identity card image front side."); - expect(toolOutput).toContain(previewWithinNewLimitMarker); - expect(toolOutput).not.toContain(previewAfterNewLimitMarker); - expect(toolOutput).not.toContain("https://blob.example"); - expect(toolOutput).not.toContain("assetUrl"); - expect(toolOutput).not.toContain("retrievalPlan"); - expect(toolOutput).not.toContain("decisionTrace"); - expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ - toolName: "searchSources", - output: expect.objectContaining({ - truncated: false, - preview: expect.stringContaining(fullToolOutputEvidenceMarker), - }), - }); - - const readRetrievedChunkTool = getCapturedAgentTools(agent).readRetrievedChunk; - expect( - readRetrievedChunkTool.inputSchema.safeParse({ - id: "chunk_identity_1", - limit: 8_000, - }).success, - ).toBe(true); - expect( - readRetrievedChunkTool.inputSchema.safeParse({ - id: "chunk_identity_1", - limit: 8_001, - }).success, - ).toBe(false); - - const chunkOutput = await readRetrievedChunkTool.execute({ - id: "chunk_identity_1", - offset: 0, - limit: 80, - }); - - expect(readRetrievedChunk).toHaveBeenCalledWith({ - id: "chunk_identity_1", - offset: 0, - limit: 80, - }); - expect(chunkOutput).toEqual(expect.any(String)); - expect(chunkOutput).toContain("## Retrieved Content"); - expect(chunkOutput).toContain("Status: found"); - expect(chunkOutput).toContain("Read ID: chunk_identity_1"); - expect(chunkOutput).toContain( - "Source: document-generated.pdf / Assets / images / id-front.jpg", - ); - expect(chunkOutput).toContain( - "Full identity card text. [media asset URL hidden]", - ); - expect(chunkOutput).not.toContain("https://blob.example"); - expect(chunkOutput).not.toContain("chunkId"); - expect(getLoggerInfoMeta("chat-agent: tool output")).toMatchObject({ - toolName: "readRetrievedChunk", - output: expect.objectContaining({ - truncated: false, - preview: expect.stringContaining("## Retrieved Content\n\nStatus"), - }), - }); + query: "冯荣洲 身份证 图片", + targetContent: "text_image", + purpose: "Find exactly the requested identity-card images.", + topK: 2, + signalPaths: undefined, + filterMode: undefined, + threshold: undefined, + }); + expect(JSON.stringify(capturedGenerateInput)).toContain("Recent turn index"); + expect(JSON.stringify(capturedGenerateInput)).toContain("tax.pdf / deadline"); }); - it("uses managed context for stored history and loop steps", async () => { + it("self-corrects an over-budget manifest via a validation-feedback revision", async () => { process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; - let capturedGenerateInput: - | Parameters[0] - | undefined; - const generateSpy = vi - .spyOn(ToolLoopAgent.prototype, "generate") - .mockImplementation(( - input: Parameters[0], - ): ReturnType => { - capturedGenerateInput = input; - return Promise.resolve({ - text: "The answer is grounded.", - } as Awaited>); - }); - const messages = Array.from({ length: 24 }, (_, index) => ({ - role: index % 2 === 0 ? ("user" as const) : ("assistant" as const), - content: `history-message-${index} ${"context ".repeat(80)}`, - })); - - await generateAgenticGroundedAnswer({ - question: "What should I know now?", - messages, - sources: [makeSource()], - excludedSourceIds: [], - searchSources: vi.fn(), - readRetrievedChunk: vi.fn(), - }); - - const generateInput = getCapturedGenerateInput(capturedGenerateInput); - const serializedMessages = JSON.stringify(generateInput.messages); - expect(generateInput.messages[0]).toMatchObject({ - role: "system", - content: expect.stringContaining("Compacted earlier conversation"), - }); - expect(serializedMessages).not.toContain("history-message-0"); - expect(serializedMessages).toContain("history-message-23"); - - const settings = getCapturedAgentSettings( - getCapturedAgent(generateSpy.mock.contexts[0]), - ); - const oversizedLoopMessages = Array.from({ length: 25 }, (_, index) => ({ - role: "user" as const, - content: `loop-message-${index}`, - })); - const preparedStep = settings.prepareStep({ - stepNumber: 2, - messages: oversizedLoopMessages, - }) as { readonly messages: readonly ModelMessage[] }; - - expect(preparedStep.messages.length).toBeLessThanOrEqual(12); - expect(JSON.stringify(preparedStep.messages)).not.toContain("loop-message-0"); - expect(JSON.stringify(preparedStep.messages)).toContain("loop-message-24"); - expect(getLoggerInfoMeta("chat-agent: llm request")).toMatchObject({ - operation: "generateAgenticGroundedAnswer.step", - model: "google/gemini-3-flash", - promptType: "messages", - stepNumber: 2, - instructions: expect.stringContaining("Notebook research agent"), - messageCount: preparedStep.messages.length, - messages: expect.arrayContaining([ - expect.objectContaining({ - role: "user", - content: "loop-message-24", - }), - ]), - }); - - const hugeLoopMessages = [ - { - role: "user" as const, - content: `huge-loop-message ${"context ".repeat(9_000)}`, - }, - { - role: "assistant" as const, - content: "middle-loop-message", + let generateCallCount = 0; + vi.spyOn(ToolLoopAgent.prototype, "generate").mockImplementation( + async function mockGenerate( + this: ToolLoopAgent, + ): ReturnType { + generateCallCount += 1; + const tools = this.tools as unknown as Record< + string, + { execute: (input: unknown) => Promise } + >; + + if (generateCallCount === 1) { + await tools.declareIntent?.execute({ + task: "show_media", + dependsOnPreviousTurn: false, + retrievalNeeded: "yes", + targetModalities: ["image"], + constraints: { desiredCount: 2, maxCount: 2 }, + groundingPolicy: "must_use_sources", + }); + await tools.setContextPolicy?.execute({ + carryHistory: "none", + reason: "Self-contained request.", + activePriorTurnIds: [], + }); + await tools.retrieve?.execute({ + query: "身份证 图片", + modalities: ["image"], + topK: 3, + purpose: "Find requested identity images.", + }); + await tools.finalize?.execute({ + text: "见下方图片。", + citations: [{ ref: "r1:result:1", label: "id" }], + artifacts: [1, 2, 3].map((index) => ({ + type: "image", + ref: `asset:r1:result:${index}`, + display: true, + reason: "candidate", + })), + unresolved: [], + }); + } else { + await tools.finalize?.execute({ + text: "见下方图片。", + citations: [{ ref: "r1:result:1", label: "id" }], + artifacts: [1, 2].map((index) => ({ + type: "image", + ref: `asset:r1:result:${index}`, + display: true, + reason: "selected", + })), + unresolved: [], + }); + } + + return { + text: "ignored", + response: { messages: [] }, + } as unknown as Awaited>; }, - { - role: "user" as const, - content: "latest-loop-message", - }, - ]; - const preparedHugeStep = settings.prepareStep({ - stepNumber: 2, - messages: hugeLoopMessages, - }) as { readonly messages: readonly ModelMessage[] }; - const serializedHugeStepMessages = JSON.stringify( - preparedHugeStep.messages, ); - expect(getTestModelMessagesCharLength(preparedHugeStep.messages)).toBeLessThanOrEqual( - 64_000, - ); - expect(serializedHugeStepMessages).not.toContain("huge-loop-message"); - expect(serializedHugeStepMessages).toContain("latest-loop-message"); - }); - - it("logs bounded tool calls and complete tool results for each loop step", async () => { - process.env.AI_GATEWAY_API_KEY = "test_gateway_key"; - const fullStepToolOutputMarker = "full-step-tool-output-end"; - const fullStepToolOutput = `\n${[ - "## Retrieval Result", - "", - "Status: useful_evidence_found", - "Query: 冯荣洲 身份证 ID card", - "Guidance: Use this evidence if it directly answers the user.", - "", - "## Evidence", - `Image evidence https://blob.example/id-front.jpg ${"evidence ".repeat( - 600, - )}`, - "", - "## Results", - "### Result 1", - "Type: image", - "Source: 商务标文件.pdf / 二、法定代表人身份证明", - "Media: image available", - "Read ID: chunk_identity_1", - "", - "Preview:", - `Identity image content ${"result ".repeat(80)}`, - fullStepToolOutputMarker, - ].join("\n")}\n `; - const generateSpy = vi - .spyOn(ToolLoopAgent.prototype, "generate") - .mockResolvedValue({ - text: "The answer is grounded.", - } as Awaited>); - - await generateAgenticGroundedAnswer({ - question: "请将 冯荣洲 的身份证图片发给我", - messages: [], - sources: [makeSource()], - excludedSourceIds: [], - searchSources: vi.fn(), - readRetrievedChunk: vi.fn(), - }); - - const settings = getCapturedAgentSettings( - getCapturedAgent(generateSpy.mock.contexts[0]), - ); - loggerMock.info.mockClear(); - - settings.onStepFinish({ - stepNumber: 1, - finishReason: "tool-calls", - text: `Inspecting identity image candidates. ${"reason ".repeat(200)}`, - toolCalls: [ - { - toolName: "searchSources", - toolCallId: "call_1", - input: { - query: "冯荣洲 身份证 ID card", - purpose: `Find the matching identity card image. ${"input ".repeat( - 300, - )}`, - targetContent: "image", + const searchSources = vi.fn().mockResolvedValue({ + results: [1, 2, 3].map((index) => + makeRetrievalResult({ + chunkType: "image", + assetUrl: `https://blob.example/images/id-${index}.jpg`, + source: { + documentId: "doc_identity", + sourceFileName: "ids.pdf", + sectionPath: `身份证 ${index}`, }, - }, - ], - toolResults: [ - { - toolName: "searchSources", - toolCallId: "call_1", - output: fullStepToolOutput, - }, - ], - usage: { - inputTokens: 11, - outputTokens: 22, - totalTokens: 33, - }, - }); - - const stepMeta = getLoggerInfoMeta("chat-agent: llm response"); - const stepLog = stepMeta as unknown as AgentLoopStepLogMeta; - expect(stepLog).toMatchObject({ - operation: "generateAgenticGroundedAnswer.step", - model: "google/gemini-3-flash", - stepNumber: 1, - finishReason: "tool-calls", - toolCallCount: 1, - toolResultCount: 1, - inputTokens: 11, - outputTokens: 22, - totalTokens: 33, - }); - expect(stepLog.responseText).toContain("Inspecting identity image candidates."); - expect(stepLog.toolCalls[0]?.input.truncated).toBe(true); - expect(stepLog.toolResults[0]?.output).toMatchObject({ - kind: "searchSources", - output: { - truncated: false, - }, - }); - const searchSourcesOutput = stepLog.toolResults[0] - ?.output as SearchSourcesToolOutputLogMeta; - expect(searchSourcesOutput.output.preview.startsWith("\n## Retrieval Result")) - .toBe(true); - expect(searchSourcesOutput.output.preview.endsWith("\n ")).toBe(true); - expect(searchSourcesOutput.output.preview).toContain("## Retrieval Result"); - expect(searchSourcesOutput.output.preview).toContain("\n\n## Evidence"); - expect(searchSourcesOutput.output.preview).toContain( - "Query: 冯荣洲 身份证 ID card", - ); - expect(searchSourcesOutput.output.preview).toContain( - "[media asset URL hidden]", - ); - expect(searchSourcesOutput.output.preview).toContain( - fullStepToolOutputMarker, - ); - expect(JSON.stringify(stepMeta)).not.toContain("https://blob.example"); - - settings.onFinish({ - steps: [ - { - stepNumber: 1, - finishReason: "tool-calls", - text: "Read tool result.", - toolCalls: [ - { - toolName: "searchSources", - toolCallId: "call_1", - input: { query: "冯荣洲 身份证 ID card" }, - }, - ], - toolResults: [ - { - toolName: "searchSources", - toolCallId: "call_1", - output: "## Evidence\nMatched image evidence.", - }, - ], - }, - ], - finishReason: "stop", - text: "Here is the matched identity card image.", - totalUsage: { - inputTokens: 40, - outputTokens: 20, - totalTokens: 60, - }, - }); - - const finishMeta = getLoggerInfoMeta("chat-agent: loop finished"); - expect(finishMeta).toMatchObject({ - stepCount: 1, - finishReason: "stop", - responseText: "Here is the matched identity card image.", - toolNames: ["searchSources"], - steps: [ - expect.objectContaining({ - stepNumber: 1, - toolCallCount: 1, - toolResultCount: 1, }), - ], - inputTokens: 40, - outputTokens: 20, - totalTokens: 60, - }); - }); -}); - -describe("buildGroundedPrompt", () => { - it("includes evidence text and uses evidence-based citation format", () => { - const prompt = buildGroundedPrompt({ - question: "What is PR-E?", - evidenceText: "PR-E wires chat to Knowhere retrieval.\n[Document] requirements.txt\n▸ [L1] N-005", - }); - - expect(prompt).toContain("What is PR-E?"); - expect(prompt).toContain("Retrieval query used: What is PR-E?"); - expect(prompt).toContain("PR-E wires chat to Knowhere retrieval."); - expect(prompt).toContain("requirements.txt"); - expect(prompt).toContain( - "Use the retrieved evidence as your primary context.", - ); - expect(prompt).toContain("Retrieved evidence:"); - expect(prompt).not.toContain("Source excerpts:"); - }); - - it("asks the model to answer naturally and directly", () => { - const prompt = buildGroundedPrompt({ - question: "How about the TBD?", - evidenceText: "Roadster location: TBD. Status: Design development.", - }); - - expect(prompt).toContain("Answer in a natural, friendly, and direct tone."); - expect(prompt).toContain("Use GitHub-flavored Markdown when it improves readability"); - expect(prompt).toContain("Start with the answer first."); - expect(prompt).toContain("Avoid meta phrases like \"Based on the sources\""); - expect(prompt).toContain("Keep answers concise by default"); - expect(prompt).toContain( - "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", - ); - }); - - it("includes retrieved media asset references as internal metadata", () => { - const prompt = buildGroundedPrompt({ - question: "Show me the launch image.", - evidenceText: "A launch image was retrieved.", - mediaAssetContext: - "- spacex-s1.pdf / Assets / images / launch.jpg: https://blob.example/images/launch.jpg", + ), + evidenceText: "Identity image evidence.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "身份证 图片", + routerUsed: "workflow_single_step", + answerText: null, + stopReason: "answer_done", + failureReason: null, }); - expect(prompt).toContain( - "Retrieved media asset references (internal; do not quote raw URLs):", - ); - expect(prompt).toContain( - "When retrieved image or table asset references are relevant to the user's request, cite the matching source label; the UI renders media from citation metadata.", - ); - expect(prompt).toContain( - "Do not write raw media asset URLs in the answer. They are internal metadata only.", - ); - expect(prompt).toContain("Never output JSON metadata blocks"); - expect(prompt).toContain("Never mention asset_id, assetUrl"); - expect(prompt).toContain("do not transcribe personal details"); - expect(prompt).toContain("https://blob.example/images/launch.jpg"); - }); -}); - -describe("buildAgenticChatSystemPrompt", () => { - it("instructs the agent how to continue or stop from retrieval responses", () => { - const prompt = buildAgenticChatSystemPrompt({ + const result = await generateAgenticOutputManifest({ + question: "只要 2 张身份证图片", messages: [], - sources: [makeSource({ title: "商务标文件.pdf" })], - excludedSourceIds: [], - }); - - expect(prompt).toContain("Always call searchSources") - expect(prompt).toContain("Make a second searchSources call") - expect(prompt).toContain("readRetrievedChunk") - expect(prompt).toContain("markdown output gives guidance") - expect(prompt).toContain("Read IDs") - expect(prompt).toContain("image or text+image search") - expect(prompt).toContain("remote index") - expect(prompt).toContain("person or section but not an image asset") - expect(prompt).toContain("Do not paste raw prior messages") - expect(prompt).toContain("身份证") - expect(prompt).toContain("For image requests, search visual content directly") - expect(prompt).toContain("Never output JSON metadata blocks") - expect(prompt).toContain("Use GitHub-flavored Markdown when it improves readability") - expect(prompt).toContain("do not transcribe personal details") - expect(prompt).not.toContain("targetContent maps") - expect(prompt).not.toContain("Read the tool output fields") - expect(prompt).not.toContain("intent=overview") - expect(prompt).toContain("商务标文件.pdf") - }); -}); - -describe("buildRetrievalQueryPrompt", () => { - it("includes source and history context for stateless retrieval", () => { - const prompt = buildRetrievalQueryPrompt({ - question: "What about energy storage in this document?", - messages: [ - { - role: "assistant", - content: "Tesla's update mentions Q4 revenue.", - citations: [ - { - chunkType: "text", - score: 0.9, - source: { - documentId: "doc_tesla", - sourceFileName: "TSLA-Q4-2025-Update.pdf", - sectionPath: "FINANCIAL SUMMARY", - }, - }, - ], - }, - ], sources: [ - makeSource({ - id: "source_tesla", - title: "TSLA-Q4-2025-Update.pdf", - knowhereDocumentId: "doc_tesla", - }), - makeSource({ - id: "source_excluded", - title: "Other.pdf", - knowhereDocumentId: "doc_other", - }), + makeSource({ title: "ids.pdf", knowhereDocumentId: "doc_identity" }), ], - excludedSourceIds: ["source_excluded"], + excludedSourceIds: [], + searchSources, }); - expect(prompt).toContain("Knowhere retrieval is stateless"); - expect(prompt).toContain("TSLA-Q4-2025-Update.pdf"); - expect(prompt).toContain("FINANCIAL SUMMARY"); - expect(prompt).toContain("What about energy storage in this document?"); - expect(prompt).not.toContain("Other.pdf"); + expect(generateCallCount).toBe(2); + expect(result.trace.revisionsUsed).toBe(1); + expect(result.trace.validationErrors).toEqual([]); + expect( + result.manifest.artifacts.filter((artifact) => artifact.display).length, + ).toBe(2); }); }); @@ -1699,71 +1140,28 @@ function makeSource(overrides: Partial = {}): Source { }; } -type CapturedAgentSettings = { - readonly instructions: string - readonly prepareStep: (input: { - readonly stepNumber: number - readonly messages: ModelMessage[] - }) => unknown - readonly onStepFinish: (input: unknown) => void - readonly onFinish: (input: unknown) => void -} - -type CapturedAgentTools = { - readonly searchSources: { - readonly inputSchema: { - readonly _def?: { - readonly type?: string - readonly shape?: - | Record - | (() => Record) - } - readonly safeParse: (value: unknown) => { readonly success: boolean } - } - readonly execute: (input: AgenticRetrievalQuery) => Promise - } - readonly readRetrievedChunk: { - readonly inputSchema: { - readonly safeParse: (value: unknown) => { readonly success: boolean } - } - readonly execute: (input: ReadRetrievedChunkInput) => Promise - } -} - -type CapturedZodSchema = { - readonly _def?: { - readonly type?: string - readonly innerType?: CapturedZodSchema - } -} - -type AgentLoopStepLogMeta = { - readonly operation: string - readonly model: string - readonly stepNumber: number - readonly finishReason: string - readonly responseText: string - readonly toolCallCount: number - readonly toolCalls: readonly { - readonly input: AgentLoopLogPreviewMeta - }[] - readonly toolResultCount: number - readonly toolResults: readonly { - readonly output: SearchSourcesToolOutputLogMeta | AgentLoopLogPreviewMeta - }[] - readonly inputTokens: number - readonly outputTokens: number - readonly totalTokens: number -} - -type SearchSourcesToolOutputLogMeta = { - readonly kind: "searchSources" - readonly output: AgentLoopLogPreviewMeta -} - -type AgentLoopLogPreviewMeta = { - readonly truncated: boolean - readonly preview: string +function makeHarnessRunResult(text: string): HarnessRunResult { + return { + manifest: { + text, + citations: [], + artifacts: [], + unresolved: [], + }, + trace: { + ledger: { + retrievalCount: 0, + chunks: [], + assets: [], + evidenceText: [], + stopReasons: [], + failureReasons: [], + decisionTraces: [], + }, + validationErrors: [], + revisionsUsed: 0, + }, + }; } type KnowhereQueryResponseLogMeta = { @@ -1782,72 +1180,6 @@ type KnowhereQueryResponseLogMeta = { }[] } -function getCapturedAgent(agent: unknown): ToolLoopAgent { - expect(agent).toBeInstanceOf(ToolLoopAgent) - return agent as ToolLoopAgent -} - -function getCapturedGenerateInput( - input: Parameters[0] | undefined, -): { readonly messages: ModelMessage[] } { - expect(input).toBeDefined() - return input as { readonly messages: ModelMessage[] } -} - -function getCapturedAgentSettings(agent: ToolLoopAgent): CapturedAgentSettings { - return (agent as unknown as { readonly settings: CapturedAgentSettings }) - .settings -} - -function getCapturedAgentTools(agent: ToolLoopAgent): CapturedAgentTools { - return agent.tools as unknown as CapturedAgentTools -} - -function getTestModelMessagesCharLength( - messages: readonly ModelMessage[], -): number { - return messages.reduce( - (totalLength, message): number => - totalLength + getTestUnknownTextLength(message.content), - 0, - ) -} - -function getTestUnknownTextLength(value: unknown): number { - if (typeof value === "string") return value.length - if (Array.isArray(value)) { - return value.reduce( - (totalLength, nestedValue): number => - totalLength + getTestUnknownTextLength(nestedValue), - 0, - ) - } - if (!value || typeof value !== "object") return 0 - - return Object.values(value as Record).reduce( - (totalLength, nestedValue): number => - totalLength + getTestUnknownTextLength(nestedValue), - 0, - ) -} - -function getSearchSourcesTargetContentSchema( - tool: CapturedAgentTools["searchSources"], -): CapturedZodSchema { - const shape = tool.inputSchema._def?.shape - const fields = typeof shape === "function" ? shape() : shape - if (!fields) { - throw new Error("searchSources input schema should expose fields.") - } - - const targetContentSchema = fields.targetContent - if (!targetContentSchema) { - throw new Error("searchSources input schema should include targetContent.") - } - - return targetContentSchema -} - function getLoggerInfoMeta(message: string): Record { const calls = loggerMock.info.mock.calls as unknown as readonly (readonly [ string, diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index b764648..7c83b62 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -3,11 +3,20 @@ import type { RetrievalQueryParams, RetrievalQueryResponse, RetrievalResult, - RetrievalSource, } from "@ontos-ai/knowhere-sdk" import { logger } from "@/lib/logger" -import type { ChatCitationView } from "@/domains/chat/types" +import type { + ChatArtifactView, + ChatCitationView, +} from "@/domains/chat/types" +import type { + EvidenceAsset, + EvidenceChunk, + HarnessRunResult, + OutputArtifact, + OutputCitation, +} from "@/agent-harness" import { toChatCitationViews, useNotebookSourceTitles, @@ -19,9 +28,6 @@ import type { AgenticRetrievalResponse, AnswerQuestionInput, AnswerQuestionResult, - ReadRetrievedChunkInput, - ReadRetrievedChunkResult, - RetrievedChunkReference, } from "./contracts" import { excludeDocuments, @@ -29,15 +35,12 @@ import { } from "./retrieval" import { enrichRetrievalResultsWithAssetUrls, - isImageAssetUrl, removeRetrievedMediaAssetUrls, } from "./media-assets" const DEFAULT_TOP_K = 8 const MAX_AGENTIC_TOP_K = 12 const MAX_CITATION_RESULTS = 20 -const DEFAULT_CHUNK_READ_LIMIT = 4_000 -const MAX_CHUNK_READ_LIMIT = 8_000 const KNOWHERE_RESPONSE_TEXT_LOG_LIMIT = 200 const KNOWHERE_CHUNK_LOG_LIMIT = 100 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." @@ -56,18 +59,6 @@ const RETRIEVAL_TARGET_CONTENT_DATA_TYPES: Readonly< type RetrievalDataType = NonNullable -type StoredRetrievedChunk = { - id: string - chunkId: string | null - kind: RetrievedChunkReference["kind"] - resultIndex: number | null - content: string - chunkType: string - score: number | null - source: RetrievalSource - hasAssetUrl: boolean -} - type KnowhereQueryResponseLog = { readonly namespace: string readonly query: string @@ -101,15 +92,8 @@ export type { SearchSources, } from "./contracts" export { - buildAgenticChatSystemPrompt, - buildGroundedPrompt, - buildRetrievalQueryPrompt, - generateAgenticGroundedAnswer, - generateAgenticGroundedAnswerEffect, - generateContextualRetrievalQuery, - generateContextualRetrievalQueryEffect, - generateGroundedAnswer, - generateGroundedAnswerEffect, + generateAgenticOutputManifest, + generateAgenticOutputManifestEffect, } from "./prompt" export { parseChatRequestBody, @@ -123,7 +107,6 @@ export const answerQuestionWithRetrieval = ( Effect.gen(function* () { const question = input.question.trim() const retrievalResponses: RetrievalQueryResponse[] = [] - const retrievedChunkContext = createRetrievedChunkContext() logger.info("chat-agent: answer start", { questionLength: question.length, @@ -158,19 +141,11 @@ export const answerQuestionWithRetrieval = ( try { const response = await input.retrieval.query(retrievalQueryParams) retrievalResponses.push(response) - const chunkReferences = retrievedChunkContext.registerResponse({ - response, - responseIndex: retrievalResponses.length, - }) logger.info("chat-agent: searchSources ok", { query: response.query, durationMs: Date.now() - startedAt, resultCount: response.results.length, referencedChunkCount: response.referencedChunks.length, - readableChunkCount: chunkReferences.length, - truncatedChunkCount: chunkReferences.filter( - (reference): boolean => reference.contentTruncated, - ).length, stopReason: response.stopReason ?? null, failureReason: response.failureReason ?? null, targetContent: retrievalPlan.targetContent, @@ -179,7 +154,7 @@ export const answerQuestionWithRetrieval = ( durationMs: Date.now() - startedAt, response: formatKnowhereQueryResponseForLog(response), }) - return { ...response, chunkReferences, retrievalPlan } + return { ...response, retrievalPlan } } catch (error) { logger.error("chat-agent: searchSources failed", { query: retrievalQueryParams.query, @@ -191,23 +166,6 @@ export const answerQuestionWithRetrieval = ( } } - const readRetrievedChunk = async ( - readInput: ReadRetrievedChunkInput, - ): Promise => { - const result = retrievedChunkContext.read(readInput) - logger.info("chat-agent: readRetrievedChunk", { - id: result.id, - found: result.found, - offset: result.offset, - limit: result.limit, - contentLength: result.contentLength, - returnedLength: result.contentSlice.length, - hasMoreContent: result.hasMoreContent, - nextOffset: result.nextOffset, - }) - return result - } - const generatedAnswer = yield* Effect.tryPromise(() => input.generateAnswer({ question, @@ -215,18 +173,23 @@ export const answerQuestionWithRetrieval = ( sources: input.sources, excludedSourceIds: input.excludedSourceIds, searchSources, - readRetrievedChunk, }), ) logger.info("chat-agent: answer generated", { - answerLength: generatedAnswer.length, + answerLength: generatedAnswer.manifest.text.length, retrievalCallCount: retrievalResponses.length, - registeredChunkCount: retrievedChunkContext.size(), + citationCount: generatedAnswer.manifest.citations.length, + harnessValidationErrorCount: generatedAnswer.trace.validationErrors.length, + revisionsUsed: generatedAnswer.trace.revisionsUsed, }) - const rawResults = collectRetrievalResults(retrievalResponses, input.sources) - if (rawResults.length === 0 && generatedAnswer.trim().length === 0) { + const rawResults = selectCitationRawResults({ + generatedAnswer, + retrievalResponses, + sources: input.sources, + }) + if (rawResults.length === 0 && generatedAnswer.manifest.text.trim().length === 0) { return { answer: NO_RESULTS_ANSWER, citations: [] as ChatCitationView[] } } @@ -239,190 +202,153 @@ export const answerQuestionWithRetrieval = ( }), ) const answer = sanitizeGeneratedAnswer({ - answer: generatedAnswer, - question, - results, - }) - const citationResults = selectCitationResultsForAnswer({ - question, + answer: generatedAnswer.manifest.text, results, }) + const citationResults = results + const artifacts = toChatArtifactViewsFromHarness(generatedAnswer, input.sources) logger.info("chat-agent: answer complete", { answerLength: answer.length, citationCount: citationResults.length, + artifactCount: artifacts?.length ?? 0, }) return { answer, citations: toChatCitationViews(citationResults, answer), + ...(artifacts && artifacts.length > 0 ? { artifacts } : {}), } }) -type GeneratedAnswerSanitizerInput = { - readonly answer: string - readonly question: string - readonly results: readonly RetrievalResult[] -} - -function sanitizeGeneratedAnswer({ - answer, - question, - results, -}: GeneratedAnswerSanitizerInput): string { - const sanitizedAnswer = removeRetrievedMediaAssetUrls(answer, results) - - if ( - shouldUseConciseImageRequestAnswer({ - answer: sanitizedAnswer, - question, - results, - }) - ) { - return buildConciseImageRequestAnswer(question) - } - - return sanitizedAnswer -} - -function shouldUseConciseImageRequestAnswer({ - answer, - question, - results, -}: GeneratedAnswerSanitizerInput): boolean { - return ( - isShowOrSendImageRequest(question) && - !isExplicitPersonalDetailRequest(question) && - hasImageCitationResult(results) && - shouldSimplifyImageRequestAnswer(answer) - ) -} - -function selectCitationResultsForAnswer(input: { - readonly question: string - readonly results: readonly RetrievalResult[] -}): readonly RetrievalResult[] { - if (!isShowOrSendImageRequest(input.question)) return input.results - - const imageResults = input.results.filter(isImageCitationResult) - if (imageResults.length === 0) return input.results - - const focusedImageResults = filterFocusedImageCitationResults( - input.question, - imageResults, +function toChatArtifactViewsFromHarness( + result: HarnessRunResult, + sources: readonly AnswerQuestionInput["sources"][number][], +): ChatArtifactView[] | undefined { + const assetsByRef = new Map( + result.trace.ledger.assets.map((asset): readonly [string, EvidenceAsset] => [ + asset.ref, + asset, + ]), ) - return focusedImageResults.length > 0 ? focusedImageResults : imageResults -} - -function hasImageCitationResult(results: readonly RetrievalResult[]): boolean { - return results.some(isImageCitationResult) -} - -function isImageCitationResult(result: RetrievalResult): boolean { - const assetUrl = result.assetUrl?.trim() - if (!assetUrl) return false - - return result.chunkType.toLowerCase() === "image" || isImageAssetUrl(assetUrl) -} - -function filterFocusedImageCitationResults( - question: string, - results: readonly RetrievalResult[], -): readonly RetrievalResult[] { - const labelPattern = getFocusedImageCitationLabelPattern(question) - if (!labelPattern) return results - - return results.filter((result): boolean => - labelPattern.test(getImageCitationLabel(result)), + const chunksByRef = new Map( + result.trace.ledger.chunks.map((chunk): readonly [string, EvidenceChunk] => [ + chunk.ref, + chunk, + ]), ) -} -function getFocusedImageCitationLabelPattern(question: string): RegExp | null { - if (/身份证|公民身份|居民身份证|\bid card\b|\bidentity card\b/iu.test(question)) { - return /身份证|居民身份证|\bid card\b|\bidentity card\b/iu - } + const displayLimit = getHarnessArtifactDisplayLimit(result) + const artifacts: ChatArtifactView[] = [] + let displayedArtifactCount = 0 - return null -} - -function getImageCitationLabel(result: RetrievalResult): string { - return [ - result.source.sourceFileName, - result.source.sectionPath, - getAssetPathFromCitationUrl(result.assetUrl), - ] - .filter((value): value is string => Boolean(value?.trim())) - .join(" ") -} - -function getAssetPathFromCitationUrl(assetUrl: string | undefined): string | null { - if (!assetUrl) return null + for (const artifact of result.manifest.artifacts) { + const artifactView = resolveHarnessArtifactView({ + artifact, + assetsByRef, + chunksByRef, + sources, + }) + if (!artifactView) continue + + const isDisplayed = artifactView.display !== false + if ( + isDisplayed && + typeof displayLimit === "number" && + displayedArtifactCount >= displayLimit + ) { + continue + } - try { - return decodeURIComponent(new URL(assetUrl).pathname) - } catch { - return assetUrl + artifacts.push(artifactView) + if (isDisplayed) displayedArtifactCount += 1 } -} -function isShowOrSendImageRequest(question: string): boolean { - const normalizedQuestion = question.toLowerCase() - const hasImageTerm = - /图片|照片|图像|截图|身份证|\bimage\b|\bimages\b|\bphoto\b|\bphotos\b|\bpicture\b|\bpictures\b|\bscreenshot\b|\bid card\b|\bidentity card\b/u.test( - normalizedQuestion, - ) - const hasActionTerm = - /请将|请把|发送|发给我|发来|给我看|展示|显示|看一下|\bshow\b|\bsend\b|\bdisplay\b|\battach\b|\bgive me\b/u.test( - normalizedQuestion, - ) - - return hasImageTerm && hasActionTerm + return artifacts.length > 0 ? artifacts : undefined } -function isExplicitPersonalDetailRequest(question: string): boolean { - return /号码|身份证号|身份号码|住址|地址|出生|有效期限|签发机关|姓名|是什么|多少|\bid number\b|\bidentity number\b|\baddress\b|\bbirth\b|\bissuer\b|\bvalid/u.test( - question.toLowerCase(), +function getHarnessArtifactDisplayLimit(result: HarnessRunResult): number | null { + const constraints = result.trace.intent?.constraints + const limits = [constraints?.desiredCount, constraints?.maxCount].filter( + (value): value is number => + typeof value === "number" && Number.isSafeInteger(value) && value > 0, ) -} - -function containsPersonalDetailField(answer: string): boolean { - return /公民身份号码|身份号码|身份证号|身份证号码|住址|地址|出生日期|出生|有效期限|签发机关|性别|民族|姓名|\bid number\b|\bidentity number\b|\baddress\b|\bdate of birth\b|\bbirth date\b|\bissuer\b|\bissuing authority\b|\bvalid until\b|\bvalid through\b/i.test( - answer, - ) -} - -function shouldSimplifyImageRequestAnswer(answer: string): boolean { - const trimmedAnswer = answer.trim() - return ( - containsPersonalDetailField(trimmedAnswer) || - containsMarkdownList(trimmedAnswer) || - containsSourceIndexReference(trimmedAnswer) || - trimmedAnswer.length > getConciseImageAnswerLengthLimit(trimmedAnswer) - ) -} + return limits.length > 0 ? Math.min(...limits) : null +} + +function resolveHarnessArtifactView(input: { + readonly artifact: OutputArtifact + readonly assetsByRef: ReadonlyMap + readonly chunksByRef: ReadonlyMap + readonly sources: readonly AnswerQuestionInput["sources"][number][] +}): ChatArtifactView | null { + const asset = input.assetsByRef.get(input.artifact.ref) + if (asset) { + return toChatArtifactView({ + artifact: input.artifact, + asset, + sources: input.sources, + }) + } -function containsMarkdownList(value: string): boolean { - return /\n\s*[-*]\s+/u.test(value) + const chunk = input.chunksByRef.get(input.artifact.ref) + const chunkAssetRef = chunk?.assetRef + const chunkAsset = chunkAssetRef ? input.assetsByRef.get(chunkAssetRef) : null + return chunkAsset + ? toChatArtifactView({ + artifact: input.artifact, + asset: chunkAsset, + sources: input.sources, + }) + : null } -function containsSourceIndexReference(value: string): boolean { - return /\bSource\s+\d+\b/iu.test(value) +function toChatArtifactView(input: { + readonly artifact: OutputArtifact + readonly asset: EvidenceAsset + readonly sources: readonly AnswerQuestionInput["sources"][number][] +}): ChatArtifactView { + const source = normalizeHarnessSource(input.asset.source, input.sources) + return { + type: input.artifact.type, + ref: input.artifact.ref, + display: input.artifact.display, + reason: input.artifact.reason, + assetUrl: input.asset.assetUrl, + label: input.asset.label, + citation: { + chunkType: input.asset.type, + score: null, + assetUrl: input.asset.assetUrl, + source, + }, + } } -function getConciseImageAnswerLengthLimit(answer: string): number { - return containsCjkText(answer) ? 120 : 220 -} +function normalizeHarnessSource( + source: OutputCitation["source"], + sources: readonly AnswerQuestionInput["sources"][number][], +): ChatCitationView["source"] { + const sourceTitle = source.documentId + ? sources.find((candidate) => candidate.knowhereDocumentId === source.documentId) + ?.title + : undefined -function buildConciseImageRequestAnswer(question: string): string { - if (containsCjkText(question)) { - return question.includes("身份证") - ? "已找到相关身份证图片,见下方图片。" - : "已找到相关图片,见下方图片。" + return { + documentId: source.documentId, + sourceFileName: sourceTitle ?? source.sourceFileName, + sectionPath: source.sectionPath, } +} - return "I found the relevant image. See the image below." +type GeneratedAnswerSanitizerInput = { + readonly answer: string + readonly results: readonly RetrievalResult[] } -function containsCjkText(value: string): boolean { - return /[\u3400-\u9fff]/u.test(value) +function sanitizeGeneratedAnswer({ + answer, + results, +}: GeneratedAnswerSanitizerInput): string { + return removeRetrievedMediaAssetUrls(answer, results) } function formatKnowhereQueryResponseForLog( @@ -540,163 +466,84 @@ function normalizeRetrievalTargetContent( return value ?? "all" } -function createRetrievedChunkContext(): { - registerResponse(input: { - readonly response: RetrievalQueryResponse - readonly responseIndex: number - }): readonly RetrievedChunkReference[] - read(input: ReadRetrievedChunkInput): ReadRetrievedChunkResult - size(): number -} { - const chunksById = new Map() - - function storeChunk(chunk: StoredRetrievedChunk): void { - chunksById.set(chunk.id, chunk) - if (chunk.chunkId && chunk.chunkId !== chunk.id) { - chunksById.set(chunk.chunkId, chunk) - } - } - - return { - registerResponse(input): readonly RetrievedChunkReference[] { - const references: RetrievedChunkReference[] = [] - input.response.results.forEach((result, index): void => { - const resultIndex = index + 1 - const chunkId = getRetrievalResultChunkId(result) - const id = chunkId ?? `search_${input.responseIndex}_result_${resultIndex}` - const storedChunk: StoredRetrievedChunk = { - id, - chunkId, - kind: "result", - resultIndex, - content: result.content, - chunkType: result.chunkType, - score: result.score, - source: result.source, - hasAssetUrl: Boolean(result.assetUrl), - } - storeChunk(storedChunk) - references.push(toRetrievedChunkReference(storedChunk)) - }) - - input.response.referencedChunks.forEach((chunk, index): void => { - const id = chunk.chunkId || `search_${input.responseIndex}_reference_${index + 1}` - const existingChunk = chunksById.get(id) - if (existingChunk) { - references.push(toRetrievedChunkReference(existingChunk)) - return - } - - const storedChunk: StoredRetrievedChunk = { - id, - chunkId: chunk.chunkId || null, - kind: "referencedChunk", - resultIndex: null, - content: "", - chunkType: chunk.chunkType, - score: null, - source: { - documentId: chunk.documentId, - sourceFileName: null, - sectionPath: chunk.sectionPath, - }, - hasAssetUrl: Boolean(chunk.assetUrl), - } - storeChunk(storedChunk) - references.push(toRetrievedChunkReference(storedChunk)) - }) - - return references - }, - read(input): ReadRetrievedChunkResult { - const offset = normalizeChunkReadOffset(input.offset) - const limit = normalizeChunkReadLimit(input.limit) - const chunk = chunksById.get(input.id) - if (!chunk) { - return { - id: input.id, - chunkId: null, - found: false, - chunkType: null, - score: null, - source: null, - hasAssetUrl: false, - offset, - limit, - contentLength: 0, - contentSlice: "", - hasMoreContent: false, - nextOffset: null, - } - } - - const boundedOffset = Math.min(offset, chunk.content.length) - const endOffset = Math.min(boundedOffset + limit, chunk.content.length) - return { - id: chunk.id, - chunkId: chunk.chunkId, - found: true, - chunkType: chunk.chunkType, - score: chunk.score, - source: chunk.source, - hasAssetUrl: chunk.hasAssetUrl, - offset: boundedOffset, - limit, - contentLength: chunk.content.length, - contentSlice: chunk.content.slice(boundedOffset, endOffset), - hasMoreContent: endOffset < chunk.content.length, - nextOffset: endOffset < chunk.content.length ? endOffset : null, - } - }, - size(): number { - return chunksById.size - }, +function normalizeTopK(value: number | undefined): number { + if (typeof value !== "number" || !Number.isSafeInteger(value)) { + return DEFAULT_TOP_K } + return Math.min(Math.max(value, 1), MAX_AGENTIC_TOP_K) } -function toRetrievedChunkReference( - chunk: StoredRetrievedChunk, -): RetrievedChunkReference { - const contentPreview = chunk.content.slice(0, DEFAULT_CHUNK_READ_LIMIT) - return { - id: chunk.id, - chunkId: chunk.chunkId, - kind: chunk.kind, - resultIndex: chunk.resultIndex, - chunkType: chunk.chunkType, - score: chunk.score, - source: chunk.source, - hasAssetUrl: chunk.hasAssetUrl, - contentLength: chunk.content.length, - contentPreview, - contentTruncated: contentPreview.length < chunk.content.length, - } -} +/** + * Display citations come from the agent-curated manifest (the refs it chose to + * cite), resolved against the evidence ledger. Only when the agent cited + * nothing do we fall back to the full set of retrieved results, so a grounded + * answer still shows its sources instead of appearing unsupported. + */ +function selectCitationRawResults(input: { + readonly generatedAnswer: HarnessRunResult + readonly retrievalResponses: readonly RetrievalQueryResponse[] + readonly sources: readonly AnswerQuestionInput["sources"][number][] +}): RetrievalResult[] { + const curated = mapManifestCitationsToResults(input.generatedAnswer) + if (curated.length > 0) return curated + return collectRetrievalResults(input.retrievalResponses, input.sources) +} + +function mapManifestCitationsToResults( + result: HarnessRunResult, +): RetrievalResult[] { + const chunksByRef = new Map( + result.trace.ledger.chunks.map((chunk): readonly [string, EvidenceChunk] => [ + chunk.ref, + chunk, + ]), + ) + const assetsByRef = new Map( + result.trace.ledger.assets.map((asset): readonly [string, EvidenceAsset] => [ + asset.ref, + asset, + ]), + ) -function getRetrievalResultChunkId(result: RetrievalResult): string | null { - const resultWithChunkId = result as RetrievalResult & { - readonly chunkId?: string | null - } - return resultWithChunkId.chunkId?.trim() || null -} + const results: RetrievalResult[] = [] + const seenKeys = new Set() -function normalizeChunkReadOffset(value: number | undefined): number { - if (typeof value !== "number" || !Number.isSafeInteger(value)) return 0 - return Math.max(value, 0) -} + for (const citation of result.manifest.citations) { + const chunk = + chunksByRef.get(citation.ref) ?? + resolveChunkForAssetRef(citation.ref, assetsByRef, chunksByRef) + if (!chunk) continue + + const retrievalResult: RetrievalResult = { + content: chunk.content, + chunkType: chunk.chunkType, + score: chunk.score, + ...(chunk.assetUrl ? { assetUrl: chunk.assetUrl } : {}), + source: { + documentId: chunk.source.documentId ?? undefined, + sourceFileName: chunk.source.sourceFileName ?? undefined, + sectionPath: chunk.source.sectionPath ?? undefined, + }, + } + const key = getRetrievalResultKey(retrievalResult) + if (seenKeys.has(key)) continue -function normalizeChunkReadLimit(value: number | undefined): number { - if (typeof value !== "number" || !Number.isSafeInteger(value)) { - return DEFAULT_CHUNK_READ_LIMIT + seenKeys.add(key) + results.push(retrievalResult) + if (results.length >= MAX_CITATION_RESULTS) break } - return Math.min(Math.max(value, 1), MAX_CHUNK_READ_LIMIT) + + return results } -function normalizeTopK(value: number | undefined): number { - if (typeof value !== "number" || !Number.isSafeInteger(value)) { - return DEFAULT_TOP_K - } - return Math.min(Math.max(value, 1), MAX_AGENTIC_TOP_K) +function resolveChunkForAssetRef( + ref: string, + assetsByRef: ReadonlyMap, + chunksByRef: ReadonlyMap, +): EvidenceChunk | undefined { + const asset = assetsByRef.get(ref) + if (!asset) return undefined + return chunksByRef.get(asset.chunkRef) } function collectRetrievalResults( diff --git a/src/domains/chat/prompt.ts b/src/domains/chat/prompt.ts index eba12ad..b13831e 100644 --- a/src/domains/chat/prompt.ts +++ b/src/domains/chat/prompt.ts @@ -1,194 +1,39 @@ -import { - generateText, - pruneMessages, - stepCountIs, - ToolLoopAgent, - tool, - type ModelMessage, - type PrepareStepFunction, -} from "ai" import { Effect } from "effect" -import type { - RetrievalQueryResponse, - RetrievalSource, -} from "@ontos-ai/knowhere-sdk" -import { z } from "zod" import { CHAT_MODEL } from "@/lib/ai" import { logger } from "@/lib/logger" import type { Source } from "@/infrastructure/db/schema" import type { ChatCitationView } from "@/domains/chat/types" +import { + runAgentHarness, + type AgentTurn, + type AgentTurnInput, + type HarnessRetrievalRequest, + type HarnessRunResult, + type TargetModality, +} from "@/agent-harness" import type { AgenticRetrievalQuery, - AgenticRetrievalResponse, + AgenticRetrievalTargetContent, ChatHistoryMessage, - ReadRetrievedChunk, - ReadRetrievedChunkInput, - ReadRetrievedChunkResult, - RetrievedChunkReference, SearchSources, } from "./contracts" -import { normalizeRetrievalQuery } from "./retrieval" const RECENT_CONTEXT_MESSAGE_LIMIT = 8 const CONTEXT_CONTENT_CHAR_LIMIT = 900 -const COMPACTED_HISTORY_MESSAGE_LIMIT = 12 -const COMPACTED_HISTORY_CONTENT_CHAR_LIMIT = 500 -const STORED_HISTORY_MESSAGE_LIMIT = 20 -const STORED_HISTORY_CHAR_BUDGET = 32_000 -const AGENT_STEP_MESSAGE_LIMIT = 20 -const AGENT_STEP_RECENT_MESSAGE_LIMIT = 12 -const AGENT_STEP_CONTEXT_CHAR_BUDGET = 64_000 const SOURCE_CONTEXT_LIMIT = 12 -const AGENTIC_SEARCH_STEP_LIMIT = 5 -const TOOL_EVIDENCE_CHAR_LIMIT = 12_000 -const TOOL_RESULT_CONTENT_CHAR_LIMIT = 1_500 -const TOOL_CHUNK_READ_LIMIT_DEFAULT = 4_000 -const TOOL_CHUNK_READ_LIMIT_MAX = 8_000 -const AGENT_LOOP_TOOL_INPUT_LOG_LIMIT = 1_200 -const AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT = 4 -const AGENT_REQUIRED_SEARCH_STEP_COUNT = 2 -const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g -const REDACTED_MEDIA_URL = "[media asset URL hidden]" -type GenerateContextualRetrievalQueryInput = { - question: string - messages: readonly ChatHistoryMessage[] - sources: readonly Source[] - excludedSourceIds: readonly string[] -} - -type GenerateGroundedAnswerInput = { - question: string - retrievalQuery: string - messages: readonly ChatHistoryMessage[] - evidenceText: string - mediaAssetContext?: string -} - -type BuildGroundedPromptInput = { - question: string - retrievalQuery?: string - messages?: readonly ChatHistoryMessage[] - evidenceText: string - mediaAssetContext?: string -} - -type GenerateAgenticGroundedAnswerInput = { +type GenerateAgenticOutputManifestInput = { question: string messages: readonly ChatHistoryMessage[] sources: readonly Source[] excludedSourceIds: readonly string[] searchSources: SearchSources - readRetrievedChunk: ReadRetrievedChunk -} - -type AgenticChatTools = ReturnType - -type AgentLoopLogPreview = { - readonly charLength: number - readonly truncated: boolean - readonly preview: string -} - -type AgentLoopToolCallLog = { - readonly toolName: string - readonly toolCallId: string | null - readonly input: AgentLoopLogPreview -} - -type AgentLoopToolOutputLog = - | AgentLoopLogPreview - | AgentLoopSearchSourcesOutputLog - | AgentLoopReadChunkOutputLog - -type AgentLoopToolResultLog = { - readonly toolName: string - readonly toolCallId: string | null - readonly output: AgentLoopToolOutputLog -} - -type AgentLoopStepLog = { - readonly stepNumber: number - readonly finishReason: string | null - readonly responseText: string - readonly responseTextCharLength: number - readonly toolCallCount: number - readonly toolCalls: readonly AgentLoopToolCallLog[] - readonly toolCallsOmitted: number - readonly toolResultCount: number - readonly toolResults: readonly AgentLoopToolResultLog[] - readonly toolResultsOmitted: number -} - -type AgentLoopSearchSourcesOutputLog = { - readonly kind: "searchSources" - readonly output: AgentLoopLogPreview -} - -type AgentLoopReadChunkOutputLog = { - readonly kind: "readRetrievedChunk" - readonly output: AgentLoopLogPreview -} - -type LlmModelMessageLog = { - readonly role: string - readonly contentCharLength: number - readonly content: unknown -} - -type RetrievalResponseWithDecisionData = AgenticRetrievalResponse & { - readonly decision_trace?: unknown - readonly decisionTree?: unknown - readonly decision_tree?: unknown -} - -type GenerateLoggedTextInput = { - readonly operation: string - readonly prompt: string -} - -export const generateContextualRetrievalQueryEffect = ( - input: GenerateContextualRetrievalQueryInput, -): Effect.Effect => - Effect.gen(function* () { - const question = input.question.trim() - if (input.messages.length === 0) return question - - if (!process.env.AI_GATEWAY_API_KEY) { - return yield* Effect.die( - new Error( - "AI_GATEWAY_API_KEY environment variable is required. " + - "Set it in your .env.local file.", - ), - ) - } - - const prompt = buildRetrievalQueryPrompt({ - question, - messages: input.messages, - sources: input.sources, - excludedSourceIds: input.excludedSourceIds, - }) - const response = yield* Effect.tryPromise(() => - generateLoggedText({ - operation: "generateContextualRetrievalQuery", - prompt, - }), - ) - return normalizeRetrievalQuery(response.text, question) - }) - -/** Async wrapper for the legacy single-query retrieval flow. */ -export async function generateContextualRetrievalQuery( - input: GenerateContextualRetrievalQueryInput, -): Promise { - return Effect.runPromise(generateContextualRetrievalQueryEffect(input)) } -export const generateGroundedAnswerEffect = ( - input: GenerateGroundedAnswerInput, -): Effect.Effect => +export const generateAgenticOutputManifestEffect = ( + input: GenerateAgenticOutputManifestInput, +): Effect.Effect => Effect.gen(function* () { if (!process.env.AI_GATEWAY_API_KEY) { return yield* Effect.die( @@ -199,1074 +44,111 @@ export const generateGroundedAnswerEffect = ( ) } - const response = yield* Effect.tryPromise(() => - generateLoggedText({ - operation: "generateGroundedAnswer", - prompt: buildGroundedPrompt(input), - }), - ) - return response.text.trim() - }) - -/** Async wrapper for the legacy single-response answer flow. */ -export async function generateGroundedAnswer( - input: GenerateGroundedAnswerInput, -): Promise { - return Effect.runPromise(generateGroundedAnswerEffect(input)) -} - -export const generateAgenticGroundedAnswerEffect = ( - input: GenerateAgenticGroundedAnswerInput, -): Effect.Effect => - Effect.gen(function* () { - if (!process.env.AI_GATEWAY_API_KEY) { - return yield* Effect.die( - new Error( - "AI_GATEWAY_API_KEY environment variable is required. " + - "Set it in your .env.local file.", - ), - ) - } - - const agent = buildAgenticChatAgent(input) - const messages = buildAgenticChatMessages(input) - logger.info("chat-agent: llm request", { - operation: "generateAgenticGroundedAnswer.initial", + const turn = buildNotebookHarnessTurn(input) + logger.info("chat-agent: harness request", { + operation: "generateAgenticOutputManifest.initial", model: CHAT_MODEL, - promptType: "messages", - messageCount: messages.length, - messages: formatModelMessagesForLlmLog(messages), + surface: turn.surface, + recentTurnCount: turn.recentTurns.length, + messageCharLength: turn.userText.length, }) - const response = yield* Effect.tryPromise(async () => { - const generationResponse = await agent.generate({ messages }) - logger.info("chat-agent: llm response", { - operation: "generateAgenticGroundedAnswer.final", - model: CHAT_MODEL, - responseTextCharLength: generationResponse.text.length, - responseText: redactRawUrls(generationResponse.text), - }) - return generationResponse - }) - return response.text.trim() - }) -export async function generateAgenticGroundedAnswer( - input: GenerateAgenticGroundedAnswerInput, -): Promise { - return Effect.runPromise(generateAgenticGroundedAnswerEffect(input)) -} - -async function generateLoggedText( - input: GenerateLoggedTextInput, -): Promise>> { - logger.info("chat-agent: llm request", { - operation: input.operation, - model: CHAT_MODEL, - promptType: "text", - promptCharLength: input.prompt.length, - prompt: redactRawUrls(input.prompt), - }) - const response = await generateText({ - model: CHAT_MODEL, - prompt: input.prompt, - }) - logger.info("chat-agent: llm response", { - operation: input.operation, - model: CHAT_MODEL, - responseTextCharLength: response.text.length, - responseText: redactRawUrls(response.text), - }) - return response -} - -export function buildRetrievalQueryPrompt( - input: GenerateContextualRetrievalQueryInput, -): string { - const sourceContext = formatSourceContext(input.sources, input.excludedSourceIds) - const conversationContext = formatConversationContext(input.messages) - - return [ - "You prepare one search query for the Knowhere SDK retrieval API.", - "Knowhere retrieval is stateless: it only sees the query string you return and does not know the chat history.", - "Rewrite the user's latest question into a self-contained retrieval query by adding missing document, company, topic, date, or section context from the recent conversation.", - "If the latest question already has enough context, keep it concise and close to the user's wording.", - "Do not answer the question. Return only the retrieval query text.", - "", - "Searchable sources:", - sourceContext, - "", - "Recent conversation:", - conversationContext, - "", - `Latest user question: ${input.question}`, - "", - "Retrieval query:", - ].join("\n") -} - -export function buildGroundedPrompt(input: BuildGroundedPromptInput): string { - const retrievalQuery = input.retrievalQuery?.trim() || input.question - const conversationContext = formatConversationContext(input.messages ?? []) - const mediaAssetContext = input.mediaAssetContext?.trim() - - const promptLines = [ - "You answer user questions.", - "Use the retrieved evidence as your primary context.", - "Cite document sections (e.g. [文档名 / 章节名]) when they support a claim.", - "When retrieved image or table asset references are relevant to the user's request, cite the matching source label; the UI renders media from citation metadata.", - "Do not write raw media asset URLs in the answer. They are internal metadata only.", - "Never output JSON metadata blocks for citations, images, tables, or media.", - "Never mention asset_id, assetUrl, raw URLs, chunk ids, request-local ids, or retrieval internals.", - "For image requests, answer briefly and let the UI render images from citation metadata.", - "For send/show image requests, do not transcribe personal details from the image; do not list identity numbers, addresses, birth dates, or document fields unless the user explicitly asks for those details.", - "Do not invent asset URLs; use only the retrieved media asset references listed below.", - "If the sources are related but incomplete, answer what you can and briefly say what is not covered.", - "Do not invent document-specific facts that are not in the sources.", - "Use the recent conversation only to resolve references like \"this document\"; do not use it as factual evidence.", - "Answer in a natural, friendly, and direct tone.", - "Use GitHub-flavored Markdown when it improves readability, such as short lists, tables, or code blocks. Keep simple answers as plain sentences.", - "Start with the answer first. Avoid meta phrases like \"Based on the sources\" or \"Based on the source excerpts\" unless the user asks how you know.", - "Use plain language.", - "Keep answers concise by default: 1-3 short paragraphs unless the user asks for detail.", - "CITATION FORMAT: Cite evidence by document and section path, e.g. [文档名 / 章节名].", - "", - `Question: ${input.question}`, - `Retrieval query used: ${retrievalQuery}`, - "", - "Recent conversation:", - conversationContext, - "", - "Retrieved evidence:", - input.evidenceText, - ] - - if (mediaAssetContext) { - promptLines.push( - "", - "Retrieved media asset references (internal; do not quote raw URLs):", - mediaAssetContext, - ) - } - - return promptLines.join("\n") -} - -export function buildAgenticChatSystemPrompt( - input: Pick< - GenerateAgenticGroundedAnswerInput, - "messages" | "sources" | "excludedSourceIds" - >, -): string { - const sourceContext = formatSourceContext(input.sources, input.excludedSourceIds) - - return [ - "Role", - "You are a Notebook research agent that answers user questions from their uploaded sources.", - "Use retrieved source evidence as the factual source of truth. Do not invent document-specific facts.", - "", - "Retrieval strategy", - "You have two tools: searchSources and readRetrievedChunk.", - "Use searchSources for source discovery. Its markdown output gives guidance, evidence, result previews, and Read IDs.", - "Use readRetrievedChunk only when a relevant search result preview is too short and the markdown output shows a Read ID.", - "Treat tool output like source notes from a remote index: inspect it, reason over it, then decide whether to answer, search again, or read more.", - "", - "Tool use rules", - "1. Always call searchSources before writing a final answer.", - "2. Make a second searchSources call before answering to double-check the retrieved data. Reuse the same core query or refine it with entities, document names, section paths, file paths, content types, or failure hints from the first output.", - "3. Choose the content target from the user's request: broad questions use broad or text-only search, image requests use image or text+image search, and table requests use table or text+table search.", - "4. Do not paste raw prior messages into searchSources.query. The query must be concise and contain only distilled search terms such as document title, person, topic, date, section path, or asset kind.", - "5. Use one response to guide the next query: carry forward discovered people, organizations, document names, section paths, file paths, content types, and failure hints.", - "6. After the verification search, if the markdown guidance says the evidence is useful and the evidence/results directly support the answer, stop searching and answer.", - "7. If results are missing, weak, or do not cover the requested entity/topic/media/table, search again with a broader or more specific query.", - "8. Use readRetrievedChunk selectively; do not read every result when the previews already answer the question.", - "9. Stop after enough evidence or when further searches are unlikely to help; then clearly say what was not found and what retrieval context was missing.", - "", - "Media/table handling", - "For image requests, search visual content directly or combine text and image evidence. If an initial text result identifies a relevant person or section but not an image asset, query again with that person/section plus the requested image concept, e.g. identity card / 身份证 / 公民身份证明.", - "For table requests, search table content directly or combine text and table evidence.", - "When retrieved image or table assets are relevant, cite the matching source label; the UI renders media from citation metadata.", - "Do not invent asset URLs or describe hidden asset metadata.", - "", - "Final answer contract", - "Conversation context is supplied as managed model messages. Use it only to resolve references like \"this document\" or \"those images\".", - "Cite document sections in the answer, e.g. [文档名 / 章节名].", - "Use existing [Source N: label] labels only when they are the clearest available citation form.", - "Never output JSON metadata blocks for citations, images, tables, or media.", - "Never mention asset_id, assetUrl, raw URLs, chunk ids, Read IDs, tool parameters, or retrieval internals.", - "For image requests, answer briefly and let the UI render images from citation metadata.", - "For send/show image requests, do not transcribe personal details from the image; do not list identity numbers, addresses, birth dates, or document fields unless the user explicitly asks for those details.", - "Do not add unrelated personal details for send/show image requests unless the user asks.", - "Use GitHub-flavored Markdown when it improves readability, such as short lists, tables, or code blocks. Keep simple answers as plain sentences.", - "Start with the answer first. Keep answers concise unless the user asks for detail.", - "", - "Searchable sources", - sourceContext, - ].join("\n") -} - -function buildAgenticChatAgent( - input: GenerateAgenticGroundedAnswerInput, -): ToolLoopAgent { - const instructions = buildAgenticChatSystemPrompt(input) - return new ToolLoopAgent({ - model: CHAT_MODEL, - instructions, - tools: buildAgenticChatTools(input), - stopWhen: stepCountIs(AGENTIC_SEARCH_STEP_LIMIT), - prepareStep: buildAgenticPrepareStep(instructions), - onStepFinish: (event) => { - logger.info("chat-agent: llm response", { - operation: "generateAgenticGroundedAnswer.step", + const result = yield* Effect.tryPromise(() => + runAgentHarness({ model: CHAT_MODEL, - stepNumber: event.stepNumber, - finishReason: event.finishReason, - responseTextCharLength: event.text.length, - responseText: redactRawUrls(event.text), - toolCallCount: event.toolCalls.length, - toolCalls: formatAgentLoopToolCalls(event.toolCalls), - toolCallsOmitted: getOmittedAgentLoopEntryCount(event.toolCalls), - toolResultCount: event.toolResults.length, - toolResults: formatAgentLoopToolResults(event.toolResults), - toolResultsOmitted: getOmittedAgentLoopEntryCount(event.toolResults), - inputTokens: event.usage.inputTokens, - outputTokens: event.usage.outputTokens, - totalTokens: event.usage.totalTokens, - }) - }, - onFinish: (event) => { - logger.info("chat-agent: loop finished", { - stepCount: event.steps.length, - finishReason: event.finishReason, - responseTextCharLength: event.text.length, - responseText: redactRawUrls(event.text), - steps: event.steps.map(formatAgentLoopStep), - toolNames: Array.from( - new Set( - event.steps.flatMap((step) => - step.toolCalls.map((toolCall) => toolCall.toolName), - ), - ), - ), - inputTokens: event.totalUsage.inputTokens, - outputTokens: event.totalUsage.outputTokens, - totalTokens: event.totalUsage.totalTokens, - }) - }, - }) -} - -function buildAgenticChatTools( - input: Pick< - GenerateAgenticGroundedAnswerInput, - "searchSources" | "readRetrievedChunk" - >, -) { - return { - searchSources: tool({ - description: - "Search the user's Notebook sources through Knowhere retrieval. " + - "It returns markdown source notes with guidance, evidence, previews, " + - "and Read IDs for follow-up reads. Use it before answering and call it " + - "again with refined text, media, or section-path queries when evidence is missing or weak.", - inputSchema: z.object({ - query: z - .string() - .min(1) - .describe( - "A concise, self-contained retrieval query. Do not paste raw chat history or previous messages. Use only distilled terms such as document title, person, topic, date, section path, or asset kind when needed.", - ), - targetContent: z - .enum([ - "all", - "text", - "image", - "table", - "text_image", - "text_table", - ]) - .optional() - .describe( - "The content type to retrieve: all, text, image, table, text_image, or text_table. Omit only when all content types are useful.", - ), - purpose: z - .string() - .min(1) - .max(240) - .optional() - .describe( - "Short reason this query is needed, such as finding an entity, locating an image asset, or verifying a citation.", - ), - topK: z - .number() - .int() - .min(1) - .max(12) - .optional() - .describe("Number of chunks to return. Defaults to 8."), - signalPaths: z - .array(z.string().min(1)) - .max(8) - .optional() - .describe( - "Optional section/path keywords when a previous result points to a useful section.", - ), - filterMode: z - .enum(["keep", "delete"]) - .optional() - .describe( - "How to apply signalPaths. Use keep to focus on matching paths, delete to exclude them.", - ), - threshold: z - .number() - .min(0) - .max(1) - .optional() - .describe("Optional minimum retrieval score threshold."), - }), - execute: async (queryInput: AgenticRetrievalQuery) => { - const output = buildRetrievalToolOutput( - await input.searchSources(queryInput), - ) - logToolMarkdownOutput("searchSources", output) - return output - }, - }), - readRetrievedChunk: tool({ - description: - "Read an offset/limit content slice from a Read ID shown in searchSources markdown. " + - "Use this when a returned result preview is relevant and you want more data before answering.", - inputSchema: z.object({ - id: z - .string() - .min(1) - .describe( - "The Read ID shown in searchSources markdown for a relevant result.", - ), - offset: z - .number() - .int() - .min(0) - .optional() - .describe("Character offset to start reading from. Defaults to 0."), - limit: z - .number() - .int() - .min(1) - .max(TOOL_CHUNK_READ_LIMIT_MAX) - .optional() - .describe( - `Maximum characters to return. Defaults to ${TOOL_CHUNK_READ_LIMIT_DEFAULT}; max ${TOOL_CHUNK_READ_LIMIT_MAX}.`, - ), - }), - execute: async (readInput: ReadRetrievedChunkInput) => { - const output = buildRetrievedChunkToolOutput( - await input.readRetrievedChunk(readInput), - ) - logToolMarkdownOutput("readRetrievedChunk", output) - return output - }, - }), - } as const -} - -function buildAgenticPrepareStep( - instructions: string, -): PrepareStepFunction { - return ({ stepNumber, messages }) => { - const managedMessages = buildAgentStepMessages(messages) - if (stepNumber < AGENT_REQUIRED_SEARCH_STEP_COUNT) { - const stepInput = { - messages: managedMessages, - toolChoice: { - type: "tool" as const, - toolName: "searchSources" as const, + turn, + retrieval: { + query: (request) => + input.searchSources(toAgenticRetrievalQuery(request)), }, - activeTools: ["searchSources" as const], - } - logAgentStepLlmRequest({ - stepNumber, - instructions, - messages: managedMessages, - toolChoice: stepInput.toolChoice, - activeTools: stepInput.activeTools, - }) - return stepInput - } + }), + ) - logAgentStepLlmRequest({ - stepNumber, - instructions, - messages: managedMessages, - toolChoice: null, - activeTools: null, + logger.info("chat-agent: harness response", { + operation: "generateAgenticOutputManifest.final", + model: CHAT_MODEL, + answerLength: result.manifest.text.length, + citationCount: result.manifest.citations.length, + artifactCount: result.manifest.artifacts.length, + unresolvedCount: result.manifest.unresolved.length, + validationErrorCount: result.trace.validationErrors.length, + intentTask: result.trace.intent?.task ?? null, + carryHistory: result.trace.contextPolicy?.carryHistory ?? null, }) - return { messages: managedMessages } - } -} - -function formatAgentLoopStep(step: unknown, index: number): AgentLoopStepLog { - const record = getRecordFromUnknown(step) - const toolCalls = getRecordArray(record, "toolCalls") - const toolResults = getRecordArray(record, "toolResults") - const responseText = getRecordString(record, "text") ?? "" - return { - stepNumber: - getRecordNumber(record, "stepNumber") ?? - getRecordNumber(record, "stepIndex") ?? - index + 1, - finishReason: getRecordString(record, "finishReason"), - responseText: redactRawUrls(responseText), - responseTextCharLength: responseText.length, - toolCallCount: toolCalls.length, - toolCalls: formatAgentLoopToolCalls(toolCalls), - toolCallsOmitted: getOmittedAgentLoopEntryCount(toolCalls), - toolResultCount: toolResults.length, - toolResults: formatAgentLoopToolResults(toolResults), - toolResultsOmitted: getOmittedAgentLoopEntryCount(toolResults), - } -} - -function formatAgentLoopToolCalls( - toolCalls: readonly unknown[], -): readonly AgentLoopToolCallLog[] { - return toolCalls - .slice(0, AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) - .map(formatAgentLoopToolCall) -} - -function formatAgentLoopToolCall(toolCall: unknown): AgentLoopToolCallLog { - const record = getRecordFromUnknown(toolCall) - return { - toolName: getRecordString(record, "toolName") ?? "unknown", - toolCallId: getRecordString(record, "toolCallId"), - input: buildAgentLoopPreview( - getFirstRecordValue(record, ["input", "args", "arguments"]), - AGENT_LOOP_TOOL_INPUT_LOG_LIMIT, - ), - } -} - -function formatAgentLoopToolResults( - toolResults: readonly unknown[], -): readonly AgentLoopToolResultLog[] { - return toolResults - .slice(0, AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) - .map(formatAgentLoopToolResult) -} - -function formatAgentLoopToolResult(toolResult: unknown): AgentLoopToolResultLog { - const record = getRecordFromUnknown(toolResult) - const toolName = getRecordString(record, "toolName") ?? "unknown" - return { - toolName, - toolCallId: getRecordString(record, "toolCallId"), - output: formatAgentLoopToolOutput( - toolName, - getFirstRecordValue(record, ["output", "result", "content"]), - ), - } -} - -function formatAgentLoopToolOutput( - toolName: string, - output: unknown, -): AgentLoopToolOutputLog { - if (toolName === "searchSources") { - return { - kind: "searchSources", - output: buildAgentLoopFullMarkdownPreview(output), - } - } - if (toolName === "readRetrievedChunk") { - return { - kind: "readRetrievedChunk", - output: buildAgentLoopFullMarkdownPreview(output), - } - } - return buildAgentLoopFullPreview(output) -} - -function getOmittedAgentLoopEntryCount(entries: readonly unknown[]): number { - return Math.max(0, entries.length - AGENT_LOOP_TOOL_LOG_ENTRY_LIMIT) -} - -function logAgentStepLlmRequest(input: { - readonly stepNumber: number - readonly instructions: string - readonly messages: readonly ModelMessage[] - readonly toolChoice: unknown - readonly activeTools: readonly string[] | null -}): void { - logger.info("chat-agent: llm request", { - operation: "generateAgenticGroundedAnswer.step", - model: CHAT_MODEL, - promptType: "messages", - stepNumber: input.stepNumber, - instructionsCharLength: input.instructions.length, - instructions: redactRawUrls(input.instructions), - messageCount: input.messages.length, - messages: formatModelMessagesForLlmLog(input.messages), - toolChoice: input.toolChoice, - activeTools: input.activeTools, + return result }) -} -function formatModelMessagesForLlmLog( - messages: readonly ModelMessage[], -): readonly LlmModelMessageLog[] { - return messages.map(formatModelMessageForLlmLog) -} - -function formatModelMessageForLlmLog(message: ModelMessage): LlmModelMessageLog { - return { - role: message.role, - contentCharLength: getUnknownTextLength(message.content), - content: redactRawUrlsFromUnknown(message.content), - } -} - -function buildAgentLoopPreview( - value: unknown, - limit: number, -): AgentLoopLogPreview { - const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)) - .replace(/\s+/g, " ") - .trim() - const truncated = normalized.length > limit - return { - charLength: normalized.length, - truncated, - preview: truncated ? `${normalized.slice(0, limit)}...` : normalized, - } +export async function generateAgenticOutputManifest( + input: GenerateAgenticOutputManifestInput, +): Promise { + return Effect.runPromise(generateAgenticOutputManifestEffect(input)) } -function buildAgentLoopFullPreview(value: unknown): AgentLoopLogPreview { - const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)) +function buildNotebookHarnessTurn( + input: GenerateAgenticOutputManifestInput, +): AgentTurnInput { return { - charLength: normalized.length, - truncated: false, - preview: normalized, - } -} - -function buildAgentLoopFullMarkdownPreview(value: unknown): AgentLoopLogPreview { - const normalized = redactRawUrls(stringifyAgentLoopLogValue(value)) - return { - charLength: normalized.length, - truncated: false, - preview: normalized, - } -} - -function stringifyAgentLoopLogValue(value: unknown): string { - if (typeof value === "string") return value - if (value === undefined) return "undefined" - if (typeof value === "function") { - return `[Function ${value.name || "anonymous"}]` - } - if (typeof value === "symbol") return value.toString() - - const json = JSON.stringify(value, createAgentLoopLogJsonReplacer()) - return json ?? String(value) -} - -function createAgentLoopLogJsonReplacer(): ( - key: string, - value: unknown, -) => unknown { - const seenObjects = new WeakSet() - return (_key: string, value: unknown): unknown => { - if (typeof value === "bigint") return value.toString() - if (typeof value === "function") { - return `[Function ${value.name || "anonymous"}]` - } - if (typeof value === "symbol") return value.toString() - if (value instanceof Error) { - return { - name: value.name, - message: value.message, - } - } - if (!value || typeof value !== "object") return value - if (seenObjects.has(value)) return "[Circular]" - seenObjects.add(value) - return value + surface: "notebook_chat", + userText: input.question, + recentTurns: buildNotebookHarnessRecentTurns(input.messages), + sourceContext: formatSourceContext(input.sources, input.excludedSourceIds), + outputCapabilities: { + text: true, + image: true, + table: true, + }, } } -function getRecordFromUnknown( - value: unknown, -): Readonly> | null { - if (!value || typeof value !== "object" || Array.isArray(value)) return null - return value as Readonly> -} - -function getRecordString( - record: Readonly> | null, - key: string, -): string | null { - const value = record?.[key] - return typeof value === "string" ? value : null -} - -function getRecordNumber( - record: Readonly> | null, - key: string, -): number | null { - const value = record?.[key] - return typeof value === "number" ? value : null -} - -function getRecordArray( - record: Readonly> | null, - key: string, -): readonly unknown[] { - const value = record?.[key] - return Array.isArray(value) ? value : [] -} - -function getFirstRecordValue( - record: Readonly> | null, - keys: readonly string[], -): unknown { - const matchingKey = keys.find((key): boolean => record?.[key] !== undefined) - return matchingKey ? record?.[matchingKey] : undefined -} - -function buildAgenticChatMessages( - input: Pick, -): ModelMessage[] { - return [ - ...buildManagedStoredHistoryMessages(input.messages), - { role: "user", content: input.question }, - ] -} - -function buildManagedStoredHistoryMessages( +function buildNotebookHarnessRecentTurns( messages: readonly ChatHistoryMessage[], -): ModelMessage[] { - const exactMessages = messages.map(toModelMessage) - if ( - exactMessages.length <= STORED_HISTORY_MESSAGE_LIMIT && - getModelMessagesCharLength(exactMessages) <= STORED_HISTORY_CHAR_BUDGET - ) { - return exactMessages - } - - const recentMessages = messages.slice(-RECENT_CONTEXT_MESSAGE_LIMIT) - const olderMessages = messages.slice(0, -RECENT_CONTEXT_MESSAGE_LIMIT) - const compactedHistoryContext = formatCompactedHistoryContext(olderMessages) - - return [ - ...(compactedHistoryContext - ? [ - { - role: "system" as const, - content: compactedHistoryContext, - }, - ] - : []), - ...recentMessages.map(toModelMessage), - ] -} - -function buildAgentStepMessages(messages: ModelMessage[]): ModelMessage[] { - const prunedMessages = pruneMessages({ - messages: [...messages], - reasoning: "before-last-message", - toolCalls: [{ type: "before-last-4-messages", tools: ["searchSources"] }], - emptyMessages: "remove", - }) - - if ( - prunedMessages.length <= AGENT_STEP_MESSAGE_LIMIT && - getModelMessagesCharLength(prunedMessages) <= AGENT_STEP_CONTEXT_CHAR_BUDGET - ) { - return prunedMessages - } - - const systemMessages = prunedMessages.filter( - (message): boolean => message.role === "system", - ) - const nonSystemMessages = prunedMessages.filter( - (message): boolean => message.role !== "system", - ) - - return [ - ...systemMessages, - ...selectRecentMessagesWithinBudget({ - messages: nonSystemMessages, - reservedCharLength: getModelMessagesCharLength(systemMessages), - charBudget: AGENT_STEP_CONTEXT_CHAR_BUDGET, - messageLimit: AGENT_STEP_RECENT_MESSAGE_LIMIT, - }), - ] -} - -function selectRecentMessagesWithinBudget(input: { - readonly messages: readonly ModelMessage[] - readonly reservedCharLength: number - readonly charBudget: number - readonly messageLimit: number -}): ModelMessage[] { - const selectedMessages: ModelMessage[] = [] - const remainingCharBudget = Math.max( - input.charBudget - input.reservedCharLength, - 0, - ) - let selectedCharLength = 0 - - for (const message of [...input.messages].reverse()) { - if (selectedMessages.length >= input.messageLimit) break - - const messageCharLength = getUnknownTextLength(message.content) - const isLatestMessage = selectedMessages.length === 0 - const canFitWithinBudget = - selectedCharLength + messageCharLength <= remainingCharBudget - if (!isLatestMessage && !canFitWithinBudget) continue - - selectedMessages.push(message) - selectedCharLength += messageCharLength - } - - return selectedMessages.reverse() -} - -function toModelMessage(message: ChatHistoryMessage): ModelMessage { - return { +): AgentTurn[] { + return messages.slice(-RECENT_CONTEXT_MESSAGE_LIMIT).map((message, index) => ({ + id: `history_${Math.max(messages.length - RECENT_CONTEXT_MESSAGE_LIMIT, 0) + index + 1}`, role: message.role, + contentPreview: truncateContextText(message.content), content: message.content, - } -} - -function formatCompactedHistoryContext( - messages: readonly ChatHistoryMessage[], -): string { - if (messages.length === 0) return "" - - const selectedMessages = messages.slice(-COMPACTED_HISTORY_MESSAGE_LIMIT) - const omittedMessageCount = messages.length - selectedMessages.length - const lines = selectedMessages.map((message): string => { - const content = truncateContextTextToLimit( - message.content, - COMPACTED_HISTORY_CONTENT_CHAR_LIMIT, - ) - const citationContext = formatCitationContext(message.citations ?? []) - return citationContext - ? `- ${message.role}: ${content}\n citations: ${citationContext}` - : `- ${message.role}: ${content}` - }) - - return [ - "Compacted earlier conversation for context. This is not a retrieval query and must not be pasted into searchSources.query.", - omittedMessageCount > 0 - ? `${omittedMessageCount} earlier messages were omitted before this compacted context.` - : "", - ...lines, - ] - .filter((line): boolean => line.length > 0) - .join("\n") -} - -function getModelMessagesCharLength(messages: readonly ModelMessage[]): number { - return messages.reduce( - (totalLength, message): number => - totalLength + getUnknownTextLength(message.content), - 0, - ) -} - -function getUnknownTextLength(value: unknown): number { - if (typeof value === "string") return value.length - if (value === null || value === undefined) return 0 - return JSON.stringify(value).length -} - -function buildRetrievalToolOutput(response: AgenticRetrievalResponse): string { - const resultReferences = response.chunkReferences.filter( - (reference): boolean => reference.kind === "result", - ) - const relatedReferences = response.chunkReferences.filter( - (reference): boolean => reference.kind === "referencedChunk", - ) - const lines = [ - "## Retrieval Result", - "", - `Query: ${redactRawUrls(response.query)}`, - `Guidance: ${getRetrievalResponseGuidance(response)}`, - "", - "## Evidence", - formatOptionalMarkdownText(response.evidenceText, "No evidence text returned."), - "", - ...formatDecisionTraceMarkdown(response), - "## Results", - ...formatResultReferencesMarkdown(resultReferences), - "", - "## Related Sources", - ...formatRelatedReferencesMarkdown(relatedReferences), - ] - - return lines.join("\n") -} - -function formatDecisionTraceMarkdown( - response: AgenticRetrievalResponse, -): readonly string[] { - const decisionData = getDecisionTraceData(response) - if (!decisionData) return [] - - return [ - "## Decision Trace", - ...formatDecisionValueMarkdown(decisionData, 0), - "", - ] -} - -function getDecisionTraceData(response: AgenticRetrievalResponse): unknown | null { - const record = response as RetrievalResponseWithDecisionData - const candidates = [ - record.decisionTrace, - record.decision_trace, - record.decisionTree, - record.decision_tree, - ] - - return candidates.find(hasRenderableDecisionData) ?? null -} - -function hasRenderableDecisionData(value: unknown): boolean { - if (Array.isArray(value)) return value.length > 0 - if (typeof value === "string") return value.trim().length > 0 - return Boolean(value && typeof value === "object") -} - -function formatDecisionValueMarkdown( - value: unknown, - depth: number, -): readonly string[] { - if (Array.isArray(value)) return formatDecisionArrayMarkdown(value, depth) - if (value && typeof value === "object") { - return formatDecisionRecordMarkdown(value as Record, depth) - } - - return [`${getDecisionIndent(depth)}- ${formatDecisionScalar(value)}`] + citationLabels: getCitationLabels(message.citations ?? []), + })) } -function formatDecisionArrayMarkdown( - values: readonly unknown[], - depth: number, +function getCitationLabels( + citations: readonly ChatCitationView[], ): readonly string[] { - if (values.length === 0) return [`${getDecisionIndent(depth)}- none`] - - return values.flatMap((value, index): readonly string[] => { - const label = depth === 0 ? `Step ${index + 1}` : `Item ${index + 1}` - if (value && typeof value === "object") { - return [ - `${getDecisionIndent(depth)}- ${label}:`, - ...formatDecisionValueMarkdown(value, depth + 1), - ] - } - return [ - `${getDecisionIndent(depth)}- ${label}: ${formatDecisionScalar(value)}`, - ] - }) + return formatCitationContext(citations) + .split(";") + .map((label) => label.trim()) + .filter((label) => label.length > 0) } -function formatDecisionRecordMarkdown( - record: Record, - depth: number, -): readonly string[] { - const entries = Object.entries(record).filter( - ([key, value]): boolean => shouldRenderDecisionEntry(key, value), - ) - if (entries.length === 0) return [`${getDecisionIndent(depth)}- none`] - - return entries.flatMap(([key, value]): readonly string[] => { - if (Array.isArray(value) || (value && typeof value === "object")) { - return [ - `${getDecisionIndent(depth)}- ${key}:`, - ...formatDecisionValueMarkdown(value, depth + 1), - ] - } - return [ - `${getDecisionIndent(depth)}- ${key}: ${formatDecisionScalar(value)}`, - ] - }) -} - -function shouldRenderDecisionEntry(key: string, value: unknown): boolean { - if (value === null || value === undefined) return false - if (typeof value === "string" && value.trim().length === 0) return false - - return !isInternalDecisionField(key) -} - -function isInternalDecisionField(key: string): boolean { - return [ - "assetId", - "asset_id", - "assetUrl", - "asset_url", - "rawUrl", - "raw_url", - "presignedUrl", - "presigned_url", - ].includes(key) -} - -function formatDecisionScalar(value: unknown): string { - if (typeof value === "string") { - return redactRawUrls(value).replace(/\s+/g, " ").trim() - } - if (typeof value === "number" || typeof value === "boolean") { - return String(value) +function toAgenticRetrievalQuery( + request: HarnessRetrievalRequest, +): AgenticRetrievalQuery { + return { + query: request.query, + targetContent: toAgenticRetrievalTargetContent(request.modalities), + purpose: request.purpose, + topK: request.topK, + signalPaths: request.signalPaths, + filterMode: request.filterMode, + threshold: request.threshold, } - return redactRawUrls(String(value)).replace(/\s+/g, " ").trim() -} - -function getDecisionIndent(depth: number): string { - return " ".repeat(depth) -} - -function formatResultReferencesMarkdown( - references: readonly RetrievedChunkReference[], -): readonly string[] { - if (references.length === 0) return ["- No direct results returned."] - - return references.flatMap((reference, index): readonly string[] => [ - `### Result ${reference.resultIndex ?? index + 1}`, - `Type: ${reference.chunkType}`, - `Source: ${formatToolSourceLabel(reference.source)}`, - `Media: ${formatMediaAvailability(reference)}`, - `Read ID: ${reference.id}`, - `More content available: ${reference.contentTruncated ? "yes" : "no"}`, - "", - "Preview:", - formatMarkdownCodeBlock( - truncateSafeContextTextToLimit( - reference.contentPreview, - TOOL_RESULT_CONTENT_CHAR_LIMIT, - ) || "No preview text returned.", - ), - "", - ]) -} - -function formatRelatedReferencesMarkdown( - references: readonly RetrievedChunkReference[], -): readonly string[] { - if (references.length === 0) return ["- No related sources returned."] - - return references.flatMap((reference, index): readonly string[] => [ - `### Related Source ${index + 1}`, - `Type: ${reference.chunkType}`, - `Source: ${formatToolSourceLabel(reference.source)}`, - `Media: ${formatMediaAvailability(reference)}`, - "", - ]) } -function buildRetrievedChunkToolOutput( - result: ReadRetrievedChunkResult, -): string { - if (!result.found) { - return [ - "## Retrieved Content", - "", - "Status: not_found", - `Read ID: ${result.id}`, - "Guidance: The requested Read ID was not found. Search again or use a Read ID shown in the latest retrieval result.", - ].join("\n") +function toAgenticRetrievalTargetContent( + modalities: readonly TargetModality[], +): AgenticRetrievalTargetContent { + const requestedModalities = new Set(modalities) + if (requestedModalities.has("image") && requestedModalities.has("text")) { + return "text_image" } - - return [ - "## Retrieved Content", - "", - "Status: found", - `Read ID: ${result.id}`, - `Type: ${result.chunkType ?? "unknown"}`, - `Source: ${result.source ? formatToolSourceLabel(result.source) : "Unknown source"}`, - `Media: ${result.hasAssetUrl ? "available" : "none"}`, - `Returned range: ${result.offset}-${result.offset + result.contentSlice.length} of ${result.contentLength} characters`, - `More content available: ${result.hasMoreContent ? "yes" : "no"}`, - ...(result.nextOffset === null ? [] : [`Next offset: ${result.nextOffset}`]), - "", - "## Content", - formatMarkdownCodeBlock(redactRawUrls(result.contentSlice)), - ].join("\n") -} - -function formatOptionalMarkdownText( - value: string | null | undefined, - fallback: string, -): string { - const normalized = truncateSafeMarkdownTextToLimit( - value ?? "", - TOOL_EVIDENCE_CHAR_LIMIT, - ) - return formatMarkdownCodeBlock(normalized || fallback) -} - -function formatMarkdownCodeBlock(value: string): string { - return ["```text", value.replaceAll("```", "'''"), "```"].join("\n") -} - -function formatToolSourceLabel(source: RetrievalSource): string { - const label = [ - source.sourceFileName ? redactRawUrls(source.sourceFileName) : null, - source.sectionPath ? redactRawUrls(source.sectionPath) : null, - ] - .filter((value): value is string => Boolean(value?.trim())) - .join(" / ") - - return label || "Unknown source" -} - -function formatMediaAvailability(reference: RetrievedChunkReference): string { - if (!reference.hasAssetUrl) return "none" - - const chunkType = reference.chunkType.toLowerCase() - if (chunkType === "image") return "image available" - if (chunkType === "table") return "table available" - return "media available" -} - -function logToolMarkdownOutput(toolName: string, output: string): void { - logger.info("chat-agent: tool output", { - toolName, - output: buildAgentLoopFullMarkdownPreview(output), - }) -} - -function getRetrievalResponseGuidance( - response: RetrievalQueryResponse, -): string { - const hasEvidence = Boolean(response.evidenceText?.trim()) - const hasResults = - response.results.length > 0 || response.referencedChunks.length > 0 - - if (response.failureReason) { - return ( - "Retrieval reported a semantic failure. If the user question is still answerable, " + - "try one refined query; otherwise say the sources do not contain enough support." - ) - } - if (!hasEvidence && !hasResults) { - return ( - "No useful evidence was returned. Try a broader query, a different wording, " + - "or an image/table-focused content target if the user asked for images or tables." - ) - } - if (response.stopReason && response.stopReason !== "answer_done") { - return ( - `Retrieval stopped with stopReason=${response.stopReason}. Inspect evidence; ` + - "if it does not directly answer the user, query again with a better target." - ) + if (requestedModalities.has("table") && requestedModalities.has("text")) { + return "text_table" } - return ( - "Use this evidence if it directly answers the user. Query again only if an " + - "important requested detail, source, image, table, person, date, or section is missing." - ) + if (requestedModalities.has("image")) return "image" + if (requestedModalities.has("table")) return "table" + if (requestedModalities.has("text")) return "text" + return "all" } function formatSourceContext( @@ -1287,21 +169,6 @@ function formatSourceContext( return lines.length > 0 ? lines.join("\n") : "- No searchable sources." } -function formatConversationContext( - messages: readonly ChatHistoryMessage[], -): string { - const recentMessages = messages.slice(-RECENT_CONTEXT_MESSAGE_LIMIT) - const lines = recentMessages.map((message): string => { - const content = truncateContextText(message.content) - const citationContext = formatCitationContext(message.citations ?? []) - return citationContext - ? `- ${message.role}: ${content}\n citations: ${citationContext}` - : `- ${message.role}: ${content}` - }) - - return lines.length > 0 ? lines.join("\n") : "- No prior messages." -} - function formatCitationContext( citations: readonly ChatCitationView[], ): string { @@ -1322,37 +189,3 @@ function truncateContextText(value: string): string { if (normalized.length <= CONTEXT_CONTENT_CHAR_LIMIT) return normalized return `${normalized.slice(0, CONTEXT_CONTENT_CHAR_LIMIT)}...` } - -function truncateContextTextToLimit(value: string, limit: number): string { - const normalized = value.replace(/\s+/g, " ").trim() - if (normalized.length <= limit) return normalized - return `${normalized.slice(0, limit)}...` -} - -function truncateSafeContextTextToLimit(value: string, limit: number): string { - return truncateContextTextToLimit(redactRawUrls(value), limit) -} - -function truncateSafeMarkdownTextToLimit(value: string, limit: number): string { - const normalized = redactRawUrls(value).replace(/\r\n?/g, "\n") - if (normalized.trim().length === 0) return "" - if (normalized.length <= limit) return normalized - return `${normalized.slice(0, limit)}...` -} - -function redactRawUrls(value: string): string { - return value.replace(RAW_URL_PATTERN, REDACTED_MEDIA_URL) -} - -function redactRawUrlsFromUnknown(value: unknown): unknown { - if (typeof value === "string") return redactRawUrls(value) - if (Array.isArray(value)) return value.map(redactRawUrlsFromUnknown) - if (!value || typeof value !== "object") return value - - return Object.fromEntries( - Object.entries(value).map(([key, nestedValue]) => [ - key, - redactRawUrlsFromUnknown(nestedValue), - ]), - ) -} diff --git a/src/domains/chat/route-answer.ts b/src/domains/chat/route-answer.ts index 741749b..58763da 100644 --- a/src/domains/chat/route-answer.ts +++ b/src/domains/chat/route-answer.ts @@ -1,7 +1,7 @@ import { Cause, Effect, Either, Option } from "effect" import { - generateAgenticGroundedAnswer, + generateAgenticOutputManifest, parseChatRequestBody, } from "@/domains/chat" import { @@ -68,7 +68,7 @@ const answerChatEffect = (input: AnswerChatInput) => threadId: body.value.threadId, excludedSourceIds: body.value.excludedSourceIds, retrieval: client.retrieval, - generateAnswer: generateAgenticGroundedAnswer, + generateAnswer: generateAgenticOutputManifest, loadSourceAssetUrls: (source) => sourceService.getParseAssetUrls(workspace.id, source.id), repository: chatTurnPersistence.createRepository(), diff --git a/src/domains/chat/route-service.test.ts b/src/domains/chat/route-service.test.ts index b2b9ef4..f24e5cd 100644 --- a/src/domains/chat/route-service.test.ts +++ b/src/domains/chat/route-service.test.ts @@ -8,7 +8,7 @@ const mocks = vi.hoisted(() => ({ createChatThread: vi.fn(), ensureDefaultChatThread: vi.fn(), findChatThreadInWorkspace: vi.fn(), - generateAgenticGroundedAnswer: vi.fn(), + generateAgenticOutputManifest: vi.fn(), getAuthenticated: vi.fn(), getAuthenticatedWithClient: vi.fn(), handleChatTurn: vi.fn(), @@ -25,7 +25,7 @@ vi.mock("@/domains/chat", async (importOriginal) => { const original = await importOriginal() return { ...original, - generateAgenticGroundedAnswer: mocks.generateAgenticGroundedAnswer, + generateAgenticOutputManifest: mocks.generateAgenticOutputManifest, } }) @@ -123,7 +123,8 @@ describe("chat route services", () => { threadId: "thread_1", excludedSourceIds: ["source_skipped"], retrieval: client.retrieval, - generateAnswer: mocks.generateAgenticGroundedAnswer, + generateAnswer: mocks.generateAgenticOutputManifest, + loadSourceAssetUrls: expect.any(Function), repository: expect.objectContaining({ appendMessageToThread: expect.any(Function), ensureDefaultChatThread: expect.any(Function), @@ -370,6 +371,7 @@ function makeMessage(overrides: Partial = {}): ChatMessage { role: "user", content: "Message", citations: null, + artifacts: null, createdAt: new Date("2026-05-06T00:00:00Z"), ...overrides, } diff --git a/src/domains/chat/service.test.ts b/src/domains/chat/service.test.ts index e449c5d..71da889 100644 --- a/src/domains/chat/service.test.ts +++ b/src/domains/chat/service.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it, vi } from "vitest"; import type { RetrievalResult } from "@ontos-ai/knowhere-sdk"; import { Either } from "effect"; +import type { HarnessRunResult } from "@/agent-harness"; import { handleChatTurn } from "./service"; import type { ChatMessage, ChatThread, Source, Workspace } from "@/infrastructure/db/schema"; @@ -21,7 +22,7 @@ describe("handleChatTurn", () => { const repository = makeRepository(); const generateAnswer = vi.fn(async ({ searchSources }) => { await searchSources({ query: "What does the document say?" }); - return "Grounded answer."; + return makeHarnessRunResult("Grounded answer."); }); const sources = [ makeSource({ id: "source_included", knowhereDocumentId: "doc_included" }), @@ -66,7 +67,6 @@ describe("handleChatTurn", () => { sources, excludedSourceIds: ["source_excluded"], searchSources: expect.any(Function), - readRetrievedChunk: expect.any(Function), }); expect(repository.appendMessageToThread).toHaveBeenNthCalledWith(1, "workspace_1", { threadId: "thread_1", @@ -172,7 +172,7 @@ describe("handleChatTurn", () => { await searchSources({ query: "Tesla Q4 2025 Update energy generation and storage deployments", }); - return "Grounded answer."; + return makeHarnessRunResult("Grounded answer."); }); const sources = [makeSource({ title: "TSLA-Q4-2025-Update.pdf" })]; @@ -215,7 +215,6 @@ describe("handleChatTurn", () => { sources, excludedSourceIds: [], searchSources: expect.any(Function), - readRetrievedChunk: expect.any(Function), }); expect(retrieval.query).toHaveBeenCalledWith({ namespace: "notebook-namespace", @@ -304,6 +303,7 @@ function makeMessage(overrides: Partial = {}): ChatMessage { role: overrides.role ?? "user", content: overrides.content ?? "message", citations: null, + artifacts: null, createdAt: new Date("2026-05-06T00:00:00Z"), ...overrides, }; @@ -324,3 +324,27 @@ function makeRetrievalResult( ...overrides, }; } + +function makeHarnessRunResult(text: string): HarnessRunResult { + return { + manifest: { + text, + citations: [], + artifacts: [], + unresolved: [], + }, + trace: { + ledger: { + retrievalCount: 0, + chunks: [], + assets: [], + evidenceText: [], + stopReasons: [], + failureReasons: [], + decisionTraces: [], + }, + validationErrors: [], + revisionsUsed: 0, + }, + }; +} diff --git a/src/domains/chat/service.ts b/src/domains/chat/service.ts index 0bc663a..ae256bd 100644 --- a/src/domains/chat/service.ts +++ b/src/domains/chat/service.ts @@ -9,7 +9,11 @@ import { } from "." import { toChatMessageView } from "./view" import type { ChatMessage, ChatThread, Source, Workspace } from "@/infrastructure/db/schema" -import type { ChatCitationView, ChatMessageView } from "@/domains/chat/types" +import type { + ChatArtifactView, + ChatCitationView, + ChatMessageView, +} from "@/domains/chat/types" export type ChatRepository = { ensureDefaultChatThread(workspaceId: string): Promise @@ -28,6 +32,7 @@ export type ChatRepository = { role: "user" | "assistant" content: string citations?: readonly ChatCitationView[] | null + artifacts?: readonly ChatArtifactView[] | null }, ): Promise } @@ -128,6 +133,7 @@ export const handleChatTurnEffect = (input: ChatTurnInput) => role: "assistant", content: answer.answer, citations: answer.citations, + artifacts: answer.artifacts, }), ) if (!assistantMessage) { @@ -138,7 +144,7 @@ export const handleChatTurnEffect = (input: ChatTurnInput) => threadId: thread.id, messages: [ toChatMessageView(userMessage), - toChatMessageView(assistantMessage, answer.citations), + toChatMessageView(assistantMessage, answer.citations, answer.artifacts), ] as [ChatMessageView, ChatMessageView], } }) diff --git a/src/domains/chat/thread-service.ts b/src/domains/chat/thread-service.ts index e22085f..f0c3401 100644 --- a/src/domains/chat/thread-service.ts +++ b/src/domains/chat/thread-service.ts @@ -6,6 +6,7 @@ import { chatRepository } from "./repository" import type { ChatMessage, ChatThread } from "@/infrastructure/db/schema" import type { DemoCatalog } from "@/integrations/knowhere-demo" import type { + ChatArtifactView, ChatCitationView, CitationView, RetrievalResultView, @@ -18,6 +19,7 @@ type AppendMessageInput = { readonly citations?: | readonly (ChatCitationView | CitationView | RetrievalResultView)[] | null + readonly artifacts?: readonly ChatArtifactView[] | null } type DemoChatThreadSeed = { diff --git a/src/domains/chat/types.ts b/src/domains/chat/types.ts index c0920e7..fdda855 100644 --- a/src/domains/chat/types.ts +++ b/src/domains/chat/types.ts @@ -30,11 +30,22 @@ export type ChatCitationView = CitationView & { readonly content?: string } +export type ChatArtifactView = { + readonly type: "image" | "table" + readonly ref?: string + readonly assetUrl?: string + readonly label?: string + readonly display?: boolean + readonly reason?: string + readonly citation?: ChatCitationView +} + export type ChatMessageView = { readonly id: string readonly role: "user" | "assistant" readonly content: string readonly citations?: readonly ChatCitationView[] + readonly artifacts?: readonly ChatArtifactView[] } export type ChatThreadView = { diff --git a/src/domains/chat/view.ts b/src/domains/chat/view.ts index 5908c79..03987c7 100644 --- a/src/domains/chat/view.ts +++ b/src/domains/chat/view.ts @@ -1,6 +1,7 @@ import { deriveChatThreadTitle } from "./title" import type { ChatMessage, ChatThread } from "@/infrastructure/db/schema" import type { + ChatArtifactView, ChatCitationView, ChatMessageView, ChatThreadView, @@ -18,17 +19,26 @@ export function toChatThreadView(thread: ChatThread): ChatThreadView { export function toChatMessageView( message: ChatMessage, citations: readonly ChatCitationView[] = [], + artifacts: readonly ChatArtifactView[] = [], ): ChatMessageView { const citationViews = citations.length > 0 ? [...citations] : toPersistedCitationViews(message.citations) + const artifactViews = + artifacts.length > 0 + ? [...artifacts] + : toPersistedArtifactViews(message.artifacts) + return { id: message.id, role: message.role === "assistant" ? "assistant" : "user", content: message.content, citations: citationViews, + ...(artifactViews && artifactViews.length > 0 + ? { artifacts: artifactViews } + : {}), } } @@ -56,6 +66,45 @@ function toPersistedCitationViews(value: unknown): ChatCitationView[] | undefine return citations.length > 0 ? citations : undefined } +function toPersistedArtifactViews(value: unknown): ChatArtifactView[] | undefined { + if (!Array.isArray(value)) return undefined + + const artifacts = value.flatMap((item): ChatArtifactView[] => { + if (!isRecord(item)) return [] + const type = getString(item.type) + if (type !== "image" && type !== "table") return [] + + const citation = + isRecord(item.citation) && isRecord(item.citation.source) + ? { + chunkType: getString(item.citation.chunkType) ?? "text", + score: getNumber(item.citation.score) ?? 0, + assetUrl: getString(item.citation.assetUrl), + description: getString(item.citation.description), + source: { + documentId: getString(item.citation.source.documentId), + sourceFileName: getString(item.citation.source.sourceFileName), + sectionPath: getString(item.citation.source.sectionPath), + }, + } + : undefined + + return [ + { + type, + ref: getString(item.ref), + assetUrl: getString(item.assetUrl), + label: getString(item.label), + display: typeof item.display === "boolean" ? item.display : undefined, + reason: getString(item.reason), + ...(citation ? { citation } : {}), + }, + ] + }) + + return artifacts.length > 0 ? artifacts : undefined +} + function getString(value: unknown): string | undefined { if (typeof value !== "string") return undefined return value.length > 0 ? value : undefined diff --git a/src/domains/workspace/initial-state.test.ts b/src/domains/workspace/initial-state.test.ts index 0e7ad0f..7773eb7 100644 --- a/src/domains/workspace/initial-state.test.ts +++ b/src/domains/workspace/initial-state.test.ts @@ -535,6 +535,7 @@ function makeMessage( role: "user", content: "Hello", citations: null, + artifacts: null, createdAt: new Date("2026-05-10T00:00:00.000Z"), ...overrides, } diff --git a/src/infrastructure/db/schema.ts b/src/infrastructure/db/schema.ts index 76f6e2c..585bfbb 100644 --- a/src/infrastructure/db/schema.ts +++ b/src/infrastructure/db/schema.ts @@ -233,6 +233,12 @@ export type NewChatThread = typeof chatThreads.$inferInsert; * (see `src/lib/types.ts#CitationView[]`). Stored only on assistant * rows. It intentionally excludes retrieval `content`, because that is * source chunk text and must stay upstream in Knowhere. + * + * `artifacts` is JSONB of the agent-selected display artifacts + * (see `ChatArtifactView[]`): the exact images/tables the harness chose to + * show, with their asset URLs and labels. Persisted so artifact selection + * (e.g. "only two charts") survives reload instead of falling back to every + * retrieved media citation. It carries no upstream chunk text. */ export const chatMessages = pgTable( "chat_messages", @@ -244,6 +250,7 @@ export const chatMessages = pgTable( role: text("role").notNull(), content: text("content").notNull(), citations: jsonb("citations"), + artifacts: jsonb("artifacts"), createdAt: timestamp("created_at", { withTimezone: true }) .notNull() .defaultNow(), From 6a20381cd3baa66596f9d4367cc9dc3d77803e11 Mon Sep 17 00:00:00 2001 From: chengke <404835780@qq.com> Date: Fri, 12 Jun 2026 21:45:31 +0800 Subject: [PATCH 2/3] chore(chat): clear notebook release checks --- package-lock.json | 105 ++++++++++++++++++++++++++++++ src/agent-harness/runtime.test.ts | 3 +- src/integrations/knowhere-demo.ts | 11 +++- 3 files changed, 116 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 50972cb..d5a79bb 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15268,6 +15268,111 @@ "type": "github", "url": "https://github.com/sponsors/wooorm" } + }, + "node_modules/@next/swc-darwin-x64": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-darwin-x64/-/swc-darwin-x64-16.2.4.tgz", + "integrity": "sha512-XhpVnUfmYWvD3YrXu55XdcAkQtOnvaI6wtQa8fuF5fGoKoxIUZ0kWPtcOfqJEWngFF/lOS9l3+O9CcownhiQxQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-arm64-gnu": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-gnu/-/swc-linux-arm64-gnu-16.2.4.tgz", + "integrity": "sha512-Mx/tjlNA3G8kg14QvuGAJ4xBwPk1tUHq56JxZ8CXnZwz1Etz714soCEzGQQzVMz4bEnGPowzkV6Xrp6wAkEWOQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-arm64-musl": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-arm64-musl/-/swc-linux-arm64-musl-16.2.4.tgz", + "integrity": "sha512-iVMMp14514u7Nup2umQS03nT/bN9HurK8ufylC3FZNykrwjtx7V1A7+4kvhbDSCeonTVqV3Txnv0Lu+m2oDXNg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-x64-gnu": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-gnu/-/swc-linux-x64-gnu-16.2.4.tgz", + "integrity": "sha512-EZOvm1aQWgnI/N/xcWOlnS3RQBk0VtVav5Zo7n4p0A7UKyTDx047k8opDbXgBpHl4CulRqRfbw3QrX2w5UOXMQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-linux-x64-musl": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-linux-x64-musl/-/swc-linux-x64-musl-16.2.4.tgz", + "integrity": "sha512-h9FxsngCm9cTBf71AR4fGznDEDx1hS7+kSEiIRjq5kO1oXWm07DxVGZjCvk0SGx7TSjlUqhI8oOyz7NfwAdPoA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-arm64-msvc": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-win32-arm64-msvc/-/swc-win32-arm64-msvc-16.2.4.tgz", + "integrity": "sha512-3NdJV5OXMSOeJYijX+bjaLge3mJBlh4ybydbT4GFoB/2hAojWHtMhl3CYlYoMrjPuodp0nzFVi4Tj2+WaMg+Ow==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } + }, + "node_modules/@next/swc-win32-x64-msvc": { + "version": "16.2.4", + "resolved": "https://registry.npmjs.org/@next/swc-win32-x64-msvc/-/swc-win32-x64-msvc-16.2.4.tgz", + "integrity": "sha512-kMVGgsqhO5YTYODD9IPGGhA6iprWidQckK3LmPeW08PIFENRmgfb4MjXHO+p//d+ts2rpjvK5gXWzXSMrPl9cw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ], + "engines": { + "node": ">= 10" + } } } } diff --git a/src/agent-harness/runtime.test.ts b/src/agent-harness/runtime.test.ts index 09cd630..8fd9ecd 100644 --- a/src/agent-harness/runtime.test.ts +++ b/src/agent-harness/runtime.test.ts @@ -11,6 +11,7 @@ import type { AgentTurnInput, ContextPolicy, IntentFrame, + OutputManifest, RetrievalCapability, } from "./types" @@ -91,7 +92,7 @@ describe("agent harness runtime", () => { const state: { intent?: IntentFrame contextPolicy?: ContextPolicy - finalizedManifest?: unknown + finalizedManifest?: OutputManifest } = {} const tools = createHarnessTools({ state, diff --git a/src/integrations/knowhere-demo.ts b/src/integrations/knowhere-demo.ts index bad4c75..6953d0f 100644 --- a/src/integrations/knowhere-demo.ts +++ b/src/integrations/knowhere-demo.ts @@ -1,6 +1,6 @@ import "server-only" -import { Effect } from "effect" +import { Effect, Schema } from "effect" import { cacheLife, cacheTag } from "next/cache" export type DemoCitation = { @@ -231,7 +231,7 @@ const materializeSourcesEffect = Effect.fn("knowhereDemo.materializeSources")( readonly namespace: string readonly demoSourceIds: readonly string[] }) { - const requestBody = JSON.stringify({ + const requestBody = yield* Schema.encode(MaterializeSourcesRequestJson)({ namespace: input.namespace, demo_source_ids: input.demoSourceIds, }) @@ -254,6 +254,13 @@ const materializeSourcesEffect = Effect.fn("knowhereDemo.materializeSources")( }, ) +const MaterializeSourcesRequestJson = Schema.parseJson( + Schema.Struct({ + namespace: Schema.String, + demo_source_ids: Schema.Array(Schema.String), + }), +) + const fetchOptionalCatalogEffect = ( fetcher?: () => Effect.Effect, ) => From fb6f6c3f21d2e3d7f1281d4e730d81a1c48a4ba2 Mon Sep 17 00:00:00 2001 From: chengke <404835780@qq.com> Date: Sat, 13 Jun 2026 10:40:13 +0800 Subject: [PATCH 3/3] feat(chat): strengthen agent harness outputs --- .../knowhere_agent_harness_03093158.plan.md | 111 ----- src/agent-harness/runtime.test.ts | 64 ++- src/agent-harness/runtime.ts | 448 ++++++++++++++---- src/agent-harness/types.ts | 27 +- src/agent-harness/validator.test.ts | 119 +++++ src/agent-harness/validator.ts | 68 ++- src/components/chat-message-list.test.ts | 68 +++ src/components/chat-message-list.tsx | 88 +++- src/domains/chat/chat-citation-persistence.ts | 7 +- src/domains/chat/index.test.ts | 350 +++++++++++++- src/domains/chat/index.ts | 146 +++++- src/domains/chat/service.test.ts | 4 + src/domains/chat/types.ts | 6 +- src/domains/chat/view.ts | 61 ++- 14 files changed, 1339 insertions(+), 228 deletions(-) delete mode 100644 .cursor/plans/knowhere_agent_harness_03093158.plan.md diff --git a/.cursor/plans/knowhere_agent_harness_03093158.plan.md b/.cursor/plans/knowhere_agent_harness_03093158.plan.md deleted file mode 100644 index 5261f9e..0000000 --- a/.cursor/plans/knowhere_agent_harness_03093158.plan.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -name: knowhere agent harness -overview: 把 notebook 问答与 typing compose 重构成一套共享的、推理驱动的 Knowhere Agent Harness:KNOWHERE 只提供证据,agent 通过结构化工作记忆(意图/上下文策略/计划/证据账本/输出清单)+ 工具 + 合约校验完成理解、规划、证据选择与交付,彻底去掉硬编码 pipeline。 -todos: - - id: harness-core - content: 新建 knowhere-agent-harness 规范源目录与同步脚本;实现核心类型(IntentFrame/ContextPolicy/RetrievalPlan/EvidenceLedger/OutputManifest)、工具集、单推理循环与合约校验器,model/retrieval 注入接口 + 单测与评测集骨架 - status: pending - - id: notebook-adapter - content: Notebook 接入 harness:重写 answerQuestionWithRetrieval/prompt/retrieval/service,暴露 RetrievalCapability,chat_messages 增 artifacts/metadata jsonb,前端按 manifest 渲染,删除 legacy query/answer 函数 - status: pending - - id: typing-adapter - content: Typing compose 迁入同一 harness:compose.ts/retrieval.ts/protocol.ts/tools.ts 改造,extractIntent/planQueries/setMode 收敛为 harness 工具,补发 meta 事件,validateComposeProtocol 升级为 typing 校验 profile - status: pending - - id: eval-cleanup - content: 跑 harness 回归评测集(无关多轮/纠错/图片数量/混合资产/NOT_FOUND/typing continue/generic rewrite);清理 legacy 代码与 notebook lockfile SDK 版本漂移 - status: pending -isProject: false ---- - -# Knowhere Agent Harness 重构方案 - -## 0. 目标与不变量 - -- KNOWHERE-MAIN **不改动**。它已是纯 evidence provider(`POST /v1/retrieval/query` 永远 agentic、`answer_text` 恒空,返回 `evidence_text` / `results` / `referenced_chunks` / `decision_trace` / `stop_reason`)。所有理解、判断、选图、回答归外围 agent。 -- 一套核心、两个 surface:`notebook_chat` 与 `typing_compose`(及 `typing_quick_ask`)。 -- 架构哲学(2A):**单推理循环 deep agent**。harness 提供工作记忆 + 工具 + 合约校验,不写业务 if 分支。 -- 代码共享(1C):新建顶层规范源目录,脚本同步进两个仓库,后续可平滑升级为 npm 包。 - -## 1. 共享与分发(1C) - -- 新建规范源:`/Users/wuchengke/Desktop/knowhere/knowhere-agent-harness/`(纯 TS,仅依赖 `ai`、`zod`,与两边版本兼容)。 -- 同步脚本:`scripts/sync-harness.sh`(copy 到目标 + 写 `HARNESS_REV` 哈希),两仓库各放一份反向校验脚本;CI/dev 比对哈希检测漂移。 - - notebook 落点:`src/agent-harness/` - - typing 落点:`sidecar/knowhere-agent/src/harness/` -- 两边运行时都能消费:Next.js(Node)与 Bun `--compile` 都可 bundle 纯 TS。 - -## 2. 核心抽象(surface 无关) - -工作记忆全部由 agent 经工具读写,代码只做状态管理与守护: - -- `IntentFrame`: task、dependsOnPreviousTurn、retrievalNeeded、targetModalities、constraints{desiredCount,maxCount,language,outputStyle,citationRequired}、groundingPolicy。"要2张图" = agent 推理出的 `constraints.desiredCount=2`,非正则。 -- `ContextPolicy`: carryHistory("none" | "referential_only" | "full_recent" | "repair_previous")、reason、activePriorTurnIds。解决"第二个无关问题返回第一轮答案"——由 agent 判断,不靠代码塞历史。 -- `RetrievalPlan`: 可日志化步骤(retrieve / read_more / select_artifacts / compose)。 -- `EvidenceLedger`: 跨多次 retrieve 累积的 chunks / assets / decisionTrace / stopReason / failureReason。KNOWHERE 给的是候选,不等于最终输出。 -- `OutputManifest`: text、citations[]、artifacts[{type,ref,display,reason}]、unresolved[]。**最终回答不再是裸 markdown 字符串**。 - -```mermaid -flowchart LR - In["AgentTurnInput
(surface,userText,recentTurns,localContext,caps)"] --> Loop - subgraph Loop [单一 ToolLoopAgent 推理循环] - DI["declareIntent → IntentFrame"] - CP["setContextPolicy → ContextPolicy"] - RT["retrieve → KNOWHERE → EvidenceLedger"] - RD["readEvidence(本地账本)"] - SA["selectArtifacts → OutputManifest.artifacts"] - FN["finalize → OutputManifest"] - DI --> CP --> RT --> RD --> SA --> FN - end - FN --> VAL["Validator/Critic
(校验 agent 自声明合约)"] - VAL -->|"违约"| Loop - VAL -->|"通过"| Out["surface adapter 交付"] -``` - -## 3. 工具集(agent 唯一的行动方式) - -- `declareIntent(IntentFrame)`:必须最先调用(harness 门控,合约级而非话题级)。 -- `setContextPolicy(ContextPolicy)`:agent 决定是否/如何带历史。 -- `retrieve({query,modalities,topK,signalPaths,filterMode,threshold})`:封装 KNOWHERE,query 由 agent 拟定(替代 notebook `searchSources`、typing `planQueries+retrieveKnowledge`),结果进 EvidenceLedger。 -- `readEvidence({ref,offset,limit})`:读账本里某 chunk 更多内容(本地,等价 notebook `readRetrievedChunk`,无需再打 KNOWHERE)。 -- `selectArtifacts({refs[],reason})`:agent 显式声明展示哪些资产 → 这才是"只发2张图"的来源。 -- `finalize({text,citations,artifacts,unresolved})`:产出 OutputManifest,结束循环。 - -门控只在**合约层**:`declareIntent` 必须在前;`retrieve` 仅在意图声明后可用;若 `groundingPolicy=must_use_sources`,无证据时禁止 `finalize`。不再硬编码"前两步必须搜索"。 - -## 4. 合约校验器(守护而非业务硬编码) - -`finalize` 后对照 agent **自己声明的 IntentFrame** 校验: -- artifacts 数量满足 agent 声明的 desiredCount/maxCount。 -- `groundingPolicy=must_use_sources` → 必须有 citations/evidence。 -- `carryHistory=none` → 输出不得引用上一轮主题(软校验)。 -- KNOWHERE `stop_reason=no_documents_selected`/空 → 禁止编造。 -- `surface=typing_compose` → 文本必须是纯插入文本(无 markdown/meta)。 - -违约 → 回灌结构化反馈让 agent 修订一次;再不行 → 优雅返回"证据不足/需澄清"。 - -## 5. Surface 适配器 - -- Notebook(`notebook_chat`):thread 历史 → recentTurns;retrieval 绑 workspace namespace + excludedSourceIds;OutputManifest → assistant message{content,citations,artifacts};**前端只渲染 `message.artifacts`**,不再遍历全部 image citation。 - - 改造点:[src/domains/chat/index.ts](knowhere-notebook/src/domains/chat/index.ts)(`answerQuestionWithRetrieval` 改为调用 harness)、[src/domains/chat/prompt.ts](knowhere-notebook/src/domains/chat/prompt.ts)(prompt/agent 逻辑迁入 harness,删除 legacy `generateContextualRetrievalQuery`/`generateGroundedAnswer`)、[src/domains/chat/retrieval.ts](knowhere-notebook/src/domains/chat/retrieval.ts)(query 规范化迁入 retrieve 工具)、[src/domains/chat/service.ts](knowhere-notebook/src/domains/chat/service.ts)、[src/integrations/knowhere.ts](knowhere-notebook/src/integrations/knowhere.ts)(暴露 RetrievalCapability)、[src/components/chat-message-list.tsx](knowhere-notebook/src/components/chat-message-list.tsx)(按 artifacts 渲染)。 - - 数据模型:`chat_messages` 增 `artifacts` jsonb + `metadata` jsonb(存 intent/plan/trace 供调试),见 schema。 -- Typing(`typing_compose`):focusedSnapshot → localContext + userText;outputCapabilities={text,inlineInsertion};OutputManifest.text → 纯插入文本;保留 stdio NDJSON,并补发此前未实现的 `meta` 事件。 - - 改造点:[sidecar/knowhere-agent/src/compose.ts](knowhere-typing/sidecar/knowhere-agent/src/compose.ts)(改为 harness 驱动,extractIntent/planQueries/setMode 收敛为 harness 工具)、[sidecar/knowhere-agent/src/retrieval.ts](knowhere-typing/sidecar/knowhere-agent/src/retrieval.ts)、[sidecar/knowhere-agent/src/protocol.ts](knowhere-typing/sidecar/knowhere-agent/src/protocol.ts)、[sidecar/knowhere-agent/src/tools.ts](knowhere-typing/sidecar/knowhere-agent/src/tools.ts);`validateComposeProtocol` 升级为 harness 的 typing 校验 profile。 -- 模型注入:notebook 传 AI Gateway 模型(`CHAT_MODEL`),typing 传 OpenAI-compatible 模型;harness 自身 model-agnostic。 - -## 6. 解决你提的三个具体问题(均由 agent 推理 + 合约保证) - -- 无关多轮污染 → `ContextPolicy` 由 agent 判定 `carryHistory=none`,并记录 reason 供调试。 -- 只发指定数量图 → agent `declareIntent.desiredCount` + `selectArtifacts`,UI 仅渲染 manifest;校验器兜底数量一致。 -- 输出不智能 → 显式 intent/plan/evidence/critic 闭环 + 可观测 trace,落到 message.metadata,出问题可还原 agent 当时搜了什么、选了什么。 -- 附带小卫生项:同步 notebook `package-lock.json` 残留 `@ontos-ai/knowhere-sdk ^0.4.0` 与 pnpm-lock 的 0.6.0。 - -## 7. 回归评测集(harness regression) - -unrelated follow-up / correction turn / image-count intent / text+image mixed / NOT_FOUND 不编造 / typing continue 必检索 / generic rewrite 不检索。作为 harness 单测与两 surface 集成测试。 - -## 8. 阶段划分 - -- Phase 1:建规范源目录 + 同步脚本 + 核心类型与工具 + 校验器 + 单测/评测集骨架。 -- Phase 2:notebook 适配器接入,artifacts 持久化 + 前端 manifest 渲染,/api/chat 响应向后兼容。 -- Phase 3:typing compose 迁入同一 harness,补 `meta` 事件,校验 profile 化。 -- Phase 4:跑回归评测,清理 legacy 代码与 lockfile。 \ No newline at end of file diff --git a/src/agent-harness/runtime.test.ts b/src/agent-harness/runtime.test.ts index 8fd9ecd..5657e5c 100644 --- a/src/agent-harness/runtime.test.ts +++ b/src/agent-harness/runtime.test.ts @@ -10,6 +10,7 @@ import { createEvidenceLedger } from "./ledger" import type { AgentTurnInput, ContextPolicy, + HarnessToolCallTrace, IntentFrame, OutputManifest, RetrievalCapability, @@ -32,6 +33,7 @@ describe("agent harness runtime", () => { const state: { intent?: IntentFrame contextPolicy?: ContextPolicy + toolCalls?: HarnessToolCallTrace[] } = {} const tools = createHarnessTools({ state, @@ -86,6 +88,13 @@ describe("agent harness runtime", () => { expect(JSON.stringify(query.mock.calls[0]?.[0])).not.toContain( "LegalAction", ) + expect(state.toolCalls?.map((call) => [call.tool, call.ok])).toEqual([ + ["retrieve", false], + ["declareIntent", true], + ["retrieve", false], + ["setContextPolicy", true], + ["retrieve", true], + ]) }) it("blocks finalize until intent and context policy are declared", async () => { @@ -93,6 +102,7 @@ describe("agent harness runtime", () => { intent?: IntentFrame contextPolicy?: ContextPolicy finalizedManifest?: OutputManifest + finalized?: boolean } = {} const tools = createHarnessTools({ state, @@ -138,11 +148,23 @@ describe("agent harness runtime", () => { text: "Answer.", }) expect(state.finalizedManifest).toEqual(manifest) + expect(state.finalized).toBe(true) }) - it("exposes full prior-turn content on demand through readPriorTurn", async () => { + it("exposes full prior-turn content through policy-approved readPriorTurn", async () => { + const state: { + contextPolicy?: ContextPolicy + priorTurnReads?: string[] + } = { + contextPolicy: { + carryHistory: "repair_previous", + reason: "The current request corrects the previous answer.", + activePriorTurnIds: ["turn_1"], + }, + priorTurnReads: [], + } const tools = createHarnessTools({ - state: {}, + state, ledger: createEvidenceLedger(), retrieval: { query: vi.fn() }, recentTurns: [ @@ -163,11 +185,47 @@ describe("agent harness runtime", () => { content: "The full earlier answer about the tax filing deadline.", citationLabels: ["tax.pdf / deadline"], }) + expect(state.priorTurnReads).toEqual(["turn_1"]) expect(await executeTool(tools.readPriorTurn, { id: "missing" })).toEqual({ found: false, id: "missing", - message: "No prior turn with that id is available.", + message: "readPriorTurn id must be listed in activePriorTurnIds.", + }) + }) + + it("blocks prior-turn reads when the context policy does not allow them", async () => { + const state: { contextPolicy?: ContextPolicy; priorTurnReads?: string[] } = {} + const tools = createHarnessTools({ + state, + ledger: createEvidenceLedger(), + retrieval: { query: vi.fn() }, + recentTurns: [ + { + id: "turn_1", + role: "assistant", + contentPreview: "Truncated preview...", + content: "Full content.", + }, + ], + }) + + expect(await executeTool(tools.readPriorTurn, { id: "turn_1" })).toEqual({ + found: false, + id: "turn_1", + message: "setContextPolicy must be called before readPriorTurn.", + }) + + state.contextPolicy = { + carryHistory: "none", + reason: "The current request is unrelated to previous turns.", + activePriorTurnIds: [], + } + expect(await executeTool(tools.readPriorTurn, { id: "turn_1" })).toEqual({ + found: false, + id: "turn_1", + message: "readPriorTurn is not allowed when carryHistory is none.", }) + expect(state.priorTurnReads).toBeUndefined() }) it("summarizes recent turns as an index instead of pasting full history as query context", () => { diff --git a/src/agent-harness/runtime.ts b/src/agent-harness/runtime.ts index e952c55..c03baac 100644 --- a/src/agent-harness/runtime.ts +++ b/src/agent-harness/runtime.ts @@ -7,6 +7,7 @@ import type { AgentTurnInput, ContextPolicy, HarnessRunResult, + HarnessToolCallTrace, HarnessTrace, IntentFrame, OutputManifest, @@ -38,6 +39,9 @@ type HarnessToolState = { intent?: IntentFrame contextPolicy?: ContextPolicy finalizedManifest?: OutputManifest + finalized?: boolean + priorTurnReads?: string[] + toolCalls?: HarnessToolCallTrace[] } const targetModalitySchema = z.enum(["text", "image", "table"]) @@ -94,13 +98,29 @@ const outputCitationSchema = z.object({ }), }) -const outputArtifactSchema = z.object({ +const selectedOutputArtifactSchema = z.object({ type: z.enum(["image", "table"]), ref: z.string().min(1), display: z.boolean(), reason: z.string().min(1), }) +const derivedTableArtifactSchema = z.object({ + type: z.literal("derived_table"), + ref: z.string().min(1), + title: z.string().min(1), + columns: z.array(z.string().min(1)).min(1).max(24), + rows: z.array(z.array(z.string()).min(1).max(24)).max(200), + sourceRefs: z.array(z.string().min(1)).min(1).max(50), + display: z.boolean(), + reason: z.string().min(1), +}) + +const outputArtifactSchema = z.union([ + selectedOutputArtifactSchema, + derivedTableArtifactSchema, +]) + const outputManifestSchema = z.object({ text: z.string(), citations: z.array(outputCitationSchema).default([]), @@ -111,7 +131,11 @@ const outputManifestSchema = z.object({ export async function runAgentHarness( input: RunAgentHarnessInput, ): Promise { - const state: HarnessToolState = {} + const state: HarnessToolState = { + finalized: false, + priorTurnReads: [], + toolCalls: [], + } const ledger = createEvidenceLedger() const tools = createHarnessTools({ state, @@ -141,6 +165,7 @@ export async function runAgentHarness( manifest, intent: state.intent, contextPolicy: state.contextPolicy, + finalized: state.finalized === true, ledger: ledger.snapshot(), surface: input.turn.surface, }) @@ -153,6 +178,7 @@ export async function runAgentHarness( // can repair its own contract violations instead of shipping them. revisionsUsed += 1 state.finalizedManifest = undefined + state.finalized = false messages = [ ...messages, ...(response.response.messages as ModelMessage[]), @@ -170,6 +196,9 @@ export async function runAgentHarness( intent: state.intent, contextPolicy: state.contextPolicy, ledger: ledgerSnapshot, + finalized: state.finalized === true, + priorTurnReads: [...(state.priorTurnReads ?? [])], + toolCalls: [...(state.toolCalls ?? [])], validationErrors, revisionsUsed, }, @@ -182,6 +211,8 @@ function buildRevisionFeedback(errors: readonly string[]): string { ...errors.map((error) => `- ${error}`), "", "Fix every issue and call finalize again with a corrected manifest.", + "You must call finalize; freeform assistant text is not a valid final", + "answer contract.", "Do not exceed the user's requested artifact count, only cite or display", "evidence refs that exist in the evidence ledger, and do not fabricate", "facts when evidence is missing.", @@ -199,20 +230,32 @@ export function createHarnessTools(input: { description: "Declare the user's intent before any other action. This is working memory, not a final answer.", inputSchema: intentFrameSchema, - execute: async (intent): Promise => { - input.state.intent = intent - return intent - }, + execute: async (intent): Promise => + traceToolCall(input.state, { + toolName: "declareIntent", + inputSummary: summarizeIntent(intent), + execute: async () => { + input.state.intent = intent + return intent + }, + summarizeOutput: summarizeIntent, + }), }), setContextPolicy: tool({ description: "Decide whether prior turns should influence this turn. Use none for unrelated follow-ups.", inputSchema: contextPolicySchema, - execute: async (policy): Promise => { - input.state.contextPolicy = policy - return policy - }, + execute: async (policy): Promise => + traceToolCall(input.state, { + toolName: "setContextPolicy", + inputSummary: summarizeContextPolicy(policy), + execute: async () => { + input.state.contextPolicy = policy + return policy + }, + summarizeOutput: summarizeContextPolicy, + }), }), retrieve: tool({ @@ -227,52 +270,58 @@ export function createHarnessTools(input: { filterMode: z.enum(["keep", "delete"]).optional(), threshold: z.number().min(0).max(1).optional(), }), - execute: async (request) => { - if (!input.state.intent) { - return { - ok: false, - message: "declareIntent must be called before retrieve.", - } - } - if (!input.state.contextPolicy) { - return { - ok: false, - message: "setContextPolicy must be called before retrieve.", - } - } - - const response = await input.retrieval.query({ - query: request.query, - modalities: request.modalities as TargetModality[], - purpose: request.purpose, - topK: request.topK, - signalPaths: request.signalPaths, - filterMode: request.filterMode, - threshold: request.threshold, - }) - const snapshot = input.ledger.addRetrievalResponse(response) - return { - ok: true, - retrievalCount: snapshot.retrievalCount, - evidenceText: response.evidenceText ?? "", - stopReason: response.stopReason ?? null, - failureReason: response.failureReason ?? null, - chunks: snapshot.chunks.map((chunk) => ({ - ref: chunk.ref, - kind: chunk.kind, - type: chunk.chunkType, - preview: chunk.contentPreview, - source: chunk.source, - assetRef: chunk.assetRef, - })), - assets: snapshot.assets.map((asset) => ({ - ref: asset.ref, - type: asset.type, - label: asset.label, - source: asset.source, - })), - } - }, + execute: async (request) => + traceToolCall(input.state, { + toolName: "retrieve", + inputSummary: summarizeRetrievalRequest(request), + execute: async () => { + if (!input.state.intent) { + return { + ok: false, + message: "declareIntent must be called before retrieve.", + } + } + if (!input.state.contextPolicy) { + return { + ok: false, + message: "setContextPolicy must be called before retrieve.", + } + } + + const response = await input.retrieval.query({ + query: request.query, + modalities: request.modalities as TargetModality[], + purpose: request.purpose, + topK: request.topK, + signalPaths: request.signalPaths, + filterMode: request.filterMode, + threshold: request.threshold, + }) + const snapshot = input.ledger.addRetrievalResponse(response) + return { + ok: true, + retrievalCount: snapshot.retrievalCount, + evidenceText: response.evidenceText ?? "", + stopReason: response.stopReason ?? null, + failureReason: response.failureReason ?? null, + chunks: snapshot.chunks.map((chunk) => ({ + ref: chunk.ref, + kind: chunk.kind, + type: chunk.chunkType, + preview: chunk.contentPreview, + source: chunk.source, + assetRef: chunk.assetRef, + })), + assets: snapshot.assets.map((asset) => ({ + ref: asset.ref, + type: asset.type, + label: asset.label, + source: asset.source, + })), + } + }, + summarizeOutput: summarizeRetrieveOutput, + }), }), readEvidence: tool({ @@ -283,8 +332,22 @@ export function createHarnessTools(input: { offset: z.number().int().min(0).optional(), limit: z.number().int().min(1).max(8_000).optional(), }), - execute: async ({ ref, offset = 0, limit = 4_000 }) => - input.ledger.read(ref, offset, limit), + execute: async (request) => + traceToolCall(input.state, { + toolName: "readEvidence", + inputSummary: { + ref: request.ref, + offset: request.offset ?? 0, + limit: request.limit ?? 4_000, + }, + execute: async () => + input.ledger.read( + request.ref, + request.offset ?? 0, + request.limit ?? 4_000, + ), + summarizeOutput: summarizeReadEvidenceOutput, + }), }), readPriorTurn: tool({ @@ -295,23 +358,55 @@ export function createHarnessTools(input: { inputSchema: z.object({ id: z.string().min(1), }), - execute: async ({ id }) => { - const priorTurn = input.recentTurns.find((turn) => turn.id === id) - if (!priorTurn) { - return { - found: false as const, - id, - message: "No prior turn with that id is available.", - } - } - return { - found: true as const, - id, - role: priorTurn.role, - content: priorTurn.content ?? priorTurn.contentPreview, - citationLabels: priorTurn.citationLabels ?? [], - } - }, + execute: async ({ id }) => + traceToolCall(input.state, { + toolName: "readPriorTurn", + inputSummary: { id }, + execute: async () => { + if (!input.state.contextPolicy) { + return { + found: false as const, + id, + message: "setContextPolicy must be called before readPriorTurn.", + } + } + if (input.state.contextPolicy.carryHistory === "none") { + return { + found: false as const, + id, + message: + "readPriorTurn is not allowed when carryHistory is none.", + } + } + if (!input.state.contextPolicy.activePriorTurnIds.includes(id)) { + return { + found: false as const, + id, + message: + "readPriorTurn id must be listed in activePriorTurnIds.", + } + } + const priorTurn = input.recentTurns.find((turn) => turn.id === id) + if (!priorTurn) { + return { + found: false as const, + id, + message: "No prior turn with that id is available.", + } + } + const priorTurnReads = input.state.priorTurnReads ?? [] + if (!priorTurnReads.includes(id)) priorTurnReads.push(id) + input.state.priorTurnReads = priorTurnReads + return { + found: true as const, + id, + role: priorTurn.role, + content: priorTurn.content ?? priorTurn.contentPreview, + citationLabels: priorTurn.citationLabels ?? [], + } + }, + summarizeOutput: summarizeReadPriorTurnOutput, + }), }), finalize: tool({ @@ -320,31 +415,197 @@ export function createHarnessTools(input: { "contract. Artifacts listed here with display=true are the exact set of " + "images/tables shown to the user; cite only refs from the evidence ledger.", inputSchema: outputManifestSchema, - execute: async (manifest) => { - if (!input.state.intent) { - return { - ok: false as const, - message: "declareIntent must be called before finalize.", - } - } - if (!input.state.contextPolicy) { - return { - ok: false as const, - message: "setContextPolicy must be called before finalize.", - } - } - input.state.finalizedManifest = manifest - return { ok: true as const, ...manifest } - }, + execute: async (manifest) => + traceToolCall(input.state, { + toolName: "finalize", + inputSummary: summarizeManifest(manifest), + execute: async () => { + if (!input.state.intent) { + return { + ok: false as const, + message: "declareIntent must be called before finalize.", + } + } + if (!input.state.contextPolicy) { + return { + ok: false as const, + message: "setContextPolicy must be called before finalize.", + } + } + input.state.finalizedManifest = manifest + input.state.finalized = true + return { ok: true as const, ...manifest } + }, + summarizeOutput: summarizeFinalizeOutput, + }), }), } as const } +async function traceToolCall(input: { + readonly toolCalls?: HarnessToolCallTrace[] +}, call: { + readonly toolName: string + readonly inputSummary: unknown + readonly execute: () => Promise + readonly summarizeOutput: (output: T) => unknown +}): Promise { + const startedAtMs = Date.now() + const startedAt = new Date(startedAtMs).toISOString() + try { + const output = await call.execute() + recordToolCall(input, { + tool: call.toolName, + ok: getToolTraceOk(output), + inputSummary: call.inputSummary, + outputSummary: call.summarizeOutput(output), + startedAt, + durationMs: Math.max(0, Date.now() - startedAtMs), + }) + return output + } catch (error) { + recordToolCall(input, { + tool: call.toolName, + ok: false, + inputSummary: call.inputSummary, + outputSummary: { + error: error instanceof Error ? error.message : String(error), + }, + startedAt, + durationMs: Math.max(0, Date.now() - startedAtMs), + }) + throw error + } +} + +function recordToolCall( + state: { toolCalls?: HarnessToolCallTrace[] }, + trace: HarnessToolCallTrace, +): void { + const toolCalls = state.toolCalls ?? [] + toolCalls.push(trace) + state.toolCalls = toolCalls +} + +function getToolTraceOk(output: unknown): boolean { + if (!isRecord(output)) return true + if (typeof output.ok === "boolean") return output.ok + if (typeof output.found === "boolean") return output.found + return true +} + +function summarizeIntent(intent: IntentFrame): unknown { + return { + task: intent.task, + dependsOnPreviousTurn: intent.dependsOnPreviousTurn, + retrievalNeeded: intent.retrievalNeeded, + targetModalities: intent.targetModalities, + constraints: intent.constraints, + groundingPolicy: intent.groundingPolicy, + } +} + +function summarizeContextPolicy(policy: ContextPolicy): unknown { + return { + carryHistory: policy.carryHistory, + activePriorTurnIds: policy.activePriorTurnIds, + } +} + +function summarizeRetrievalRequest(request: { + readonly query: string + readonly modalities?: readonly TargetModality[] + readonly purpose?: string + readonly topK?: number + readonly signalPaths?: readonly string[] + readonly filterMode?: string + readonly threshold?: number +}): unknown { + return { + query: request.query, + modalities: request.modalities ?? ["text"], + purpose: request.purpose, + topK: request.topK, + signalPathCount: request.signalPaths?.length ?? 0, + filterMode: request.filterMode, + threshold: request.threshold, + } +} + +function summarizeRetrieveOutput(output: unknown): unknown { + if (!isRecord(output)) return output + return { + ok: output.ok, + retrievalCount: output.retrievalCount, + stopReason: output.stopReason, + failureReason: output.failureReason, + chunkCount: Array.isArray(output.chunks) ? output.chunks.length : 0, + assetCount: Array.isArray(output.assets) ? output.assets.length : 0, + } +} + +function summarizeReadEvidenceOutput(output: unknown): unknown { + if (!isRecord(output)) return output + return { + found: output.found, + ref: output.ref, + contentLength: output.contentLength, + offset: output.offset, + limit: output.limit, + hasMoreContent: output.hasMoreContent, + } +} + +function summarizeReadPriorTurnOutput(output: unknown): unknown { + if (!isRecord(output)) return output + return { + found: output.found, + id: output.id, + role: output.role, + citationLabelCount: Array.isArray(output.citationLabels) + ? output.citationLabels.length + : 0, + message: output.message, + } +} + +function summarizeManifest(manifest: OutputManifest): unknown { + return { + textLength: manifest.text.length, + citationCount: manifest.citations.length, + artifactCount: manifest.artifacts.length, + displayedArtifactCount: manifest.artifacts.filter((artifact) => artifact.display) + .length, + derivedTableCount: manifest.artifacts.filter( + (artifact) => artifact.type === "derived_table", + ).length, + unresolvedCount: manifest.unresolved.length, + } +} + +function summarizeFinalizeOutput(output: unknown): unknown { + if (!isRecord(output)) return output + return { + ok: output.ok, + textLength: typeof output.text === "string" ? output.text.length : 0, + citationCount: Array.isArray(output.citations) ? output.citations.length : 0, + artifactCount: Array.isArray(output.artifacts) ? output.artifacts.length : 0, + unresolvedCount: Array.isArray(output.unresolved) + ? output.unresolved.length + : 0, + message: output.message, + } +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null +} + export function buildHarnessSystemPrompt(turn: AgentTurnInput): string { return [ "You are the outer Knowhere Agent Harness.", "KNOWHERE is only an evidence provider. Do not infer or control its internal navigation algorithm.", - "Your job is to understand intent, decide context use, optionally retrieve evidence, select evidence/artifacts, and finalize an output manifest.", + "Your job is to understand intent, decide context use, optionally retrieve evidence, select evidence/artifacts, create source-backed derived tables when useful, and finalize an output manifest.", "", "Required workflow:", "1. Call declareIntent first. Capture constraints like a requested image/table count in constraints.desiredCount.", @@ -362,7 +623,8 @@ export function buildHarnessSystemPrompt(turn: AgentTurnInput): string { "Output rules:", "- Final output is the OutputManifest passed to finalize, not freeform tool JSON or trailing text.", "- artifacts with display=true are the exact images/tables shown. Never display every candidate; honor constraints.desiredCount / maxCount.", - "- citations and artifacts may only reference refs returned by retrieve (in the evidence ledger).", + "- Use type=derived_table only for tables you create from evidence; every derived_table.sourceRefs entry must reference evidence in the ledger.", + "- citations and selected image/table artifact refs may only reference refs returned by retrieve (in the evidence ledger).", "- If evidence is insufficient, list it in unresolved instead of fabricating facts.", "- After a validation-feedback message, fix all listed issues and call finalize again.", `Surface: ${turn.surface}`, diff --git a/src/agent-harness/types.ts b/src/agent-harness/types.ts index 1f4cad3..1a1cce9 100644 --- a/src/agent-harness/types.ts +++ b/src/agent-harness/types.ts @@ -139,17 +139,42 @@ export type OutputArtifact = { readonly reason: string } +export type DerivedTableArtifact = { + readonly type: "derived_table" + readonly ref: string + readonly title: string + readonly columns: readonly string[] + readonly rows: readonly (readonly string[])[] + readonly sourceRefs: readonly string[] + readonly display: boolean + readonly reason: string +} + +export type OutputArtifactView = OutputArtifact | DerivedTableArtifact + export type OutputManifest = { readonly text: string readonly citations: readonly OutputCitation[] - readonly artifacts: readonly OutputArtifact[] + readonly artifacts: readonly OutputArtifactView[] readonly unresolved: readonly string[] } +export type HarnessToolCallTrace = { + readonly tool: string + readonly ok: boolean + readonly inputSummary: unknown + readonly outputSummary: unknown + readonly startedAt: string + readonly durationMs: number +} + export type HarnessTrace = { readonly intent?: IntentFrame readonly contextPolicy?: ContextPolicy readonly ledger: EvidenceLedgerSnapshot + readonly finalized: boolean + readonly priorTurnReads: readonly string[] + readonly toolCalls: readonly HarnessToolCallTrace[] readonly validationErrors: readonly string[] readonly revisionsUsed: number } diff --git a/src/agent-harness/validator.test.ts b/src/agent-harness/validator.test.ts index 70a7ea9..2815241 100644 --- a/src/agent-harness/validator.test.ts +++ b/src/agent-harness/validator.test.ts @@ -9,6 +9,21 @@ import type { } from "./types" describe("validateOutputManifest", () => { + it("requires finalize to be the successful output path", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ text: "Freeform answer." }), + intent: makeIntent({ groundingPolicy: "no_retrieval" }), + contextPolicy: unrelatedContextPolicy, + finalized: false, + ledger: emptyLedger, + surface: "notebook_chat", + }) + + expect(validation.errors).toContain( + "Agent must call finalize to produce the output manifest.", + ) + }) + it("requires the agent to declare intent and context policy before finalizing", () => { const validation = validateOutputManifest({ manifest: makeManifest({ text: "Answer." }), @@ -100,6 +115,110 @@ describe("validateOutputManifest", () => { ) }) + it("accepts source-backed derived tables and rejects missing source refs", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ + artifacts: [ + { + type: "derived_table", + ref: "derived:table:1", + title: "Revenue comparison", + columns: ["Metric", "Value"], + rows: [["Revenue", "$10M"]], + sourceRefs: ["r1:result:1"], + display: true, + reason: "Structured comparison requested by the user.", + }, + { + type: "derived_table", + ref: "derived:table:2", + title: "Invalid table", + columns: ["Metric", "Value"], + rows: [["Revenue"]], + sourceRefs: ["missing"], + display: true, + reason: "Demonstrates validation.", + }, + ], + }), + intent: makeIntent({}), + contextPolicy: unrelatedContextPolicy, + ledger: { + ...emptyLedger, + chunks: [ + { + ref: "r1:result:1", + kind: "result", + content: "Revenue was $10M.", + contentPreview: "Revenue was $10M.", + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "Revenue", + }, + }, + ], + }, + surface: "notebook_chat", + }) + + expect(validation.errors).toContain( + "Derived table source ref 'missing' was not found in the evidence ledger.", + ) + expect(validation.errors).toContain( + "Derived table row 1 has 1 cells but expected 2.", + ) + }) + + it("requires compare outputs to cite at least two evidence refs", () => { + const validation = validateOutputManifest({ + manifest: makeManifest({ + text: "A is stronger than B.", + citations: [ + { + ref: "r1:result:1", + label: "report.pdf / A", + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "A", + }, + }, + ], + }), + intent: { + ...makeIntent({}), + task: "compare", + }, + contextPolicy: unrelatedContextPolicy, + ledger: { + ...emptyLedger, + chunks: [ + { + ref: "r1:result:1", + kind: "result", + content: "A is strong.", + contentPreview: "A is strong.", + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_1", + sourceFileName: "report.pdf", + sectionPath: "A", + }, + }, + ], + }, + surface: "notebook_chat", + }) + + expect(validation.errors).toContain( + "Compare outputs that must use sources require at least two evidence refs or an explicit unresolved reason.", + ) + }) + it("keeps typing compose output insertion-ready", () => { const validation = validateOutputManifest({ manifest: makeManifest({ text: "- bullet\n- list" }), diff --git a/src/agent-harness/validator.ts b/src/agent-harness/validator.ts index 195241d..89b5e43 100644 --- a/src/agent-harness/validator.ts +++ b/src/agent-harness/validator.ts @@ -9,6 +9,7 @@ export type ManifestValidationInput = { readonly manifest: OutputManifest readonly intent?: IntentFrame readonly contextPolicy?: ContextPolicy + readonly finalized?: boolean readonly ledger: EvidenceLedgerSnapshot readonly surface: "notebook_chat" | "typing_compose" | "typing_quick_ask" } @@ -32,6 +33,7 @@ export function validateOutputManifest( validateArtifactRefs(input, errors) validateArtifactCounts(input, errors) validateGrounding(input, errors) + validateTaskEvidence(input, errors) validateTypingText(input, errors) return { @@ -44,6 +46,10 @@ function validateWorkflow( input: ManifestValidationInput, errors: string[], ): void { + if (input.finalized === false) { + errors.push("Agent must call finalize to produce the output manifest.") + } + if (!input.intent) { errors.push("Agent must declare intent before finalizing.") } @@ -63,8 +69,29 @@ function validateArtifactRefs( ]) for (const artifact of input.manifest.artifacts) { + if (artifact.type === "derived_table") { + artifact.rows.forEach((row, index) => { + if (row.length !== artifact.columns.length) { + errors.push( + `Derived table row ${index + 1} has ${row.length} cells but expected ${artifact.columns.length}.`, + ) + } + }) + + for (const ref of artifact.sourceRefs) { + if (!knownRefs.has(ref)) { + errors.push( + `Derived table source ref '${ref}' was not found in the evidence ledger.`, + ) + } + } + continue + } + if (!knownRefs.has(artifact.ref)) { - errors.push(`Artifact ref '${artifact.ref}' was not found in the evidence ledger.`) + errors.push( + `Artifact ref '${artifact.ref}' was not found in the evidence ledger.`, + ) } } @@ -124,6 +151,45 @@ function validateGrounding( } } +function validateTaskEvidence( + input: ManifestValidationInput, + errors: string[], +): void { + if (input.intent?.groundingPolicy !== "must_use_sources") return + if (input.manifest.unresolved.length > 0) return + + const refs = getOutputEvidenceRefs(input.manifest) + + if (input.intent.task === "compare" && refs.size < 2) { + errors.push( + "Compare outputs that must use sources require at least two evidence refs or an explicit unresolved reason.", + ) + } + + if (input.intent.task === "summarize" && refs.size < 1) { + errors.push( + "Summaries that must use sources require at least one evidence ref or an explicit unresolved reason.", + ) + } +} + +function getOutputEvidenceRefs(manifest: OutputManifest): Set { + const refs = new Set() + + for (const citation of manifest.citations) refs.add(citation.ref) + + for (const artifact of manifest.artifacts) { + if (!artifact.display) continue + if (artifact.type === "derived_table") { + artifact.sourceRefs.forEach((ref) => refs.add(ref)) + } else { + refs.add(artifact.ref) + } + } + + return refs +} + function validateTypingText( input: ManifestValidationInput, errors: string[], diff --git a/src/components/chat-message-list.test.ts b/src/components/chat-message-list.test.ts index fa1a181..12644ed 100644 --- a/src/components/chat-message-list.test.ts +++ b/src/components/chat-message-list.test.ts @@ -167,6 +167,40 @@ describe("ChatMessageList", () => { expect(screen.queryByRole("img", { name: "其他候选图片" })).toBeNull(); }); + it("does not fall back to image citations when a harness message has empty artifacts", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "I could not select a display image.", + citations: [ + { + chunkType: "image", + score: 0.9, + assetUrl: "https://blob.example/images/candidate.jpg", + source: { + documentId: "doc_1", + sourceFileName: "source.pdf", + sectionPath: "Candidate image", + }, + }, + ], + artifacts: [], + }, + ], + }), + ); + + expect(screen.queryByRole("img")).toBeNull(); + expect( + screen.getByRole("button", { + name: "Open source source.pdf · Candidate image", + }), + ).toBeTruthy(); + }); + it("renders assistant markdown with GitHub-flavored tables", () => { render( React.createElement(ChatMessageList, { @@ -190,6 +224,40 @@ describe("ChatMessageList", () => { expect(screen.getByRole("cell", { name: "Ready" })).toBeTruthy(); }); + it("renders derived table artifacts as structured tables", () => { + render( + React.createElement(ChatMessageList, { + messages: [ + { + id: "assistant_1", + role: "assistant", + content: "I organized the comparison.", + artifacts: [ + { + type: "derived_table", + ref: "derived:table:plans", + title: "Plan comparison", + columns: ["Plan", "Cost"], + rows: [ + ["Plan A", "$10M"], + ["Plan B", "$8M"], + ], + sourceRefs: ["r1:result:1", "r1:result:2"], + display: true, + reason: "Comparison requested.", + }, + ], + }, + ], + }), + ); + + expect(screen.getByText("Plan comparison")).toBeTruthy(); + expect(screen.getByRole("table")).toBeTruthy(); + expect(screen.getByRole("columnheader", { name: "Plan" })).toBeTruthy(); + expect(screen.getByRole("cell", { name: "$8M" })).toBeTruthy(); + }); + it("keeps user markdown-looking text literal", () => { render( React.createElement(ChatMessageList, { diff --git a/src/components/chat-message-list.tsx b/src/components/chat-message-list.tsx index c112314..b4030a6 100644 --- a/src/components/chat-message-list.tsx +++ b/src/components/chat-message-list.tsx @@ -32,6 +32,13 @@ type DisplayImageArtifact = { readonly label: string; }; +type DisplayDerivedTableArtifact = { + readonly artifactId: string; + readonly title: string; + readonly columns: readonly string[]; + readonly rows: readonly (readonly string[])[]; +}; + const assistantMarkdownComponents: Components = { p: ({ children }) => (

{children}

@@ -264,14 +271,25 @@ function MessageBubble({ sourceTitlesByDocumentId, ); const displayImageCitations = - displayImageArtifacts.length > 0 + message.artifacts !== undefined ? displayImageArtifacts : getDisplayImageCitations(message, sourceTitlesByDocumentId); + const displayDerivedTables = getDisplayDerivedTableArtifacts(message); return (
+ {displayDerivedTables.length > 0 && ( +
+ {displayDerivedTables.map((artifact) => ( + + ))} +
+ )} {displayImageCitations.length > 0 && (

@@ -331,6 +349,54 @@ function MessageBubble({ ); } +function DerivedTableArtifactView({ + artifact, +}: { + readonly artifact: DisplayDerivedTableArtifact; +}): ReactElement { + return ( +

+
+ {artifact.title} +
+
+ + + + {artifact.columns.map((column, index) => ( + + ))} + + + + {artifact.rows.map((row, rowIndex) => ( + + {artifact.columns.map((_, columnIndex) => ( + + ))} + + ))} + +
+ {column} +
+ {row[columnIndex] ?? ""} +
+
+
+ ); +} + function AssistantMessageContent({ content, }: { @@ -423,6 +489,26 @@ function getDisplayImageArtifacts( return imageArtifacts; } +function getDisplayDerivedTableArtifacts( + message: ChatMessageView, +): readonly DisplayDerivedTableArtifact[] { + const tables: DisplayDerivedTableArtifact[] = []; + + for (const [index, artifact] of (message.artifacts ?? []).entries()) { + if (artifact.display === false || artifact.type !== "derived_table") continue; + if (!artifact.title || !artifact.columns || !artifact.rows) continue; + + tables.push({ + artifactId: `${message.id}:derived-table:${index}`, + title: artifact.title, + columns: artifact.columns, + rows: artifact.rows, + }); + } + + return tables; +} + function getArtifactLabel( artifact: ChatArtifactView, sourceTitlesByDocumentId: Readonly>, diff --git a/src/domains/chat/chat-citation-persistence.ts b/src/domains/chat/chat-citation-persistence.ts index 1b160f0..bf2a1a1 100644 --- a/src/domains/chat/chat-citation-persistence.ts +++ b/src/domains/chat/chat-citation-persistence.ts @@ -34,7 +34,8 @@ function normalizeCitations( function normalizeArtifacts( artifacts: readonly ChatArtifactView[] | null | undefined, ): ChatArtifactView[] | null { - if (!artifacts || artifacts.length === 0) return null + if (!artifacts) return null + if (artifacts.length === 0) return [] return artifacts.map(toArtifactView) } @@ -42,6 +43,10 @@ function toArtifactView(artifact: ChatArtifactView): ChatArtifactView { return { type: artifact.type, ref: artifact.ref, + title: artifact.title, + columns: artifact.columns, + rows: artifact.rows, + sourceRefs: artifact.sourceRefs, assetUrl: artifact.assetUrl, label: artifact.label, display: artifact.display, diff --git a/src/domains/chat/index.test.ts b/src/domains/chat/index.test.ts index 0640de2..e5066e7 100644 --- a/src/domains/chat/index.test.ts +++ b/src/domains/chat/index.test.ts @@ -86,6 +86,7 @@ describe("answerQuestionWithRetrieval", () => { expect(answer).toEqual({ answer: "The answer is grounded.", citations: [result], + artifacts: [], }); }); @@ -443,7 +444,53 @@ describe("answerQuestionWithRetrieval", () => { stopReasons: [], failureReasons: [], decisionTraces: [], - chunks: [], + chunks: [ + { + ref: "r1:result:1", + kind: "result", + content: "", + contentPreview: "", + chunkType: "image", + score: 0.9, + assetUrl: frontAssetUrl, + assetRef: "asset:r1:result:1", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "身份证正面", + }, + }, + { + ref: "r1:result:2", + kind: "result", + content: "", + contentPreview: "", + chunkType: "image", + score: 0.88, + assetUrl: backAssetUrl, + assetRef: "asset:r1:result:2", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "身份证反面", + }, + }, + { + ref: "r1:result:3", + kind: "result", + content: "", + contentPreview: "", + chunkType: "image", + score: 0.7, + assetUrl: extraAssetUrl, + assetRef: "asset:r1:result:3", + source: { + documentId: "doc_identity", + sourceFileName: "document-generated.pdf", + sectionPath: "营业执照", + }, + }, + ], assets: [ { ref: "asset:r1:result:1", @@ -498,6 +545,9 @@ describe("answerQuestionWithRetrieval", () => { reason: "The current turn is self-contained.", activePriorTurnIds: [], }, + finalized: true, + priorTurnReads: [], + toolCalls: [], }, }; return harnessResult; @@ -538,6 +588,300 @@ describe("answerQuestionWithRetrieval", () => { }, ], ); + expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ + frontAssetUrl, + backAssetUrl, + ]); + }); + + it("returns a safe fallback when the harness still has validation errors", async () => { + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [makeRetrievalResult()], + evidenceText: "Grounding content", + referencedChunks: [], + namespace: "notebook-workspace", + query: "What changed?", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "What changed?" }); + return { + ...makeHarnessRunResult("This invalid answer should not ship."), + trace: { + ...makeHarnessRunResult("").trace, + finalized: false, + validationErrors: [ + "Agent must call finalize to produce the output manifest.", + ], + }, + }; + }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "What changed?", + namespace: "notebook-workspace", + sources: [makeSource()], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(answer).toEqual({ + answer: + "I couldn't safely finish that response because the agent output did not pass Notebook's validation checks. Please try again.", + citations: [], + artifacts: [], + }); + }); + + it("keeps image-only harness output instead of treating it as no results", async () => { + const assetUrl = "https://blob.example/images/diagram.png"; + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [ + makeRetrievalResult({ + content: "", + chunkType: "image", + assetUrl, + source: { + documentId: "doc_diagram", + sourceFileName: "generated.pdf", + sectionPath: "Diagram", + }, + }), + ], + evidenceText: "Diagram candidate.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "diagram", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "diagram", targetContent: "image" }); + return { + manifest: { + text: "", + citations: [], + artifacts: [ + { + type: "image", + ref: "asset:r1:result:1", + display: true, + reason: "Requested diagram", + }, + ], + unresolved: [], + }, + trace: { + ...makeHarnessRunResult("").trace, + finalized: true, + priorTurnReads: [], + toolCalls: [], + ledger: { + retrievalCount: 1, + evidenceText: ["Diagram candidate."], + stopReasons: [], + failureReasons: [], + decisionTraces: [], + chunks: [ + { + ref: "r1:result:1", + kind: "result", + content: "", + contentPreview: "", + chunkType: "image", + score: 0.9, + assetUrl, + assetRef: "asset:r1:result:1", + source: { + documentId: "doc_diagram", + sourceFileName: "generated.pdf", + sectionPath: "Diagram", + }, + }, + ], + assets: [ + { + ref: "asset:r1:result:1", + chunkRef: "r1:result:1", + type: "image", + assetUrl, + label: "generated.pdf / Diagram / image", + source: { + documentId: "doc_diagram", + sourceFileName: "generated.pdf", + sectionPath: "Diagram", + }, + }, + ], + }, + }, + } satisfies HarnessRunResult; + }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "Show me the diagram.", + namespace: "notebook-workspace", + sources: [ + makeSource({ title: "diagram.pdf", knowhereDocumentId: "doc_diagram" }), + ], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(answer.answer).not.toBe("I couldn't find that in your sources."); + expect(answer.artifacts?.map((artifact) => artifact.assetUrl)).toEqual([ + assetUrl, + ]); + expect(answer.citations.map((citation) => citation.assetUrl)).toEqual([ + assetUrl, + ]); + }); + + it("returns source-backed derived table artifacts from the harness manifest", async () => { + const retrieval = { + query: vi.fn().mockResolvedValue({ + results: [ + makeRetrievalResult({ + content: "Plan A costs $10M and takes 6 months.", + source: { + documentId: "doc_plan_a", + sourceFileName: "plan-a.pdf", + sectionPath: "Cost", + }, + }), + makeRetrievalResult({ + content: "Plan B costs $8M and takes 9 months.", + source: { + documentId: "doc_plan_b", + sourceFileName: "plan-b.pdf", + sectionPath: "Cost", + }, + }), + ], + evidenceText: "Plan comparison evidence.", + referencedChunks: [], + namespace: "notebook-workspace", + query: "compare plan costs timelines", + routerUsed: "workflow_single_step", + answerText: null, + }), + }; + const generateAnswer = vi.fn(async ({ searchSources }) => { + await searchSources({ query: "compare plan costs timelines" }); + return { + manifest: { + text: "I organized the comparison into a table.", + citations: [], + artifacts: [ + { + type: "derived_table", + ref: "derived:table:plans", + title: "Plan comparison", + columns: ["Plan", "Cost", "Timeline"], + rows: [ + ["Plan A", "$10M", "6 months"], + ["Plan B", "$8M", "9 months"], + ], + sourceRefs: ["r1:result:1", "r1:result:2"], + display: true, + reason: "The user asked for a comparison table.", + }, + ], + unresolved: [], + }, + trace: { + ...makeHarnessRunResult("").trace, + finalized: true, + ledger: { + retrievalCount: 1, + evidenceText: ["Plan comparison evidence."], + stopReasons: [], + failureReasons: [], + decisionTraces: [], + chunks: [ + { + ref: "r1:result:1", + kind: "result", + content: "Plan A costs $10M and takes 6 months.", + contentPreview: "Plan A costs $10M and takes 6 months.", + chunkType: "text", + score: 0.9, + source: { + documentId: "doc_plan_a", + sourceFileName: "plan-a.pdf", + sectionPath: "Cost", + }, + }, + { + ref: "r1:result:2", + kind: "result", + content: "Plan B costs $8M and takes 9 months.", + contentPreview: "Plan B costs $8M and takes 9 months.", + chunkType: "text", + score: 0.88, + source: { + documentId: "doc_plan_b", + sourceFileName: "plan-b.pdf", + sectionPath: "Cost", + }, + }, + ], + assets: [], + }, + }, + } satisfies HarnessRunResult; + }); + + const answer = await Effect.runPromise( + answerQuestionWithRetrieval({ + question: "Compare the plans in a table.", + namespace: "notebook-workspace", + sources: [ + makeSource({ title: "Plan A.pdf", knowhereDocumentId: "doc_plan_a" }), + makeSource({ + id: "source_plan_b", + title: "Plan B.pdf", + knowhereDocumentId: "doc_plan_b", + }), + ], + excludedSourceIds: [], + retrieval, + generateAnswer, + messages: [], + }), + ); + + expect(answer.artifacts).toEqual([ + { + type: "derived_table", + ref: "derived:table:plans", + title: "Plan comparison", + columns: ["Plan", "Cost", "Timeline"], + rows: [ + ["Plan A", "$10M", "6 months"], + ["Plan B", "$8M", "9 months"], + ], + sourceRefs: ["r1:result:1", "r1:result:2"], + display: true, + reason: "The user asked for a comparison table.", + }, + ]); + expect(answer.citations.map((citation) => citation.source.sourceFileName)).toEqual( + ["Plan A.pdf", "Plan B.pdf"], + ); }); it("turns retrieved evidence image filenames into image citations", async () => { @@ -654,6 +998,7 @@ describe("answerQuestionWithRetrieval", () => { expect(answer).toEqual({ answer: "I couldn't find that in your sources.", citations: [], + artifacts: [], }); }); @@ -1158,6 +1503,9 @@ function makeHarnessRunResult(text: string): HarnessRunResult { failureReasons: [], decisionTraces: [], }, + finalized: true, + priorTurnReads: [], + toolCalls: [], validationErrors: [], revisionsUsed: 0, }, diff --git a/src/domains/chat/index.ts b/src/domains/chat/index.ts index 7c83b62..d9070f7 100644 --- a/src/domains/chat/index.ts +++ b/src/domains/chat/index.ts @@ -11,6 +11,7 @@ import type { ChatCitationView, } from "@/domains/chat/types" import type { + DerivedTableArtifact, EvidenceAsset, EvidenceChunk, HarnessRunResult, @@ -44,6 +45,8 @@ const MAX_CITATION_RESULTS = 20 const KNOWHERE_RESPONSE_TEXT_LOG_LIMIT = 200 const KNOWHERE_CHUNK_LOG_LIMIT = 100 const NO_RESULTS_ANSWER = "I couldn't find that in your sources." +const HARNESS_VALIDATION_FAILURE_ANSWER = + "I couldn't safely finish that response because the agent output did not pass Notebook's validation checks. Please try again." const RAW_URL_PATTERN = /https?:\/\/[^\s)\]}>"']+/g const REDACTED_MEDIA_URL = "[media asset URL hidden]" const RETRIEVAL_TARGET_CONTENT_DATA_TYPES: Readonly< @@ -183,14 +186,36 @@ export const answerQuestionWithRetrieval = ( harnessValidationErrorCount: generatedAnswer.trace.validationErrors.length, revisionsUsed: generatedAnswer.trace.revisionsUsed, }) + if (generatedAnswer.trace.validationErrors.length > 0) { + logger.warn("chat-agent: validation failed; returning safe fallback", { + validationErrors: generatedAnswer.trace.validationErrors, + revisionsUsed: generatedAnswer.trace.revisionsUsed, + finalized: generatedAnswer.trace.finalized, + intentTask: generatedAnswer.trace.intent?.task ?? null, + retrievalCallCount: retrievalResponses.length, + }) + return { + answer: HARNESS_VALIDATION_FAILURE_ANSWER, + citations: [] as ChatCitationView[], + artifacts: [] as ChatArtifactView[], + } + } const rawResults = selectCitationRawResults({ generatedAnswer, retrievalResponses, sources: input.sources, }) - if (rawResults.length === 0 && generatedAnswer.manifest.text.trim().length === 0) { - return { answer: NO_RESULTS_ANSWER, citations: [] as ChatCitationView[] } + if ( + rawResults.length === 0 && + generatedAnswer.manifest.text.trim().length === 0 && + !hasDisplayedManifestArtifacts(generatedAnswer) + ) { + return { + answer: NO_RESULTS_ANSWER, + citations: [] as ChatCitationView[], + artifacts: [] as ChatArtifactView[], + } } const results = yield* Effect.tryPromise(() => @@ -215,7 +240,7 @@ export const answerQuestionWithRetrieval = ( return { answer, citations: toChatCitationViews(citationResults, answer), - ...(artifacts && artifacts.length > 0 ? { artifacts } : {}), + artifacts: artifacts ?? [], } }) @@ -241,12 +266,15 @@ function toChatArtifactViewsFromHarness( let displayedArtifactCount = 0 for (const artifact of result.manifest.artifacts) { - const artifactView = resolveHarnessArtifactView({ - artifact, - assetsByRef, - chunksByRef, - sources, - }) + const artifactView = + artifact.type === "derived_table" + ? toDerivedTableArtifactView(artifact) + : resolveHarnessArtifactView({ + artifact, + assetsByRef, + chunksByRef, + sources, + }) if (!artifactView) continue const isDisplayed = artifactView.display !== false @@ -265,6 +293,21 @@ function toChatArtifactViewsFromHarness( return artifacts.length > 0 ? artifacts : undefined } +function toDerivedTableArtifactView( + artifact: DerivedTableArtifact, +): ChatArtifactView { + return { + type: "derived_table", + ref: artifact.ref, + title: artifact.title, + columns: artifact.columns, + rows: artifact.rows, + sourceRefs: artifact.sourceRefs, + display: artifact.display, + reason: artifact.reason, + } +} + function getHarnessArtifactDisplayLimit(result: HarnessRunResult): number | null { const constraints = result.trace.intent?.constraints const limits = [constraints?.desiredCount, constraints?.maxCount].filter( @@ -486,6 +529,10 @@ function selectCitationRawResults(input: { }): RetrievalResult[] { const curated = mapManifestCitationsToResults(input.generatedAnswer) if (curated.length > 0) return curated + const displayedArtifacts = mapDisplayedManifestArtifactsToResults( + input.generatedAnswer, + ) + if (displayedArtifacts.length > 0) return displayedArtifacts return collectRetrievalResults(input.retrievalResponses, input.sources) } @@ -546,6 +593,87 @@ function resolveChunkForAssetRef( return chunksByRef.get(asset.chunkRef) } +function mapDisplayedManifestArtifactsToResults( + result: HarnessRunResult, +): RetrievalResult[] { + const chunksByRef = new Map( + result.trace.ledger.chunks.map((chunk): readonly [string, EvidenceChunk] => [ + chunk.ref, + chunk, + ]), + ) + const assetsByRef = new Map( + result.trace.ledger.assets.map((asset): readonly [string, EvidenceAsset] => [ + asset.ref, + asset, + ]), + ) + + const results: RetrievalResult[] = [] + const seenKeys = new Set() + const displayLimit = getHarnessArtifactDisplayLimit(result) + + for (const artifact of result.manifest.artifacts) { + if (!artifact.display) continue + if (typeof displayLimit === "number" && results.length >= displayLimit) { + break + } + + if (artifact.type === "derived_table") { + for (const sourceRef of artifact.sourceRefs) { + const chunk = + chunksByRef.get(sourceRef) ?? + resolveChunkForAssetRef(sourceRef, assetsByRef, chunksByRef) + if (!chunk) continue + + const retrievalResult = toRetrievalResultFromEvidenceChunk(chunk) + const key = getRetrievalResultKey(retrievalResult) + if (seenKeys.has(key)) continue + + seenKeys.add(key) + results.push(retrievalResult) + if (results.length >= MAX_CITATION_RESULTS) return results + } + continue + } + + const chunk = + chunksByRef.get(artifact.ref) ?? + resolveChunkForAssetRef(artifact.ref, assetsByRef, chunksByRef) + if (!chunk) continue + + const retrievalResult = toRetrievalResultFromEvidenceChunk(chunk) + const key = getRetrievalResultKey(retrievalResult) + if (seenKeys.has(key)) continue + + seenKeys.add(key) + results.push(retrievalResult) + if (results.length >= MAX_CITATION_RESULTS) break + } + + return results +} + +function toRetrievalResultFromEvidenceChunk( + chunk: EvidenceChunk, +): RetrievalResult { + return { + content: chunk.content, + chunkType: chunk.chunkType, + score: chunk.score, + ...(chunk.assetUrl ? { assetUrl: chunk.assetUrl } : {}), + source: { + documentId: chunk.source.documentId ?? undefined, + sourceFileName: chunk.source.sourceFileName ?? undefined, + sectionPath: chunk.source.sectionPath ?? undefined, + }, + } +} + +function hasDisplayedManifestArtifacts(result: HarnessRunResult): boolean { + return result.manifest.artifacts.some((artifact) => artifact.display) +} + function collectRetrievalResults( responses: readonly RetrievalQueryResponse[], sources: readonly AnswerQuestionInput["sources"][number][], diff --git a/src/domains/chat/service.test.ts b/src/domains/chat/service.test.ts index 71da889..e931f04 100644 --- a/src/domains/chat/service.test.ts +++ b/src/domains/chat/service.test.ts @@ -78,6 +78,7 @@ describe("handleChatTurn", () => { role: "assistant", content: "Grounded answer.", citations: [makeRetrievalResult()], + artifacts: [], }); }); @@ -343,6 +344,9 @@ function makeHarnessRunResult(text: string): HarnessRunResult { failureReasons: [], decisionTraces: [], }, + finalized: true, + priorTurnReads: [], + toolCalls: [], validationErrors: [], revisionsUsed: 0, }, diff --git a/src/domains/chat/types.ts b/src/domains/chat/types.ts index fdda855..f3f987d 100644 --- a/src/domains/chat/types.ts +++ b/src/domains/chat/types.ts @@ -31,8 +31,12 @@ export type ChatCitationView = CitationView & { } export type ChatArtifactView = { - readonly type: "image" | "table" + readonly type: "image" | "table" | "derived_table" readonly ref?: string + readonly title?: string + readonly columns?: readonly string[] + readonly rows?: readonly (readonly string[])[] + readonly sourceRefs?: readonly string[] readonly assetUrl?: string readonly label?: string readonly display?: boolean diff --git a/src/domains/chat/view.ts b/src/domains/chat/view.ts index 03987c7..9afc4f6 100644 --- a/src/domains/chat/view.ts +++ b/src/domains/chat/view.ts @@ -19,7 +19,7 @@ export function toChatThreadView(thread: ChatThread): ChatThreadView { export function toChatMessageView( message: ChatMessage, citations: readonly ChatCitationView[] = [], - artifacts: readonly ChatArtifactView[] = [], + artifacts?: readonly ChatArtifactView[], ): ChatMessageView { const citationViews = citations.length > 0 @@ -27,7 +27,7 @@ export function toChatMessageView( : toPersistedCitationViews(message.citations) const artifactViews = - artifacts.length > 0 + artifacts !== undefined ? [...artifacts] : toPersistedArtifactViews(message.artifacts) @@ -36,9 +36,7 @@ export function toChatMessageView( role: message.role === "assistant" ? "assistant" : "user", content: message.content, citations: citationViews, - ...(artifactViews && artifactViews.length > 0 - ? { artifacts: artifactViews } - : {}), + ...(artifactViews !== undefined ? { artifacts: artifactViews } : {}), } } @@ -68,11 +66,14 @@ function toPersistedCitationViews(value: unknown): ChatCitationView[] | undefine function toPersistedArtifactViews(value: unknown): ChatArtifactView[] | undefined { if (!Array.isArray(value)) return undefined + if (value.length === 0) return [] const artifacts = value.flatMap((item): ChatArtifactView[] => { if (!isRecord(item)) return [] const type = getString(item.type) - if (type !== "image" && type !== "table") return [] + if (type !== "image" && type !== "table" && type !== "derived_table") { + return [] + } const citation = isRecord(item.citation) && isRecord(item.citation.source) @@ -89,6 +90,37 @@ function toPersistedArtifactViews(value: unknown): ChatArtifactView[] | undefine } : undefined + if (type === "derived_table") { + const title = getString(item.title) + const columns = getStringArray(item.columns) + const rows = getStringRows(item.rows) + const sourceRefs = getStringArray(item.sourceRefs) + + if ( + !title || + !columns || + columns.length === 0 || + !rows || + !sourceRefs || + sourceRefs.length === 0 + ) { + return [] + } + + return [ + { + type, + ref: getString(item.ref), + title, + columns, + rows, + sourceRefs, + display: typeof item.display === "boolean" ? item.display : undefined, + reason: getString(item.reason), + }, + ] + } + return [ { type, @@ -114,6 +146,23 @@ function getNumber(value: unknown): number | undefined { return typeof value === "number" && Number.isFinite(value) ? value : undefined } +function getStringArray(value: unknown): readonly string[] | undefined { + if (!Array.isArray(value)) return undefined + if (!value.every((item): item is string => typeof item === "string")) { + return undefined + } + return value +} + +function getStringRows( + value: unknown, +): readonly (readonly string[])[] | undefined { + if (!Array.isArray(value)) return undefined + const rows = value.map(getStringArray) + if (rows.some((row) => row === undefined)) return undefined + return rows as readonly (readonly string[])[] +} + function isRecord(value: unknown): value is Record { return typeof value === "object" && value !== null }