diff --git a/API.en.md b/API.en.md index 46601886..c5b8f9b7 100644 --- a/API.en.md +++ b/API.en.md @@ -40,9 +40,9 @@ Docs: [Overview](README.en.md) / [Architecture](docs/ARCHITECTURE.en.md) / [Depl - OpenAI / Claude / Gemini protocols are now mounted on one shared `chi` router tree assembled in `internal/server/router.go`. - Adapter responsibilities are streamlined to: **request normalization → DeepSeek invocation → protocol-shaped rendering**, reducing legacy split-logic paths. -- Tool-calling semantics are aligned between Go and Node runtime: models should output the fullwidth-separator DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts the halfwidth DSML wrapper `<|DSML|tool_calls>`, DSML wrapper aliases such as ``, `<|tool_calls>`, `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, PascalCase local-name drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, `<、DSML、tool_calls>`, ``, or ``, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) with non-structural separators before or after them back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. +- Tool-calling semantics are aligned between Go and Node runtime: models should output the halfwidth-pipe DSML shell `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; DS2API also accepts DSML wrapper aliases such as `` and `<|tool_calls>`, common DSML separator drift such as `<|DSML tool_calls>`, collapsed DSML local names such as ``, control-separator drift such as `` / raw STX `\x02`, CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, PascalCase local-name drift, and trailing attribute separator drift such as `...〈/DSM|parameter〉`, `<!DSML!invoke name=“Bash”>`, `<、DSML、tool_calls>`, ``, or ``, arbitrary protocol prefixes such as ``, and legacy canonical XML `` → `` → ``. The scanner normalizes fixed local names (`tool_calls` / `invoke` / `parameter`) with non-structural separators before or after them back to XML before parsing, and also tolerates CDATA opener drift such as `<![CDATA[` / `<、[CDATA[`; only wrapped tool blocks or the narrow missing-opening-wrapper repair path enter the tool path, while bare `` does not count as supported syntax. JSON literal parameter bodies are preserved as structured values, explicit empty or whitespace-only parameters are preserved as empty strings, malformed complete wrappers are released as plain text, and loose CDATA is narrowly repaired at final parse/flush when it can preserve a complete outer tool call. - `Admin API` separates static config from runtime policy: `/admin/config*` for configuration state, `/admin/settings*` for runtime behavior. -- When upstream returns a thinking-only response with no visible text, the Go main path for both streaming and non-streaming completions retries once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429. +- When upstream returns a thinking-only response with no visible text, the Go main path and the Vercel Node streaming path retry once in the same DeepSeek session: it appends the prompt suffix `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` and sets `parent_message_id`. If that same-account retry would still end as `429 upstream_empty_output`, managed-account mode switches to the next available account, creates a fresh session, and retries the original payload once before returning 429. - Citation/reference marker boundary: streaming output hides upstream `[citation:N]` / `[reference:N]` placeholders by default; non-stream output converts DeepSeek search reference markers into Markdown links. --- @@ -355,7 +355,7 @@ When `tools` is present, DS2API performs anti-leak handling: Additional notes: -- The parser treats the recommended DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), halfwidth DSML shell blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, PascalCase local-name drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize non-structural separators back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. +- The parser treats the recommended halfwidth-pipe DSML shell tool blocks (`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`), DSML wrapper aliases (``, `<|tool_calls>`), common DSML separator drift (`<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`), collapsed DSML local names (`` / `` / ``), control-separator drift (`` / raw STX `\x02`), CJK angle bracket, fullwidth-bang / ideographic-comma separator drift, PascalCase local-name drift, and trailing attribute separator drift (`...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``), arbitrary protocol prefixes (``), and legacy canonical XML tool blocks (`` / `` / ``) as executable tool calls. These shells normalize non-structural separators back to XML first, while internal parsing remains XML-based; CDATA opener drift such as `<![CDATA[` / `<、[CDATA[` is also normalized for parameter bodies. Legacy ``, ``, ``, ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text; complete but malformed wrappers are also released as plain text. - The parser no longer drops tool calls solely because parameter values are empty; explicit empty strings or whitespace-only parameters become empty strings in structured `tool_calls`. Prompting still tells the model not to emit blank parameters, and missing/empty argument rejection belongs in the tool executor or client schema validation. - If the final visible response text is empty but the reasoning stream contains an executable tool call, Chat / Responses emits a standard OpenAI `tool_calls` / `function_call` output during finalization. If thinking/reasoning was not enabled by the client, that reasoning text is used only for detection and is not exposed as visible text or `reasoning_content`. - `tool_calls` shown inside fenced markdown code blocks (for example, ```json ... ```) are treated as examples, not executable calls. diff --git a/API.md b/API.md index 9809ecac..63b4539d 100644 --- a/API.md +++ b/API.md @@ -40,9 +40,9 @@ - OpenAI / Claude / Gemini 三套协议已统一挂在同一 `chi` 路由树上,由 `internal/server/router.go` 负责装配。 - 适配器层职责收敛为:**请求归一化 → DeepSeek 调用 → 协议形态渲染**,减少历史版本中“同能力多处实现”的分叉。 -- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出全角分隔符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受半角 DSML wrapper `<|DSML|tool_calls>`、DSML wrapper 别名 ``、`<|tool_calls>`、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、PascalCase 本地名、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,标签名前或标签名后的非结构性分隔符会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 +- Tool Calling 的解析策略在 Go 与 Node Runtime 间保持一致:推荐模型输出半角管道符 DSML 外壳 `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容层也接受 DSML wrapper 别名 ``、`<|tool_calls>`、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>`)、`DSML` 与工具标签名黏连的常见 typo(如 ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、PascalCase 本地名、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``)、任意协议前缀壳(如 ``),以及旧式 canonical XML `` → `` → ``。实现上采用结构扫描:只要固定本地标签名是 `tool_calls` / `invoke` / `parameter`,标签名前或标签名后的非结构性分隔符会在解析入口归一化;CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[` 这类分隔符漂移;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 会进入工具路径,裸 `` 不计为已支持语法;流式场景继续执行防泄漏筛分。若参数体本身是合法 JSON 字面量(如 `123`、`true`、`null`、数组或对象),会按结构化值输出,不再一律当作字符串;显式空字符串和纯空白参数会结构化保留为空字符串,是否拒绝缺参由工具执行侧决定;完整但 malformed 的 wrapper 会作为普通文本释放,不会吞掉或伪造成工具调用;若 CDATA 偶发漏闭合,则会在最终 parse / flush 恢复阶段做窄修复,尽量保住已完整包裹的外层工具调用。 - `Admin API` 将配置与运行时策略分开:`/admin/config*` 管静态配置,`/admin/settings*` 管运行时行为。 -- 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径的流式与非流式补全都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。 +- 当上游返回 thinking-only 响应(模型输出了推理链但无可见文本)时,Go 主路径与 Vercel Node 流式路径都会先自动重试一次:以多轮对话 follow-up 方式追加 prompt 后缀 `"Previous reply had no visible output. Please regenerate the visible final answer or tool call now."` 并设置 `parent_message_id` 在同一 DeepSeek session 内让模型重新输出;同账号重试最大 1 次。若同账号重试后仍即将返回 `429 upstream_empty_output`,托管账号模式会在返回 429 前自动切换到下一个可用账号,新建 session,用原始 payload 再 fresh retry 一次。 - 引用标记处理边界:流式输出默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部占位符;非流式输出默认把 DeepSeek 搜索引用标记转换为 Markdown 引用链接。 --- @@ -357,7 +357,7 @@ data: [DONE] 补充说明: - **非代码块上下文**下,工具负载即使与普通文本混合,也会按特征识别并产出可执行 tool call(前后普通文本仍可透传)。 -- 解析器当前把推荐 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、半角 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、PascalCase 本地名、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些非结构性分隔符壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 +- 解析器当前把推荐半角管道符 DSML 外壳(`<|DSML|tool_calls>` / `<|DSML|invoke name="...">` / `<|DSML|parameter name="...">`)、DSML wrapper 别名(``、`<|tool_calls>`)、常见 DSML 分隔符漏写形态(如 `<|DSML tool_calls>` / `<|DSML invoke>` / `<|DSML parameter>`)、`DSML` 与工具标签名黏连的常见 typo(如 `` / `` / ``)、控制分隔符漂移(如 `` / 原始 STX `\x02`)、CJK 尖括号、全角感叹号、顿号、PascalCase 本地名、弯引号属性值与属性尾部分隔符漂移(如 `...〈/DSM|parameter〉` / `<!DSML!invoke name=“Bash”>` / `<、DSML、tool_calls>` / `` / ``)、任意协议前缀壳(如 ``)和旧式 canonical XML 工具块(`` / `` / ``)作为可执行调用解析;这些非结构性分隔符壳会先归一化回 XML,内部仍以 XML 解析语义为准,CDATA 开头也会容错 `<![CDATA[` / `<、[CDATA[`。旧式 ``、``、``、``、``、`tool_use`、antml 风格与纯 JSON `tool_calls` 片段默认都会按普通文本处理;完整但 malformed 的 wrapper 同样会作为普通文本释放。 - 解析层不会因为参数值为空而丢弃工具调用;显式空字符串或纯空白参数会按空字符串进入结构化 `tool_calls`。Prompt 会要求模型不要主动输出空参数,缺参/空命令的拒绝应由工具执行侧或客户端 schema 校验负责。 - 当最终可见正文为空但思维链里包含可执行工具调用时,Chat / Responses 会在收尾阶段补发标准 OpenAI `tool_calls` / `function_call` 输出;如果客户端未开启 thinking / reasoning,该思维链只用于检测,不会作为可见正文或 `reasoning_content` 暴露。 - Markdown fenced code block(例如 ```json ... ```)中的 `tool_calls` 仅视为示例文本,不会被执行。 diff --git a/README.MD b/README.MD index ae5eafc5..c32c09c8 100644 --- a/README.MD +++ b/README.MD @@ -196,7 +196,7 @@ OpenAI `/v1/*` 仍是推荐的规范路径;同时支持 `/models`、`/chat/com - `ANTHROPIC_BASE_URL` 推荐直接指向 DS2API 根地址(例如 `http://127.0.0.1:5001`),Claude Code 会请求 `/v1/messages?beta=true`。 - `ANTHROPIC_API_KEY` 需要与 `config.json` 中 `keys` 一致;建议同时保留常规 key 与 `sk-ant-*` 形态 key,兼容不同客户端校验习惯。 - 若系统设置了代理,建议对 DS2API 地址配置 `NO_PROXY=127.0.0.1,localhost,<你的主机IP>`,避免本地回环请求被代理拦截。 -- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的全角分隔符 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受半角 DSML 与旧式 canonical XML:`...`;旧式 `` / `` / `` / ``、``、`tool_use` 或纯 JSON `tool_calls` 片段不会执行,会作为普通文本处理。 +- 如遇“工具调用输出成文本、未执行”问题,请优先检查模型输出是否为推荐的半角管道符 DSML 工具块:`<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`。兼容层也接受旧式 canonical XML:`...`;旧式 `` / `` / `` / ``、``、`tool_use` 或纯 JSON `tool_calls` 片段不会执行,会作为普通文本处理。 ### Gemini 接口 @@ -373,7 +373,7 @@ Gemini 路由还可以使用 `x-goog-api-key`,或在没有认证头时使用 ` 当请求中带 `tools` 时,DS2API 会做防泄漏处理与结构化转译: 1. 只在**非代码块上下文**启用执行型 toolcall 识别(代码块示例默认不触发) -2. 解析层当前把全角分隔符 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容半角 DSML、旧式 canonical XML `` → `` → ``,以及若干 DSML 前缀/分隔符漂移。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `` / `` / `` / ``、``、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理,完整但 malformed 的 wrapper 也会作为普通文本释放 +2. 解析层当前把半角管道符 DSML 外壳视为推荐可执行调用:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`;兼容旧式 canonical XML `` → `` → ``,以及若干 DSML 前缀/分隔符漂移。DSML 只是外壳别名,内部仍以 XML 解析语义为准;旧式 `` / `` / `` / ``、``、`tool_use` / antml 变体与纯 JSON `tool_calls` 片段都会按普通文本处理,完整但 malformed 的 wrapper 也会作为普通文本释放 3. `responses` 流式严格使用官方 item 生命周期事件(`response.output_item.*`、`response.content_part.*`、`response.function_call_arguments.*`) 4. `responses` 支持并执行 `tool_choice`(`auto`/`none`/`required`/强制函数);`required` 违规时非流式返回 `422`,流式返回 `response.failed` 5. 客户端请求哪种协议,就按该协议返回工具调用(OpenAI/Claude/Gemini 各自原生结构);模型侧优先约束输出规范 XML,再由兼容层转译 diff --git a/README.en.md b/README.en.md index 81ef3137..afb4c7dd 100644 --- a/README.en.md +++ b/README.en.md @@ -185,7 +185,7 @@ Besides the primary aliases above, `/anthropic/v1/models` also returns Claude 4. - Set `ANTHROPIC_BASE_URL` to the DS2API root URL (for example `http://127.0.0.1:5001`). Claude Code sends requests to `/v1/messages?beta=true`. - `ANTHROPIC_API_KEY` must match an entry in `keys` from `config.json`. Keeping both a regular key and an `sk-ant-*` style key improves client compatibility. - If your environment has proxy variables, set `NO_PROXY=127.0.0.1,localhost,` for DS2API to avoid proxy interception of local traffic. -- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended fullwidth-separator DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts halfwidth DSML and legacy canonical XML: `...`; legacy `` / `` / `` / ``, ``, `tool_use`, or standalone JSON `tool_calls` are not executed and stay plain text. +- If tool calls are rendered as plain text and not executed, first verify the model output uses the recommended halfwidth-pipe DSML block: `<|DSML|tool_calls><|DSML|invoke name="..."><|DSML|parameter name="...">...`. DS2API also accepts legacy canonical XML: `...`; legacy `` / `` / `` / ``, ``, `tool_use`, or standalone JSON `tool_calls` are not executed and stay plain text. ### Gemini Endpoint @@ -359,7 +359,7 @@ Queue limit = DS2API_ACCOUNT_MAX_QUEUE (default = recommended concurrency) When `tools` is present in the request, DS2API performs anti-leak handling: 1. Toolcall feature matching is enabled only in **non-code-block context** (fenced examples are ignored) -2. The parser treats the fullwidth-separator DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts halfwidth DSML, legacy canonical XML `` → `` → ``, plus common DSML prefix/separator drift. DSML is a shell alias and internal parsing remains XML-based; legacy `` / `` / `` / ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text, and complete but malformed wrappers are released as plain text too +2. The parser treats the halfwidth-pipe DSML shell as the recommended executable tool-calling syntax: `<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`; it also accepts legacy canonical XML `` → `` → ``, plus common DSML prefix/separator drift. DSML is a shell alias and internal parsing remains XML-based; legacy `` / `` / `` / ``, ``, `tool_use`, antml variants, and standalone JSON `tool_calls` payloads are treated as plain text, and complete but malformed wrappers are released as plain text too 3. `responses` streaming strictly uses official item lifecycle events (`response.output_item.*`, `response.content_part.*`, `response.function_call_arguments.*`) 4. `responses` supports and enforces `tool_choice` (`auto`/`none`/`required`/forced function); `required` violations return `422` for non-stream and `response.failed` for stream 5. The output protocol follows the client request (OpenAI / Claude / Gemini native shapes); model-side prompting can prefer XML, and the compatibility layer handles the protocol-specific translation diff --git a/VERSION b/VERSION index a84947d6..6016e8ad 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.5.0 +4.6.0 diff --git a/docs/DEPLOY.md b/docs/DEPLOY.md index d0f23dee..d2050bd5 100644 --- a/docs/DEPLOY.md +++ b/docs/DEPLOY.md @@ -4,7 +4,7 @@ 本指南基于当前 Go 代码库,详细说明各种部署方式。 -本页导航:[文档总索引](./README.md)|[架构说明](./ARCHITECTURE.md)|[接口文档](../API.md)|[测试指南](./TESTING.md) +本页导航:[文档总索引](./README.md)|[架构说明](./ARCHITECTURE.md)|[接口文档](../API.md)|[测试指南](./TESTING.md) --- diff --git a/docs/prompt-compatibility.md b/docs/prompt-compatibility.md index fb030219..7b49865e 100644 --- a/docs/prompt-compatibility.md +++ b/docs/prompt-compatibility.md @@ -89,7 +89,7 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` "chat_session_id": "session-id", "model_type": "default", "parent_message_id": null, - "prompt": "<|begin▁of▁sentence|>...", + "prompt": "<|begin▁of▁sentence|>...", "ref_file_ids": [ "file-history", "file-systemprompt", @@ -112,7 +112,7 @@ DS2API 当前的核心思路,不是把客户端传来的 `messages`、`tools` - Vercel Node 流式路径本轮不迁移,仍使用现有 Node bridge / stream-tool-sieve 实现;后续若变更 Node 流式语义,需要按 `assistantturn` 的 Go canonical 输出语义同步对齐。 - 客户端传入的 thinking / reasoning 开关会被归一到下游 `thinking_enabled`。Gemini `generationConfig.thinkingConfig.thinkingBudget` 会翻译成同一套 thinking 开关;关闭时即使上游返回 `response/thinking_content`,兼容层也不会把它当作可见正文输出。若最终解析出的模型名带 `-nothinking` 后缀,则会无条件强制关闭 thinking,优先级高于请求体中的 `thinking` / `reasoning` / `reasoning_effort`。未显式关闭时,各 surface 会按解析后的 DeepSeek 模型默认能力开启 thinking,并用各自协议的原生形态暴露:OpenAI Chat 为 `reasoning_content`,OpenAI Responses 为 `response.reasoning.delta` / `reasoning` content,Claude 为 `thinking` block / `thinking_delta`,Gemini 为 `thought: true` part。 - 对 OpenAI Chat / Responses 的非流式收尾,如果最终可见正文为空,兼容层会优先尝试把思维链中的独立 DSML / XML 工具块当作真实工具调用解析出来。流式链路也会在收尾阶段做同样的 fallback 检测,但不会因为思维链内容去中途拦截或改写流式输出;真正的工具识别始终基于原始上游文本,而不是基于“已经做过可见输出清洗”的版本。最终可见层会剥离已经成功解析成工具调用的完整 leaked DSML / XML `tool_calls` wrapper;如果遇到完整 wrapper 但内部形态不符合可执行工具调用语义(例如 `` 这类 malformed XML 工具壳),流式 sieve 会把该块作为普通文本释放,而不是吞掉或伪造成工具调用。补发结果会作为本轮 assistant 的结构化 `tool_calls` / `function_call` 输出返回,而不是塞进 `content` 文本;如果客户端没有开启 thinking / reasoning,思维链只用于检测,不会作为 `reasoning_content` 或可见正文暴露。只有正文为空且思维链里也没有可执行工具调用时,才继续按空回复错误处理。 -- OpenAI Chat / Responses、Claude Messages、Gemini generateContent 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。Go 主路径的非流式重试由 `completionruntime.ExecuteNonStreamWithRetry` 统一处理;流式重试由 `completionruntime.ExecuteStreamWithRetry` 统一处理,各协议 runtime 只负责消费/渲染本协议 SSE framing。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该同账号重试不会重新标准化消息、不会新建 session,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若同账号补偿重试后即将返回 429 `upstream_empty_output`,并且当前是托管账号模式,Go 主路径会在返回 429 前切换到下一个可用账号,新建 `chat_session_id`,使用原始 completion payload 再做一次 fresh retry;该切号重试不携带空回复 prompt 后缀,也不设置上一账号的 `parent_message_id`。如果没有可切换账号,或切号后的 fresh retry 仍没有可见正文或工具调用,则继续按原错误返回:无任何输出为 503 `upstream_unavailable`,有 reasoning 但没有可见正文或工具调用为 429 `upstream_empty_output`。若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。JS Vercel 运行时同样设置 `parent_message_id`,但因无法直接调用 PoW API 而复用原始 PoW;切号 fresh retry 目前由 Go 主路径提供。 +- OpenAI Chat / Responses、Claude Messages、Gemini generateContent 的空回复错误处理之前会默认做一次内部补偿重试:第一次上游完整结束后,如果最终可见正文为空、没有解析到工具调用、也没有已经向客户端流式发出工具调用,并且终止原因不是 `content_filter`,兼容层会复用同一个 `chat_session_id`、账号、token 与工具策略,把原始 completion `prompt` 追加固定后缀 `Previous reply had no visible output. Please regenerate the visible final answer or tool call now.` 后重新提交一次。Go 主路径的非流式重试由 `completionruntime.ExecuteNonStreamWithRetry` 统一处理;流式重试由 `completionruntime.ExecuteStreamWithRetry` 统一处理,各协议 runtime 只负责消费/渲染本协议 SSE framing。重试遵循 DeepSeek 多轮对话协议:从第一次上游 SSE 流中提取 `response_message_id`,并在重试 payload 中设置 `parent_message_id` 为该值,使重试成为同一会话的后续轮次而非断裂的根消息;同时重新获取一次 PoW(若 PoW 获取失败则回退到原始 PoW)。该同账号重试不会重新标准化消息、不会新建 session,也不会向流式客户端插入重试标记;第二次 thinking / reasoning 会按正常增量直接接到第一次之后,并继续使用 overlap trim 去重。若同账号补偿重试后即将返回 429 `upstream_empty_output`,并且当前是托管账号模式,runtime 会在返回 429 前切换到下一个可用账号,新建 `chat_session_id`,使用原始 completion payload 再做一次 fresh retry;该切号重试不携带空回复 prompt 后缀,也不设置上一账号的 `parent_message_id`。如果 current input file 已触发,切号前会在新账号上重新上传同一份 `DS2API_HISTORY.txt`(以及需要时的 `DS2API_TOOLS.txt`),并用新账号可见的 file_id 替换自动生成的旧 file_id;客户端原本传入的其他文件引用保持不变。如果没有可切换账号,或切号后的 fresh retry 仍没有可见正文或工具调用,则继续按原错误返回:无任何输出为 503 `upstream_unavailable`,有 reasoning 但没有可见正文或工具调用为 429 `upstream_empty_output`。若任一尝试触发空 `content_filter`,不做补偿重试并保持 `content_filter` 错误。Vercel Node 流式路径通过 Go 内部 prepare / pow / switch 端点获取初始 payload、重试 PoW 和切号 fresh retry payload,因此同样会重新上传 current-input 自动文件并替换为新账号 file_id。 - 非流式 OpenAI Chat / Responses、Claude Messages、Gemini generateContent 在最终可见正文渲染阶段,会把 DeepSeek 搜索返回中的 `[citation:N]` / `[reference:N]` 标记替换成对应 Markdown 链接。`citation` 标记按一基序号解析;`reference` 标记只有在同一段正文中出现 `[reference:0]`(允许冒号后有空格)时才按零基序号映射,并且不会影响同段正文里的 `citation` 标记。 - 流式输出仍默认隐藏 `[citation:N]` / `[reference:N]` 这类上游内部标记,避免分片输出中泄漏尚未完成映射的引用占位符。 @@ -135,14 +135,14 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 最终 prompt 使用 DeepSeek 风格角色标记: -- `<|begin▁of▁sentence|>` -- `<|System|>` -- `<|User|>` -- `<|Assistant|>` -- `<|Tool|>` -- `<|end▁of▁instructions|>` -- `<|end▁of▁sentence|>` -- `<|end▁of▁toolresults|>` +- `<|begin▁of▁sentence|>` +- `<|System|>` +- `<|User|>` +- `<|Assistant|>` +- `<|Tool|>` +- `<|end▁of▁instructions|>` +- `<|end▁of▁sentence|>` +- `<|end▁of▁toolresults|>` 实现位置: [internal/prompt/messages.go](../internal/prompt/messages.go) @@ -165,10 +165,10 @@ OpenAI Chat / Responses 在标准化后、current input file 之前,会默认 1. 把每个 tool 的名称、描述、参数 schema 序列化成文本。 2. 拼成 `You have access to these tools:` 大段说明。 3. 再附上统一的 DSML tool call 外壳格式约束。 -4. 把这整段内容并入 system prompt。 +4. 普通直传请求会把“工具描述 + 格式约束”一起并入 system prompt;如果 `current_input_file` 触发,则工具描述/schema 会单独上传成 `DS2API_TOOLS.txt`,live prompt 和 system tool 格式提示都会明确要求模型把 `DS2API_TOOLS.txt` 当作可调用工具和参数 schema 的权威来源。 -工具调用正例现在优先示范全角分隔符 DSML 风格:`<|DSML|tool_calls>` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 -兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值、PascalCase 本地名和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`、``、``。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前或标签名后的非结构性协议分隔符都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser;结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。CDATA 开头也使用同一类扫描式容错,`` → `<|DSML|invoke name="...">` → `<|DSML|parameter name="...">`。 +兼容层仍接受旧式纯 `` wrapper,并会容错若干 DSML 标签变体,包括短横线形式 `` / `` / ``、下划线形式 `` / `` / ``,以及其他前缀分隔形态如 `` / `` / ``;标签壳扫描还会把全角 ASCII 漂移归一化,例如 `<dSML|tool_calls>` 与全角 `>` 结束符,也会容错 CJK 尖括号、全角感叹号或顿号分隔符、弯引号属性值、PascalCase 本地名和属性尾部分隔符漂移,例如 `...〈/DSM|parameter〉`、`<!DSML!invoke name=“Bash”>`、`<、DSML、tool_calls>`、``、``。更一般地,Go / Node tag 扫描以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,标签名前或标签名后的非结构性协议分隔符都会在解析入口剥离,例如 ``、`` 这类控制符或非 ASCII 分隔符漂移也会归一化回现有 XML 标签后继续走同一套 parser;结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。进入现有 DSML rewrite / XML parse 之前,Go / Node 还会先对“已经识别成工具标签壳的 candidate span”做一次窄 canonicalization:只折叠 wrapper / `invoke` / `parameter` / `name` / `CDATA` / `DSML` 及其壳层分隔符里的 confusable 字符,清理零宽 / BOM / 控制类干扰,并把引号、空白、dash / underscore 变体等统一回可解析的工具语法。这个阶段不会广义改写普通正文、参数内容、CDATA 里的示例文本或其他非工具 XML。CDATA 开头也使用同一类扫描式容错,`...` 子节点表示;当某个参数体只包含 item 子节点时,Go / Node 解析器会把它还原成数组,避免 `questions` / `options` 这类 schema 中要求 array 的参数被误解析成 `{ "item": ... }` 对象。除此之外,解析器还会回收一些更松散的列表写法,例如 JSON array 字面量或逗号分隔的 JSON 项序列,只要它们足够明确;但 `` 仍然是首选形态。若模型把完整结构化 XML fragment 误包进 CDATA,兼容层会在保护 `content` / `command` 等原文字段的前提下,尝试把非原文字段中的 CDATA XML fragment 还原成 object / array。不过,如果 CDATA 只是单个平面的 XML/HTML 标签,例如 `urgent` 这种行内标记,兼容层会保留原始字符串,不会强行升成 object / array;只有明显表示结构的 CDATA 片段,例如多兄弟节点、嵌套子节点或 `item` 列表,才会触发结构化恢复。对 `command` / `content` 等长文本参数,CDATA 内部的 Markdown fenced DSML / XML 示例会作为原文保护;示例里的 `]]>` 或 `` 不会截断外层工具调用,解析器会继续等待围栏外真正的参数 / wrapper 结束标签。 Go 侧读取 DeepSeek SSE 时不再依赖 `bufio.Scanner` 的固定 2MiB 单行上限;当写文件类工具把很长的 `content` 放在单个 `data:` 行里返回时,非流式收集、流式解析和 auto-continue 透传都会保留完整行,再进入同一套工具解析与序列化流程。 在 assistant 最终回包阶段,如果某个 tool 参数在声明 schema 中明确是 `string`,兼容层会在把解析后的 `tool_calls` / `function_call` 重新序列化成 OpenAI / Responses / Claude 可见参数前,递归把该路径上的 number / bool / object / array 统一转成字符串;其中 object / array 会压成紧凑 JSON 字符串。这个保护只对 schema 明确声明为 string 的路径生效,不会改写本来就是 `number` / `boolean` / `object` / `array` 的参数。这样可以兼容 DeepSeek 输出了结构化片段、但上游客户端工具 schema 又严格要求字符串参数的场景(例如 `content`、`prompt`、`path`、`taskId` 等)。 @@ -215,17 +215,17 @@ assistant 的 reasoning 会变成一个显式标签块: assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 prompt 可见的 DSML 外壳: ```xml -<|DSML|tool_calls> - <|DSML|invoke name="read_file"> - <|DSML|parameter name="path"> - - +<|DSML|tool_calls> + <|DSML|invoke name="read_file"> + <|DSML|parameter name="path"> + + ``` 如果客户端历史里没有结构化 `tool_calls` 字段、却把一个可独立解析的 assistant 工具块放进了普通 `content`,兼容层会在写入后续 prompt 前先按工具调用解析它,再重渲染为规范 DSML 历史外壳。这样可以避免一次 malformed 工具块未被结构化保存后,作为普通 assistant 文本回灌,继续污染后续模型的 few-shot 工具格式。 解析层同时兼容旧式纯 XML 形态:`` / `` / ``。两者都会先归一到现有 XML 解析语义;其他旧格式都会作为普通文本保留,不会作为可执行调用语法。 -例外是 parser 会对一个非常窄的模型失误做修复:如果 assistant 输出了 `` ... ``(或 DSML 对应标签),但漏掉最前面的 opening wrapper,解析阶段会补回 wrapper 后再尝试识别。 +例外是 parser 会对一个非常窄的模型失误做修复:如果 assistant 输出了 `` ... ``(或 DSML 对应标签),但漏掉最前面的 opening wrapper,解析阶段会在 wrapper-confidence 足够高时补回 wrapper 后再尝试识别。这里的 wrapper-confidence 指 scanner 已经识别出白名单工具壳结构,剩余失败只像壳层结构漂移,而不是语义上接近但不在白名单内的 near-miss 标签名。修复成功时,wrapper 后面的 suffix prose 会继续保留在可见文本里;修复失败时,该块仍按普通文本处理。 这件事很重要,因为它决定了: @@ -237,7 +237,7 @@ assistant 历史 `tool_calls` 不会保留成 OpenAI 原生 JSON,而会转成 ### 7.3 tool result 保留方式 -tool / function role 的结果会作为 `<|Tool|>...<|end▁of▁toolresults|>` 进入 prompt。 +tool / function role 的结果会作为 `<|Tool|>...<|end▁of▁toolresults|>` 进入 prompt。 如果 tool content 为空,当前会补成字符串 `"null"`,避免整个 tool turn 丢失。 @@ -278,7 +278,7 @@ OpenAI 的文件上传现在不再是“只传文件本体”的通用路径, 兼容层现在只保留 `current_input_file` 这一种拆分方式;旧的 `history_split` 配置字段已移除,读取旧配置时会忽略它且不会再写回。 -- `current_input_file` 默认开启;它在统一 completion runtime 入口全局生效,用于把“完整上下文”合并进 `DS2API_HISTORY.txt` 上下文文件。当最新 user turn 的纯文本长度达到 `current_input_file.min_chars`(默认 `0`)时,runtime 会上传一个文件名为 `DS2API_HISTORY.txt` 的上下文文件。文件内容会先经过各协议入口的标准化,再序列化成按轮次编号的 `DS2API_HISTORY.txt` 风格 transcript,带有 `# DS2API_HISTORY.txt` 标题和 `=== N. ROLE ===` 分段;live prompt 中则会给出一个 continuation 语气的 user 消息,引导模型从 `DS2API_HISTORY.txt` 的最新状态继续推进,并直接回答最新请求,避免把任务拉回起点。 +- `current_input_file` 默认开启;它在统一 completion runtime 入口全局生效,用于把“完整上下文”合并进 `DS2API_HISTORY.txt` 上下文文件。当最新 user turn 的纯文本长度达到 `current_input_file.min_chars`(默认 `0`)时,runtime 会上传一个文件名为 `DS2API_HISTORY.txt` 的上下文文件。文件内容会先经过各协议入口的标准化,再序列化成按轮次编号的 `DS2API_HISTORY.txt` 风格 transcript,带有 `# DS2API_HISTORY.txt` 标题和 `=== N. ROLE ===` 分段;如果当前请求声明了可用工具,还会把工具名称、描述和参数 schema 单独上传成 `DS2API_TOOLS.txt`,带有 `# DS2API_TOOLS.txt` 标题。live prompt 中则会给出一个 continuation 语气的 user 消息,引导模型从 `DS2API_HISTORY.txt` 的最新状态继续推进,并在有工具文件时明确可用工具 schema 位于 `DS2API_TOOLS.txt`;system prompt 也会在统一 DSML 工具格式约束前说明 `DS2API_TOOLS.txt` 是可调用工具和 schema 的权威来源,同时保留本轮工具选择策略,避免把任务拉回起点。 - 如果 `current_input_file.enabled=false`,请求会直接透传,不上传任何拆分上下文文件。 - 即使触发 `current_input_file` 后 live prompt 被缩短,对客户端回包里的上下文 token 统计,仍会沿用**拆分前的完整 prompt 语义**做计数,而不是按缩短后的占位 prompt 计算;否则会把真实上下文显著算小。 @@ -291,7 +291,7 @@ OpenAI 的文件上传现在不再是“只传文件本体”的通用路径, - 全局 completion runtime 应用点: [internal/completionruntime/nonstream.go](../internal/completionruntime/nonstream.go) -当前输入转文件启用并触发时,上传文件的真实文件名是 `DS2API_HISTORY.txt`,文件内容是完整 `messages` 上下文;它会使用 OpenAI-compatible 的消息/transcript 序列化规则和 DeepSeek 角色标记,再按轮次编号成 `DS2API_HISTORY.txt` 风格的 transcript(不再注入文件边界标签): +当前输入转文件启用并触发时,上传的历史文件真实文件名是 `DS2API_HISTORY.txt`,文件内容是完整 `messages` 上下文;它会使用 OpenAI-compatible 的消息/transcript 序列化规则和 DeepSeek 角色标记,再按轮次编号成 `DS2API_HISTORY.txt` 风格的 transcript(不再注入文件边界标签): ```text [uploaded filename]: DS2API_HISTORY.txt @@ -311,7 +311,21 @@ Prior conversation history and tool progress. ... ``` -开启后,请求的 live prompt 不再直接内联完整上下文,而是保留一个 user role 的短提示,提示模型基于已提供上下文直接回答最新请求;上传后的 `file_id` 会进入 `ref_file_ids`。 +如果当前请求带有工具,runtime 同时上传 `DS2API_TOOLS.txt`: + +```text +[uploaded filename]: DS2API_TOOLS.txt +# DS2API_TOOLS.txt +Available tool descriptions and parameter schemas for this request. + +You have access to these tools: + +Tool: ... +Description: ... +Parameters: ... +``` + +开启后,请求的 live prompt 不再直接内联完整上下文,也不再内联大段工具 schema;它保留一个 user role 的短提示,提示模型基于已提供上下文直接回答最新请求,并在有工具时引用 `DS2API_TOOLS.txt`。上传后的 `DS2API_HISTORY.txt` file_id 会排在 `ref_file_ids` 最前;如果存在 `DS2API_TOOLS.txt`,它的 file_id 紧随其后;客户端已有的其他 file_id 保持在后面。上下文 token 统计会包含上传的历史文件、工具文件和 live prompt。自动生成的 current-input 文件引用会被记录为 runtime 状态;如果托管账号模式切号 fresh retry,runtime 会重新上传这些自动文件,而不是把上一账号的 file_id 交给新账号。 ## 10. 各协议入口的差异 @@ -321,7 +335,7 @@ Prior conversation history and tool progress. - `developer` 会映射到 `system` - Responses `instructions` 会 prepend 为 system message -- `tools` 会注入 system prompt +- 普通直传时 `tools` 会注入 system prompt;`current_input_file` 触发时工具描述/schema 会拆成 `DS2API_TOOLS.txt`,system prompt 保留格式/策略规则并明确要求模型从 `DS2API_TOOLS.txt` 获取可调用工具和 schema - `attachments` / `input_file` / inline 文件会进入 `ref_file_ids` - current input file 在统一 completion runtime 入口全局生效 @@ -331,7 +345,7 @@ Prior conversation history and tool progress. - top-level `system` 优先作为系统提示 - `tool_use` / `tool_result` 会被转换成统一的 assistant/tool 历史语义 -- `tools` 同样会被并进 system prompt +- 普通直传时 `tools` 同样会被并进 system prompt;`current_input_file` 触发时会沿用统一的 `DS2API_TOOLS.txt` 拆分上传路径 - 常规执行通过 `internal/httpapi/claude/handler_messages.go` 转到 OpenAI chat 路径,模型 alias 会先解析成 DeepSeek 原生模型 - 当前代码里没有像 OpenAI 那样完整的 `ref_file_ids` 附件链路 @@ -341,7 +355,7 @@ Prior conversation history and tool progress. - `systemInstruction`、`contents.parts`、`functionCall`、`functionResponse` 会先归一 - tools 会转成 OpenAI 风格 function schema -- prompt 构建复用 OpenAI 的 `promptcompat.BuildOpenAIPromptForAdapter` +- prompt 构建复用 OpenAI 的 `promptcompat.BuildOpenAIPromptForAdapter`,`current_input_file` 触发时也会使用统一的 `DS2API_TOOLS.txt` 拆分上传路径 - 未识别的非文本 part 会被安全序列化进 prompt,并对二进制/疑似 base64 内容做省略或截断处理 也就是说,Gemini 在“最终 prompt 语义”上,尽量和 OpenAI 保持一致。 @@ -360,9 +374,10 @@ Prior conversation history and tool progress. ```json { - "prompt": "<|begin▁of▁sentence|><|System|>原 system / developer\n\nYou have access to these tools: ...<|end▁of▁instructions|><|User|>Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly.<|Assistant|>", + "prompt": "<|begin▁of▁sentence|><|System|>原 system / developer\n\nTOOL CALL FORMAT — FOLLOW EXACTLY: ...<|end▁of▁instructions|><|User|>Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly. Available tool descriptions and parameter schemas are attached in DS2API_TOOLS.txt; use only those tools and follow the tool-call format rules in this prompt.<|Assistant|>", "ref_file_ids": [ - "file-current-input-ignore", + "file-ds2api-history", + "file-ds2api-tools", "file-systemprompt", "file-other-attachment" ], diff --git a/docs/toolcall-semantics.md b/docs/toolcall-semantics.md index 4deb80dd..7988d5a8 100644 --- a/docs/toolcall-semantics.md +++ b/docs/toolcall-semantics.md @@ -6,14 +6,14 @@ ## 1) 当前可执行格式 -当前版本推荐模型输出全角分隔符 DSML 外壳: +当前版本推荐模型输出半角管道符 DSML 外壳: ```xml -<|DSML|tool_calls> - <|DSML|invoke name="read_file"> - <|DSML|parameter name="path"> - - +<|DSML|tool_calls> + <|DSML|invoke name="read_file"> + <|DSML|parameter name="path"> + + ``` 兼容层仍接受旧式 canonical XML: @@ -30,17 +30,20 @@ 约束: -- 必须有 `<|DSML|tool_calls>...` 或 `...` wrapper -- 每个调用必须在 `<|DSML|invoke name="...">...` 或 `...` 内 +- 必须有 `<|DSML|tool_calls>...` 或 `...` wrapper +- 每个调用必须在 `<|DSML|invoke name="...">...` 或 `...` 内 - 工具名必须放在 `invoke` 的 `name` 属性 -- 参数必须使用 `<|DSML|parameter name="...">...` 或 `...` +- 参数必须使用 `<|DSML|parameter name="...">...` 或 `...` - 同一个工具块内不要混用 DSML 标签和旧 XML 工具标签;混搭会被视为非法工具块 兼容修复: - 如果模型漏掉 opening wrapper,但后面仍输出了一个或多个 invoke 并以 closing wrapper 收尾,Go 解析链路会在解析前补回缺失的 opening wrapper。 -- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容管道符 `|` / `|`、全角感叹号 `!`、顿号 `、`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉`、弯引号属性值、PascalCase 本地名等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、``、`<`、``、``、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`、`<、DSML、tool_calls>...<、/DSML、tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra` / `ToolCallsExtra`)仍按普通文本处理。 -- 如果模型在固定工具标签名后多输出一个非结构性分隔符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|` / ``,或在带属性标签的结束符前多输出一个尾部分隔符(如 ``),兼容层会把这个尾部分隔符当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。 +- 在进入现有 DSML rewrite / XML parse 之前,Go / Node 都会先做一次非常窄的 candidate-span canonicalization:只处理已经被 scanner 识别为工具标签壳的 wrapper / `invoke` / `parameter` / `name` / `CDATA` / `DSML` 及其结构分隔符;这里会移除零宽 / BOM / 控制类干扰字符,并把 `<`、`>`、`/`、`|`、`=`、引号、Unicode 空白、常见 dash / underscore 变体这类工具语法外壳符号折回 ASCII 语义。 +- Go / Node 解析层不再枚举每一种 DSML typo。它以固定本地标签名 `tool_calls` / `invoke` / `parameter` 为准,把标签名前的任意协议前缀壳视为可容忍噪声,并继续兼容半角管道符、全角感叹号 `!`、顿号 `、`、空白、重复 leading `<`、可视控制符 `␂`、原始 STX `\x02`、非 ASCII 分隔符、CJK 尖括号 `〈` / `〉`、弯引号属性值、PascalCase 本地名等漂移。例如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、``、`<`、``、``、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`、`<、DSML、tool_calls>...<、/DSML、tool_calls>` 都会归一化;相似但非固定标签名(如 `tool_calls_extra` / `ToolCallsExtra`)仍按普通文本处理。 +- 这个 candidate-span canonicalization 不会对普通 prose、参数正文、CDATA 内容或嵌套的非工具 XML 做广义 Unicode 归一化。也就是说,参数里的示例 ``、普通聊天文本里的 confusable 单词、或其他非工具壳 XML 片段都保持原样;只有真正落在工具标签壳上的 whitelist 关键字和结构符号会被折叠。 +- 如果模型在固定工具标签名后多输出一个非结构性分隔符,例如 `<|DSML|tool_calls|` / `<|DSML|invoke|` / `<|DSML|parameter|` / ``,或在带属性标签的结束符前多输出一个尾部分隔符(如 ``),兼容层会把这个尾部分隔符当作异常标签终止符并补齐或归一化;如果后面已经有 `>` / `〉`,也会消费这个多余分隔符后再归一化。结构性字符如 `<` / `>` / `/` / `=` / 引号、空白和 ASCII 字母数字不会被当作这类分隔符。 +- “缺失 opening wrapper”的修复只会在 wrapper-confidence 足够高时触发:scanner 必须已经识别出白名单工具壳结构(wrapper / invoke / parameter / `name=` 等),且剩余失败看起来只是壳层结构问题。相似但不在白名单内的 near-miss 标签名,或缺少足够 wrapper 证据的 malformed 片段,仍会按普通文本透传。 - 这是一个针对常见模型失误的窄修复,不改变推荐输出格式;prompt 仍要求模型直接输出完整 DSML 外壳。 - 裸 `` / `` 不会被当成“已支持的工具语法”;只有 `tool_calls` wrapper 或可修复的缺失 opening wrapper 才会进入工具调用路径。 @@ -54,10 +57,11 @@ 在流式链路中(Go / Node 一致): -- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `` / `` / ``)、基于固定本地标签名的 DSML 噪声容错形态、尾部非结构性分隔符形态(如 `<|DSML|tool_calls|` / ``)和 canonical `` wrapper 都会进入结构化捕获 +- DSML `<|DSML|tool_calls>` wrapper、短横线形式(如 `` / `` / ``)、基于固定本地标签名的 DSML 噪声容错形态、尾部非结构性分隔符形态(如 `<|DSML|tool_calls|` / ``)和 canonical `` wrapper 都会进入结构化捕获 - 如果流里直接从 invoke 开始,但后面补上了 closing wrapper,Go 流式筛分也会按缺失 opening wrapper 的修复路径尝试恢复 - 已识别成功的工具调用不会再次回流到普通文本 - 不符合新格式的块不会执行,并继续按原样文本透传 +- 如果一个 confusable / 漂移过的工具壳在 candidate-span canonicalization + repair 后仍能形成有效工具调用,wrapper 后面的 suffix prose 会继续按普通文本输出;如果 canonicalization 后仍不满足 wrapper-confidence 或 XML 语义,整块就作为普通文本释放,不会半吞半漏。 - fenced code block(反引号 `` ``` `` 和波浪线 `~~~`)中的 XML 示例始终按普通文本处理 - 支持嵌套围栏(如 4 反引号嵌套 3 反引号)和 CDATA 内围栏保护 - 对 `command` / `content` 等长文本参数,CDATA 内部如果包含 Markdown fenced DSML / XML 示例,即使示例里出现 `]]>` / `` 这类看起来像外层结束标签的片段,也会继续按参数原文保留,直到真正位于围栏外的外层结束标签 @@ -101,9 +105,9 @@ go test -v -run 'TestParseToolCalls|TestProcessToolSieve' ./internal/toolcall ./ 重点覆盖: -- DSML `<|DSML|tool_calls>` wrapper 正常解析 +- DSML `<|DSML|tool_calls>` wrapper 正常解析 - legacy canonical `` wrapper 正常解析 -- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、``、`<`、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`)正常解析 +- 固定本地标签名的 DSML 噪声容错形态(如 ``、`<<|DSML|tool_calls>`、`<|DSML tool_calls>`、``、``、`<`、`...〈/DSM|tool_calls〉`、`<!DSML!tool_calls>...<!/DSML!tool_calls>`)正常解析 - 混搭标签(DSML wrapper + canonical inner)归一化后正常解析 - 波浪线围栏 `~~~` 内的示例不执行 - 嵌套围栏(4 反引号嵌套 3 反引号)内的示例不执行 diff --git a/internal/completionruntime/nonstream.go b/internal/completionruntime/nonstream.go index 921d3b4d..bc589c61 100644 --- a/internal/completionruntime/nonstream.go +++ b/internal/completionruntime/nonstream.go @@ -114,7 +114,7 @@ func ExecuteNonStreamStartedWithRetry(ctx context.Context, ds DeepSeekCaller, a turn, outErr := collectAttempt(currentResp, stdReq, usagePrompt, opts) if outErr != nil { if canRetryOnAlternateAccount(ctx, a, outErr, opts.RetryEnabled, &accountSwitchAttempted) { - switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, maxAttempts) + switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, opts, maxAttempts) if switchErr != nil { return NonStreamResult{SessionID: sessionID, Payload: payload, Attempts: attempts}, switchErr } @@ -154,7 +154,7 @@ func ExecuteNonStreamStartedWithRetry(ctx context.Context, ds DeepSeekCaller, a } if !opts.RetryEnabled || !assistantturn.ShouldRetryEmptyOutput(turn, attempts, retryMax) { if canRetryOnAlternateAccount(ctx, a, turn.Error, opts.RetryEnabled, &accountSwitchAttempted) { - switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, maxAttempts) + switched, switchErr := startStandardCompletionOnAlternateAccount(ctx, ds, a, stdReq, opts, maxAttempts) if switchErr != nil { return NonStreamResult{SessionID: sessionID, Payload: payload, Turn: turn, Attempts: attempts}, switchErr } @@ -205,7 +205,12 @@ func canRetryOnAlternateAccount(ctx context.Context, a *auth.RequestAuth, outErr return a.SwitchAccount(ctx) } -func startStandardCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, maxAttempts int) (StartResult, *assistantturn.OutputError) { +func startStandardCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, opts Options, maxAttempts int) (StartResult, *assistantturn.OutputError) { + var prepErr *assistantturn.OutputError + stdReq, prepErr = reuploadCurrentInputFileForAccount(ctx, ds, a, stdReq, opts) + if prepErr != nil { + return StartResult{Request: stdReq}, prepErr + } sessionID, err := ds.CreateSession(ctx, a, maxAttempts) if err != nil { return StartResult{}, authOutputError(a) @@ -222,6 +227,18 @@ func startStandardCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekC return StartResult{SessionID: sessionID, Payload: payload, Pow: pow, Response: resp, Request: stdReq}, nil } +func reuploadCurrentInputFileForAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, stdReq promptcompat.StandardRequest, opts Options) (promptcompat.StandardRequest, *assistantturn.OutputError) { + if opts.CurrentInputFile == nil || !stdReq.CurrentInputFileApplied { + return stdReq, nil + } + out, err := (history.Service{Store: opts.CurrentInputFile, DS: ds}).ReuploadAppliedCurrentInputFile(ctx, a, stdReq) + if err != nil { + status, message := history.MapError(err) + return out, &assistantturn.OutputError{Status: status, Message: message, Code: "error"} + } + return out, nil +} + func collectAttempt(resp *http.Response, stdReq promptcompat.StandardRequest, usagePrompt string, opts Options) (assistantturn.Turn, *assistantturn.OutputError) { defer func() { if err := resp.Body.Close(); err != nil { diff --git a/internal/completionruntime/nonstream_test.go b/internal/completionruntime/nonstream_test.go index 7c5959ad..12598ab3 100644 --- a/internal/completionruntime/nonstream_test.go +++ b/internal/completionruntime/nonstream_test.go @@ -38,8 +38,11 @@ func (f *fakeDeepSeekCaller) GetPow(context.Context, *auth.RequestAuth, int) (st return "pow", nil } -func (f *fakeDeepSeekCaller) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) { +func (f *fakeDeepSeekCaller) UploadFile(_ context.Context, a *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) { f.uploads = append(f.uploads, req) + if a != nil && a.AccountID != "" { + return &dsclient.UploadFileResult{ID: "file-runtime-" + a.AccountID}, nil + } return &dsclient.UploadFileResult{ID: "file-runtime-1"}, nil } @@ -162,6 +165,66 @@ func TestExecuteNonStreamWithRetrySwitchesManagedAccountBeforeFinal429(t *testin } } +func TestExecuteNonStreamWithRetryReuploadsCurrentInputFileAfterAccountSwitch(t *testing.T) { + t.Setenv("DS2API_CONFIG_JSON", `{ + "keys":["managed-key"], + "accounts":[ + {"email":"acc1@test.com","password":"pwd"}, + {"email":"acc2@test.com","password":"pwd"} + ] + }`) + store := config.LoadStore() + resolver := auth.NewResolver(store, account.NewPool(store), func(_ context.Context, acc config.Account) (string, error) { + return "token-" + acc.Identifier(), nil + }) + req, _ := http.NewRequest(http.MethodPost, "/", nil) + req.Header.Set("Authorization", "Bearer managed-key") + a, err := resolver.Determine(req) + if err != nil { + t.Fatalf("determine failed: %v", err) + } + defer resolver.Release(a) + + ds := &fakeDeepSeekCaller{ + sessionByAccount: true, + responses: []*http.Response{ + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":11,"p":"response/thinking_content","v":"first empty"}`), + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":12,"p":"response/thinking_content","v":"retry empty"}`), + sseHTTPResponse(http.StatusOK, `data: {"response_message_id":21,"p":"response/content","v":"ok from second account"}`), + }, + } + stdReq := promptcompat.StandardRequest{ + Surface: "test", + RequestedModel: "deepseek-v4-flash", + ResolvedModel: "deepseek-v4-flash", + ResponseModel: "deepseek-v4-flash", + Messages: []any{ + map[string]any{"role": "user", "content": "large current input"}, + }, + PromptTokenText: "large current input", + FinalPrompt: "large current input", + Thinking: true, + } + + result, outErr := ExecuteNonStreamWithRetry(context.Background(), ds, a, stdReq, Options{ + RetryEnabled: true, + CurrentInputFile: currentInputRuntimeConfig{}, + }) + if outErr != nil { + t.Fatalf("unexpected output error after account switch retry: %#v", outErr) + } + if result.Turn.Text != "ok from second account" { + t.Fatalf("text mismatch after switch retry: %q", result.Turn.Text) + } + if len(ds.uploads) != 2 { + t.Fatalf("expected current input file uploaded once per account, got %d", len(ds.uploads)) + } + refIDs, _ := ds.payloads[2]["ref_file_ids"].([]any) + if len(refIDs) != 1 || refIDs[0] != "file-runtime-acc2@test.com" { + t.Fatalf("expected switched account ref_file_ids to use reuploaded file, got %#v", ds.payloads[2]["ref_file_ids"]) + } +} + func TestExecuteNonStreamWithRetryUsesParentMessageForEmptyRetry(t *testing.T) { ds := &fakeDeepSeekCaller{responses: []*http.Response{ sseHTTPResponse(http.StatusOK, `data: {"response_message_id":77,"p":"response/thinking_content","v":"plan"}`), diff --git a/internal/completionruntime/stream_retry.go b/internal/completionruntime/stream_retry.go index 03c9dc75..6007ceab 100644 --- a/internal/completionruntime/stream_retry.go +++ b/internal/completionruntime/stream_retry.go @@ -9,7 +9,9 @@ import ( "ds2api/internal/assistantturn" "ds2api/internal/auth" "ds2api/internal/config" + "ds2api/internal/httpapi/openai/history" "ds2api/internal/httpapi/openai/shared" + "ds2api/internal/promptcompat" ) type StreamRetryOptions struct { @@ -19,6 +21,8 @@ type StreamRetryOptions struct { RetryMaxAttempts int MaxAttempts int UsagePrompt string + Request promptcompat.StandardRequest + CurrentInputFile history.CurrentInputConfigReader } type StreamRetryHooks struct { @@ -71,7 +75,7 @@ func ExecuteStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.Requ if attempts >= retryMax { if canRetryOnAlternateAccount(ctx, a, &assistantturn.OutputError{Status: http.StatusTooManyRequests}, opts.RetryEnabled, &accountSwitchAttempted) { - switched, switchErr := startPayloadCompletionOnAlternateAccount(ctx, ds, a, payload, maxAttempts) + switched, switchErr := startPayloadCompletionOnAlternateAccount(ctx, ds, a, payload, opts, maxAttempts) if switchErr != nil { if hooks.OnRetryFailure != nil { hooks.OnRetryFailure(switchErr.Status, switchErr.Message, switchErr.Code) @@ -142,7 +146,7 @@ func ExecuteStreamWithRetry(ctx context.Context, ds DeepSeekCaller, a *auth.Requ } } -func startPayloadCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, payload map[string]any, maxAttempts int) (StartResult, *assistantturn.OutputError) { +func startPayloadCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCaller, a *auth.RequestAuth, payload map[string]any, opts StreamRetryOptions, maxAttempts int) (StartResult, *assistantturn.OutputError) { sessionID, err := ds.CreateSession(ctx, a, maxAttempts) if err != nil { return StartResult{}, authOutputError(a) @@ -152,6 +156,13 @@ func startPayloadCompletionOnAlternateAccount(ctx context.Context, ds DeepSeekCa return StartResult{SessionID: sessionID}, &assistantturn.OutputError{Status: http.StatusUnauthorized, Message: "Failed to get PoW (invalid token or unknown error).", Code: "error"} } nextPayload := clonePayload(payload) + if opts.CurrentInputFile != nil && opts.Request.CurrentInputFileApplied { + stdReq, prepErr := reuploadCurrentInputFileForAccount(ctx, ds, a, opts.Request, Options{CurrentInputFile: opts.CurrentInputFile}) + if prepErr != nil { + return StartResult{SessionID: sessionID}, prepErr + } + nextPayload = stdReq.CompletionPayload(sessionID) + } nextPayload["chat_session_id"] = sessionID delete(nextPayload, "parent_message_id") resp, err := ds.CallCompletion(ctx, a, nextPayload, pow, maxAttempts) diff --git a/internal/deepseek/client/client_completion.go b/internal/deepseek/client/client_completion.go index 1b91ce2f..0563d334 100644 --- a/internal/deepseek/client/client_completion.go +++ b/internal/deepseek/client/client_completion.go @@ -5,9 +5,7 @@ import ( "context" dsprotocol "ds2api/internal/deepseek/protocol" "encoding/json" - "errors" "net/http" - "time" "ds2api/internal/auth" "ds2api/internal/config" @@ -15,39 +13,33 @@ import ( ) func (c *Client) CallCompletion(ctx context.Context, a *auth.RequestAuth, payload map[string]any, powResp string, maxAttempts int) (*http.Response, error) { - if maxAttempts <= 0 { - maxAttempts = c.maxRetries - } + _ = maxAttempts clients := c.requestClientsForAuth(ctx, a) headers := c.authHeaders(a.DeepSeekToken) headers["x-ds-pow-response"] = powResp captureSession := c.capture.Start("deepseek_completion", dsprotocol.DeepSeekCompletionURL, a.AccountID, payload) - attempts := 0 - for attempts < maxAttempts { - resp, err := c.streamPost(ctx, clients.stream, dsprotocol.DeepSeekCompletionURL, headers, payload) - if err != nil { - attempts++ - time.Sleep(time.Second) - continue - } - if resp.StatusCode == http.StatusOK { - if captureSession != nil { - resp.Body = captureSession.WrapBody(resp.Body, resp.StatusCode) - } - resp = c.wrapCompletionWithAutoContinue(ctx, a, payload, powResp, resp) - return resp, nil - } - if captureSession != nil { - resp.Body = captureSession.WrapBody(resp.Body, resp.StatusCode) - } - _ = resp.Body.Close() - attempts++ - time.Sleep(time.Second) + resp, err := c.streamPostOnce(ctx, clients.stream, dsprotocol.DeepSeekCompletionURL, headers, payload) + if err != nil { + return nil, err + } + if captureSession != nil { + resp.Body = captureSession.WrapBody(resp.Body, resp.StatusCode) + } + if resp.StatusCode == http.StatusOK { + resp = c.wrapCompletionWithAutoContinue(ctx, a, payload, powResp, resp) } - return nil, errors.New("completion failed") + return resp, nil } func (c *Client) streamPost(ctx context.Context, doer trans.Doer, url string, headers map[string]string, payload any) (*http.Response, error) { + return c.streamPostWithFallback(ctx, doer, url, headers, payload, true) +} + +func (c *Client) streamPostOnce(ctx context.Context, doer trans.Doer, url string, headers map[string]string, payload any) (*http.Response, error) { + return c.streamPostWithFallback(ctx, doer, url, headers, payload, false) +} + +func (c *Client) streamPostWithFallback(ctx context.Context, doer trans.Doer, url string, headers map[string]string, payload any, allowFallback bool) (*http.Response, error) { b, err := json.Marshal(payload) if err != nil { return nil, err @@ -63,15 +55,18 @@ func (c *Client) streamPost(ctx context.Context, doer trans.Doer, url string, he } resp, err := doer.Do(req) if err != nil { - config.Logger.Warn("[deepseek] fingerprint stream request failed, fallback to std transport", "url", url, "error", err) - req2, reqErr := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(b)) - if reqErr != nil { - return nil, reqErr - } - for k, v := range headers { - req2.Header.Set(k, v) + if allowFallback { + config.Logger.Warn("[deepseek] fingerprint stream request failed, fallback to std transport", "url", url, "error", err) + req2, reqErr := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(b)) + if reqErr != nil { + return nil, reqErr + } + for k, v := range headers { + req2.Header.Set(k, v) + } + return clients.fallbackS.Do(req2) } - return clients.fallbackS.Do(req2) + return nil, err } return resp, nil } diff --git a/internal/deepseek/client/client_completion_test.go b/internal/deepseek/client/client_completion_test.go new file mode 100644 index 00000000..5244c800 --- /dev/null +++ b/internal/deepseek/client/client_completion_test.go @@ -0,0 +1,36 @@ +package client + +import ( + "context" + "errors" + "net/http" + "testing" + + "ds2api/internal/auth" +) + +func TestCallCompletionDoesNotFallbackForNonIdempotentCompletion(t *testing.T) { + var fallbackCalled bool + client := &Client{ + stream: doerFunc(func(*http.Request) (*http.Response, error) { + return nil, errors.New("ambiguous completion write failure") + }), + fallbackS: &http.Client{Transport: roundTripperFunc(func(*http.Request) (*http.Response, error) { + fallbackCalled = true + return &http.Response{StatusCode: http.StatusOK}, nil + })}, + } + _, err := client.CallCompletion( + context.Background(), + &auth.RequestAuth{DeepSeekToken: "token"}, + map[string]any{"prompt": "hello"}, + "pow", + 3, + ) + if err == nil { + t.Fatal("expected completion error") + } + if fallbackCalled { + t.Fatal("completion fallback should not be called for a non-idempotent request") + } +} diff --git a/internal/deepseek/client/client_upload.go b/internal/deepseek/client/client_upload.go index c3334c35..3dc778dd 100644 --- a/internal/deepseek/client/client_upload.go +++ b/internal/deepseek/client/client_upload.go @@ -95,11 +95,7 @@ func (c *Client) UploadFile(ctx context.Context, a *auth.RequestAuth, req Upload resp, err := c.doUpload(ctx, clients.regular, clients.fallback, dsprotocol.DeepSeekUploadFileURL, headers, body) if err != nil { config.Logger.Warn("[upload_file] request error", "error", err, "account", a.AccountID, "filename", filename) - powHeader = "" - lastFailureKind = FailureUnknown - lastFailureMessage = err.Error() - attempts++ - continue + return nil, err } if captureSession != nil { resp.Body = captureSession.WrapBody(resp.Body, resp.StatusCode) @@ -201,7 +197,7 @@ func escapeMultipartFilename(filename string) string { return filename } -func (c *Client) doUpload(ctx context.Context, doer trans.Doer, fallback trans.Doer, url string, headers map[string]string, body []byte) (*http.Response, error) { +func (c *Client) doUpload(ctx context.Context, doer trans.Doer, _ trans.Doer, url string, headers map[string]string, body []byte) (*http.Response, error) { req, err := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) if err != nil { return nil, err @@ -213,15 +209,7 @@ func (c *Client) doUpload(ctx context.Context, doer trans.Doer, fallback trans.D if err == nil { return resp, nil } - config.Logger.Warn("[deepseek] fingerprint upload request failed, fallback to std transport", "url", url, "error", err) - req2, reqErr := http.NewRequestWithContext(ctx, http.MethodPost, url, bytes.NewReader(body)) - if reqErr != nil { - return nil, reqErr - } - for k, v := range headers { - req2.Header.Set(k, v) - } - return fallback.Do(req2) + return nil, err } func extractUploadFileResult(resp map[string]any) *UploadFileResult { diff --git a/internal/deepseek/client/client_upload_test.go b/internal/deepseek/client/client_upload_test.go index e7d1cc02..ff547da3 100644 --- a/internal/deepseek/client/client_upload_test.go +++ b/internal/deepseek/client/client_upload_test.go @@ -6,6 +6,7 @@ import ( "encoding/base64" "encoding/hex" "encoding/json" + "errors" "io" "net/http" "strings" @@ -39,6 +40,31 @@ func TestBuildUploadMultipartBodyOmitsPurposeAndIncludesFilePart(t *testing.T) { } } +func TestDoUploadDoesNotFallbackForNonIdempotentUpload(t *testing.T) { + var fallbackCalled bool + client := &Client{} + _, err := client.doUpload( + context.Background(), + doerFunc(func(req *http.Request) (*http.Response, error) { + _, _ = io.ReadAll(req.Body) + return nil, errors.New("ambiguous upload write failure") + }), + doerFunc(func(*http.Request) (*http.Response, error) { + fallbackCalled = true + return &http.Response{StatusCode: http.StatusOK, Header: make(http.Header), Body: io.NopCloser(strings.NewReader("{}"))}, nil + }), + dsprotocol.DeepSeekUploadFileURL, + map[string]string{"Content-Type": "multipart/form-data"}, + []byte("body"), + ) + if err == nil { + t.Fatal("expected upload error") + } + if fallbackCalled { + t.Fatal("upload fallback should not be called for a non-idempotent request") + } +} + func TestExtractUploadFileResultSupportsNestedShapes(t *testing.T) { got := extractUploadFileResult(map[string]any{ "data": map[string]any{ diff --git a/internal/httpapi/claude/current_input_file_test.go b/internal/httpapi/claude/current_input_file_test.go index fa6b34b0..d49646ef 100644 --- a/internal/httpapi/claude/current_input_file_test.go +++ b/internal/httpapi/claude/current_input_file_test.go @@ -93,7 +93,11 @@ func (d *claudeCurrentInputDS) GetPow(context.Context, *auth.RequestAuth, int) ( func (d *claudeCurrentInputDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) { d.uploads = append(d.uploads, req) - return &dsclient.UploadFileResult{ID: "file-claude-history"}, nil + id := "file-claude-history" + if len(d.uploads) > 1 { + id = "file-claude-tools" + } + return &dsclient.UploadFileResult{ID: id}, nil } func (d *claudeCurrentInputDS) CallCompletion(_ context.Context, _ *auth.RequestAuth, payload map[string]any, _ string, _ int) (*http.Response, error) { @@ -156,3 +160,47 @@ func TestClaudeDirectAppliesCurrentInputFile(t *testing.T) { t.Fatalf("expected persisted message to match upstream continuation prompt, got %#v", full.Messages) } } + +func TestClaudeCurrentInputFileUploadsToolsSeparately(t *testing.T) { + ds := &claudeCurrentInputDS{} + h := &Handler{ + Store: mockClaudeConfig{aliases: map[string]string{"claude-sonnet-4-6": "deepseek-v4-flash"}}, + Auth: claudeCurrentInputAuth{}, + DS: ds, + } + reqBody := `{"model":"claude-sonnet-4-6","messages":[{"role":"user","content":"hello from claude"}],"tools":[{"name":"search","description":"Search docs","input_schema":{"type":"object"}}],"max_tokens":1024}` + req := httptest.NewRequest(http.MethodPost, "/v1/messages", strings.NewReader(reqBody)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + h.Messages(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(ds.uploads) != 2 { + t.Fatalf("expected history and tools uploads, got %d", len(ds.uploads)) + } + if ds.uploads[0].Filename != "DS2API_HISTORY.txt" || ds.uploads[1].Filename != "DS2API_TOOLS.txt" { + t.Fatalf("unexpected upload filenames: %#v", ds.uploads) + } + historyText := string(ds.uploads[0].Data) + if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: Search docs") { + t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText) + } + toolsText := string(ds.uploads[1].Data) + if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: search") || !strings.Contains(toolsText, "Description: Search docs") { + t.Fatalf("expected tools transcript to include tool schema, got %q", toolsText) + } + refIDs, _ := ds.payload["ref_file_ids"].([]any) + if len(refIDs) < 2 || refIDs[0] != "file-claude-history" || refIDs[1] != "file-claude-tools" { + t.Fatalf("expected history and tools ref ids first, got %#v", ds.payload["ref_file_ids"]) + } + prompt, _ := ds.payload["prompt"].(string) + if !strings.Contains(prompt, "DS2API_TOOLS.txt") || !strings.Contains(prompt, "TOOL CALL FORMAT") { + t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", prompt) + } + if strings.Contains(prompt, "Description: Search docs") { + t.Fatalf("live prompt should not inline tool descriptions, got %q", prompt) + } +} diff --git a/internal/httpapi/claude/handler_messages.go b/internal/httpapi/claude/handler_messages.go index e22a1edc..a89ed8da 100644 --- a/internal/httpapi/claude/handler_messages.go +++ b/internal/httpapi/claude/handler_messages.go @@ -145,7 +145,7 @@ func (h *Handler) handleClaudeDirectStream(w http.ResponseWriter, r *http.Reques return } streamReq := start.Request - h.handleClaudeStreamRealtimeWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.PromptTokenText, historySession) + h.handleClaudeStreamRealtimeWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq, streamReq.ResponseModel, streamReq.Messages, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.PromptTokenText, historySession) } func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, store ConfigReader) bool { @@ -361,7 +361,7 @@ func (h *Handler) handleClaudeStreamRealtime(w http.ResponseWriter, r *http.Requ }) } -func (h *Handler) handleClaudeStreamRealtimeWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, promptTokenText string, historySession *responsehistory.Session) { +func (h *Handler) handleClaudeStreamRealtimeWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow string, stdReq promptcompat.StandardRequest, model string, messages []any, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, promptTokenText string, historySession *responsehistory.Session) { if resp.StatusCode != http.StatusOK { defer func() { _ = resp.Body.Close() }() body, _ := io.ReadAll(resp.Body) @@ -399,11 +399,13 @@ func (h *Handler) handleClaudeStreamRealtimeWithRetry(w http.ResponseWriter, r * streamRuntime.sendMessageStart() completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{ - Surface: "claude.messages", - Stream: true, - RetryEnabled: true, - MaxAttempts: 3, - UsagePrompt: promptTokenText, + Surface: "claude.messages", + Stream: true, + RetryEnabled: true, + MaxAttempts: 3, + UsagePrompt: promptTokenText, + Request: stdReq, + CurrentInputFile: h.Store, }, completionruntime.StreamRetryHooks{ ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { return h.consumeClaudeStreamAttempt(r, currentResp, streamRuntime, thinkingEnabled, allowDeferEmpty) diff --git a/internal/httpapi/claude/handler_util_test.go b/internal/httpapi/claude/handler_util_test.go index a624b01f..7d229fbc 100644 --- a/internal/httpapi/claude/handler_util_test.go +++ b/internal/httpapi/claude/handler_util_test.go @@ -93,10 +93,10 @@ func TestNormalizeClaudeMessagesToolUseToAssistantToolCalls(t *testing.T) { t.Fatalf("expected call id preserved, got %#v", call) } content, _ := m["content"].(string) - if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) { + if !containsStr(content, "<|DSML|tool_calls>") || !containsStr(content, `<|DSML|invoke name="search_web">`) { t.Fatalf("expected assistant content to include DSML tool call history, got %q", content) } - if !containsStr(content, `<|DSML|parameter name="query">`) { + if !containsStr(content, `<|DSML|parameter name="query">`) { t.Fatalf("expected assistant content to include serialized parameters, got %q", content) } } @@ -133,7 +133,7 @@ func TestNormalizeClaudeMessagesPreservesThinkingOnToolUseHistory(t *testing.T) if !containsStr(prompt, "[reasoning_content]\nneed live search before answering\n[/reasoning_content]") { t.Fatalf("expected thinking in prompt history, got %q", prompt) } - if !containsStr(prompt, `<|DSML|invoke name="search_web">`) { + if !containsStr(prompt, `<|DSML|invoke name="search_web">`) { t.Fatalf("expected tool call in prompt history, got %q", prompt) } } @@ -329,7 +329,7 @@ func TestBuildClaudeToolPromptSingleTool(t *testing.T) { if !containsStr(prompt, "Search the web") { t.Fatalf("expected description in prompt") } - if !containsStr(prompt, "<|DSML|tool_calls>") { + if !containsStr(prompt, "<|DSML|tool_calls>") { t.Fatalf("expected DSML tool_calls format in prompt") } if !containsStr(prompt, "TOOL CALL FORMAT") { diff --git a/internal/httpapi/claude/standard_request.go b/internal/httpapi/claude/standard_request.go index 49d9bffc..4998eb94 100644 --- a/internal/httpapi/claude/standard_request.go +++ b/internal/httpapi/claude/standard_request.go @@ -52,7 +52,7 @@ func normalizeClaudeRequest(store ConfigReader, req map[string]any) (claudeNorma RequestedModel: strings.TrimSpace(model), ResolvedModel: dsModel, ResponseModel: strings.TrimSpace(model), - Messages: payload["messages"].([]any), + Messages: normalizedMessages, PromptTokenText: finalPrompt, ToolsRaw: toolsRequested, FinalPrompt: finalPrompt, diff --git a/internal/httpapi/gemini/convert_messages_test.go b/internal/httpapi/gemini/convert_messages_test.go index a4293254..6f0890f3 100644 --- a/internal/httpapi/gemini/convert_messages_test.go +++ b/internal/httpapi/gemini/convert_messages_test.go @@ -89,7 +89,7 @@ func TestGeminiMessagesFromRequestPreservesThoughtOnFunctionCallHistory(t *testi if !strings.Contains(prompt, "[reasoning_content]\nneed current state before answering\n[/reasoning_content]") { t.Fatalf("expected thought in prompt history, got %q", prompt) } - if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) { + if !strings.Contains(prompt, `<|DSML|invoke name="search_web">`) { t.Fatalf("expected tool call in prompt history, got %q", prompt) } } diff --git a/internal/httpapi/gemini/handler_generate.go b/internal/httpapi/gemini/handler_generate.go index 784ff757..b9a648d4 100644 --- a/internal/httpapi/gemini/handler_generate.go +++ b/internal/httpapi/gemini/handler_generate.go @@ -137,7 +137,7 @@ func (h *Handler) handleGeminiDirectStream(w http.ResponseWriter, r *http.Reques return } streamReq := start.Request - h.handleStreamGenerateContentWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession) + h.handleStreamGenerateContentWithRetry(w, r, a, start.Response, start.Payload, start.Pow, streamReq, streamReq.ResponseModel, streamReq.PromptTokenText, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, historySession) } func (h *Handler) proxyViaOpenAI(w http.ResponseWriter, r *http.Request, stream bool) bool { diff --git a/internal/httpapi/gemini/handler_stream_runtime.go b/internal/httpapi/gemini/handler_stream_runtime.go index a1244ad6..6a98a4e6 100644 --- a/internal/httpapi/gemini/handler_stream_runtime.go +++ b/internal/httpapi/gemini/handler_stream_runtime.go @@ -12,6 +12,7 @@ import ( "ds2api/internal/auth" "ds2api/internal/completionruntime" dsprotocol "ds2api/internal/deepseek/protocol" + "ds2api/internal/promptcompat" "ds2api/internal/responsehistory" "ds2api/internal/sse" streamengine "ds2api/internal/stream" @@ -87,7 +88,7 @@ type geminiStreamRuntime struct { history *responsehistory.Session } -func (h *Handler) handleStreamGenerateContentWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *responsehistory.Session) { +func (h *Handler) handleStreamGenerateContentWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow string, stdReq promptcompat.StandardRequest, model, finalPrompt string, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, historySession *responsehistory.Session) { if resp.StatusCode != http.StatusOK { defer func() { _ = resp.Body.Close() }() body, _ := io.ReadAll(resp.Body) @@ -108,11 +109,13 @@ func (h *Handler) handleStreamGenerateContentWithRetry(w http.ResponseWriter, r runtime := newGeminiStreamRuntime(w, rc, canFlush, model, finalPrompt, thinkingEnabled, searchEnabled, stripReferenceMarkersEnabled(), toolNames, toolsRaw, historySession) completionruntime.ExecuteStreamWithRetry(r.Context(), h.DS, a, resp, payload, pow, completionruntime.StreamRetryOptions{ - Surface: "gemini.generate_content", - Stream: true, - RetryEnabled: true, - MaxAttempts: 3, - UsagePrompt: finalPrompt, + Surface: "gemini.generate_content", + Stream: true, + RetryEnabled: true, + MaxAttempts: 3, + UsagePrompt: finalPrompt, + Request: stdReq, + CurrentInputFile: h.Store, }, completionruntime.StreamRetryHooks{ ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { return h.consumeGeminiStreamAttempt(r.Context(), currentResp, runtime, thinkingEnabled, allowDeferEmpty) diff --git a/internal/httpapi/gemini/handler_test.go b/internal/httpapi/gemini/handler_test.go index 90a1fe9a..9409b722 100644 --- a/internal/httpapi/gemini/handler_test.go +++ b/internal/httpapi/gemini/handler_test.go @@ -67,7 +67,11 @@ func (m *testGeminiDS) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (st //nolint:unused // reserved test double for native Gemini DS-call path coverage. func (m *testGeminiDS) UploadFile(_ context.Context, _ *auth.RequestAuth, req dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) { m.uploadCalls = append(m.uploadCalls, req) - return &dsclient.UploadFileResult{ID: "file-gemini-history"}, nil + id := "file-gemini-history" + if len(m.uploadCalls) > 1 { + id = "file-gemini-tools" + } + return &dsclient.UploadFileResult{ID: id}, nil } //nolint:unused // reserved test double for native Gemini DS-call path coverage. @@ -201,6 +205,57 @@ func TestGeminiDirectAppliesCurrentInputFile(t *testing.T) { } } +func TestGeminiCurrentInputFileUploadsToolsSeparately(t *testing.T) { + ds := &testGeminiDS{ + resp: makeGeminiUpstreamResponse(`data: {"p":"response/content","v":"ok"}`), + } + h := &Handler{ + Store: testGeminiConfig{}, + Auth: testGeminiAuth{}, + DS: ds, + } + reqBody := `{ + "contents":[{"role":"user","parts":[{"text":"run code"}]}], + "tools":[{"functionDeclarations":[{"name":"eval_javascript","description":"eval","parameters":{"type":"object","properties":{"code":{"type":"string"}}}}]}] + }` + req := httptest.NewRequest(http.MethodPost, "/v1beta/models/gemini-2.5-pro:generateContent", strings.NewReader(reqBody)) + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + r := chi.NewRouter() + RegisterRoutes(r, h) + + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(ds.uploadCalls) != 2 { + t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls)) + } + if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" || ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" { + t.Fatalf("unexpected upload filenames: %#v", ds.uploadCalls) + } + historyText := string(ds.uploadCalls[0].Data) + if strings.Contains(historyText, "Description: eval") { + t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText) + } + toolsText := string(ds.uploadCalls[1].Data) + if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: eval_javascript") || !strings.Contains(toolsText, "Description: eval") { + t.Fatalf("expected tools transcript to include Gemini tool schema, got %q", toolsText) + } + refIDs, _ := ds.payloads[0]["ref_file_ids"].([]any) + if len(refIDs) < 2 || refIDs[0] != "file-gemini-history" || refIDs[1] != "file-gemini-tools" { + t.Fatalf("expected history and tools ref ids first, got %#v", ds.payloads[0]["ref_file_ids"]) + } + prompt, _ := ds.payloads[0]["prompt"].(string) + if !strings.Contains(prompt, "DS2API_TOOLS.txt") || !strings.Contains(prompt, "TOOL CALL FORMAT") { + t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", prompt) + } + if strings.Contains(prompt, "Description: eval") { + t.Fatalf("live prompt should not inline tool descriptions, got %q", prompt) + } +} + func TestGeminiRoutesRegistered(t *testing.T) { h := &Handler{ Store: testGeminiConfig{}, diff --git a/internal/httpapi/openai/chat/empty_retry_runtime.go b/internal/httpapi/openai/chat/empty_retry_runtime.go index 1dc8ca94..3494b6de 100644 --- a/internal/httpapi/openai/chat/empty_retry_runtime.go +++ b/internal/httpapi/openai/chat/empty_retry_runtime.go @@ -66,7 +66,7 @@ func (h *Handler) handleNonStreamWithRetry(w http.ResponseWriter, ctx context.Co writeJSON(w, http.StatusOK, respBody) } -func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID string, sessionIDRef *string, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) { +func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, completionID string, sessionIDRef *string, stdReq promptcompat.StandardRequest, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, historySession *chatHistorySession) { streamRuntime, initialType, ok := h.prepareChatStreamRuntime(w, resp, completionID, model, finalPrompt, refFileTokens, thinkingEnabled, searchEnabled, toolNames, toolsRaw, toolChoice, historySession) if !ok { return @@ -78,6 +78,8 @@ func (h *Handler) handleStreamWithRetry(w http.ResponseWriter, r *http.Request, RetryMaxAttempts: emptyOutputRetryMaxAttempts(), MaxAttempts: 3, UsagePrompt: finalPrompt, + Request: stdReq, + CurrentInputFile: h.Store, }, completionruntime.StreamRetryHooks{ ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { return h.consumeChatStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, historySession, allowDeferEmpty) diff --git a/internal/httpapi/openai/chat/handler.go b/internal/httpapi/openai/chat/handler.go index da0ad4a2..d91091d2 100644 --- a/internal/httpapi/openai/chat/handler.go +++ b/internal/httpapi/openai/chat/handler.go @@ -33,6 +33,8 @@ type Handler struct { type streamLease struct { Auth *auth.RequestAuth + Standard promptcompat.StandardRequest + SessionID string ExpiresAt time.Time } diff --git a/internal/httpapi/openai/chat/handler_chat.go b/internal/httpapi/openai/chat/handler_chat.go index 9d86cf74..c46278bd 100644 --- a/internal/httpapi/openai/chat/handler_chat.go +++ b/internal/httpapi/openai/chat/handler_chat.go @@ -28,6 +28,10 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { h.handleVercelStreamPow(w, r) return } + if isVercelStreamSwitchRequest(r) { + h.handleVercelStreamSwitch(w, r) + return + } if isVercelStreamPrepareRequest(r) { h.handleVercelStreamPrepare(w, r) return @@ -114,7 +118,7 @@ func (h *Handler) ChatCompletions(w http.ResponseWriter, r *http.Request) { } streamReq := start.Request refFileTokens := streamReq.RefFileTokens - h.handleStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, sessionID, &sessionID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, historySession) + h.handleStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, sessionID, &sessionID, streamReq, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, historySession) } func (h *Handler) autoDeleteRemoteSession(ctx context.Context, a *auth.RequestAuth, sessionID string) { diff --git a/internal/httpapi/openai/chat/test_helpers_test.go b/internal/httpapi/openai/chat/test_helpers_test.go index d8284cd7..8a8baa9f 100644 --- a/internal/httpapi/openai/chat/test_helpers_test.go +++ b/internal/httpapi/openai/chat/test_helpers_test.go @@ -2,6 +2,7 @@ package chat import ( "context" + "fmt" "io" "net/http" "strings" @@ -148,8 +149,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth if m.uploadErr != nil { return nil, m.uploadErr } + id := "file-inline-1" + if len(m.uploadCalls) > 1 { + id = "file-inline-" + fmt.Sprint(len(m.uploadCalls)) + } return &dsclient.UploadFileResult{ - ID: "file-inline-1", + ID: id, Filename: req.Filename, Bytes: int64(len(req.Data)), Status: "uploaded", diff --git a/internal/httpapi/openai/chat/vercel_prepare_test.go b/internal/httpapi/openai/chat/vercel_prepare_test.go index 38fccc2f..b8811807 100644 --- a/internal/httpapi/openai/chat/vercel_prepare_test.go +++ b/internal/httpapi/openai/chat/vercel_prepare_test.go @@ -1,6 +1,7 @@ package chat import ( + "context" "encoding/json" "net/http" "net/http/httptest" @@ -8,8 +9,11 @@ import ( "testing" "time" + "ds2api/internal/account" "ds2api/internal/auth" + "ds2api/internal/config" dsclient "ds2api/internal/deepseek/client" + "ds2api/internal/promptcompat" ) func TestIsVercelStreamPrepareRequest(t *testing.T) { @@ -64,14 +68,16 @@ func TestVercelInternalSecret(t *testing.T) { func TestStreamLeaseLifecycle(t *testing.T) { h := &Handler{} - leaseID := h.holdStreamLease(&auth.RequestAuth{UseConfigToken: false}) + leaseID := h.holdStreamLease(&auth.RequestAuth{UseConfigToken: false}, promptcompat.StandardRequest{}, "test-session-id") if leaseID == "" { t.Fatalf("expected non-empty lease id") } - if ok := h.releaseStreamLease(leaseID); !ok { + if lease, ok := h.releaseStreamLease(leaseID); !ok { t.Fatalf("expected lease release success") + } else if lease.SessionID != "test-session-id" { + t.Fatalf("expected released session id, got %q", lease.SessionID) } - if ok := h.releaseStreamLease(leaseID); ok { + if _, ok := h.releaseStreamLease(leaseID); ok { t.Fatalf("expected duplicate release to fail") } } @@ -141,6 +147,243 @@ func TestHandleVercelStreamPrepareAppliesCurrentInputFile(t *testing.T) { } } +func TestHandleVercelStreamPrepareUsesHalfwidthDSMLToolPrompt(t *testing.T) { + t.Setenv("VERCEL", "1") + t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret") + + h := &Handler{ + Store: mockOpenAIConfig{}, + Auth: streamStatusAuthStub{}, + DS: &inlineUploadDSStub{}, + } + + reqBody, _ := json.Marshal(map[string]any{ + "model": "deepseek-v4-flash", + "messages": []any{ + map[string]any{"role": "user", "content": "search docs"}, + }, + "tools": []any{ + map[string]any{ + "type": "function", + "function": map[string]any{ + "name": "search", + "description": "search docs", + "parameters": map[string]any{ + "type": "object", + "properties": map[string]any{ + "query": map[string]any{"type": "string"}, + }, + "required": []any{"query"}, + }, + }, + }, + }, + "stream": true, + }) + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody))) + req.Header.Set("Authorization", "Bearer direct-token") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Ds2-Internal-Token", "stream-secret") + rec := httptest.NewRecorder() + + h.handleVercelStreamPrepare(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + var body map[string]any + if err := json.NewDecoder(rec.Body).Decode(&body); err != nil { + t.Fatalf("decode failed: %v", err) + } + finalPrompt, _ := body["final_prompt"].(string) + payload, _ := body["payload"].(map[string]any) + payloadPrompt, _ := payload["prompt"].(string) + for label, promptText := range map[string]string{"final_prompt": finalPrompt, "payload.prompt": payloadPrompt} { + if !strings.Contains(promptText, "<|DSML|tool_calls>") || !strings.Contains(promptText, "Tag punctuation alphabet: ASCII < > / = \" plus the halfwidth pipe |.") { + t.Fatalf("expected %s to contain halfwidth DSML tool instructions, got %q", label, promptText) + } + if strings.Contains(promptText, "\uff5c") || strings.Contains(promptText, "full"+"width vertical bar") { + t.Fatalf("expected %s not to contain legacy pipe guidance, got %q", label, promptText) + } + } + toolNames, _ := body["tool_names"].([]any) + if len(toolNames) != 1 || toolNames[0] != "search" { + t.Fatalf("expected prepared tool names to align with request tools, got %#v", body["tool_names"]) + } +} + +type vercelReleaseAutoDeleteDSStub struct { + resp *http.Response + deleteCallCount int + deletedSessionID string + deletedToken string + deleteErr error + events *[]string +} + +func (m *vercelReleaseAutoDeleteDSStub) CreateSession(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) { + return "session-id", nil +} + +func (m *vercelReleaseAutoDeleteDSStub) GetPow(_ context.Context, _ *auth.RequestAuth, _ int) (string, error) { + return "pow", nil +} + +func (m *vercelReleaseAutoDeleteDSStub) UploadFile(_ context.Context, _ *auth.RequestAuth, _ dsclient.UploadFileRequest, _ int) (*dsclient.UploadFileResult, error) { + return &dsclient.UploadFileResult{ID: "file-id", Filename: "file.txt", Bytes: 1, Status: "uploaded"}, nil +} + +func (m *vercelReleaseAutoDeleteDSStub) CallCompletion(_ context.Context, _ *auth.RequestAuth, _ map[string]any, _ string, _ int) (*http.Response, error) { + return m.resp, nil +} + +func (m *vercelReleaseAutoDeleteDSStub) DeleteSessionForToken(_ context.Context, token string, sessionID string) (*dsclient.DeleteSessionResult, error) { + if m.events != nil { + *m.events = append(*m.events, "delete") + } + m.deleteCallCount++ + m.deletedSessionID = sessionID + m.deletedToken = token + if m.deleteErr != nil { + return nil, m.deleteErr + } + return &dsclient.DeleteSessionResult{SessionID: sessionID, Success: true}, nil +} + +func (m *vercelReleaseAutoDeleteDSStub) DeleteAllSessionsForToken(_ context.Context, _ string) error { + return nil +} + +type vercelReleaseAuthStub struct { + events *[]string +} + +func (a *vercelReleaseAuthStub) Determine(_ *http.Request) (*auth.RequestAuth, error) { + return &auth.RequestAuth{DeepSeekToken: "test-token", AccountID: "test-account"}, nil +} + +func (a *vercelReleaseAuthStub) DetermineCaller(_ *http.Request) (*auth.RequestAuth, error) { + return &auth.RequestAuth{DeepSeekToken: "test-token", AccountID: "test-account"}, nil +} + +func (a *vercelReleaseAuthStub) Release(_ *auth.RequestAuth) { + if a.events != nil { + *a.events = append(*a.events, "release") + } +} + +func TestHandleVercelStreamReleaseTriggersAutoDelete(t *testing.T) { + t.Setenv("VERCEL", "1") + t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret") + + events := []string{} + ds := &vercelReleaseAutoDeleteDSStub{events: &events} + h := &Handler{ + Store: mockOpenAIConfig{ + autoDeleteMode: "single", + }, + Auth: &vercelReleaseAuthStub{events: &events}, + DS: ds, + } + + leaseID := h.holdStreamLease(&auth.RequestAuth{DeepSeekToken: "test-token", AccountID: "test-account"}, promptcompat.StandardRequest{}, "session-to-delete") + if leaseID == "" { + t.Fatalf("expected non-empty lease id") + } + + reqBody := map[string]any{"lease_id": leaseID} + reqJSON, _ := json.Marshal(reqBody) + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_release=1", strings.NewReader(string(reqJSON))) + req.Header.Set("X-Ds2-Internal-Token", "stream-secret") + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + h.handleVercelStreamRelease(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if ds.deleteCallCount != 1 { + t.Fatalf("expected auto delete call count=1, got %d", ds.deleteCallCount) + } + if ds.deletedSessionID != "session-to-delete" { + t.Fatalf("expected deleted session id=session-to-delete, got %q", ds.deletedSessionID) + } + if got, want := strings.Join(events, ","), "delete,release"; got != want { + t.Fatalf("expected auto-delete before auth release, got %s", got) + } +} + +func TestHandleVercelStreamPrepareUploadsToolsSeparately(t *testing.T) { + t.Setenv("VERCEL", "1") + t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret") + + ds := &inlineUploadDSStub{} + h := &Handler{ + Store: mockOpenAIConfig{currentInputEnabled: true}, + Auth: streamStatusAuthStub{}, + DS: ds, + } + + reqBody, _ := json.Marshal(map[string]any{ + "model": "deepseek-v4-flash", + "messages": []any{ + map[string]any{"role": "user", "content": "search docs"}, + }, + "tools": []any{ + map[string]any{ + "type": "function", + "function": map[string]any{ + "name": "search", + "description": "search docs", + "parameters": map[string]any{"type": "object"}, + }, + }, + }, + "stream": true, + }) + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_prepare=1", strings.NewReader(string(reqBody))) + req.Header.Set("Authorization", "Bearer direct-token") + req.Header.Set("Content-Type", "application/json") + req.Header.Set("X-Ds2-Internal-Token", "stream-secret") + rec := httptest.NewRecorder() + + h.handleVercelStreamPrepare(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(ds.uploadCalls) != 2 { + t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls)) + } + if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" || ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" { + t.Fatalf("unexpected upload filenames: %#v", ds.uploadCalls) + } + if strings.Contains(string(ds.uploadCalls[0].Data), "Description: search docs") { + t.Fatalf("history transcript should not embed tool descriptions, got %q", string(ds.uploadCalls[0].Data)) + } + + var body map[string]any + if err := json.NewDecoder(rec.Body).Decode(&body); err != nil { + t.Fatalf("decode failed: %v", err) + } + finalPrompt, _ := body["final_prompt"].(string) + payload, _ := body["payload"].(map[string]any) + payloadPrompt, _ := payload["prompt"].(string) + for label, promptText := range map[string]string{"final_prompt": finalPrompt, "payload.prompt": payloadPrompt} { + if !strings.Contains(promptText, "DS2API_TOOLS.txt") || !strings.Contains(promptText, "TOOL CALL FORMAT") { + t.Fatalf("expected %s to reference tools file and retain tool instructions, got %q", label, promptText) + } + if strings.Contains(promptText, "Description: search docs") { + t.Fatalf("expected %s not to inline tool descriptions, got %q", label, promptText) + } + } + refIDs, _ := payload["ref_file_ids"].([]any) + if len(refIDs) < 2 || refIDs[0] != "file-inline-1" || refIDs[1] != "file-inline-2" { + t.Fatalf("expected history and tools ref ids first, got %#v", payload["ref_file_ids"]) + } +} + func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t *testing.T) { t.Setenv("VERCEL", "1") t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret") @@ -176,3 +419,88 @@ func TestHandleVercelStreamPrepareMapsCurrentInputFileManagedAuthFailureTo401(t t.Fatalf("expected managed auth error message, got %s", rec.Body.String()) } } + +func TestHandleVercelStreamSwitchReuploadsCurrentInputFile(t *testing.T) { + t.Setenv("VERCEL", "1") + t.Setenv("DS2API_VERCEL_INTERNAL_SECRET", "stream-secret") + t.Setenv("DS2API_CONFIG_JSON", `{ + "keys":["managed-key"], + "accounts":[ + {"email":"acc1@test.com","password":"pwd"}, + {"email":"acc2@test.com","password":"pwd"} + ] + }`) + store := config.LoadStore() + resolver := auth.NewResolver(store, account.NewPool(store), func(_ context.Context, acc config.Account) (string, error) { + return "token-" + acc.Identifier(), nil + }) + authReq := httptest.NewRequest(http.MethodPost, "/", nil) + authReq.Header.Set("Authorization", "Bearer managed-key") + a, err := resolver.Determine(authReq) + if err != nil { + t.Fatalf("determine failed: %v", err) + } + defer resolver.Release(a) + + ds := &inlineUploadDSStub{} + h := &Handler{ + Store: mockOpenAIConfig{currentInputEnabled: true}, + Auth: resolver, + DS: ds, + } + stdReq := promptcompat.StandardRequest{ + RequestedModel: "deepseek-v4-flash", + ResolvedModel: "deepseek-v4-flash", + ResponseModel: "deepseek-v4-flash", + FinalPrompt: "Continue from the latest state in the attached DS2API_HISTORY.txt context. Available tool descriptions and parameter schemas are attached in DS2API_TOOLS.txt; use only those tools and follow the tool-call format rules in this prompt.", + PromptTokenText: "# DS2API_HISTORY.txt\n\n=== 1. USER ===\nhello\n\n# DS2API_TOOLS.txt\nAvailable tool descriptions and parameter schemas for this request.\n\nYou have access to these tools:\n\nTool: search\nDescription: search docs\nParameters: {\"type\":\"object\"}\n", + HistoryText: "# DS2API_HISTORY.txt\n\n=== 1. USER ===\nhello\n", + CurrentInputFileApplied: true, + CurrentInputFileID: "file-old", + CurrentToolsFileID: "file-old-tools", + ToolsRaw: []any{ + map[string]any{ + "type": "function", + "function": map[string]any{ + "name": "search", + "description": "search docs", + "parameters": map[string]any{"type": "object"}, + }, + }, + }, + RefFileIDs: []string{"file-old", "file-old-tools", "client-file"}, + Thinking: true, + } + leaseID := h.holdStreamLease(a, stdReq, "") + req := httptest.NewRequest(http.MethodPost, "/v1/chat/completions?__stream_switch=1", strings.NewReader(`{"lease_id":"`+leaseID+`"}`)) + req.Header.Set("X-Ds2-Internal-Token", "stream-secret") + rec := httptest.NewRecorder() + + h.handleVercelStreamSwitch(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(ds.uploadCalls) != 2 { + t.Fatalf("expected current input and tools reupload on switched account, got %d", len(ds.uploadCalls)) + } + if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" || ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" { + t.Fatalf("unexpected reupload filenames: %#v", ds.uploadCalls) + } + var body map[string]any + if err := json.NewDecoder(rec.Body).Decode(&body); err != nil { + t.Fatalf("decode failed: %v", err) + } + if body["deepseek_token"] != "token-acc2@test.com" { + t.Fatalf("expected switched account token, got %#v", body["deepseek_token"]) + } + payload, _ := body["payload"].(map[string]any) + refIDs, _ := payload["ref_file_ids"].([]any) + if len(refIDs) != 3 || refIDs[0] != "file-inline-1" || refIDs[1] != "file-inline-2" || refIDs[2] != "client-file" { + t.Fatalf("expected reuploaded current input ref plus client ref, got %#v", payload["ref_file_ids"]) + } + promptText, _ := payload["prompt"].(string) + if !strings.Contains(promptText, "DS2API_TOOLS.txt") { + t.Fatalf("expected switched payload prompt to retain tools file reference, got %q", promptText) + } +} diff --git a/internal/httpapi/openai/chat/vercel_stream.go b/internal/httpapi/openai/chat/vercel_stream.go index b52cd9c6..77b216a3 100644 --- a/internal/httpapi/openai/chat/vercel_stream.go +++ b/internal/httpapi/openai/chat/vercel_stream.go @@ -11,6 +11,7 @@ import ( "ds2api/internal/auth" "ds2api/internal/config" + "ds2api/internal/httpapi/openai/history" "ds2api/internal/promptcompat" "ds2api/internal/util" @@ -96,7 +97,7 @@ func (h *Handler) handleVercelStreamPrepare(w http.ResponseWriter, r *http.Reque } payload := stdReq.CompletionPayload(sessionID) - leaseID := h.holdStreamLease(a) + leaseID := h.holdStreamLease(a, stdReq, sessionID) if leaseID == "" { writeOpenAIError(w, http.StatusInternalServerError, "failed to create stream lease") return @@ -140,10 +141,17 @@ func (h *Handler) handleVercelStreamRelease(w http.ResponseWriter, r *http.Reque writeOpenAIError(w, http.StatusBadRequest, "lease_id is required") return } - if !h.releaseStreamLease(leaseID) { + lease, ok := h.releaseStreamLease(leaseID) + if !ok { writeOpenAIError(w, http.StatusNotFound, "stream lease not found") return } + if h.Auth != nil && lease.Auth != nil { + defer h.Auth.Release(lease.Auth) + } + if lease.Auth != nil { + h.autoDeleteRemoteSession(r.Context(), lease.Auth, lease.SessionID) + } writeJSON(w, http.StatusOK, map[string]any{"success": true}) } @@ -185,6 +193,80 @@ func (h *Handler) handleVercelStreamPow(w http.ResponseWriter, r *http.Request) }) } +func (h *Handler) handleVercelStreamSwitch(w http.ResponseWriter, r *http.Request) { + if !config.IsVercel() { + http.NotFound(w, r) + return + } + h.sweepExpiredStreamLeases() + internalSecret := vercelInternalSecret() + internalToken := strings.TrimSpace(r.Header.Get("X-Ds2-Internal-Token")) + if internalSecret == "" || subtle.ConstantTimeCompare([]byte(internalToken), []byte(internalSecret)) != 1 { + writeOpenAIError(w, http.StatusUnauthorized, "unauthorized internal request") + return + } + + var req map[string]any + if err := json.NewDecoder(r.Body).Decode(&req); err != nil { + writeOpenAIError(w, http.StatusBadRequest, "invalid json") + return + } + leaseID, _ := req["lease_id"].(string) + leaseID = strings.TrimSpace(leaseID) + if leaseID == "" { + writeOpenAIError(w, http.StatusBadRequest, "lease_id is required") + return + } + lease, ok := h.lookupStreamLease(leaseID) + if !ok || lease.Auth == nil { + writeOpenAIError(w, http.StatusNotFound, "stream lease not found or expired") + return + } + a := lease.Auth + if !a.UseConfigToken || !a.SwitchAccount(r.Context()) { + writeOpenAIErrorWithCode(w, http.StatusTooManyRequests, "Upstream account hit a rate limit and returned reasoning without visible output.", "upstream_empty_output") + return + } + + stdReq := lease.Standard + var err error + if stdReq.CurrentInputFileApplied { + stdReq, err = (history.Service{Store: h.Store, DS: h.DS}).ReuploadAppliedCurrentInputFile(r.Context(), a, stdReq) + if err != nil { + status, message := mapCurrentInputFileError(err) + writeOpenAIError(w, status, message) + return + } + } + sessionID, err := h.DS.CreateSession(r.Context(), a, 3) + if err != nil { + writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.") + return + } + powHeader, err := h.DS.GetPow(r.Context(), a, 3) + if err != nil { + writeOpenAIError(w, http.StatusUnauthorized, "Failed to get PoW (invalid token or unknown error).") + return + } + if strings.TrimSpace(a.DeepSeekToken) == "" { + writeOpenAIError(w, http.StatusUnauthorized, "Account token is invalid. Please re-login the account in admin.") + return + } + h.updateStreamLeaseState(leaseID, stdReq, sessionID) + writeJSON(w, http.StatusOK, map[string]any{ + "session_id": sessionID, + "lease_id": leaseID, + "model": stdReq.ResponseModel, + "final_prompt": stdReq.FinalPrompt, + "thinking_enabled": stdReq.Thinking, + "search_enabled": stdReq.Search, + "tool_names": stdReq.ToolNames, + "deepseek_token": a.DeepSeekToken, + "pow_header": powHeader, + "payload": stdReq.CompletionPayload(sessionID), + }) +} + func isVercelStreamPrepareRequest(r *http.Request) bool { if r == nil { return false @@ -206,6 +288,13 @@ func isVercelStreamPowRequest(r *http.Request) bool { return strings.TrimSpace(r.URL.Query().Get("__stream_pow")) == "1" } +func isVercelStreamSwitchRequest(r *http.Request) bool { + if r == nil { + return false + } + return strings.TrimSpace(r.URL.Query().Get("__stream_switch")) == "1" +} + func vercelInternalSecret() string { if v := strings.TrimSpace(os.Getenv("DS2API_VERCEL_INTERNAL_SECRET")); v != "" { return v @@ -216,7 +305,7 @@ func vercelInternalSecret() string { return "admin" } -func (h *Handler) holdStreamLease(a *auth.RequestAuth) string { +func (h *Handler) holdStreamLease(a *auth.RequestAuth, stdReq promptcompat.StandardRequest, sessionID string) string { if a == nil { return "" } @@ -234,6 +323,8 @@ func (h *Handler) holdStreamLease(a *auth.RequestAuth) string { leaseID := newLeaseID() h.streamLeases[leaseID] = streamLease{ Auth: a, + Standard: stdReq, + SessionID: sessionID, ExpiresAt: now.Add(ttl), } h.leaseMu.Unlock() @@ -241,24 +332,48 @@ func (h *Handler) holdStreamLease(a *auth.RequestAuth) string { return leaseID } -func (h *Handler) lookupStreamLeaseAuth(leaseID string) *auth.RequestAuth { +func (h *Handler) lookupStreamLease(leaseID string) (streamLease, bool) { leaseID = strings.TrimSpace(leaseID) if leaseID == "" { - return nil + return streamLease{}, false } h.leaseMu.Lock() lease, ok := h.streamLeases[leaseID] h.leaseMu.Unlock() if !ok || time.Now().After(lease.ExpiresAt) { + return streamLease{}, false + } + return lease, true +} + +func (h *Handler) lookupStreamLeaseAuth(leaseID string) *auth.RequestAuth { + lease, ok := h.lookupStreamLease(leaseID) + if !ok { return nil } return lease.Auth } -func (h *Handler) releaseStreamLease(leaseID string) bool { +func (h *Handler) updateStreamLeaseState(leaseID string, stdReq promptcompat.StandardRequest, sessionID string) { leaseID = strings.TrimSpace(leaseID) if leaseID == "" { - return false + return + } + h.leaseMu.Lock() + defer h.leaseMu.Unlock() + lease, ok := h.streamLeases[leaseID] + if !ok { + return + } + lease.Standard = stdReq + lease.SessionID = sessionID + h.streamLeases[leaseID] = lease +} + +func (h *Handler) releaseStreamLease(leaseID string) (streamLease, bool) { + leaseID = strings.TrimSpace(leaseID) + if leaseID == "" { + return streamLease{}, false } h.leaseMu.Lock() @@ -271,12 +386,9 @@ func (h *Handler) releaseStreamLease(leaseID string) bool { h.releaseExpiredAuths(expired) if !ok { - return false - } - if h.Auth != nil { - h.Auth.Release(lease.Auth) + return streamLease{}, false } - return true + return lease, true } func (h *Handler) popExpiredLeasesLocked(now time.Time) []*auth.RequestAuth { diff --git a/internal/httpapi/openai/deps_injection_test.go b/internal/httpapi/openai/deps_injection_test.go index 3082dab1..b3bdc1da 100644 --- a/internal/httpapi/openai/deps_injection_test.go +++ b/internal/httpapi/openai/deps_injection_test.go @@ -103,7 +103,7 @@ func TestNormalizeOpenAIResponsesRequestAlwaysAcceptsWideInput(t *testing.T) { if out.Surface != "openai_responses" { t.Fatalf("unexpected surface: %q", out.Surface) } - if !strings.Contains(out.FinalPrompt, "<|User|>hi") { + if !strings.Contains(out.FinalPrompt, "<|User|>hi") { t.Fatalf("unexpected final prompt: %q", out.FinalPrompt) } } diff --git a/internal/httpapi/openai/file_inline_upload_test.go b/internal/httpapi/openai/file_inline_upload_test.go index abaf704c..88978e28 100644 --- a/internal/httpapi/openai/file_inline_upload_test.go +++ b/internal/httpapi/openai/file_inline_upload_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "net/http" "net/http/httptest" "strings" @@ -41,8 +42,12 @@ func (m *inlineUploadDSStub) UploadFile(ctx context.Context, _ *auth.RequestAuth if m.uploadErr != nil { return nil, m.uploadErr } + id := "file-inline-1" + if len(m.uploadCalls) > 1 { + id = "file-inline-" + fmt.Sprint(len(m.uploadCalls)) + } return &dsclient.UploadFileResult{ - ID: "file-inline-1", + ID: id, Filename: req.Filename, Bytes: int64(len(req.Data)), Status: "uploaded", diff --git a/internal/httpapi/openai/history/current_input_file.go b/internal/httpapi/openai/history/current_input_file.go index 9f5f8eeb..032927d1 100644 --- a/internal/httpapi/openai/history/current_input_file.go +++ b/internal/httpapi/openai/history/current_input_file.go @@ -15,6 +15,7 @@ import ( const ( currentInputFilename = promptcompat.CurrentInputContextFilename + currentToolsFilename = promptcompat.CurrentToolsContextFilename currentInputContentType = "text/plain; charset=utf-8" currentInputPurpose = "assistants" ) @@ -50,6 +51,7 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, if strings.TrimSpace(fileText) == "" { return stdReq, errors.New("current user input file produced empty transcript") } + toolsText, _ := promptcompat.BuildOpenAIToolsContextTranscript(stdReq.ToolsRaw, stdReq.ToolChoice) modelType := "default" if resolvedType, ok := config.GetModelType(stdReq.ResolvedModel); ok { modelType = resolvedType @@ -69,21 +71,98 @@ func (s Service) ApplyCurrentInputFile(ctx context.Context, a *auth.RequestAuth, return stdReq, errors.New("upload current user input file returned empty file id") } + toolFileID := "" + if strings.TrimSpace(toolsText) != "" { + result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{ + Filename: currentToolsFilename, + ContentType: currentInputContentType, + Purpose: currentInputPurpose, + ModelType: modelType, + Data: []byte(toolsText), + }, 3) + if err != nil { + return stdReq, fmt.Errorf("upload current tools file: %w", err) + } + toolFileID = strings.TrimSpace(result.ID) + if toolFileID == "" { + return stdReq, errors.New("upload current tools file returned empty file id") + } + } + messages := []any{ map[string]any{ "role": "user", - "content": currentInputFilePrompt(), + "content": currentInputFilePrompt(toolFileID != ""), }, } stdReq.Messages = messages stdReq.HistoryText = fileText stdReq.CurrentInputFileApplied = true - stdReq.RefFileIDs = prependUniqueRefFileID(stdReq.RefFileIDs, fileID) - stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPrompt(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking) + stdReq.CurrentInputFileID = fileID + stdReq.CurrentToolsFileID = toolFileID + stdReq.RefFileIDs = prependUniqueRefFileIDs(stdReq.RefFileIDs, fileID, toolFileID) + stdReq.FinalPrompt, stdReq.ToolNames = promptcompat.BuildOpenAIPromptWithToolInstructionsOnly(messages, stdReq.ToolsRaw, "", stdReq.ToolChoice, stdReq.Thinking) // Token accounting must reflect the actual downstream context: - // the uploaded DS2API_HISTORY.txt file content + the continuation live prompt. - stdReq.PromptTokenText = fileText + "\n" + stdReq.FinalPrompt + // uploaded context files + the continuation live prompt. + tokenParts := []string{fileText} + if strings.TrimSpace(toolsText) != "" { + tokenParts = append(tokenParts, toolsText) + } + tokenParts = append(tokenParts, stdReq.FinalPrompt) + stdReq.PromptTokenText = strings.Join(tokenParts, "\n") + return stdReq, nil +} + +func (s Service) ReuploadAppliedCurrentInputFile(ctx context.Context, a *auth.RequestAuth, stdReq promptcompat.StandardRequest) (promptcompat.StandardRequest, error) { + if !stdReq.CurrentInputFileApplied || s.DS == nil || a == nil { + return stdReq, nil + } + fileText := strings.TrimSpace(stdReq.HistoryText) + if fileText == "" { + return stdReq, nil + } + modelType := "default" + if resolvedType, ok := config.GetModelType(stdReq.ResolvedModel); ok { + modelType = resolvedType + } + result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{ + Filename: currentInputFilename, + ContentType: currentInputContentType, + Purpose: currentInputPurpose, + ModelType: modelType, + Data: []byte(stdReq.HistoryText), + }, 3) + if err != nil { + return stdReq, fmt.Errorf("upload current user input file: %w", err) + } + fileID := strings.TrimSpace(result.ID) + if fileID == "" { + return stdReq, errors.New("upload current user input file returned empty file id") + } + + toolsText, _ := promptcompat.BuildOpenAIToolsContextTranscript(stdReq.ToolsRaw, stdReq.ToolChoice) + toolFileID := "" + if strings.TrimSpace(toolsText) != "" { + result, err := s.DS.UploadFile(ctx, a, dsclient.UploadFileRequest{ + Filename: currentToolsFilename, + ContentType: currentInputContentType, + Purpose: currentInputPurpose, + ModelType: modelType, + Data: []byte(toolsText), + }, 3) + if err != nil { + return stdReq, fmt.Errorf("upload current tools file: %w", err) + } + toolFileID = strings.TrimSpace(result.ID) + if toolFileID == "" { + return stdReq, errors.New("upload current tools file returned empty file id") + } + } + + stdReq.RefFileIDs = replaceGeneratedCurrentInputRefs(stdReq.RefFileIDs, stdReq.CurrentInputFileID, stdReq.CurrentToolsFileID, fileID, toolFileID) + stdReq.CurrentInputFileID = fileID + stdReq.CurrentToolsFileID = toolFileID return stdReq, nil } @@ -106,23 +185,62 @@ func latestUserInputForFile(messages []any) (int, string) { return -1, "" } -func currentInputFilePrompt() string { - return "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly." +func currentInputFilePrompt(hasToolsFile bool) string { + prompt := "Continue from the latest state in the attached DS2API_HISTORY.txt context. Treat it as the current working state and answer the latest user request directly." + if hasToolsFile { + prompt += " Available tool descriptions and parameter schemas are attached in DS2API_TOOLS.txt; use only those tools and follow the tool-call format rules in this prompt." + } + return prompt } -func prependUniqueRefFileID(existing []string, fileID string) []string { - fileID = strings.TrimSpace(fileID) - if fileID == "" { - return existing +func prependUniqueRefFileIDs(existing []string, fileIDs ...string) []string { + out := make([]string, 0, len(existing)+len(fileIDs)) + seen := map[string]struct{}{} + for _, fileID := range fileIDs { + trimmed := strings.TrimSpace(fileID) + if trimmed == "" { + continue + } + key := strings.ToLower(trimmed) + if _, ok := seen[key]; ok { + continue + } + out = append(out, trimmed) + seen[key] = struct{}{} } - out := make([]string, 0, len(existing)+1) - out = append(out, fileID) for _, id := range existing { trimmed := strings.TrimSpace(id) - if trimmed == "" || strings.EqualFold(trimmed, fileID) { + if trimmed == "" { + continue + } + key := strings.ToLower(trimmed) + if _, ok := seen[key]; ok { continue } out = append(out, trimmed) + seen[key] = struct{}{} } return out } + +func replaceGeneratedCurrentInputRefs(existing []string, oldHistoryID, oldToolsID, newHistoryID, newToolsID string) []string { + filtered := make([]string, 0, len(existing)) + old := map[string]struct{}{} + for _, id := range []string{oldHistoryID, oldToolsID} { + trimmed := strings.ToLower(strings.TrimSpace(id)) + if trimmed != "" { + old[trimmed] = struct{}{} + } + } + for _, id := range existing { + trimmed := strings.TrimSpace(id) + if trimmed == "" { + continue + } + if _, ok := old[strings.ToLower(trimmed)]; ok { + continue + } + filtered = append(filtered, trimmed) + } + return prependUniqueRefFileIDs(filtered, newHistoryID, newToolsID) +} diff --git a/internal/httpapi/openai/history_split_test.go b/internal/httpapi/openai/history_split_test.go index 97100f41..14b86588 100644 --- a/internal/httpapi/openai/history_split_test.go +++ b/internal/httpapi/openai/history_split_test.go @@ -84,7 +84,7 @@ func TestBuildOpenAICurrentInputContextTranscriptUsesNumberedHistorySections(t * "latest user turn", "[reasoning_content]", "hidden reasoning", - "<|DSML|tool_calls>", + "<|DSML|tool_calls>", } { if !strings.Contains(transcript, want) { t.Fatalf("expected transcript to contain %q, got %q", want, transcript) @@ -380,6 +380,79 @@ func TestApplyCurrentInputFileUploadsFullContextFile(t *testing.T) { } } +func TestApplyCurrentInputFileUploadsToolsContextSeparately(t *testing.T) { + ds := &inlineUploadDSStub{} + h := &openAITestSurface{ + Store: mockOpenAIConfig{ + currentInputEnabled: true, + currentInputMin: 0, + }, + DS: ds, + } + req := map[string]any{ + "model": "deepseek-v4-flash", + "messages": historySplitTestMessages(), + "tools": []any{ + map[string]any{ + "type": "function", + "function": map[string]any{ + "name": "search", + "description": "search docs", + "parameters": map[string]any{ + "type": "object", + }, + }, + }, + }, + } + stdReq, err := promptcompat.NormalizeOpenAIChatRequest(h.Store, req, "") + if err != nil { + t.Fatalf("normalize failed: %v", err) + } + + out, err := h.applyCurrentInputFile(context.Background(), &auth.RequestAuth{DeepSeekToken: "token"}, stdReq) + if err != nil { + t.Fatalf("apply current input file failed: %v", err) + } + if len(ds.uploadCalls) != 2 { + t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls)) + } + if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" { + t.Fatalf("expected first upload to be DS2API_HISTORY.txt, got %q", ds.uploadCalls[0].Filename) + } + if ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" { + t.Fatalf("expected second upload to be DS2API_TOOLS.txt, got %q", ds.uploadCalls[1].Filename) + } + historyText := string(ds.uploadCalls[0].Data) + if strings.Contains(historyText, "You have access to these tools") || strings.Contains(historyText, "Description: search docs") { + t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText) + } + toolsText := string(ds.uploadCalls[1].Data) + for _, want := range []string{"# DS2API_TOOLS.txt", "Tool: search", "Description: search docs", `Parameters: {"type":"object"}`} { + if !strings.Contains(toolsText, want) { + t.Fatalf("expected tools transcript to contain %q, got %q", want, toolsText) + } + } + if strings.Contains(toolsText, "TOOL CALL FORMAT") { + t.Fatalf("tools transcript should not duplicate tool format instructions, got %q", toolsText) + } + if !strings.Contains(out.FinalPrompt, "Continue from the latest state in the attached DS2API_HISTORY.txt context.") || !strings.Contains(out.FinalPrompt, "DS2API_TOOLS.txt") { + t.Fatalf("expected live prompt to reference both context files, got %q", out.FinalPrompt) + } + if !strings.Contains(out.FinalPrompt, "TOOL CALL FORMAT") || !strings.Contains(out.FinalPrompt, "Remember: The ONLY valid way to use tools") { + t.Fatalf("expected live prompt to retain tool format instructions, got %q", out.FinalPrompt) + } + if strings.Contains(out.FinalPrompt, "You have access to these tools") || strings.Contains(out.FinalPrompt, "Description: search docs") || strings.Contains(out.FinalPrompt, "Parameters:") { + t.Fatalf("expected live prompt to omit tool descriptions after tools upload, got %q", out.FinalPrompt) + } + if len(out.RefFileIDs) < 2 || out.RefFileIDs[0] != "file-inline-1" || out.RefFileIDs[1] != "file-inline-2" { + t.Fatalf("expected history and tools file ids first, got %#v", out.RefFileIDs) + } + if !strings.Contains(out.PromptTokenText, "# DS2API_HISTORY.txt") || !strings.Contains(out.PromptTokenText, "# DS2API_TOOLS.txt") || !strings.Contains(out.PromptTokenText, "Description: search docs") { + t.Fatalf("expected prompt token text to include uploaded history and tools content, got %q", out.PromptTokenText) + } +} + func TestApplyCurrentInputFileCarriesHistoryText(t *testing.T) { ds := &inlineUploadDSStub{} h := &openAITestSurface{ @@ -537,6 +610,69 @@ func TestResponsesCurrentInputFileUploadsContextAndKeepsNeutralPrompt(t *testing } } +func TestResponsesCurrentInputFileUploadsToolsSeparately(t *testing.T) { + ds := &inlineUploadDSStub{} + h := &openAITestSurface{ + Store: mockOpenAIConfig{ + currentInputEnabled: true, + }, + Auth: streamStatusAuthStub{}, + DS: ds, + } + r := chi.NewRouter() + registerOpenAITestRoutes(r, h) + reqBody, _ := json.Marshal(map[string]any{ + "model": "deepseek-v4-flash", + "messages": historySplitTestMessages(), + "tools": []any{ + map[string]any{ + "type": "function", + "function": map[string]any{ + "name": "search", + "description": "search docs", + "parameters": map[string]any{"type": "object"}, + }, + }, + }, + "stream": false, + }) + req := httptest.NewRequest(http.MethodPost, "/v1/responses", strings.NewReader(string(reqBody))) + req.Header.Set("Authorization", "Bearer direct-token") + req.Header.Set("Content-Type", "application/json") + rec := httptest.NewRecorder() + + r.ServeHTTP(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d body=%s", rec.Code, rec.Body.String()) + } + if len(ds.uploadCalls) != 2 { + t.Fatalf("expected history and tools uploads, got %d", len(ds.uploadCalls)) + } + if ds.uploadCalls[0].Filename != "DS2API_HISTORY.txt" || ds.uploadCalls[1].Filename != "DS2API_TOOLS.txt" { + t.Fatalf("unexpected upload filenames: %#v", ds.uploadCalls) + } + historyText := string(ds.uploadCalls[0].Data) + if strings.Contains(historyText, "Description: search docs") { + t.Fatalf("history transcript should not embed tool descriptions, got %q", historyText) + } + toolsText := string(ds.uploadCalls[1].Data) + if !strings.Contains(toolsText, "# DS2API_TOOLS.txt") || !strings.Contains(toolsText, "Tool: search") || !strings.Contains(toolsText, "Description: search docs") { + t.Fatalf("expected tools transcript to include schema, got %q", toolsText) + } + promptText, _ := ds.completionReq["prompt"].(string) + if !strings.Contains(promptText, "DS2API_TOOLS.txt") || !strings.Contains(promptText, "TOOL CALL FORMAT") { + t.Fatalf("expected live prompt to reference tools file and retain format instructions, got %q", promptText) + } + if strings.Contains(promptText, "Description: search docs") { + t.Fatalf("live prompt should not inline tool descriptions, got %q", promptText) + } + refIDs, _ := ds.completionReq["ref_file_ids"].([]any) + if len(refIDs) < 2 || refIDs[0] != "file-inline-1" || refIDs[1] != "file-inline-2" { + t.Fatalf("expected history and tools ref ids first, got %#v", ds.completionReq["ref_file_ids"]) + } +} + func TestChatCompletionsCurrentInputFileMapsManagedAuthFailureTo401(t *testing.T) { ds := &inlineUploadDSStub{ uploadErr: &dsclient.RequestFailure{Op: "upload file", Kind: dsclient.FailureManagedUnauthorized, Message: "expired token"}, diff --git a/internal/httpapi/openai/leaked_output_sanitize_test.go b/internal/httpapi/openai/leaked_output_sanitize_test.go index acaf7208..939f73fb 100644 --- a/internal/httpapi/openai/leaked_output_sanitize_test.go +++ b/internal/httpapi/openai/leaked_output_sanitize_test.go @@ -19,21 +19,47 @@ func TestSanitizeLeakedOutputRemovesLeakedWireToolCallAndResult(t *testing.T) { } func TestSanitizeLeakedOutputRemovesStandaloneMetaMarkers(t *testing.T) { - raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G" + raw := "A<| end_of_sentence |><| Assistant |>B<| end_of_thinking |>C<|end▁of▁thinking|>D<|end▁of▁sentence|>E<| end_of_toolresults |>F<|end▁of▁instructions|>G" got := sanitizeLeakedOutput(raw) if got != "ABCDEFG" { t.Fatalf("unexpected sanitize result for meta markers: %q", got) } } +func TestSanitizeLeakedOutputRemovesFullwidthDelimitedMetaMarkers(t *testing.T) { + fw := "\uff5c" + raw := "A<" + fw + "end▁of▁sentence" + fw + ">B<" + fw + " Assistant " + fw + ">C<" + fw + "end_of_toolresults" + fw + ">D" + got := sanitizeLeakedOutput(raw) + if got != "ABCD" { + t.Fatalf("unexpected sanitize result for fullwidth-delimited meta markers: %q", got) + } +} + func TestSanitizeLeakedOutputRemovesThinkAndBosMarkers(t *testing.T) { - raw := "ABC<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F" + raw := "ABC<|begin▁of▁sentence|>D<| begin_of_sentence |>E<|begin_of_sentence|>F" got := sanitizeLeakedOutput(raw) if got != "ABCDEF" { t.Fatalf("unexpected sanitize result for think/BOS markers: %q", got) } } +func TestSanitizeLeakedOutputRemovesThoughtMarkers(t *testing.T) { + raw := "A<|▁of▁thought|>B<| of_thought |>C<| begin_of_thought |>D<| end_of_thought |>E" + got := sanitizeLeakedOutput(raw) + if got != "ABCDE" { + t.Fatalf("unexpected sanitize result for leaked thought markers: %q", got) + } +} + +func TestSanitizeLeakedOutputRemovesFullwidthDelimitedBosAndThoughtMarkers(t *testing.T) { + fw := "\uff5c" + raw := "A<" + fw + "begin▁of▁sentence" + fw + ">B<" + fw + "▁of▁thought" + fw + ">C<" + fw + " begin_of_thought " + fw + ">D" + got := sanitizeLeakedOutput(raw) + if got != "ABCD" { + t.Fatalf("unexpected sanitize result for fullwidth-delimited BOS/thought markers: %q", got) + } +} + func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) { raw := "Answer prefixinternal reasoning that never closes" got := sanitizeLeakedOutput(raw) @@ -43,7 +69,7 @@ func TestSanitizeLeakedOutputRemovesDanglingThinkBlock(t *testing.T) { } func TestSanitizeLeakedOutputRemovesCompleteDSMLToolCallWrapper(t *testing.T) { - raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\">\n\n\n后置文本" + raw := "前置文本\n<|DSML|tool_calls>\n<|DSML|invoke name=\"Bash\">\n<|DSML|parameter name=\"command\">\n\n\n后置文本" got := sanitizeLeakedOutput(raw) if got != "前置文本\n\n后置文本" { t.Fatalf("unexpected sanitize result for leaked dsml wrapper: %q", got) diff --git a/internal/httpapi/openai/responses/empty_retry_runtime.go b/internal/httpapi/openai/responses/empty_retry_runtime.go index 80422f5b..5166f9c7 100644 --- a/internal/httpapi/openai/responses/empty_retry_runtime.go +++ b/internal/httpapi/openai/responses/empty_retry_runtime.go @@ -15,7 +15,7 @@ import ( streamengine "ds2api/internal/stream" ) -func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) { +func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http.Request, a *auth.RequestAuth, resp *http.Response, payload map[string]any, pow, owner, responseID string, stdReq promptcompat.StandardRequest, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string, historySession *responsehistory.Session) { streamRuntime, initialType, ok := h.prepareResponsesStreamRuntime(w, resp, owner, responseID, model, finalPrompt, refFileTokens, thinkingEnabled, searchEnabled, toolNames, toolsRaw, toolChoice, traceID, historySession) if !ok { return @@ -27,6 +27,8 @@ func (h *Handler) handleResponsesStreamWithRetry(w http.ResponseWriter, r *http. RetryMaxAttempts: emptyOutputRetryMaxAttempts(), MaxAttempts: 3, UsagePrompt: finalPrompt, + Request: stdReq, + CurrentInputFile: h.Store, }, completionruntime.StreamRetryHooks{ ConsumeAttempt: func(currentResp *http.Response, allowDeferEmpty bool) (bool, bool) { return h.consumeResponsesStreamAttempt(r, currentResp, streamRuntime, initialType, thinkingEnabled, allowDeferEmpty) diff --git a/internal/httpapi/openai/responses/responses_handler.go b/internal/httpapi/openai/responses/responses_handler.go index 3a6680d6..f34daed8 100644 --- a/internal/httpapi/openai/responses/responses_handler.go +++ b/internal/httpapi/openai/responses/responses_handler.go @@ -138,7 +138,7 @@ func (h *Handler) Responses(w http.ResponseWriter, r *http.Request) { streamReq := start.Request refFileTokens := streamReq.RefFileTokens - h.handleResponsesStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, owner, responseID, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, traceID, historySession) + h.handleResponsesStreamWithRetry(w, r, a, start.Response, start.Payload, start.Pow, owner, responseID, streamReq, streamReq.ResponseModel, streamReq.PromptTokenText, refFileTokens, streamReq.Thinking, streamReq.Search, streamReq.ToolNames, streamReq.ToolsRaw, streamReq.ToolChoice, traceID, historySession) } func (h *Handler) handleResponsesNonStream(w http.ResponseWriter, resp *http.Response, owner, responseID, model, finalPrompt string, refFileTokens int, thinkingEnabled, searchEnabled bool, toolNames []string, toolsRaw any, toolChoice promptcompat.ToolChoicePolicy, traceID string) { diff --git a/internal/httpapi/openai/shared/leaked_output_sanitize.go b/internal/httpapi/openai/shared/leaked_output_sanitize.go index 5e54637e..9293e78f 100644 --- a/internal/httpapi/openai/shared/leaked_output_sanitize.go +++ b/internal/httpapi/openai/shared/leaked_output_sanitize.go @@ -13,15 +13,23 @@ var leakedToolResultBlobPattern = regexp.MustCompile(`(?is)<\s*\|\s*tool\s*\|\s* var leakedThinkTagPattern = regexp.MustCompile(`(?is)`) -// leakedBOSMarkerPattern matches DeepSeek BOS markers in BOTH forms: -// - ASCII underscore: <|begin_of_sentence|> -// - U+2581 variant: <|begin▁of▁sentence|> -var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*begin[_▁]of[_▁]sentence\s*[|\|]>`) +// leakedBOSMarkerPattern matches DeepSeek BOS markers with halfwidth or +// legacy U+FF5C fullwidth delimiters: +// - ASCII underscore: <|begin_of_sentence|> +// - U+2581 variant: <|begin▁of▁sentence|> +var leakedBOSMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*begin[_▁]of[_▁]sentence\s*[\|\x{ff5c}]>`) -// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens in BOTH forms: -// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|> -// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|> -var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[|\|]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[|\|]>`) +// leakedThoughtMarkerPattern matches leaked thought control markers in both +// explicit and compact forms: +// - ASCII underscore: <| of_thought |>, <| begin_of_thought |> +// - U+2581 variant: <|▁of▁thought|>, <|begin▁of▁thought|> +var leakedThoughtMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[\|\x{ff5c}]>`) + +// leakedMetaMarkerPattern matches the remaining DeepSeek special tokens with +// halfwidth or legacy U+FF5C fullwidth delimiters: +// - ASCII underscore: <|end_of_sentence|>, <|end_of_toolresults|>, <|end_of_instructions|> +// - U+2581 variant: <|end▁of▁sentence|>, <|end▁of▁toolresults|>, <|end▁of▁instructions|> +var leakedMetaMarkerPattern = regexp.MustCompile(`(?i)<[\|\x{ff5c}]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|\x{ff5c}]>`) // leakedAgentXMLBlockPatterns catch agent-style XML blocks that leak through // when the sieve fails to capture them. These are applied only to complete @@ -48,6 +56,7 @@ func sanitizeLeakedOutput(text string) string { out = stripDanglingThinkSuffix(out) out = leakedThinkTagPattern.ReplaceAllString(out, "") out = leakedBOSMarkerPattern.ReplaceAllString(out, "") + out = leakedThoughtMarkerPattern.ReplaceAllString(out, "") out = leakedMetaMarkerPattern.ReplaceAllString(out, "") out = stripLeakedToolCallWrapperBlocks(out) out = sanitizeLeakedAgentXMLBlocks(out) diff --git a/internal/js/chat-stream/http_internal.js b/internal/js/chat-stream/http_internal.js index 247e38c0..1c94ced5 100644 --- a/internal/js/chat-stream/http_internal.js +++ b/internal/js/chat-stream/http_internal.js @@ -85,6 +85,33 @@ async function fetchStreamPow(req, leaseID) { }; } +async function fetchStreamSwitch(req, leaseID) { + const url = buildInternalGoURL(req); + url.searchParams.set('__stream_switch', '1'); + + const upstream = await fetch(url.toString(), { + method: 'POST', + headers: buildInternalGoHeaders(req, { withInternalToken: true, withContentType: true }), + body: Buffer.from(JSON.stringify({ lease_id: leaseID })), + }); + + const text = await upstream.text(); + let body = {}; + try { + body = JSON.parse(text || '{}'); + } catch (_err) { + body = {}; + } + + return { + ok: upstream.ok, + status: upstream.status, + contentType: upstream.headers.get('content-type') || 'application/json', + text, + body, + }; +} + function relayPreparedFailure(res, prep) { if (prep.status === 401 && looksLikeVercelAuthPage(prep.text)) { writeOpenAIError( @@ -223,6 +250,7 @@ module.exports = { readRawBody, fetchStreamPrepare, fetchStreamPow, + fetchStreamSwitch, relayPreparedFailure, safeReadText, buildInternalGoURL, diff --git a/internal/js/chat-stream/sse_parse_impl.js b/internal/js/chat-stream/sse_parse_impl.js index 6f5922ec..91074710 100644 --- a/internal/js/chat-stream/sse_parse_impl.js +++ b/internal/js/chat-stream/sse_parse_impl.js @@ -7,6 +7,10 @@ const { SKIP_EXACT_PATHS, } = require('../shared/deepseek-constants'); +const LEAKED_BOS_MARKER_PATTERN = /<[\|\uFF5C]\s*begin[_▁]of[_▁]sentence\s*[\|\uFF5C]>/gi; +const LEAKED_THOUGHT_MARKER_PATTERN = /<[\|\uFF5C]\s*(?:begin[_▁])?[_▁]*of[_▁]thought\s*[\|\uFF5C]>/gi; +const LEAKED_META_MARKER_PATTERN = /<[\|\uFF5C]\s*(?:assistant|tool|end[_▁]of[_▁]sentence|end[_▁]of[_▁]thinking|end[_▁]of[_▁]thought|end[_▁]of[_▁]toolresults|end[_▁]of[_▁]instructions)\s*[\|\uFF5C]>/gi; + function stripThinkTags(text) { @@ -621,7 +625,11 @@ function stripReferenceMarkersText(text) { if (!text) { return text; } - return text.replace(/\[(?:citation|reference):\s*\d+\]/gi, ''); + return text + .replace(/\[(?:citation|reference):\s*\d+\]/gi, '') + .replace(LEAKED_BOS_MARKER_PATTERN, '') + .replace(LEAKED_THOUGHT_MARKER_PATTERN, '') + .replace(LEAKED_META_MARKER_PATTERN, ''); } function asString(v) { diff --git a/internal/js/chat-stream/vercel_stream_impl.js b/internal/js/chat-stream/vercel_stream_impl.js index 9a9bb0b8..6e1d4a85 100644 --- a/internal/js/chat-stream/vercel_stream_impl.js +++ b/internal/js/chat-stream/vercel_stream_impl.js @@ -25,6 +25,7 @@ const { isAbortError, fetchStreamPrepare, fetchStreamPow, + fetchStreamSwitch, relayPreparedFailure, createLeaseReleaser, } = require('./http_internal'); @@ -46,11 +47,11 @@ async function handleVercelStream(req, res, rawBody, payload) { } const model = asString(prep.body.model) || asString(payload.model); - const sessionID = asString(prep.body.session_id) || `chatcmpl-${Date.now()}`; + const responseID = asString(prep.body.session_id) || `chatcmpl-${Date.now()}`; const leaseID = asString(prep.body.lease_id); - const deepseekToken = asString(prep.body.deepseek_token); + let deepseekToken = asString(prep.body.deepseek_token); const initialPowHeader = asString(prep.body.pow_header); - const completionPayload = prep.body.payload && typeof prep.body.payload === 'object' ? prep.body.payload : null; + let completionPayload = prep.body.payload && typeof prep.body.payload === 'object' ? prep.body.payload : null; const finalPrompt = asString(prep.body.final_prompt); const thinkingEnabled = toBool(prep.body.thinking_enabled); const searchEnabled = toBool(prep.body.search_enabled); @@ -133,13 +134,14 @@ async function handleVercelStream(req, res, rawBody, payload) { } }; const fetchCompletion = (bodyPayload) => fetchDeepSeekStream(DEEPSEEK_COMPLETION_URL, bodyPayload, currentPowHeader); + let activeDeepSeekSessionID = responseID; const fetchContinue = async (messageID) => { const powHeader = await refreshPowHeader('continue'); if (!powHeader) { return null; } return fetchDeepSeekStream(DEEPSEEK_CONTINUE_URL, { - chat_session_id: sessionID, + chat_session_id: activeDeepSeekSessionID, message_id: messageID, fallback_to_resume: true, }, powHeader); @@ -185,7 +187,7 @@ async function handleVercelStream(req, res, rawBody, payload) { let ended = false; const { sendFrame, sendDeltaFrame } = createChatCompletionEmitter({ res, - sessionID, + sessionID: responseID, created, model, isClosed: () => clientClosed, @@ -242,7 +244,7 @@ async function handleVercelStream(req, res, rawBody, payload) { } ended = true; sendFrame({ - id: sessionID, + id: responseID, object: 'chat.completion.chunk', created, model, @@ -261,7 +263,7 @@ async function handleVercelStream(req, res, rawBody, payload) { const processStream = async (initialResponse, allowDeferEmpty) => { let currentResponse = initialResponse; - let continueState = createContinueState(sessionID); + let continueState = createContinueState(activeDeepSeekSessionID); let continueRounds = 0; // eslint-disable-next-line no-constant-condition while (true) { @@ -412,13 +414,39 @@ async function handleVercelStream(req, res, rawBody, payload) { }; let retryAttempts = 0; + let accountSwitchAttempted = false; // eslint-disable-next-line no-constant-condition while (true) { - const processed = await processStream(completionRes, retryAttempts < EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS); + const allowDeferEmpty = retryAttempts < EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS || !accountSwitchAttempted; + const processed = await processStream(completionRes, allowDeferEmpty); if (processed.terminal) { return; } - if (!processed.retryable || retryAttempts >= EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS) { + if (!processed.retryable) { + await finish('stop'); + return; + } + if (retryAttempts >= EMPTY_OUTPUT_RETRY_MAX_ATTEMPTS) { + if (!accountSwitchAttempted) { + accountSwitchAttempted = true; + const switched = await fetchStreamSwitch(req, leaseID); + if (switched.ok && switched.body && switched.body.payload && typeof switched.body.payload === 'object') { + completionPayload = switched.body.payload; + deepseekToken = asString(switched.body.deepseek_token) || deepseekToken; + currentPowHeader = asString(switched.body.pow_header) || currentPowHeader; + activeDeepSeekSessionID = asString(switched.body.session_id) || activeDeepSeekSessionID; + usagePrompt = finalPrompt; + completionRes = await fetchCompletion(completionPayload); + if (completionRes === null) { + return; + } + if (!completionRes.ok || !completionRes.body) { + await finish('stop'); + return; + } + continue; + } + } await finish('stop'); return; } diff --git a/internal/js/helpers/stream-tool-sieve/parse.js b/internal/js/helpers/stream-tool-sieve/parse.js index f2ba3dcb..7a707695 100644 --- a/internal/js/helpers/stream-tool-sieve/parse.js +++ b/internal/js/helpers/stream-tool-sieve/parse.js @@ -7,6 +7,9 @@ const { parseMarkupToolCalls, stripFencedCodeBlocks, containsToolCallWrapperSyntaxOutsideIgnored, + normalizeDSMLToolCallMarkup, + hasRepairableXMLToolCallsWrapper, + indexToolCDATAOpen, sanitizeLooseCDATA, } = require('./parse_payload'); @@ -37,19 +40,23 @@ function parseToolCalls(text, toolNames) { function parseToolCallsDetailed(text, toolNames) { const result = emptyParseResult(); - const normalized = toStringSafe(text); - if (!normalized) { + const raw = toStringSafe(text); + if (!raw) { return result; } - result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized); - if (shouldSkipToolCallParsingForCodeFenceExample(normalized)) { + if (shouldSkipToolCallParsingForCodeFenceExample(raw)) { return result; } + const normalized = normalizeDSMLToolCallMarkup(stripFencedCodeBlocks(raw).trim()); + if (!normalized.ok || !normalized.text) { + return result; + } + result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized.text) || hasRepairableXMLToolCallsWrapper(normalized.text); // XML markup parsing only. - let parsed = parseMarkupToolCalls(normalized); - if (parsed.length === 0 && normalized.toLowerCase().includes('= 0) { + const recovered = sanitizeLooseCDATA(normalized.text); + if (recovered !== normalized.text) { parsed = parseMarkupToolCalls(recovered); } } @@ -70,19 +77,23 @@ function parseStandaloneToolCalls(text, toolNames) { function parseStandaloneToolCallsDetailed(text, toolNames) { const result = emptyParseResult(); - const trimmed = toStringSafe(text); - if (!trimmed) { + const raw = toStringSafe(text); + if (!raw) { + return result; + } + if (shouldSkipToolCallParsingForCodeFenceExample(raw)) { return result; } - result.sawToolCallSyntax = looksLikeToolCallSyntax(trimmed); - if (shouldSkipToolCallParsingForCodeFenceExample(trimmed)) { + const normalized = normalizeDSMLToolCallMarkup(stripFencedCodeBlocks(raw).trim()); + if (!normalized.ok || !normalized.text) { return result; } + result.sawToolCallSyntax = looksLikeToolCallSyntax(normalized.text) || hasRepairableXMLToolCallsWrapper(normalized.text); // XML markup parsing only. - let parsed = parseMarkupToolCalls(trimmed); - if (parsed.length === 0 && trimmed.toLowerCase().includes('= 0) { + const recovered = sanitizeLooseCDATA(normalized.text); + if (recovered !== normalized.text) { parsed = parseMarkupToolCalls(recovered); } } diff --git a/internal/js/helpers/stream-tool-sieve/parse_payload.js b/internal/js/helpers/stream-tool-sieve/parse_payload.js index a24bd624..e9fc02f2 100644 --- a/internal/js/helpers/stream-tool-sieve/parse_payload.js +++ b/internal/js/helpers/stream-tool-sieve/parse_payload.js @@ -2,6 +2,8 @@ const CDATA_PATTERN = /^(?:<|〈)(?:!|!)\[CDATA\[([\s\S]*?)]](?:>|>|〉)$/i; const XML_ATTR_PATTERN = /\b([a-z0-9_:-]+)\s*=\s*("([^"]*)"|'([^']*)')/gi; +const XML_TOOL_CALLS_CLOSE_PATTERN = /[<<][\//]tool_calls\s*[>>]/gi; +const XML_INVOKE_START_PATTERN = /[<<]invoke\b[^>>]*\bname\s*[==]\s*(?:"([^"]*)"|'([^']*)'|“([^”]*)”|‘([^’]*)’|"([^"]*)"|'([^']*)')/i; const TOOL_MARKUP_NAMES = [ { raw: 'tool_calls', canonical: 'tool_calls' }, { raw: 'tool-calls', canonical: 'tool_calls', dsmlOnly: true }, @@ -88,8 +90,7 @@ function isFenceCloseLine(trimmed, fenceChar, fenceLen) { } function cdataStartsBeforeFence(line) { - const cdataOpen = findNextCDATAOpen(line, 0); - const cdataIdx = cdataOpen.ok ? cdataOpen.start : -1; + const cdataIdx = indexToolCDATAOpen(line, 0); if (cdataIdx < 0) return false; const fenceIdx = Math.min( line.indexOf('```') >= 0 ? line.indexOf('```') : Infinity, @@ -99,21 +100,28 @@ function cdataStartsBeforeFence(line) { } function updateCDATAStateLine(inCDATA, line) { - const lower = line.toLowerCase(); let pos = 0; let state = inCDATA; - while (pos < lower.length) { + while (pos < line.length) { if (state) { - const cdataEnd = findCDATAEnd(lower, pos); - const end = cdataEnd.index; + let end = -1; + let closeLen = 0; + for (let i = pos; i < line.length; i += 1) { + const foundLen = toolCDATACloseLenAt(line, i); + if (foundLen > 0) { + end = i; + closeLen = foundLen; + break; + } + } if (end < 0) return true; - pos = end + cdataEnd.len; + pos = end + closeLen; state = false; continue; } - const start = findNextCDATAOpen(line, pos); - if (!start.ok) return false; - pos = start.bodyStart; + const start = indexToolCDATAOpen(line, pos); + if (start < 0) return false; + pos = start + toolCDATAOpenLenAt(line, start); state = true; } return state; @@ -124,12 +132,20 @@ function parseMarkupToolCalls(text) { if (!normalized.ok) { return []; } - const raw = normalized.text.trim(); + let raw = normalized.text.trim(); if (!raw) { return []; } + let wrappers = findXmlElementBlocks(raw, 'tool_calls'); + if (wrappers.length === 0 && hasRepairableXMLToolCallsWrapper(raw)) { + const repaired = repairMissingXMLToolCallsOpeningWrapper(raw); + if (repaired !== raw) { + raw = repaired; + wrappers = findXmlElementBlocks(raw, 'tool_calls'); + } + } const out = []; - for (const wrapper of findXmlElementBlocks(raw, 'tool_calls')) { + for (const wrapper of wrappers) { const body = toStringSafe(wrapper.body); for (const block of findXmlElementBlocks(body, 'invoke')) { const parsed = parseMarkupSingleToolCall(block); @@ -146,12 +162,13 @@ function normalizeDSMLToolCallMarkup(text) { if (!raw) { return { text: '', ok: true }; } - const styles = containsToolMarkupSyntaxOutsideIgnored(raw); - if (!styles.dsml) { - return { text: raw, ok: true }; + const canonicalized = canonicalizeToolCallCandidateSpans(raw); + const styles = containsToolMarkupSyntaxOutsideIgnored(canonicalized); + if (!styles.dsml && !styles.canonical) { + return { text: canonicalized, ok: true }; } return { - text: replaceDSMLToolMarkupOutsideIgnored(raw), + text: replaceDSMLToolMarkupOutsideIgnored(canonicalized), ok: true, }; } @@ -170,9 +187,8 @@ function containsToolCallWrapperSyntaxOutsideIgnored(text) { if (!raw) { return styles; } - const lower = raw.toLowerCase(); for (let i = 0; i < raw.length;) { - const skipped = skipXmlIgnoredSection(lower, i); + const skipped = skipXmlIgnoredSection(raw, i); if (skipped.blocked) { return styles; } @@ -208,7 +224,7 @@ function containsToolMarkupSyntaxOutsideIgnored(text) { return styles; } for (let i = 0; i < raw.length;) { - const skipped = skipXmlIgnoredSection(raw.toLowerCase(), i); + const skipped = skipXmlIgnoredSection(raw, i); if (skipped.blocked) { return styles; } @@ -239,10 +255,9 @@ function replaceDSMLToolMarkupOutsideIgnored(text) { if (!raw) { return ''; } - const lower = raw.toLowerCase(); let out = ''; for (let i = 0; i < raw.length;) { - const skipped = skipXmlIgnoredSection(lower, i); + const skipped = skipXmlIgnoredSection(raw, i); if (skipped.blocked) { out += raw.slice(i); break; @@ -254,15 +269,7 @@ function replaceDSMLToolMarkupOutsideIgnored(text) { } const tag = scanToolMarkupTagAt(raw, i); if (tag) { - if (tag.dsmlLike) { - const tail = normalizeToolMarkupTagTailForXML(raw.slice(tag.nameEnd, tag.end + 1)); - out += `<${tag.closing ? '/' : ''}${tag.name}${tail}`; - if (!tail.endsWith('>')) { - out += '>'; - } - } else { - out += raw.slice(tag.start, tag.end + 1); - } + out += `<${tag.closing ? '/' : ''}${tag.name}${raw.slice(tag.nameEnd, tag.end)}>`; i = tag.end + 1; continue; } @@ -345,7 +352,7 @@ function findXmlStartTagOutsideCDATA(text, tag, from) { const lower = text.toLowerCase(); const target = `<${tag}`; for (let i = Math.max(0, from || 0); i < text.length;) { - const skipped = skipXmlIgnoredSection(lower, i); + const skipped = skipXmlIgnoredSection(text, i); if (skipped.blocked) { return null; } @@ -375,7 +382,7 @@ function findMatchingXmlEndTagOutsideCDATA(text, tag, from) { const closeTarget = ` 0) { + const end = findToolCDATAEnd(raw, i + openLen); if (end < 0) { return { advanced: false, blocked: true, next: i }; } - return { advanced: true, blocked: false, next: end + cdataEnd.len }; + return { advanced: true, blocked: false, next: end + toolCDATACloseLenAt(raw, end) }; } - if (lower.startsWith('', i + '', i + '