From bad12f644887c6d87ebcdf2b3e4eea4cfb544413 Mon Sep 17 00:00:00 2001 From: toreleon Date: Thu, 9 Apr 2026 01:39:18 +0700 Subject: [PATCH 1/2] Add Responses API proxy routes --- src/routes/responses/handler.ts | 41 +++++++++++++++++ src/routes/responses/route.ts | 15 ++++++ src/server.ts | 3 ++ src/services/copilot/create-responses.ts | 58 ++++++++++++++++++++++++ 4 files changed, 117 insertions(+) create mode 100644 src/routes/responses/handler.ts create mode 100644 src/routes/responses/route.ts create mode 100644 src/services/copilot/create-responses.ts diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts new file mode 100644 index 000000000..b8bfc7c99 --- /dev/null +++ b/src/routes/responses/handler.ts @@ -0,0 +1,41 @@ +import type { Context } from "hono" + +import consola from "consola" +import { streamSSE, type SSEMessage } from "hono/streaming" + +import { awaitApproval } from "~/lib/approval" +import { checkRateLimit } from "~/lib/rate-limit" +import { state } from "~/lib/state" +import { + createResponses, + type ResponsesApiResponse, + type ResponsesPayload, +} from "~/services/copilot/create-responses" + +export async function handleResponses(c: Context) { + await checkRateLimit(state) + + const payload = await c.req.json() + consola.debug("Responses payload:", JSON.stringify(payload).slice(-400)) + + if (state.manualApprove) await awaitApproval() + + const response = await createResponses(payload) + + if (isNonStreaming(response)) { + consola.debug("Non-streaming responses response:", JSON.stringify(response)) + return c.json(response) + } + + consola.debug("Streaming responses response") + return streamSSE(c, async (stream) => { + for await (const chunk of response) { + consola.debug("Responses streaming chunk:", JSON.stringify(chunk)) + await stream.writeSSE(chunk as SSEMessage) + } + }) +} + +const isNonStreaming = ( + response: Awaited>, +): response is ResponsesApiResponse => !Object.hasOwn(response, Symbol.asyncIterator) diff --git a/src/routes/responses/route.ts b/src/routes/responses/route.ts new file mode 100644 index 000000000..af2423427 --- /dev/null +++ b/src/routes/responses/route.ts @@ -0,0 +1,15 @@ +import { Hono } from "hono" + +import { forwardError } from "~/lib/error" + +import { handleResponses } from "./handler" + +export const responsesRoutes = new Hono() + +responsesRoutes.post("/", async (c) => { + try { + return await handleResponses(c) + } catch (error) { + return await forwardError(c, error) + } +}) diff --git a/src/server.ts b/src/server.ts index 462a278f3..51fda1988 100644 --- a/src/server.ts +++ b/src/server.ts @@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route" import { embeddingRoutes } from "./routes/embeddings/route" import { messageRoutes } from "./routes/messages/route" import { modelRoutes } from "./routes/models/route" +import { responsesRoutes } from "./routes/responses/route" import { tokenRoute } from "./routes/token/route" import { usageRoute } from "./routes/usage/route" @@ -17,6 +18,7 @@ server.use(cors()) server.get("/", (c) => c.text("Server running")) server.route("/chat/completions", completionRoutes) +server.route("/responses", responsesRoutes) server.route("/models", modelRoutes) server.route("/embeddings", embeddingRoutes) server.route("/usage", usageRoute) @@ -24,6 +26,7 @@ server.route("/token", tokenRoute) // Compatibility with tools that expect v1/ prefix server.route("/v1/chat/completions", completionRoutes) +server.route("/v1/responses", responsesRoutes) server.route("/v1/models", modelRoutes) server.route("/v1/embeddings", embeddingRoutes) diff --git a/src/services/copilot/create-responses.ts b/src/services/copilot/create-responses.ts new file mode 100644 index 000000000..d66aba089 --- /dev/null +++ b/src/services/copilot/create-responses.ts @@ -0,0 +1,58 @@ +import consola from "consola" +import { events } from "fetch-event-stream" + +import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config" +import { HTTPError } from "~/lib/error" +import { state } from "~/lib/state" + +export const createResponses = async (payload: ResponsesPayload) => { + if (!state.copilotToken) throw new Error("Copilot token not found") + + const headers: Record = { + ...copilotHeaders(state), + "X-Initiator": "user", + } + + const response = await fetch(`${copilotBaseUrl(state)}/responses`, { + method: "POST", + headers, + body: JSON.stringify(payload), + }) + + if (!response.ok) { + consola.error("Failed to create responses", response) + throw new HTTPError("Failed to create responses", response) + } + + if (payload.stream) { + return events(response) + } + + return (await response.json()) as ResponsesApiResponse +} + +export interface ResponsesPayload { + model: string + input: unknown + instructions?: string | null + max_output_tokens?: number | null + previous_response_id?: string | null + reasoning?: Record | null + store?: boolean | null + stream?: boolean | null + temperature?: number | null + text?: Record | null + tool_choice?: string | Record | null + tools?: Array> | null + top_p?: number | null + truncation?: string | null + user?: string | null + metadata?: Record | null +} + +export interface ResponsesApiResponse { + id: string + object: string + model: string + status: string +} From c487b522fa9fb71299afa7acac27f702a31eec71 Mon Sep 17 00:00:00 2001 From: toreleon Date: Thu, 9 Apr 2026 01:41:36 +0700 Subject: [PATCH 2/2] Add verbose logging for Responses API --- src/lib/tokenizer.ts | 34 +++++++++++++++++++++++++++++++++ src/routes/responses/handler.ts | 29 +++++++++++++++++++++++++++- 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/src/lib/tokenizer.ts b/src/lib/tokenizer.ts index 8c3eda736..e39cadc5b 100644 --- a/src/lib/tokenizer.ts +++ b/src/lib/tokenizer.ts @@ -5,6 +5,7 @@ import type { Tool, ToolCall, } from "~/services/copilot/create-chat-completions" +import type { ResponsesPayload } from "~/services/copilot/create-responses" import type { Model } from "~/services/copilot/get-models" // Encoder type mapping @@ -346,3 +347,36 @@ export const getTokenCount = async ( output: outputTokens, } } + +const countUnknownTokens = (value: unknown, encoder: Encoder): number => { + if (value === null || value === undefined) { + return 0 + } + if (typeof value === "string") { + return encoder.encode(value).length + } + return encoder.encode(JSON.stringify(value)).length +} + +/** + * Estimate token count for Responses API payloads using the model tokenizer. + */ +export const getResponsesTokenCount = async ( + payload: ResponsesPayload, + model: Model, +): Promise<{ input: number; output: number }> => { + const tokenizer = getTokenizerFromModel(model) + const encoder = await getEncodeChatFunction(tokenizer) + + let inputTokens = 0 + inputTokens += countUnknownTokens(payload.input, encoder) + inputTokens += countUnknownTokens(payload.instructions, encoder) + inputTokens += countUnknownTokens(payload.tools, encoder) + inputTokens += countUnknownTokens(payload.text, encoder) + inputTokens += countUnknownTokens(payload.reasoning, encoder) + + return { + input: inputTokens, + output: 0, + } +} diff --git a/src/routes/responses/handler.ts b/src/routes/responses/handler.ts index b8bfc7c99..b29534501 100644 --- a/src/routes/responses/handler.ts +++ b/src/routes/responses/handler.ts @@ -6,6 +6,7 @@ import { streamSSE, type SSEMessage } from "hono/streaming" import { awaitApproval } from "~/lib/approval" import { checkRateLimit } from "~/lib/rate-limit" import { state } from "~/lib/state" +import { getResponsesTokenCount } from "~/lib/tokenizer" import { createResponses, type ResponsesApiResponse, @@ -15,11 +16,37 @@ import { export async function handleResponses(c: Context) { await checkRateLimit(state) - const payload = await c.req.json() + let payload = await c.req.json() consola.debug("Responses payload:", JSON.stringify(payload).slice(-400)) + const selectedModel = state.models?.data.find( + (model) => model.id === payload.model, + ) + + try { + if (selectedModel) { + const tokenCount = await getResponsesTokenCount(payload, selectedModel) + consola.info("Current token count:", tokenCount) + } else { + consola.warn("No model selected, skipping token count calculation") + } + } catch (error) { + consola.warn("Failed to calculate token count:", error) + } + if (state.manualApprove) await awaitApproval() + if (payload.max_output_tokens == null) { + payload = { + ...payload, + max_output_tokens: selectedModel?.capabilities.limits.max_output_tokens, + } + consola.debug( + "Set max_output_tokens to:", + JSON.stringify(payload.max_output_tokens), + ) + } + const response = await createResponses(payload) if (isNonStreaming(response)) {