Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions src/lib/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import type {
Tool,
ToolCall,
} from "~/services/copilot/create-chat-completions"
import type { ResponsesPayload } from "~/services/copilot/create-responses"
import type { Model } from "~/services/copilot/get-models"

// Encoder type mapping
Expand Down Expand Up @@ -346,3 +347,36 @@ export const getTokenCount = async (
output: outputTokens,
}
}

const countUnknownTokens = (value: unknown, encoder: Encoder): number => {
if (value === null || value === undefined) {
return 0
}
if (typeof value === "string") {
return encoder.encode(value).length
}
return encoder.encode(JSON.stringify(value)).length
}

/**
* Estimate token count for Responses API payloads using the model tokenizer.
*/
export const getResponsesTokenCount = async (
payload: ResponsesPayload,
model: Model,
): Promise<{ input: number; output: number }> => {
const tokenizer = getTokenizerFromModel(model)
const encoder = await getEncodeChatFunction(tokenizer)

let inputTokens = 0
inputTokens += countUnknownTokens(payload.input, encoder)
inputTokens += countUnknownTokens(payload.instructions, encoder)
inputTokens += countUnknownTokens(payload.tools, encoder)
inputTokens += countUnknownTokens(payload.text, encoder)
inputTokens += countUnknownTokens(payload.reasoning, encoder)

return {
input: inputTokens,
output: 0,
}
}
68 changes: 68 additions & 0 deletions src/routes/responses/handler.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import type { Context } from "hono"

import consola from "consola"
import { streamSSE, type SSEMessage } from "hono/streaming"

import { awaitApproval } from "~/lib/approval"
import { checkRateLimit } from "~/lib/rate-limit"
import { state } from "~/lib/state"
import { getResponsesTokenCount } from "~/lib/tokenizer"
import {
createResponses,
type ResponsesApiResponse,
type ResponsesPayload,
} from "~/services/copilot/create-responses"

export async function handleResponses(c: Context) {
await checkRateLimit(state)

let payload = await c.req.json<ResponsesPayload>()
consola.debug("Responses payload:", JSON.stringify(payload).slice(-400))

const selectedModel = state.models?.data.find(
(model) => model.id === payload.model,
)

try {
if (selectedModel) {
const tokenCount = await getResponsesTokenCount(payload, selectedModel)
consola.info("Current token count:", tokenCount)
} else {
consola.warn("No model selected, skipping token count calculation")
}
} catch (error) {
consola.warn("Failed to calculate token count:", error)
}

if (state.manualApprove) await awaitApproval()

if (payload.max_output_tokens == null) {
payload = {
...payload,
max_output_tokens: selectedModel?.capabilities.limits.max_output_tokens,
}
consola.debug(
"Set max_output_tokens to:",
JSON.stringify(payload.max_output_tokens),
)
}

const response = await createResponses(payload)

if (isNonStreaming(response)) {
consola.debug("Non-streaming responses response:", JSON.stringify(response))
return c.json(response)
}

consola.debug("Streaming responses response")
return streamSSE(c, async (stream) => {
for await (const chunk of response) {
consola.debug("Responses streaming chunk:", JSON.stringify(chunk))
await stream.writeSSE(chunk as SSEMessage)
}
})
}

const isNonStreaming = (
response: Awaited<ReturnType<typeof createResponses>>,
): response is ResponsesApiResponse => !Object.hasOwn(response, Symbol.asyncIterator)
15 changes: 15 additions & 0 deletions src/routes/responses/route.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import { Hono } from "hono"

import { forwardError } from "~/lib/error"

import { handleResponses } from "./handler"

export const responsesRoutes = new Hono()

responsesRoutes.post("/", async (c) => {
try {
return await handleResponses(c)
} catch (error) {
return await forwardError(c, error)
}
})
3 changes: 3 additions & 0 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import { completionRoutes } from "./routes/chat-completions/route"
import { embeddingRoutes } from "./routes/embeddings/route"
import { messageRoutes } from "./routes/messages/route"
import { modelRoutes } from "./routes/models/route"
import { responsesRoutes } from "./routes/responses/route"
import { tokenRoute } from "./routes/token/route"
import { usageRoute } from "./routes/usage/route"

Expand All @@ -17,13 +18,15 @@ server.use(cors())
server.get("/", (c) => c.text("Server running"))

server.route("/chat/completions", completionRoutes)
server.route("/responses", responsesRoutes)
server.route("/models", modelRoutes)
server.route("/embeddings", embeddingRoutes)
server.route("/usage", usageRoute)
server.route("/token", tokenRoute)

// Compatibility with tools that expect v1/ prefix
server.route("/v1/chat/completions", completionRoutes)
server.route("/v1/responses", responsesRoutes)
server.route("/v1/models", modelRoutes)
server.route("/v1/embeddings", embeddingRoutes)

Expand Down
58 changes: 58 additions & 0 deletions src/services/copilot/create-responses.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
import consola from "consola"
import { events } from "fetch-event-stream"

import { copilotBaseUrl, copilotHeaders } from "~/lib/api-config"
import { HTTPError } from "~/lib/error"
import { state } from "~/lib/state"

export const createResponses = async (payload: ResponsesPayload) => {
if (!state.copilotToken) throw new Error("Copilot token not found")

const headers: Record<string, string> = {
...copilotHeaders(state),
"X-Initiator": "user",
}

const response = await fetch(`${copilotBaseUrl(state)}/responses`, {
method: "POST",
headers,
body: JSON.stringify(payload),
})

if (!response.ok) {
consola.error("Failed to create responses", response)
throw new HTTPError("Failed to create responses", response)
}

if (payload.stream) {
return events(response)
}

return (await response.json()) as ResponsesApiResponse
}

export interface ResponsesPayload {
model: string
input: unknown
instructions?: string | null
max_output_tokens?: number | null
previous_response_id?: string | null
reasoning?: Record<string, unknown> | null
store?: boolean | null
stream?: boolean | null
temperature?: number | null
text?: Record<string, unknown> | null
tool_choice?: string | Record<string, unknown> | null
tools?: Array<Record<string, unknown>> | null
top_p?: number | null
truncation?: string | null
user?: string | null
metadata?: Record<string, unknown> | null
}

export interface ResponsesApiResponse {
id: string
object: string
model: string
status: string
}