diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fbd698c..4131ff2 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -27,10 +27,26 @@ jobs: - name: Lint run: npx oxlint . + test: + name: Test + runs-on: ubuntu-latest + needs: [lint] + steps: + - uses: actions/checkout@v4 + + - uses: actions/setup-node@v4 + with: + node-version: 24 + + - run: npm install + + - name: Run unit tests + run: npm test + build: name: Build runs-on: ubuntu-latest - needs: [lint] + needs: [lint, test] steps: - uses: actions/checkout@v4 diff --git a/README.md b/README.md index 12a7da8..a201c6d 100644 --- a/README.md +++ b/README.md @@ -84,7 +84,9 @@ Set defaults in `tui.json` via plugin options: "endpoint": "https://api.anthropic.com/v1", "model": "claude-haiku-4-5", "apiKeyEnv": "ANTHROPIC_API_KEY", - "maxTokens": 2048 + "maxTokens": 2048, + "reasoningEffort": "low", + "retries": 2 } ] ] @@ -93,6 +95,13 @@ Set defaults in `tui.json` via plugin options: Any OpenAI-compatible endpoint works (Ollama, vLLM, LM Studio, etc.). +- `endpoint` - OpenAI-compatible base URL +- `model` - model name sent to `/chat/completions` +- `apiKeyEnv` - environment variable containing the API key +- `maxTokens` - maximum completion tokens for normalization calls +- `reasoningEffort` - optional reasoning level for models that support it +- `retries` - number of retry attempts for transient LLM failures + ### Custom prompts The LLM system prompts used for normalization can be fully replaced by pointing @@ -194,6 +203,18 @@ npm run fmt # oxfmt --check npm run fmt:fix # oxfmt --write ``` +### Test local plugin in OpenCode + +To test unpublished changes in the OpenCode TUI, point `~/.config/opencode/tui.json` +at the local repo path, not the npm package name: + +```json +{ + "$schema": "https://opencode.ai/tui.json", + "plugin": ["/Users/your-user/opencode-voice"] +} +``` + ### Release process Manual releases via opencode; see [RELEASE_PROCESS.md](RELEASE_PROCESS.md). diff --git a/lib/llm-client.js b/lib/llm-client.js index fd9389e..8d98bda 100644 --- a/lib/llm-client.js +++ b/lib/llm-client.js @@ -10,7 +10,9 @@ // "endpoint": "https://api.anthropic.com/v1", // "model": "claude-haiku-4-5", // "apiKeyEnv": "ANTHROPIC_API_KEY", -// "maxTokens": 2048 +// "maxTokens": 2048, +// "reasoningEffort": "low", +// "retries": 2 // }] const DEFAULTS = { @@ -18,8 +20,24 @@ const DEFAULTS = { model: "claude-haiku-4-5", apiKeyEnv: "ANTHROPIC_API_KEY", maxTokens: 2048, + reasoningEffort: null, + retries: 2, }; +function normalizeRetries(value) { + const parsed = Number(value); + if (!Number.isFinite(parsed) || parsed < 0) return DEFAULTS.retries; + return Math.floor(parsed); +} + +function shouldRetry(status) { + return status === 408 || status === 429 || status >= 500; +} + +function wait(ms) { + return new Promise((resolve) => setTimeout(resolve, ms)); +} + /** * Create an LLM completion function bound to a kv store for config persistence. * @@ -34,6 +52,11 @@ export function createClient(kv, pluginOptions) { model: kv.get("llm.model") ?? pluginOptions?.model ?? DEFAULTS.model, apiKeyEnv: kv.get("llm.apiKeyEnv") ?? pluginOptions?.apiKeyEnv ?? DEFAULTS.apiKeyEnv, maxTokens: kv.get("llm.maxTokens") ?? pluginOptions?.maxTokens ?? DEFAULTS.maxTokens, + reasoningEffort: + kv.get("llm.reasoningEffort") ?? pluginOptions?.reasoningEffort ?? DEFAULTS.reasoningEffort, + retries: normalizeRetries( + kv.get("llm.retries") ?? pluginOptions?.retries ?? DEFAULTS.retries, + ), }; } @@ -57,29 +80,51 @@ export function createClient(kv, pluginOptions) { if (system) messages.push({ role: "system", content: system }); messages.push({ role: "user", content: prompt }); - try { - const response = await fetch(endpoint, { - method: "POST", - headers: { - "Content-Type": "application/json", - Authorization: "Bearer " + apiKey, - }, - body: JSON.stringify({ - model: cfg.model, - max_tokens: cfg.maxTokens, - messages, - }), - }); + const body = { + model: cfg.model, + max_tokens: cfg.maxTokens, + messages, + }; + if (cfg.reasoningEffort) body.reasoning_effort = cfg.reasoningEffort; + + for (let attempt = 0; attempt <= cfg.retries; attempt++) { + try { + const response = await fetch(endpoint, { + method: "POST", + headers: { + "Content-Type": "application/json", + Authorization: "Bearer " + apiKey, + }, + body: JSON.stringify(body), + }); + + if (!response.ok) { + if (attempt < cfg.retries && shouldRetry(response.status)) { + await wait(250 * 2 ** attempt); + continue; + } + return { text: null, error: `LLM request failed (${response.status})` }; + } - if (!response.ok) { - return { text: null, error: `LLM request failed (${response.status})` }; + const data = await response.json(); + const text = data?.choices?.[0]?.message?.content || null; + if (text) return { text }; + + if (attempt < cfg.retries) { + await wait(250 * 2 ** attempt); + continue; + } + return { text: null, error: "Empty LLM response" }; + } catch (err) { + if (attempt < cfg.retries) { + await wait(250 * 2 ** attempt); + continue; + } + return { text: null, error: `LLM error: ${err.message}` }; } - const data = await response.json(); - const text = data?.choices?.[0]?.message?.content || null; - return { text, error: text ? undefined : "Empty LLM response" }; - } catch (err) { - return { text: null, error: `LLM error: ${err.message}` }; } + + return { text: null, error: "LLM request failed after retries" }; } return { complete }; diff --git a/package.json b/package.json index 3d06927..32e07fa 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "lint": "npx oxlint .", "fmt": "npx oxfmt --check .", "fmt:fix": "npx oxfmt --write .", + "test": "node --test", "check": "npm run lint && npm run fmt", "prepack": "npm run check" }, diff --git a/test/llm-client.test.js b/test/llm-client.test.js new file mode 100644 index 0000000..2139713 --- /dev/null +++ b/test/llm-client.test.js @@ -0,0 +1,137 @@ +import assert from "node:assert/strict"; +import test from "node:test"; + +import { createClient } from "../lib/llm-client.js"; + +function createKv(entries = {}) { + return { + get(key) { + return entries[key]; + }, + }; +} + +function createJsonResponse(status, data) { + return { + ok: status >= 200 && status < 300, + status, + async json() { + return data; + }, + }; +} + +test("returns an error when the configured API key is missing", async () => { + const previousKey = process.env.TEST_LLM_API_KEY; + delete process.env.TEST_LLM_API_KEY; + + try { + const client = createClient(createKv(), { apiKeyEnv: "TEST_LLM_API_KEY" }); + const result = await client.complete({ prompt: "Normalize this" }); + + assert.deepEqual(result, { + text: null, + error: "TEST_LLM_API_KEY not set", + }); + } finally { + if (previousKey === undefined) { + delete process.env.TEST_LLM_API_KEY; + } else { + process.env.TEST_LLM_API_KEY = previousKey; + } + } +}); + +test("sends chat completions requests with reasoning_effort when configured", async () => { + const previousKey = process.env.TEST_LLM_API_KEY; + const previousFetch = globalThis.fetch; + const requests = []; + process.env.TEST_LLM_API_KEY = "secret"; + + globalThis.fetch = async (url, options) => { + requests.push({ url, options }); + return createJsonResponse(200, { + choices: [{ message: { content: "normalized text" } }], + }); + }; + + try { + const client = createClient(createKv(), { + endpoint: "https://example.test/v1/", + model: "gpt-test", + apiKeyEnv: "TEST_LLM_API_KEY", + maxTokens: 321, + reasoningEffort: "low", + retries: 0, + }); + + const result = await client.complete({ + system: "System prompt", + prompt: "User prompt", + }); + + assert.equal(result.text, "normalized text"); + assert.equal(requests.length, 1); + assert.equal(requests[0].url, "https://example.test/v1/chat/completions"); + assert.equal(requests[0].options.method, "POST"); + assert.deepEqual(JSON.parse(requests[0].options.body), { + model: "gpt-test", + max_tokens: 321, + reasoning_effort: "low", + messages: [ + { role: "system", content: "System prompt" }, + { role: "user", content: "User prompt" }, + ], + }); + } finally { + globalThis.fetch = previousFetch; + if (previousKey === undefined) { + delete process.env.TEST_LLM_API_KEY; + } else { + process.env.TEST_LLM_API_KEY = previousKey; + } + } +}); + +test("retries transient failures and eventually returns the response text", async () => { + const previousKey = process.env.TEST_LLM_API_KEY; + const previousFetch = globalThis.fetch; + const previousSetTimeout = globalThis.setTimeout; + let attempts = 0; + process.env.TEST_LLM_API_KEY = "secret"; + + globalThis.fetch = async () => { + attempts += 1; + if (attempts < 3) { + return createJsonResponse(429, { error: { message: "rate limited" } }); + } + return createJsonResponse(200, { + choices: [{ message: { content: "recovered text" } }], + }); + }; + + globalThis.setTimeout = (fn) => { + fn(); + return 0; + }; + + try { + const client = createClient(createKv(), { + apiKeyEnv: "TEST_LLM_API_KEY", + retries: 2, + }); + + const result = await client.complete({ prompt: "Retry this" }); + + assert.deepEqual(result, { text: "recovered text" }); + assert.equal(attempts, 3); + } finally { + globalThis.fetch = previousFetch; + globalThis.setTimeout = previousSetTimeout; + if (previousKey === undefined) { + delete process.env.TEST_LLM_API_KEY; + } else { + process.env.TEST_LLM_API_KEY = previousKey; + } + } +});