renjfk · renjfk · May 11, 2026 · May 11, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -27,10 +27,26 @@ jobs:
       - name: Lint
         run: npx oxlint .
 
+  test:
+    name: Test
+    runs-on: ubuntu-latest
+    needs: [lint]
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: actions/setup-node@v4
+        with:
+          node-version: 24
+
+      - run: npm install
+
+      - name: Run unit tests
+        run: npm test
+
   build:
     name: Build
     runs-on: ubuntu-latest
-    needs: [lint]
+    needs: [lint, test]
     steps:
       - uses: actions/checkout@v4
 

diff --git a/README.md b/README.md
@@ -84,7 +84,9 @@ Set defaults in `tui.json` via plugin options:
         "endpoint": "https://api.anthropic.com/v1",
         "model": "claude-haiku-4-5",
         "apiKeyEnv": "ANTHROPIC_API_KEY",
-        "maxTokens": 2048
+        "maxTokens": 2048,
+        "reasoningEffort": "low",
+        "retries": 2
       }
     ]
   ]
@@ -93,6 +95,13 @@ Set defaults in `tui.json` via plugin options:
 
 Any OpenAI-compatible endpoint works (Ollama, vLLM, LM Studio, etc.).
 
+- `endpoint` - OpenAI-compatible base URL
+- `model` - model name sent to `/chat/completions`
+- `apiKeyEnv` - environment variable containing the API key
+- `maxTokens` - maximum completion tokens for normalization calls
+- `reasoningEffort` - optional reasoning level for models that support it
+- `retries` - number of retry attempts for transient LLM failures
+
 ### Custom prompts
 
 The LLM system prompts used for normalization can be fully replaced by pointing
@@ -194,6 +203,18 @@ npm run fmt          # oxfmt --check
 npm run fmt:fix      # oxfmt --write
 ```
 
+### Test local plugin in OpenCode
+
+To test unpublished changes in the OpenCode TUI, point `~/.config/opencode/tui.json`
+at the local repo path, not the npm package name:
+
+```json
+{
+  "$schema": "https://opencode.ai/tui.json",
+  "plugin": ["/Users/your-user/opencode-voice"]
+}
+```
+
 ### Release process
 
 Manual releases via opencode; see [RELEASE_PROCESS.md](RELEASE_PROCESS.md).

diff --git a/lib/llm-client.js b/lib/llm-client.js
@@ -10,16 +10,34 @@
 //     "endpoint": "https://api.anthropic.com/v1",
 //     "model": "claude-haiku-4-5",
 //     "apiKeyEnv": "ANTHROPIC_API_KEY",
-//     "maxTokens": 2048
+//     "maxTokens": 2048,
+//     "reasoningEffort": "low",
+//     "retries": 2
 //   }]
 
 const DEFAULTS = {
   endpoint: "https://api.anthropic.com/v1",
   model: "claude-haiku-4-5",
   apiKeyEnv: "ANTHROPIC_API_KEY",
   maxTokens: 2048,
+  reasoningEffort: null,
+  retries: 2,
 };
 
+function normalizeRetries(value) {
+  const parsed = Number(value);
+  if (!Number.isFinite(parsed) || parsed < 0) return DEFAULTS.retries;
+  return Math.floor(parsed);
+}
+
+function shouldRetry(status) {
+  return status === 408 || status === 429 || status >= 500;
+}
+
+function wait(ms) {
+  return new Promise((resolve) => setTimeout(resolve, ms));
+}
+
 /**
  * Create an LLM completion function bound to a kv store for config persistence.
  *
@@ -34,6 +52,11 @@ export function createClient(kv, pluginOptions) {
       model: kv.get("llm.model") ?? pluginOptions?.model ?? DEFAULTS.model,
       apiKeyEnv: kv.get("llm.apiKeyEnv") ?? pluginOptions?.apiKeyEnv ?? DEFAULTS.apiKeyEnv,
       maxTokens: kv.get("llm.maxTokens") ?? pluginOptions?.maxTokens ?? DEFAULTS.maxTokens,
+      reasoningEffort:
+        kv.get("llm.reasoningEffort") ?? pluginOptions?.reasoningEffort ?? DEFAULTS.reasoningEffort,
+      retries: normalizeRetries(
+        kv.get("llm.retries") ?? pluginOptions?.retries ?? DEFAULTS.retries,
+      ),
     };
   }
 
@@ -57,29 +80,51 @@ export function createClient(kv, pluginOptions) {
     if (system) messages.push({ role: "system", content: system });
     messages.push({ role: "user", content: prompt });
 
-    try {
-      const response = await fetch(endpoint, {
-        method: "POST",
-        headers: {
-          "Content-Type": "application/json",
-          Authorization: "Bearer " + apiKey,
-        },
-        body: JSON.stringify({
-          model: cfg.model,
-          max_tokens: cfg.maxTokens,
-          messages,
-        }),
-      });
+    const body = {
+      model: cfg.model,
+      max_tokens: cfg.maxTokens,
+      messages,
+    };
+    if (cfg.reasoningEffort) body.reasoning_effort = cfg.reasoningEffort;
+
+    for (let attempt = 0; attempt <= cfg.retries; attempt++) {
+      try {
+        const response = await fetch(endpoint, {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+            Authorization: "Bearer " + apiKey,
+          },
+          body: JSON.stringify(body),
+        });
+
+        if (!response.ok) {
+          if (attempt < cfg.retries && shouldRetry(response.status)) {
+            await wait(250 * 2 ** attempt);
+            continue;
+          }
+          return { text: null, error: `LLM request failed (${response.status})` };
+        }
 
-      if (!response.ok) {
-        return { text: null, error: `LLM request failed (${response.status})` };
+        const data = await response.json();
+        const text = data?.choices?.[0]?.message?.content || null;
+        if (text) return { text };
+
+        if (attempt < cfg.retries) {
+          await wait(250 * 2 ** attempt);
+          continue;
+        }
+        return { text: null, error: "Empty LLM response" };
+      } catch (err) {
+        if (attempt < cfg.retries) {
+          await wait(250 * 2 ** attempt);
+          continue;
+        }
+        return { text: null, error: `LLM error: ${err.message}` };
       }
-      const data = await response.json();
-      const text = data?.choices?.[0]?.message?.content || null;
-      return { text, error: text ? undefined : "Empty LLM response" };
-    } catch (err) {
-      return { text: null, error: `LLM error: ${err.message}` };
     }
+
+    return { text: null, error: "LLM request failed after retries" };
   }
 
   return { complete };

diff --git a/package.json b/package.json
@@ -36,6 +36,7 @@
     "lint": "npx oxlint .",
     "fmt": "npx oxfmt --check .",
     "fmt:fix": "npx oxfmt --write .",
+    "test": "node --test",
     "check": "npm run lint && npm run fmt",
     "prepack": "npm run check"
   },

diff --git a/test/llm-client.test.js b/test/llm-client.test.js
@@ -0,0 +1,137 @@
+import assert from "node:assert/strict";
+import test from "node:test";
+
+import { createClient } from "../lib/llm-client.js";
+
+function createKv(entries = {}) {
+  return {
+    get(key) {
+      return entries[key];
+    },
+  };
+}
+
+function createJsonResponse(status, data) {
+  return {
+    ok: status >= 200 && status < 300,
+    status,
+    async json() {
+      return data;
+    },
+  };
+}
+
+test("returns an error when the configured API key is missing", async () => {
+  const previousKey = process.env.TEST_LLM_API_KEY;
+  delete process.env.TEST_LLM_API_KEY;
+
+  try {
+    const client = createClient(createKv(), { apiKeyEnv: "TEST_LLM_API_KEY" });
+    const result = await client.complete({ prompt: "Normalize this" });
+
+    assert.deepEqual(result, {
+      text: null,
+      error: "TEST_LLM_API_KEY not set",
+    });
+  } finally {
+    if (previousKey === undefined) {
+      delete process.env.TEST_LLM_API_KEY;
+    } else {
+      process.env.TEST_LLM_API_KEY = previousKey;
+    }
+  }
+});
+
+test("sends chat completions requests with reasoning_effort when configured", async () => {
+  const previousKey = process.env.TEST_LLM_API_KEY;
+  const previousFetch = globalThis.fetch;
+  const requests = [];
+  process.env.TEST_LLM_API_KEY = "secret";
+
+  globalThis.fetch = async (url, options) => {
+    requests.push({ url, options });
+    return createJsonResponse(200, {
+      choices: [{ message: { content: "normalized text" } }],
+    });
+  };
+
+  try {
+    const client = createClient(createKv(), {
+      endpoint: "https://example.test/v1/",
+      model: "gpt-test",
+      apiKeyEnv: "TEST_LLM_API_KEY",
+      maxTokens: 321,
+      reasoningEffort: "low",
+      retries: 0,
+    });
+
+    const result = await client.complete({
+      system: "System prompt",
+      prompt: "User prompt",
+    });
+
+    assert.equal(result.text, "normalized text");
+    assert.equal(requests.length, 1);
+    assert.equal(requests[0].url, "https://example.test/v1/chat/completions");
+    assert.equal(requests[0].options.method, "POST");
+    assert.deepEqual(JSON.parse(requests[0].options.body), {
+      model: "gpt-test",
+      max_tokens: 321,
+      reasoning_effort: "low",
+      messages: [
+        { role: "system", content: "System prompt" },
+        { role: "user", content: "User prompt" },
+      ],
+    });
+  } finally {
+    globalThis.fetch = previousFetch;
+    if (previousKey === undefined) {
+      delete process.env.TEST_LLM_API_KEY;
+    } else {
+      process.env.TEST_LLM_API_KEY = previousKey;
+    }
+  }
+});
+
+test("retries transient failures and eventually returns the response text", async () => {
+  const previousKey = process.env.TEST_LLM_API_KEY;
+  const previousFetch = globalThis.fetch;
+  const previousSetTimeout = globalThis.setTimeout;
+  let attempts = 0;
+  process.env.TEST_LLM_API_KEY = "secret";
+
+  globalThis.fetch = async () => {
+    attempts += 1;
+    if (attempts < 3) {
+      return createJsonResponse(429, { error: { message: "rate limited" } });
+    }
+    return createJsonResponse(200, {
+      choices: [{ message: { content: "recovered text" } }],
+    });
+  };
+
+  globalThis.setTimeout = (fn) => {
+    fn();
+    return 0;
+  };
+
+  try {
+    const client = createClient(createKv(), {
+      apiKeyEnv: "TEST_LLM_API_KEY",
+      retries: 2,
+    });
+
+    const result = await client.complete({ prompt: "Retry this" });
+
+    assert.deepEqual(result, { text: "recovered text" });
+    assert.equal(attempts, 3);
+  } finally {
+    globalThis.fetch = previousFetch;
+    globalThis.setTimeout = previousSetTimeout;
+    if (previousKey === undefined) {
+      delete process.env.TEST_LLM_API_KEY;
+    } else {
+      process.env.TEST_LLM_API_KEY = previousKey;
+    }
+  }
+});