Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,26 @@ jobs:
- name: Lint
run: npx oxlint .

test:
name: Test
runs-on: ubuntu-latest
needs: [lint]
steps:
- uses: actions/checkout@v4

- uses: actions/setup-node@v4
with:
node-version: 24

- run: npm install

- name: Run unit tests
run: npm test

build:
name: Build
runs-on: ubuntu-latest
needs: [lint]
needs: [lint, test]
steps:
- uses: actions/checkout@v4

Expand Down
23 changes: 22 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ Set defaults in `tui.json` via plugin options:
"endpoint": "https://api.anthropic.com/v1",
"model": "claude-haiku-4-5",
"apiKeyEnv": "ANTHROPIC_API_KEY",
"maxTokens": 2048
"maxTokens": 2048,
"reasoningEffort": "low",
"retries": 2
}
]
]
Expand All @@ -93,6 +95,13 @@ Set defaults in `tui.json` via plugin options:

Any OpenAI-compatible endpoint works (Ollama, vLLM, LM Studio, etc.).

- `endpoint` - OpenAI-compatible base URL
- `model` - model name sent to `/chat/completions`
- `apiKeyEnv` - environment variable containing the API key
- `maxTokens` - maximum completion tokens for normalization calls
- `reasoningEffort` - optional reasoning level for models that support it
- `retries` - number of retry attempts for transient LLM failures

### Custom prompts

The LLM system prompts used for normalization can be fully replaced by pointing
Expand Down Expand Up @@ -194,6 +203,18 @@ npm run fmt # oxfmt --check
npm run fmt:fix # oxfmt --write
```

### Test local plugin in OpenCode

To test unpublished changes in the OpenCode TUI, point `~/.config/opencode/tui.json`
at the local repo path, not the npm package name:

```json
{
"$schema": "https://opencode.ai/tui.json",
"plugin": ["/Users/your-user/opencode-voice"]
}
```

### Release process

Manual releases via opencode; see [RELEASE_PROCESS.md](RELEASE_PROCESS.md).
Expand Down
87 changes: 66 additions & 21 deletions lib/llm-client.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,34 @@
// "endpoint": "https://api.anthropic.com/v1",
// "model": "claude-haiku-4-5",
// "apiKeyEnv": "ANTHROPIC_API_KEY",
// "maxTokens": 2048
// "maxTokens": 2048,
// "reasoningEffort": "low",
// "retries": 2
// }]

const DEFAULTS = {
endpoint: "https://api.anthropic.com/v1",
model: "claude-haiku-4-5",
apiKeyEnv: "ANTHROPIC_API_KEY",
maxTokens: 2048,
reasoningEffort: null,
retries: 2,
};

function normalizeRetries(value) {
const parsed = Number(value);
if (!Number.isFinite(parsed) || parsed < 0) return DEFAULTS.retries;
return Math.floor(parsed);
}

function shouldRetry(status) {
return status === 408 || status === 429 || status >= 500;
}

function wait(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}

/**
* Create an LLM completion function bound to a kv store for config persistence.
*
Expand All @@ -34,6 +52,11 @@ export function createClient(kv, pluginOptions) {
model: kv.get("llm.model") ?? pluginOptions?.model ?? DEFAULTS.model,
apiKeyEnv: kv.get("llm.apiKeyEnv") ?? pluginOptions?.apiKeyEnv ?? DEFAULTS.apiKeyEnv,
maxTokens: kv.get("llm.maxTokens") ?? pluginOptions?.maxTokens ?? DEFAULTS.maxTokens,
reasoningEffort:
kv.get("llm.reasoningEffort") ?? pluginOptions?.reasoningEffort ?? DEFAULTS.reasoningEffort,
retries: normalizeRetries(
kv.get("llm.retries") ?? pluginOptions?.retries ?? DEFAULTS.retries,
),
};
}

Expand All @@ -57,29 +80,51 @@ export function createClient(kv, pluginOptions) {
if (system) messages.push({ role: "system", content: system });
messages.push({ role: "user", content: prompt });

try {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer " + apiKey,
},
body: JSON.stringify({
model: cfg.model,
max_tokens: cfg.maxTokens,
messages,
}),
});
const body = {
model: cfg.model,
max_tokens: cfg.maxTokens,
messages,
};
if (cfg.reasoningEffort) body.reasoning_effort = cfg.reasoningEffort;

for (let attempt = 0; attempt <= cfg.retries; attempt++) {
try {
const response = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: "Bearer " + apiKey,
},
body: JSON.stringify(body),
});

if (!response.ok) {
if (attempt < cfg.retries && shouldRetry(response.status)) {
await wait(250 * 2 ** attempt);
continue;
}
return { text: null, error: `LLM request failed (${response.status})` };
}

if (!response.ok) {
return { text: null, error: `LLM request failed (${response.status})` };
const data = await response.json();
const text = data?.choices?.[0]?.message?.content || null;
if (text) return { text };

if (attempt < cfg.retries) {
await wait(250 * 2 ** attempt);
continue;
}
return { text: null, error: "Empty LLM response" };
} catch (err) {
if (attempt < cfg.retries) {
await wait(250 * 2 ** attempt);
continue;
}
return { text: null, error: `LLM error: ${err.message}` };
}
const data = await response.json();
const text = data?.choices?.[0]?.message?.content || null;
return { text, error: text ? undefined : "Empty LLM response" };
} catch (err) {
return { text: null, error: `LLM error: ${err.message}` };
}

return { text: null, error: "LLM request failed after retries" };
}

return { complete };
Expand Down
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
"lint": "npx oxlint .",
"fmt": "npx oxfmt --check .",
"fmt:fix": "npx oxfmt --write .",
"test": "node --test",
"check": "npm run lint && npm run fmt",
"prepack": "npm run check"
},
Expand Down
137 changes: 137 additions & 0 deletions test/llm-client.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
import assert from "node:assert/strict";
import test from "node:test";

import { createClient } from "../lib/llm-client.js";

function createKv(entries = {}) {
return {
get(key) {
return entries[key];
},
};
}

function createJsonResponse(status, data) {
return {
ok: status >= 200 && status < 300,
status,
async json() {
return data;
},
};
}

test("returns an error when the configured API key is missing", async () => {
const previousKey = process.env.TEST_LLM_API_KEY;
delete process.env.TEST_LLM_API_KEY;

try {
const client = createClient(createKv(), { apiKeyEnv: "TEST_LLM_API_KEY" });
const result = await client.complete({ prompt: "Normalize this" });

assert.deepEqual(result, {
text: null,
error: "TEST_LLM_API_KEY not set",
});
} finally {
if (previousKey === undefined) {
delete process.env.TEST_LLM_API_KEY;
} else {
process.env.TEST_LLM_API_KEY = previousKey;
}
}
});

test("sends chat completions requests with reasoning_effort when configured", async () => {
const previousKey = process.env.TEST_LLM_API_KEY;
const previousFetch = globalThis.fetch;
const requests = [];
process.env.TEST_LLM_API_KEY = "secret";

globalThis.fetch = async (url, options) => {
requests.push({ url, options });
return createJsonResponse(200, {
choices: [{ message: { content: "normalized text" } }],
});
};

try {
const client = createClient(createKv(), {
endpoint: "https://example.test/v1/",
model: "gpt-test",
apiKeyEnv: "TEST_LLM_API_KEY",
maxTokens: 321,
reasoningEffort: "low",
retries: 0,
});

const result = await client.complete({
system: "System prompt",
prompt: "User prompt",
});

assert.equal(result.text, "normalized text");
assert.equal(requests.length, 1);
assert.equal(requests[0].url, "https://example.test/v1/chat/completions");
assert.equal(requests[0].options.method, "POST");
assert.deepEqual(JSON.parse(requests[0].options.body), {
model: "gpt-test",
max_tokens: 321,
reasoning_effort: "low",
messages: [
{ role: "system", content: "System prompt" },
{ role: "user", content: "User prompt" },
],
});
} finally {
globalThis.fetch = previousFetch;
if (previousKey === undefined) {
delete process.env.TEST_LLM_API_KEY;
} else {
process.env.TEST_LLM_API_KEY = previousKey;
}
}
});

test("retries transient failures and eventually returns the response text", async () => {
const previousKey = process.env.TEST_LLM_API_KEY;
const previousFetch = globalThis.fetch;
const previousSetTimeout = globalThis.setTimeout;
let attempts = 0;
process.env.TEST_LLM_API_KEY = "secret";

globalThis.fetch = async () => {
attempts += 1;
if (attempts < 3) {
return createJsonResponse(429, { error: { message: "rate limited" } });
}
return createJsonResponse(200, {
choices: [{ message: { content: "recovered text" } }],
});
};

globalThis.setTimeout = (fn) => {
fn();
return 0;
};

try {
const client = createClient(createKv(), {
apiKeyEnv: "TEST_LLM_API_KEY",
retries: 2,
});

const result = await client.complete({ prompt: "Retry this" });

assert.deepEqual(result, { text: "recovered text" });
assert.equal(attempts, 3);
} finally {
globalThis.fetch = previousFetch;
globalThis.setTimeout = previousSetTimeout;
if (previousKey === undefined) {
delete process.env.TEST_LLM_API_KEY;
} else {
process.env.TEST_LLM_API_KEY = previousKey;
}
}
});
Loading