Skip to content

Commit 28c68a7

Browse files
authored
Merge pull request #130 from SharpAI/feat/streaming-benchmark-env-vars
refactor: use OpenAI SDK for streaming LLM calls
2 parents 05344e8 + 4fdf83b commit 28c68a7

File tree

3 files changed

+125
-100
lines changed

3 files changed

+125
-100
lines changed

skills/analysis/home-security-benchmark/package-lock.json

Lines changed: 37 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
{
2+
"name": "home-security-benchmark",
3+
"version": "1.0.0",
4+
"description": "",
5+
"main": "index.js",
6+
"scripts": {
7+
"test": "echo \"Error: no test specified\" && exit 1"
8+
},
9+
"keywords": [],
10+
"author": "",
11+
"license": "ISC",
12+
"type": "commonjs",
13+
"dependencies": {
14+
"openai": "^6.27.0"
15+
}
16+
}

skills/analysis/home-security-benchmark/scripts/run-benchmark.cjs

Lines changed: 72 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,28 @@ const LLM_BASE_URL = process.env.AEGIS_LLM_BASE_URL || '';
9797
const VLM_API_TYPE = process.env.AEGIS_VLM_API_TYPE || 'openai-compatible';
9898
const VLM_MODEL = process.env.AEGIS_VLM_MODEL || '';
9999

100+
// ─── OpenAI SDK Clients ──────────────────────────────────────────────────────
101+
const OpenAI = require('openai');
102+
103+
// Resolve LLM base URL — priority: cloud provider → direct llama-server → gateway
104+
const strip = (u) => u.replace(/\/v1\/?$/, '');
105+
const llmBaseUrl = LLM_BASE_URL
106+
? `${strip(LLM_BASE_URL)}/v1`
107+
: LLM_URL
108+
? `${strip(LLM_URL)}/v1`
109+
: `${GATEWAY_URL}/v1`;
110+
111+
const llmClient = new OpenAI({
112+
apiKey: LLM_API_KEY || 'not-needed', // Local servers don't require auth
113+
baseURL: llmBaseUrl,
114+
});
115+
116+
// VLM client — always local llama-server
117+
const vlmClient = VLM_URL ? new OpenAI({
118+
apiKey: 'not-needed',
119+
baseURL: `${strip(VLM_URL)}/v1`,
120+
}) : null;
121+
100122
// ─── Skill Protocol: JSON lines on stdout, human text on stderr ──────────────
101123

102124
/**
@@ -136,110 +158,70 @@ const results = {
136158
};
137159

138160
async function llmCall(messages, opts = {}) {
139-
const body = { messages, stream: true };
140-
if (opts.model || LLM_MODEL) body.model = opts.model || LLM_MODEL;
141-
if (opts.maxTokens) body.max_tokens = opts.maxTokens;
142-
if (opts.temperature !== undefined) body.temperature = opts.temperature;
143-
if (opts.tools) body.tools = opts.tools;
144-
145-
// Resolve LLM endpoint — priority:
146-
// 1. Cloud provider base URL (e.g. https://api.openai.com/v1) when set via UI
147-
// 2. Direct llama-server URL (port 5411) for builtin local models
148-
// 3. Gateway (port 5407) as final fallback
149-
const strip = (u) => u.replace(/\/v1\/?$/, '');
150-
let url;
151-
if (opts.vlm) {
152-
const vlmBase = VLM_URL ? strip(VLM_URL) : '';
153-
url = `${vlmBase}/v1/chat/completions`;
154-
} else if (LLM_BASE_URL) {
155-
url = `${strip(LLM_BASE_URL)}/chat/completions`;
156-
} else if (LLM_URL) {
157-
url = `${strip(LLM_URL)}/v1/chat/completions`;
158-
} else {
159-
url = `${GATEWAY_URL}/v1/chat/completions`;
161+
// Select the appropriate OpenAI client (LLM or VLM)
162+
const client = opts.vlm ? vlmClient : llmClient;
163+
if (!client) {
164+
throw new Error(opts.vlm ? 'VLM client not configured' : 'LLM client not configured');
160165
}
161166

162-
// Build headers — include API key if available (for direct cloud provider access)
163-
const headers = { 'Content-Type': 'application/json' };
164-
if (LLM_API_KEY && !opts.vlm) headers['Authorization'] = `Bearer ${LLM_API_KEY}`;
167+
const model = opts.model || (opts.vlm ? VLM_MODEL : LLM_MODEL) || undefined;
168+
169+
// Build request params
170+
const params = {
171+
messages,
172+
stream: true,
173+
...(model && { model }),
174+
...(opts.temperature !== undefined && { temperature: opts.temperature }),
175+
...(opts.maxTokens && { max_completion_tokens: opts.maxTokens }),
176+
...(opts.tools && { tools: opts.tools }),
177+
};
165178

166-
// Use an AbortController with idle timeout that resets on each SSE chunk.
167-
// This way long inferences that stream tokens succeed, but requests
168-
// stuck with no output for IDLE_TIMEOUT_MS still abort.
179+
// Use an AbortController with idle timeout that resets on each streamed chunk.
169180
const controller = new AbortController();
170181
const idleMs = opts.timeout || IDLE_TIMEOUT_MS;
171182
let idleTimer = setTimeout(() => controller.abort(), idleMs);
172183
const resetIdle = () => { clearTimeout(idleTimer); idleTimer = setTimeout(() => controller.abort(), idleMs); };
173184

174185
try {
175-
const response = await fetch(url, {
176-
method: 'POST',
177-
headers,
178-
body: JSON.stringify(body),
186+
const stream = await client.chat.completions.create(params, {
179187
signal: controller.signal,
180188
});
181189

182-
if (!response.ok) {
183-
const errBody = await response.text().catch(() => '');
184-
throw new Error(`HTTP ${response.status}: ${errBody.slice(0, 200)}`);
185-
}
186-
187-
// Parse SSE stream
188190
let content = '';
189191
let reasoningContent = '';
190192
let toolCalls = null;
191193
let model = '';
192194
let usage = {};
193-
let finishReason = '';
194195
let tokenCount = 0;
195196

196-
const reader = response.body;
197-
const decoder = new TextDecoder();
198-
let buffer = '';
199-
200-
for await (const chunk of reader) {
197+
for await (const chunk of stream) {
201198
resetIdle();
202-
buffer += decoder.decode(chunk, { stream: true });
203-
204-
// Process complete SSE lines
205-
const lines = buffer.split('\n');
206-
buffer = lines.pop(); // Keep incomplete line in buffer
207-
208-
for (const line of lines) {
209-
if (!line.startsWith('data: ')) continue;
210-
const payload = line.slice(6).trim();
211-
if (payload === '[DONE]') continue;
212-
213-
try {
214-
const evt = JSON.parse(payload);
215-
if (evt.model) model = evt.model;
216-
217-
const delta = evt.choices?.[0]?.delta;
218-
if (delta?.content) content += delta.content;
219-
if (delta?.reasoning_content) reasoningContent += delta.reasoning_content;
220-
if (delta?.content || delta?.reasoning_content) {
221-
tokenCount++;
222-
// Log progress every 100 tokens so the console isn't silent
223-
if (tokenCount % 100 === 0) {
224-
log(` … ${tokenCount} tokens received`);
225-
}
226-
}
227-
if (delta?.tool_calls) {
228-
// Accumulate streamed tool calls
229-
if (!toolCalls) toolCalls = [];
230-
for (const tc of delta.tool_calls) {
231-
const idx = tc.index ?? 0;
232-
if (!toolCalls[idx]) {
233-
toolCalls[idx] = { id: tc.id, type: tc.type || 'function', function: { name: '', arguments: '' } };
234-
}
235-
if (tc.function?.name) toolCalls[idx].function.name += tc.function.name;
236-
if (tc.function?.arguments) toolCalls[idx].function.arguments += tc.function.arguments;
237-
}
199+
200+
if (chunk.model) model = chunk.model;
201+
202+
const delta = chunk.choices?.[0]?.delta;
203+
if (delta?.content) content += delta.content;
204+
if (delta?.reasoning_content) reasoningContent += delta.reasoning_content;
205+
if (delta?.content || delta?.reasoning_content) {
206+
tokenCount++;
207+
if (tokenCount % 100 === 0) {
208+
log(` … ${tokenCount} tokens received`);
209+
}
210+
}
211+
212+
if (delta?.tool_calls) {
213+
if (!toolCalls) toolCalls = [];
214+
for (const tc of delta.tool_calls) {
215+
const idx = tc.index ?? 0;
216+
if (!toolCalls[idx]) {
217+
toolCalls[idx] = { id: tc.id, type: tc.type || 'function', function: { name: '', arguments: '' } };
238218
}
239-
if (evt.choices?.[0]?.finish_reason) finishReason = evt.choices[0].finish_reason;
240-
if (evt.usage) usage = evt.usage;
241-
} catch { /* skip malformed SSE */ }
219+
if (tc.function?.name) toolCalls[idx].function.name += tc.function.name;
220+
if (tc.function?.arguments) toolCalls[idx].function.arguments += tc.function.arguments;
221+
}
242222
}
223+
224+
if (chunk.usage) usage = chunk.usage;
243225
}
244226

245227
// If the model only produced reasoning_content (thinking) with no content,
@@ -264,6 +246,7 @@ async function llmCall(messages, opts = {}) {
264246
} finally {
265247
clearTimeout(idleTimer);
266248
}
249+
267250
}
268251

269252
function stripThink(text) {
@@ -1787,29 +1770,18 @@ async function main() {
17871770
log(` Mode: ${IS_SKILL_MODE ? 'Aegis Skill' : 'Standalone'} (streaming, ${IDLE_TIMEOUT_MS / 1000}s idle timeout)`);
17881771
log(` Time: ${new Date().toLocaleString()}`);
17891772

1790-
// Healthcheck — ping the actual LLM endpoint directly
1791-
const healthUrl = LLM_BASE_URL
1792-
? `${LLM_BASE_URL.replace(/\/v1\/?$/, '')}/v1/chat/completions`
1793-
: LLM_URL
1794-
? `${LLM_URL.replace(/\/v1\/?$/, '')}/v1/chat/completions`
1795-
: `${GATEWAY_URL}/v1/chat/completions`;
1796-
const healthHeaders = { 'Content-Type': 'application/json' };
1797-
if (LLM_API_KEY) healthHeaders['Authorization'] = `Bearer ${LLM_API_KEY}`;
1798-
1773+
// Healthcheck — ping the LLM endpoint via SDK
17991774
try {
1800-
const ping = await fetch(healthUrl, {
1801-
method: 'POST',
1802-
headers: healthHeaders,
1803-
body: JSON.stringify({ messages: [{ role: 'user', content: 'ping' }], stream: false, max_tokens: 1 }),
1804-
signal: AbortSignal.timeout(15000),
1775+
const ping = await llmClient.chat.completions.create({
1776+
...(LLM_MODEL && { model: LLM_MODEL }),
1777+
messages: [{ role: 'user', content: 'ping' }],
1778+
max_completion_tokens: 1,
18051779
});
1806-
if (!ping.ok) throw new Error(`HTTP ${ping.status}`);
1807-
const data = await ping.json();
1808-
results.model.name = data.model || 'unknown';
1780+
results.model.name = ping.model || 'unknown';
18091781
log(` Model: ${results.model.name}`);
18101782
} catch (err) {
18111783
log(`\n ❌ Cannot reach LLM endpoint: ${err.message}`);
1812-
log(` Endpoint: ${healthUrl}`);
1784+
log(` Base URL: ${llmBaseUrl}`);
18131785
log(' Check that the LLM server is running.\n');
18141786
emit({ event: 'error', message: `Cannot reach LLM endpoint: ${err.message}` });
18151787
process.exit(1);

0 commit comments

Comments
 (0)