Skip to content

Commit b33e436

Browse files
authored
Merge pull request #126 from SentienceAPI/local_llm
local llm provider
2 parents 0553955 + 07375b9 commit b33e436

File tree

3 files changed

+282
-0
lines changed

3 files changed

+282
-0
lines changed

src/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ export { getGridBounds } from './utils/grid-utils';
2424
export {
2525
LLMProvider,
2626
LLMResponse,
27+
LocalLLMProvider,
28+
LocalVisionLLMProvider,
2729
OpenAIProvider,
2830
AnthropicProvider,
2931
GLMProvider,

src/llm-provider.ts

Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,211 @@ export abstract class LLMProvider {
6868
}
6969
}
7070

71+
/**
72+
* Local OpenAI-compatible Provider (Ollama / LM Studio / llama.cpp server, etc.)
73+
*
74+
* This is the TypeScript equivalent of Python's LocalLLMProvider concept, but instead of
75+
* embedding a full HF runtime inside Node, it calls a local HTTP server that exposes an
76+
* OpenAI-compatible Chat Completions API.
77+
*
78+
* Examples of compatible local servers:
79+
* - Ollama (OpenAI-compatible endpoint)
80+
* - LM Studio (OpenAI-compatible endpoint)
81+
* - llama.cpp server (OpenAI-compatible endpoint)
82+
*/
83+
export class LocalLLMProvider extends LLMProvider {
84+
private _modelName: string;
85+
private _baseUrl: string;
86+
private _apiKey?: string;
87+
private _defaultHeaders: Record<string, string>;
88+
private _timeoutMs: number;
89+
90+
constructor(
91+
options: {
92+
model?: string;
93+
baseUrl?: string;
94+
apiKey?: string;
95+
timeoutMs?: number;
96+
headers?: Record<string, string>;
97+
} = {}
98+
) {
99+
super();
100+
this._modelName = options.model ?? process.env.SENTIENCE_LOCAL_LLM_MODEL ?? 'local-model';
101+
// Common defaults:
102+
// - Ollama OpenAI-compatible: http://localhost:11434/v1
103+
// - LM Studio: http://localhost:1234/v1
104+
this._baseUrl =
105+
options.baseUrl ?? process.env.SENTIENCE_LOCAL_LLM_BASE_URL ?? 'http://localhost:11434/v1';
106+
this._apiKey = options.apiKey ?? process.env.SENTIENCE_LOCAL_LLM_API_KEY;
107+
this._timeoutMs = options.timeoutMs ?? 60_000;
108+
this._defaultHeaders = {
109+
'Content-Type': 'application/json',
110+
...(options.headers ?? {}),
111+
};
112+
if (this._apiKey) {
113+
this._defaultHeaders.Authorization = `Bearer ${this._apiKey}`;
114+
}
115+
}
116+
117+
supportsJsonMode(): boolean {
118+
// Many local OpenAI-compatible servers don't reliably implement response_format=json_object.
119+
return false;
120+
}
121+
122+
get modelName(): string {
123+
return this._modelName;
124+
}
125+
126+
async generate(
127+
systemPrompt: string,
128+
userPrompt: string,
129+
options: Record<string, any> = {}
130+
): Promise<LLMResponse> {
131+
const fetchFn = (globalThis as any).fetch as typeof fetch | undefined;
132+
if (!fetchFn) {
133+
throw new Error(
134+
'Global fetch is not available. Use Node 18+ or polyfill fetch before using LocalLLMProvider.'
135+
);
136+
}
137+
138+
const controller = new AbortController();
139+
const timeoutId = setTimeout(() => controller.abort(), options.timeoutMs ?? this._timeoutMs);
140+
141+
const payload: any = {
142+
model: this._modelName,
143+
messages: [
144+
...(systemPrompt ? [{ role: 'system', content: systemPrompt }] : []),
145+
{ role: 'user', content: userPrompt },
146+
],
147+
temperature: options.temperature ?? 0.0,
148+
};
149+
150+
if (options.max_tokens !== undefined) payload.max_tokens = options.max_tokens;
151+
if (options.top_p !== undefined) payload.top_p = options.top_p;
152+
153+
// Allow pass-through of server-specific fields, but avoid overriding core fields accidentally
154+
const { timeoutMs: _ignoredTimeout, ...rest } = options;
155+
Object.assign(payload, rest);
156+
157+
try {
158+
const res = await fetchFn(`${this._baseUrl}/chat/completions`, {
159+
method: 'POST',
160+
headers: this._defaultHeaders,
161+
body: JSON.stringify(payload),
162+
signal: controller.signal,
163+
});
164+
165+
const text = await res.text();
166+
if (!res.ok) {
167+
throw new Error(`Local LLM HTTP ${res.status}: ${text.slice(0, 500)}`);
168+
}
169+
170+
const data = JSON.parse(text);
171+
const choice = data?.choices?.[0];
172+
const content = choice?.message?.content ?? '';
173+
const usage = data?.usage;
174+
175+
return {
176+
content,
177+
promptTokens: usage?.prompt_tokens,
178+
completionTokens: usage?.completion_tokens,
179+
totalTokens: usage?.total_tokens,
180+
modelName: data?.model ?? this._modelName,
181+
};
182+
} finally {
183+
clearTimeout(timeoutId);
184+
}
185+
}
186+
}
187+
188+
/**
189+
* Local OpenAI-compatible Vision Provider.
190+
*
191+
* This is the TypeScript analogue of Python's LocalVisionLLMProvider, but it assumes your
192+
* local server supports the OpenAI vision message format (image_url with data: URI).
193+
*
194+
* If your local stack uses a different schema (e.g., Ollama images array), you can implement
195+
* a custom provider by extending LLMProvider.
196+
*/
197+
export class LocalVisionLLMProvider extends LocalLLMProvider {
198+
supportsVision(): boolean {
199+
return true;
200+
}
201+
202+
async generateWithImage(
203+
systemPrompt: string,
204+
userPrompt: string,
205+
imageBase64: string,
206+
options: Record<string, any> = {}
207+
): Promise<LLMResponse> {
208+
const fetchFn = (globalThis as any).fetch as typeof fetch | undefined;
209+
if (!fetchFn) {
210+
throw new Error(
211+
'Global fetch is not available. Use Node 18+ or polyfill fetch before using LocalVisionLLMProvider.'
212+
);
213+
}
214+
215+
const controller = new AbortController();
216+
const timeoutId = setTimeout(
217+
() => controller.abort(),
218+
options.timeoutMs ?? (this as any)._timeoutMs ?? 60_000
219+
);
220+
221+
// Rebuild payload (we cannot reuse LocalLLMProvider.generate because message shape differs)
222+
const modelName = (this as any)._modelName ?? 'local-model';
223+
const baseUrl = (this as any)._baseUrl ?? 'http://localhost:11434/v1';
224+
const headers = (this as any)._defaultHeaders ?? { 'Content-Type': 'application/json' };
225+
226+
const payload: any = {
227+
model: modelName,
228+
messages: [
229+
...(systemPrompt ? [{ role: 'system', content: systemPrompt }] : []),
230+
{
231+
role: 'user',
232+
content: [
233+
{ type: 'text', text: userPrompt },
234+
{ type: 'image_url', image_url: { url: `data:image/png;base64,${imageBase64}` } },
235+
],
236+
},
237+
],
238+
temperature: options.temperature ?? 0.0,
239+
};
240+
241+
if (options.max_tokens !== undefined) payload.max_tokens = options.max_tokens;
242+
const { timeoutMs: _ignoredTimeout, ...rest } = options;
243+
Object.assign(payload, rest);
244+
245+
try {
246+
const res = await fetchFn(`${baseUrl}/chat/completions`, {
247+
method: 'POST',
248+
headers,
249+
body: JSON.stringify(payload),
250+
signal: controller.signal,
251+
});
252+
253+
const text = await res.text();
254+
if (!res.ok) {
255+
throw new Error(`Local Vision LLM HTTP ${res.status}: ${text.slice(0, 500)}`);
256+
}
257+
258+
const data = JSON.parse(text);
259+
const choice = data?.choices?.[0];
260+
const content = choice?.message?.content ?? '';
261+
const usage = data?.usage;
262+
263+
return {
264+
content,
265+
promptTokens: usage?.prompt_tokens,
266+
completionTokens: usage?.completion_tokens,
267+
totalTokens: usage?.total_tokens,
268+
modelName: data?.model ?? modelName,
269+
};
270+
} finally {
271+
clearTimeout(timeoutId);
272+
}
273+
}
274+
}
275+
71276
/**
72277
* OpenAI Provider (GPT-4, GPT-4o, etc.)
73278
* Requires: npm install openai

tests/local-llm-provider.test.ts

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
import { LocalLLMProvider, LocalVisionLLMProvider } from '../src/llm-provider';
2+
3+
describe('LocalLLMProvider (OpenAI-compatible)', () => {
4+
const originalFetch = (globalThis as any).fetch;
5+
6+
afterEach(() => {
7+
(globalThis as any).fetch = originalFetch;
8+
});
9+
10+
it('should call /chat/completions and parse response', async () => {
11+
(globalThis as any).fetch = jest.fn(async () => {
12+
return {
13+
ok: true,
14+
status: 200,
15+
text: async () =>
16+
JSON.stringify({
17+
model: 'local-model',
18+
choices: [{ message: { content: 'hello' } }],
19+
usage: { prompt_tokens: 1, completion_tokens: 2, total_tokens: 3 },
20+
}),
21+
};
22+
});
23+
24+
const llm = new LocalLLMProvider({
25+
baseUrl: 'http://localhost:11434/v1',
26+
model: 'local-model',
27+
});
28+
const resp = await llm.generate('sys', 'user', { temperature: 0.0 });
29+
30+
expect(resp.content).toBe('hello');
31+
expect(resp.modelName).toBe('local-model');
32+
expect(resp.totalTokens).toBe(3);
33+
expect((globalThis as any).fetch).toHaveBeenCalledTimes(1);
34+
expect(((globalThis as any).fetch as any).mock.calls[0][0]).toBe(
35+
'http://localhost:11434/v1/chat/completions'
36+
);
37+
});
38+
});
39+
40+
describe('LocalVisionLLMProvider (OpenAI-compatible)', () => {
41+
const originalFetch = (globalThis as any).fetch;
42+
43+
afterEach(() => {
44+
(globalThis as any).fetch = originalFetch;
45+
});
46+
47+
it('should send image_url message content', async () => {
48+
let capturedBody: any = null;
49+
(globalThis as any).fetch = jest.fn(async (_url: string, init: any) => {
50+
capturedBody = JSON.parse(init.body);
51+
return {
52+
ok: true,
53+
status: 200,
54+
text: async () =>
55+
JSON.stringify({
56+
model: 'local-vision',
57+
choices: [{ message: { content: 'YES' } }],
58+
usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
59+
}),
60+
};
61+
});
62+
63+
const llm = new LocalVisionLLMProvider({
64+
baseUrl: 'http://localhost:1234/v1',
65+
model: 'local-vision',
66+
});
67+
68+
const resp = await llm.generateWithImage('sys', 'is there a button?', 'AAAA', {});
69+
expect(resp.content).toBe('YES');
70+
expect(capturedBody.messages[1].content[1].type).toBe('image_url');
71+
expect(capturedBody.messages[1].content[1].image_url.url).toContain(
72+
'data:image/png;base64,AAAA'
73+
);
74+
});
75+
});

0 commit comments

Comments
 (0)