@@ -68,6 +68,211 @@ export abstract class LLMProvider {
6868 }
6969}
7070
71+ /**
72+ * Local OpenAI-compatible Provider (Ollama / LM Studio / llama.cpp server, etc.)
73+ *
74+ * This is the TypeScript equivalent of Python's LocalLLMProvider concept, but instead of
75+ * embedding a full HF runtime inside Node, it calls a local HTTP server that exposes an
76+ * OpenAI-compatible Chat Completions API.
77+ *
78+ * Examples of compatible local servers:
79+ * - Ollama (OpenAI-compatible endpoint)
80+ * - LM Studio (OpenAI-compatible endpoint)
81+ * - llama.cpp server (OpenAI-compatible endpoint)
82+ */
83+ export class LocalLLMProvider extends LLMProvider {
84+ private _modelName : string ;
85+ private _baseUrl : string ;
86+ private _apiKey ?: string ;
87+ private _defaultHeaders : Record < string , string > ;
88+ private _timeoutMs : number ;
89+
90+ constructor (
91+ options : {
92+ model ?: string ;
93+ baseUrl ?: string ;
94+ apiKey ?: string ;
95+ timeoutMs ?: number ;
96+ headers ?: Record < string , string > ;
97+ } = { }
98+ ) {
99+ super ( ) ;
100+ this . _modelName = options . model ?? process . env . SENTIENCE_LOCAL_LLM_MODEL ?? 'local-model' ;
101+ // Common defaults:
102+ // - Ollama OpenAI-compatible: http://localhost:11434/v1
103+ // - LM Studio: http://localhost:1234/v1
104+ this . _baseUrl =
105+ options . baseUrl ?? process . env . SENTIENCE_LOCAL_LLM_BASE_URL ?? 'http://localhost:11434/v1' ;
106+ this . _apiKey = options . apiKey ?? process . env . SENTIENCE_LOCAL_LLM_API_KEY ;
107+ this . _timeoutMs = options . timeoutMs ?? 60_000 ;
108+ this . _defaultHeaders = {
109+ 'Content-Type' : 'application/json' ,
110+ ...( options . headers ?? { } ) ,
111+ } ;
112+ if ( this . _apiKey ) {
113+ this . _defaultHeaders . Authorization = `Bearer ${ this . _apiKey } ` ;
114+ }
115+ }
116+
117+ supportsJsonMode ( ) : boolean {
118+ // Many local OpenAI-compatible servers don't reliably implement response_format=json_object.
119+ return false ;
120+ }
121+
122+ get modelName ( ) : string {
123+ return this . _modelName ;
124+ }
125+
126+ async generate (
127+ systemPrompt : string ,
128+ userPrompt : string ,
129+ options : Record < string , any > = { }
130+ ) : Promise < LLMResponse > {
131+ const fetchFn = ( globalThis as any ) . fetch as typeof fetch | undefined ;
132+ if ( ! fetchFn ) {
133+ throw new Error (
134+ 'Global fetch is not available. Use Node 18+ or polyfill fetch before using LocalLLMProvider.'
135+ ) ;
136+ }
137+
138+ const controller = new AbortController ( ) ;
139+ const timeoutId = setTimeout ( ( ) => controller . abort ( ) , options . timeoutMs ?? this . _timeoutMs ) ;
140+
141+ const payload : any = {
142+ model : this . _modelName ,
143+ messages : [
144+ ...( systemPrompt ? [ { role : 'system' , content : systemPrompt } ] : [ ] ) ,
145+ { role : 'user' , content : userPrompt } ,
146+ ] ,
147+ temperature : options . temperature ?? 0.0 ,
148+ } ;
149+
150+ if ( options . max_tokens !== undefined ) payload . max_tokens = options . max_tokens ;
151+ if ( options . top_p !== undefined ) payload . top_p = options . top_p ;
152+
153+ // Allow pass-through of server-specific fields, but avoid overriding core fields accidentally
154+ const { timeoutMs : _ignoredTimeout , ...rest } = options ;
155+ Object . assign ( payload , rest ) ;
156+
157+ try {
158+ const res = await fetchFn ( `${ this . _baseUrl } /chat/completions` , {
159+ method : 'POST' ,
160+ headers : this . _defaultHeaders ,
161+ body : JSON . stringify ( payload ) ,
162+ signal : controller . signal ,
163+ } ) ;
164+
165+ const text = await res . text ( ) ;
166+ if ( ! res . ok ) {
167+ throw new Error ( `Local LLM HTTP ${ res . status } : ${ text . slice ( 0 , 500 ) } ` ) ;
168+ }
169+
170+ const data = JSON . parse ( text ) ;
171+ const choice = data ?. choices ?. [ 0 ] ;
172+ const content = choice ?. message ?. content ?? '' ;
173+ const usage = data ?. usage ;
174+
175+ return {
176+ content,
177+ promptTokens : usage ?. prompt_tokens ,
178+ completionTokens : usage ?. completion_tokens ,
179+ totalTokens : usage ?. total_tokens ,
180+ modelName : data ?. model ?? this . _modelName ,
181+ } ;
182+ } finally {
183+ clearTimeout ( timeoutId ) ;
184+ }
185+ }
186+ }
187+
188+ /**
189+ * Local OpenAI-compatible Vision Provider.
190+ *
191+ * This is the TypeScript analogue of Python's LocalVisionLLMProvider, but it assumes your
192+ * local server supports the OpenAI vision message format (image_url with data: URI).
193+ *
194+ * If your local stack uses a different schema (e.g., Ollama images array), you can implement
195+ * a custom provider by extending LLMProvider.
196+ */
197+ export class LocalVisionLLMProvider extends LocalLLMProvider {
198+ supportsVision ( ) : boolean {
199+ return true ;
200+ }
201+
202+ async generateWithImage (
203+ systemPrompt : string ,
204+ userPrompt : string ,
205+ imageBase64 : string ,
206+ options : Record < string , any > = { }
207+ ) : Promise < LLMResponse > {
208+ const fetchFn = ( globalThis as any ) . fetch as typeof fetch | undefined ;
209+ if ( ! fetchFn ) {
210+ throw new Error (
211+ 'Global fetch is not available. Use Node 18+ or polyfill fetch before using LocalVisionLLMProvider.'
212+ ) ;
213+ }
214+
215+ const controller = new AbortController ( ) ;
216+ const timeoutId = setTimeout (
217+ ( ) => controller . abort ( ) ,
218+ options . timeoutMs ?? ( this as any ) . _timeoutMs ?? 60_000
219+ ) ;
220+
221+ // Rebuild payload (we cannot reuse LocalLLMProvider.generate because message shape differs)
222+ const modelName = ( this as any ) . _modelName ?? 'local-model' ;
223+ const baseUrl = ( this as any ) . _baseUrl ?? 'http://localhost:11434/v1' ;
224+ const headers = ( this as any ) . _defaultHeaders ?? { 'Content-Type' : 'application/json' } ;
225+
226+ const payload : any = {
227+ model : modelName ,
228+ messages : [
229+ ...( systemPrompt ? [ { role : 'system' , content : systemPrompt } ] : [ ] ) ,
230+ {
231+ role : 'user' ,
232+ content : [
233+ { type : 'text' , text : userPrompt } ,
234+ { type : 'image_url' , image_url : { url : `data:image/png;base64,${ imageBase64 } ` } } ,
235+ ] ,
236+ } ,
237+ ] ,
238+ temperature : options . temperature ?? 0.0 ,
239+ } ;
240+
241+ if ( options . max_tokens !== undefined ) payload . max_tokens = options . max_tokens ;
242+ const { timeoutMs : _ignoredTimeout , ...rest } = options ;
243+ Object . assign ( payload , rest ) ;
244+
245+ try {
246+ const res = await fetchFn ( `${ baseUrl } /chat/completions` , {
247+ method : 'POST' ,
248+ headers,
249+ body : JSON . stringify ( payload ) ,
250+ signal : controller . signal ,
251+ } ) ;
252+
253+ const text = await res . text ( ) ;
254+ if ( ! res . ok ) {
255+ throw new Error ( `Local Vision LLM HTTP ${ res . status } : ${ text . slice ( 0 , 500 ) } ` ) ;
256+ }
257+
258+ const data = JSON . parse ( text ) ;
259+ const choice = data ?. choices ?. [ 0 ] ;
260+ const content = choice ?. message ?. content ?? '' ;
261+ const usage = data ?. usage ;
262+
263+ return {
264+ content,
265+ promptTokens : usage ?. prompt_tokens ,
266+ completionTokens : usage ?. completion_tokens ,
267+ totalTokens : usage ?. total_tokens ,
268+ modelName : data ?. model ?? modelName ,
269+ } ;
270+ } finally {
271+ clearTimeout ( timeoutId ) ;
272+ }
273+ }
274+ }
275+
71276/**
72277 * OpenAI Provider (GPT-4, GPT-4o, etc.)
73278 * Requires: npm install openai
0 commit comments