@@ -3,23 +3,19 @@ import path from "node:path";
33import { ChildProcessWithoutNullStreams , spawn } from "node:child_process" ;
44import tcpPortUsed from "tcp-port-used" ;
55import fetchRT from "fetch-retry" ;
6- import osUtils from "os-utils" ;
7- import {
8- getNitroProcessInfo ,
9- updateNvidiaInfo as _updateNvidiaInfo ,
10- } from "./nvidia" ;
116import { executableNitroFile } from "./execute" ;
127import {
13- NitroNvidiaConfig ,
148 NitroModelSetting ,
159 NitroPromptSetting ,
1610 NitroModelOperationResponse ,
1711 NitroModelInitOptions ,
18- ResourcesInfo ,
12+ NitroProcessInfo ,
1913} from "./types" ;
2014import { downloadNitro } from "./scripts" ;
21- import { checkMagicBytes } from "./utils" ;
15+ import { checkMagicBytes , getResourcesInfo } from "./utils" ;
2216import { log } from "./logger" ;
17+ import { updateNvidiaInfo } from "./nvidia" ;
18+ import { promptTemplateConverter } from "./prompt" ;
2319// Polyfill fetch with retry
2420const fetchRetry = fetchRT ( fetch ) ;
2521
@@ -38,22 +34,6 @@ const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
3834// The URL for the Nitro subprocess to run chat completion
3935const NITRO_HTTP_CHAT_URL = `${ NITRO_HTTP_SERVER_URL } /inferences/llamacpp/chat_completion` ;
4036
41- // The default config for using Nvidia GPU
42- const NVIDIA_DEFAULT_CONFIG : NitroNvidiaConfig = {
43- notify : true ,
44- run_mode : "cpu" ,
45- nvidia_driver : {
46- exist : false ,
47- version : "" ,
48- } ,
49- cuda : {
50- exist : false ,
51- version : "" ,
52- } ,
53- gpus : [ ] ,
54- gpu_highest_vram : "" ,
55- } ;
56-
5737// The supported model format
5838const SUPPORTED_MODEL_FORMATS = [ ".gguf" ] ;
5939
@@ -62,26 +42,32 @@ const SUPPORTED_MODEL_MAGIC_BYTES = ["GGUF"];
6242
6343// The subprocess instance for Nitro
6444let subprocess : ChildProcessWithoutNullStreams | undefined = undefined ;
45+ /**
46+ * Retrieve current nitro process
47+ */
48+ const getNitroProcessInfo = ( subprocess : any ) : NitroProcessInfo => ( {
49+ isRunning : subprocess != null ,
50+ } ) ;
51+ const getCurrentNitroProcessInfo = ( ) => getNitroProcessInfo ( subprocess ) ;
52+
6553// The current model file url
6654let currentModelFile : string = "" ;
6755// The current model settings
6856let currentSettings : NitroModelSetting | undefined = undefined ;
69- // The Nvidia info file for checking for CUDA support on the system
70- let nvidiaConfig : NitroNvidiaConfig = NVIDIA_DEFAULT_CONFIG ;
7157// The absolute path to bin directory
7258let binPath : string = path . join ( __dirname , ".." , "bin" ) ;
7359
7460/**
7561 * Get current bin path
7662 * @returns {string } The bin path
7763 */
78- export function getBinPath ( ) : string {
64+ function getBinPath ( ) : string {
7965 return binPath ;
8066}
8167/**
8268 * Set custom bin path
8369 */
84- export async function setBinPath ( customBinPath : string ) : Promise < void > {
70+ async function setBinPath ( customBinPath : string ) : Promise < void > {
8571 // Check if the path is a directory
8672 if (
8773 fs . existsSync ( customBinPath ) &&
@@ -96,31 +82,21 @@ export async function setBinPath(customBinPath: string): Promise<void> {
9682}
9783
9884/**
99- * Get current Nvidia config
100- * @returns {NitroNvidiaConfig } A copy of the config object
101- * The returned object should be used for reading only
102- * Writing to config should be via the function {@setNvidiaConfig }
103- */
104- export function getNvidiaConfig ( ) : NitroNvidiaConfig {
105- return Object . assign ( { } , nvidiaConfig ) ;
106- }
107-
108- /**
109- * Set custom Nvidia config for running inference over GPU
110- * @param {NitroNvidiaConfig } config The new config to apply
85+ * Initializes the library. Must be called before any other function.
86+ * This loads the neccesary system information and set some defaults before running model
11187 */
112- export async function setNvidiaConfig (
113- config : NitroNvidiaConfig ,
114- ) : Promise < void > {
115- nvidiaConfig = config ;
88+ async function initialize ( ) : Promise < void > {
89+ // Update nvidia info
90+ await updateNvidiaInfo ( ) ;
91+ log ( "[NITRO]::Debug: Nitro initialized" ) ;
11692}
11793
11894/**
11995 * Stops a Nitro subprocess.
12096 * @param wrapper - The model wrapper.
12197 * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
12298 */
123- export function stopModel ( ) : Promise < NitroModelOperationResponse > {
99+ function stopModel ( ) : Promise < NitroModelOperationResponse > {
124100 return killSubprocess ( ) ;
125101}
126102
@@ -130,10 +106,10 @@ export function stopModel(): Promise<NitroModelOperationResponse> {
130106 * @param promptTemplate - The template to use for generating prompts.
131107 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
132108 */
133- export async function runModel ( {
134- modelPath,
135- promptTemplate ,
136- } : NitroModelInitOptions ) : Promise < NitroModelOperationResponse > {
109+ async function runModel (
110+ { modelPath, promptTemplate } : NitroModelInitOptions ,
111+ runMode ?: "cpu" | "gpu" ,
112+ ) : Promise < NitroModelOperationResponse > {
137113 // Download nitro binaries if it's not already downloaded
138114 await downloadNitro ( binPath ) ;
139115 const files : string [ ] = fs . readdirSync ( modelPath ) ;
@@ -178,7 +154,7 @@ export async function runModel({
178154 Math . round ( nitroResourceProbe . numCpuPhysicalCore / 2 ) ,
179155 ) ,
180156 } ;
181- return runNitroAndLoadModel ( ) ;
157+ return runNitroAndLoadModel ( runMode ) ;
182158}
183159
184160/**
@@ -187,7 +163,9 @@ export async function runModel({
187163 * 3. Validate model status
188164 * @returns
189165 */
190- export async function runNitroAndLoadModel ( ) : Promise < NitroModelOperationResponse > {
166+ async function runNitroAndLoadModel (
167+ runMode ?: "cpu" | "gpu" ,
168+ ) : Promise < NitroModelOperationResponse > {
191169 try {
192170 // Gather system information for CPU physical cores and memory
193171 await killSubprocess ( ) ;
@@ -200,7 +178,7 @@ export async function runNitroAndLoadModel(): Promise<NitroModelOperationRespons
200178 if ( process . platform === "win32" ) {
201179 return await new Promise ( ( resolve ) => setTimeout ( ( ) => resolve ( { } ) , 500 ) ) ;
202180 }
203- const spawnResult = await spawnNitroProcess ( ) ;
181+ const spawnResult = await spawnNitroProcess ( runMode ) ;
204182 if ( spawnResult . error ) {
205183 return spawnResult ;
206184 }
@@ -218,60 +196,14 @@ export async function runNitroAndLoadModel(): Promise<NitroModelOperationRespons
218196 }
219197}
220198
221- /**
222- * Parse prompt template into agrs settings
223- * @param {string } promptTemplate Template as string
224- * @returns {(NitroPromptSetting | never) } parsed prompt setting
225- * @throws {Error } if cannot split promptTemplate
226- */
227- function promptTemplateConverter (
228- promptTemplate : string ,
229- ) : NitroPromptSetting | never {
230- // Split the string using the markers
231- const systemMarker = "{system_message}" ;
232- const promptMarker = "{prompt}" ;
233-
234- if (
235- promptTemplate . includes ( systemMarker ) &&
236- promptTemplate . includes ( promptMarker )
237- ) {
238- // Find the indices of the markers
239- const systemIndex = promptTemplate . indexOf ( systemMarker ) ;
240- const promptIndex = promptTemplate . indexOf ( promptMarker ) ;
241-
242- // Extract the parts of the string
243- const system_prompt = promptTemplate . substring ( 0 , systemIndex ) ;
244- const user_prompt = promptTemplate . substring (
245- systemIndex + systemMarker . length ,
246- promptIndex ,
247- ) ;
248- const ai_prompt = promptTemplate . substring (
249- promptIndex + promptMarker . length ,
250- ) ;
251-
252- // Return the split parts
253- return { system_prompt, user_prompt, ai_prompt } ;
254- } else if ( promptTemplate . includes ( promptMarker ) ) {
255- // Extract the parts of the string for the case where only promptMarker is present
256- const promptIndex = promptTemplate . indexOf ( promptMarker ) ;
257- const user_prompt = promptTemplate . substring ( 0 , promptIndex ) ;
258- const ai_prompt = promptTemplate . substring (
259- promptIndex + promptMarker . length ,
260- ) ;
261-
262- // Return the split parts
263- return { user_prompt, ai_prompt } ;
264- }
265-
266- // Throw error if none of the conditions are met
267- throw Error ( "Cannot split prompt template" ) ;
268- }
269-
270199/**
271200 * Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
272201 * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
273202 */
274- export async function loadLLMModel ( settings : any ) : Promise < Response > {
203+ async function loadLLMModel ( settings : any ) : Promise < Response > {
204+ // The nitro subprocess must be started before loading model
205+ if ( ! subprocess ) throw Error ( "Calling loadLLMModel without running nitro" ) ;
206+
275207 log ( `[NITRO]::Debug: Loading model with params ${ JSON . stringify ( settings ) } ` ) ;
276208 try {
277209 const res = await fetchRetry ( NITRO_HTTP_LOAD_MODEL_URL , {
@@ -301,7 +233,7 @@ export async function loadLLMModel(settings: any): Promise<Response> {
301233 * @returns {Promise<Response> } A Promise that resolves when the chat completion success, or rejects with an error if the completion fails.
302234 * @description If outStream is specified, the response body is consumed and cannot be used to reconstruct the data
303235 */
304- export async function chatCompletion (
236+ async function chatCompletion (
305237 request : any ,
306238 outStream ?: WritableStream ,
307239) : Promise < Response > {
@@ -350,7 +282,7 @@ export async function chatCompletion(
350282 * If the model is loaded successfully, the object is empty.
351283 * If the model is not loaded successfully, the object contains an error message.
352284 */
353- export async function validateModelStatus ( ) : Promise < NitroModelOperationResponse > {
285+ async function validateModelStatus ( ) : Promise < NitroModelOperationResponse > {
354286 // Send a GET request to the validation URL.
355287 // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
356288 const response = await fetchRetry ( NITRO_HTTP_VALIDATE_MODEL_URL , {
@@ -382,12 +314,13 @@ export async function validateModelStatus(): Promise<NitroModelOperationResponse
382314 * Terminates the Nitro subprocess.
383315 * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
384316 */
385- export async function killSubprocess ( ) : Promise < NitroModelOperationResponse > {
317+ async function killSubprocess ( ) : Promise < NitroModelOperationResponse > {
386318 const controller = new AbortController ( ) ;
387319 setTimeout ( ( ) => controller . abort ( ) , 5000 ) ;
388320 log ( `[NITRO]::Debug: Request to kill Nitro` ) ;
389321
390322 try {
323+ // FIXME: should use this response?
391324 const _response = await fetch ( NITRO_HTTP_KILL_URL , {
392325 method : "DELETE" ,
393326 signal : controller . signal ,
@@ -406,11 +339,13 @@ export async function killSubprocess(): Promise<NitroModelOperationResponse> {
406339 * Spawns a Nitro subprocess.
407340 * @returns A promise that resolves when the Nitro subprocess is started.
408341 */
409- export function spawnNitroProcess ( ) : Promise < NitroModelOperationResponse > {
342+ function spawnNitroProcess (
343+ runMode ?: "cpu" | "gpu" ,
344+ ) : Promise < NitroModelOperationResponse > {
410345 log ( `[NITRO]::Debug: Spawning Nitro subprocess...` ) ;
411346
412347 return new Promise ( async ( resolve , reject ) => {
413- const executableOptions = executableNitroFile ( nvidiaConfig , binPath ) ;
348+ const executableOptions = executableNitroFile ( binPath , runMode ) ;
414349
415350 const args : string [ ] = [ "1" , LOCAL_HOST , PORT . toString ( ) ] ;
416351 // Execute the binary
@@ -451,27 +386,23 @@ export function spawnNitroProcess(): Promise<NitroModelOperationResponse> {
451386 } ) ;
452387}
453388
454- /**
455- * Get the system resources information
456- */
457- export async function getResourcesInfo ( ) : Promise < ResourcesInfo > {
458- const cpu = osUtils . cpuCount ( ) ;
459- log ( `[NITRO]::CPU informations - ${ cpu } ` ) ;
460- const response : ResourcesInfo = {
461- numCpuPhysicalCore : cpu ,
462- memAvailable : 0 ,
463- } ;
464- return response ;
465- }
466-
467- export const updateNvidiaInfo = async ( ) =>
468- await _updateNvidiaInfo ( nvidiaConfig ) ;
469- export const getCurrentNitroProcessInfo = ( ) => getNitroProcessInfo ( subprocess ) ;
470-
471389/**
472390 * Trap for system signal so we can stop nitro process on exit
473391 */
474392process . on ( "SIGTERM" , async ( ) => {
475393 log ( `[NITRO]::Debug: Received SIGTERM signal` ) ;
476394 await killSubprocess ( ) ;
477395} ) ;
396+
397+ export {
398+ getCurrentNitroProcessInfo ,
399+ getBinPath ,
400+ setBinPath ,
401+ initialize ,
402+ stopModel ,
403+ runModel ,
404+ loadLLMModel ,
405+ chatCompletion ,
406+ validateModelStatus ,
407+ killSubprocess ,
408+ } ;
0 commit comments