Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit f5bc098

Browse files
committed
fix(nitro-node): default run mode should be based on CUDA availability
1 parent 049ef1d commit f5bc098

File tree

8 files changed

+187
-168
lines changed

8 files changed

+187
-168
lines changed

nitro-node/src/execute.ts

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import path from "node:path";
2-
import { NitroNvidiaConfig } from "./types";
2+
import { getNvidiaConfig } from "./nvidia";
33

44
export interface NitroExecutableOptions {
55
executablePath: string;
@@ -11,9 +11,11 @@ export interface NitroExecutableOptions {
1111
* @returns The name of the executable file to run.
1212
*/
1313
export const executableNitroFile = (
14-
nvidiaSettings: NitroNvidiaConfig,
1514
binaryFolder: string,
15+
// Default to GPU if CUDA is available when calling
16+
runMode: "cpu" | "gpu" = getNvidiaConfig().cuda.exist ? "gpu" : "cpu",
1617
): NitroExecutableOptions => {
18+
const nvidiaSettings = getNvidiaConfig();
1719
let cudaVisibleDevices = "";
1820
let binaryName = "nitro";
1921
/**
@@ -23,7 +25,7 @@ export const executableNitroFile = (
2325
/**
2426
* For Windows: win-cpu, win-cuda-11-7, win-cuda-12-0
2527
*/
26-
if (nvidiaSettings["run_mode"] === "cpu") {
28+
if (runMode === "cpu") {
2729
binaryFolder = path.join(binaryFolder, "win-cpu");
2830
} else {
2931
if (nvidiaSettings["cuda"].version === "12") {
@@ -44,7 +46,7 @@ export const executableNitroFile = (
4446
binaryFolder = path.join(binaryFolder, "mac-x64");
4547
}
4648
} else {
47-
if (nvidiaSettings["run_mode"] === "cpu") {
49+
if (runMode === "cpu") {
4850
binaryFolder = path.join(binaryFolder, "linux-cpu");
4951
} else {
5052
if (nvidiaSettings["cuda"].version === "12") {

nitro-node/src/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
export * from "./types";
22
export * from "./nitro";
3+
export { getNvidiaConfig, setNvidiaConfig } from "./nvidia";
34
export { setLogger } from "./logger";

nitro-node/src/nitro.ts

Lines changed: 55 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,19 @@ import path from "node:path";
33
import { ChildProcessWithoutNullStreams, spawn } from "node:child_process";
44
import tcpPortUsed from "tcp-port-used";
55
import fetchRT from "fetch-retry";
6-
import osUtils from "os-utils";
7-
import {
8-
getNitroProcessInfo,
9-
updateNvidiaInfo as _updateNvidiaInfo,
10-
} from "./nvidia";
116
import { executableNitroFile } from "./execute";
127
import {
13-
NitroNvidiaConfig,
148
NitroModelSetting,
159
NitroPromptSetting,
1610
NitroModelOperationResponse,
1711
NitroModelInitOptions,
18-
ResourcesInfo,
12+
NitroProcessInfo,
1913
} from "./types";
2014
import { downloadNitro } from "./scripts";
21-
import { checkMagicBytes } from "./utils";
15+
import { checkMagicBytes, getResourcesInfo } from "./utils";
2216
import { log } from "./logger";
17+
import { updateNvidiaInfo } from "./nvidia";
18+
import { promptTemplateConverter } from "./prompt";
2319
// Polyfill fetch with retry
2420
const fetchRetry = fetchRT(fetch);
2521

@@ -38,22 +34,6 @@ const NITRO_HTTP_KILL_URL = `${NITRO_HTTP_SERVER_URL}/processmanager/destroy`;
3834
// The URL for the Nitro subprocess to run chat completion
3935
const NITRO_HTTP_CHAT_URL = `${NITRO_HTTP_SERVER_URL}/inferences/llamacpp/chat_completion`;
4036

41-
// The default config for using Nvidia GPU
42-
const NVIDIA_DEFAULT_CONFIG: NitroNvidiaConfig = {
43-
notify: true,
44-
run_mode: "cpu",
45-
nvidia_driver: {
46-
exist: false,
47-
version: "",
48-
},
49-
cuda: {
50-
exist: false,
51-
version: "",
52-
},
53-
gpus: [],
54-
gpu_highest_vram: "",
55-
};
56-
5737
// The supported model format
5838
const SUPPORTED_MODEL_FORMATS = [".gguf"];
5939

@@ -62,26 +42,32 @@ const SUPPORTED_MODEL_MAGIC_BYTES = ["GGUF"];
6242

6343
// The subprocess instance for Nitro
6444
let subprocess: ChildProcessWithoutNullStreams | undefined = undefined;
45+
/**
46+
* Retrieve current nitro process
47+
*/
48+
const getNitroProcessInfo = (subprocess: any): NitroProcessInfo => ({
49+
isRunning: subprocess != null,
50+
});
51+
const getCurrentNitroProcessInfo = () => getNitroProcessInfo(subprocess);
52+
6553
// The current model file url
6654
let currentModelFile: string = "";
6755
// The current model settings
6856
let currentSettings: NitroModelSetting | undefined = undefined;
69-
// The Nvidia info file for checking for CUDA support on the system
70-
let nvidiaConfig: NitroNvidiaConfig = NVIDIA_DEFAULT_CONFIG;
7157
// The absolute path to bin directory
7258
let binPath: string = path.join(__dirname, "..", "bin");
7359

7460
/**
7561
* Get current bin path
7662
* @returns {string} The bin path
7763
*/
78-
export function getBinPath(): string {
64+
function getBinPath(): string {
7965
return binPath;
8066
}
8167
/**
8268
* Set custom bin path
8369
*/
84-
export async function setBinPath(customBinPath: string): Promise<void> {
70+
async function setBinPath(customBinPath: string): Promise<void> {
8571
// Check if the path is a directory
8672
if (
8773
fs.existsSync(customBinPath) &&
@@ -96,31 +82,21 @@ export async function setBinPath(customBinPath: string): Promise<void> {
9682
}
9783

9884
/**
99-
* Get current Nvidia config
100-
* @returns {NitroNvidiaConfig} A copy of the config object
101-
* The returned object should be used for reading only
102-
* Writing to config should be via the function {@setNvidiaConfig}
103-
*/
104-
export function getNvidiaConfig(): NitroNvidiaConfig {
105-
return Object.assign({}, nvidiaConfig);
106-
}
107-
108-
/**
109-
* Set custom Nvidia config for running inference over GPU
110-
* @param {NitroNvidiaConfig} config The new config to apply
85+
* Initializes the library. Must be called before any other function.
86+
* This loads the neccesary system information and set some defaults before running model
11187
*/
112-
export async function setNvidiaConfig(
113-
config: NitroNvidiaConfig,
114-
): Promise<void> {
115-
nvidiaConfig = config;
88+
async function initialize(): Promise<void> {
89+
// Update nvidia info
90+
await updateNvidiaInfo();
91+
log("[NITRO]::Debug: Nitro initialized");
11692
}
11793

11894
/**
11995
* Stops a Nitro subprocess.
12096
* @param wrapper - The model wrapper.
12197
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
12298
*/
123-
export function stopModel(): Promise<NitroModelOperationResponse> {
99+
function stopModel(): Promise<NitroModelOperationResponse> {
124100
return killSubprocess();
125101
}
126102

@@ -130,10 +106,10 @@ export function stopModel(): Promise<NitroModelOperationResponse> {
130106
* @param promptTemplate - The template to use for generating prompts.
131107
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
132108
*/
133-
export async function runModel({
134-
modelPath,
135-
promptTemplate,
136-
}: NitroModelInitOptions): Promise<NitroModelOperationResponse> {
109+
async function runModel(
110+
{ modelPath, promptTemplate }: NitroModelInitOptions,
111+
runMode?: "cpu" | "gpu",
112+
): Promise<NitroModelOperationResponse> {
137113
// Download nitro binaries if it's not already downloaded
138114
await downloadNitro(binPath);
139115
const files: string[] = fs.readdirSync(modelPath);
@@ -178,7 +154,7 @@ export async function runModel({
178154
Math.round(nitroResourceProbe.numCpuPhysicalCore / 2),
179155
),
180156
};
181-
return runNitroAndLoadModel();
157+
return runNitroAndLoadModel(runMode);
182158
}
183159

184160
/**
@@ -187,7 +163,9 @@ export async function runModel({
187163
* 3. Validate model status
188164
* @returns
189165
*/
190-
export async function runNitroAndLoadModel(): Promise<NitroModelOperationResponse> {
166+
async function runNitroAndLoadModel(
167+
runMode?: "cpu" | "gpu",
168+
): Promise<NitroModelOperationResponse> {
191169
try {
192170
// Gather system information for CPU physical cores and memory
193171
await killSubprocess();
@@ -200,7 +178,7 @@ export async function runNitroAndLoadModel(): Promise<NitroModelOperationRespons
200178
if (process.platform === "win32") {
201179
return await new Promise((resolve) => setTimeout(() => resolve({}), 500));
202180
}
203-
const spawnResult = await spawnNitroProcess();
181+
const spawnResult = await spawnNitroProcess(runMode);
204182
if (spawnResult.error) {
205183
return spawnResult;
206184
}
@@ -218,60 +196,14 @@ export async function runNitroAndLoadModel(): Promise<NitroModelOperationRespons
218196
}
219197
}
220198

221-
/**
222-
* Parse prompt template into agrs settings
223-
* @param {string} promptTemplate Template as string
224-
* @returns {(NitroPromptSetting | never)} parsed prompt setting
225-
* @throws {Error} if cannot split promptTemplate
226-
*/
227-
function promptTemplateConverter(
228-
promptTemplate: string,
229-
): NitroPromptSetting | never {
230-
// Split the string using the markers
231-
const systemMarker = "{system_message}";
232-
const promptMarker = "{prompt}";
233-
234-
if (
235-
promptTemplate.includes(systemMarker) &&
236-
promptTemplate.includes(promptMarker)
237-
) {
238-
// Find the indices of the markers
239-
const systemIndex = promptTemplate.indexOf(systemMarker);
240-
const promptIndex = promptTemplate.indexOf(promptMarker);
241-
242-
// Extract the parts of the string
243-
const system_prompt = promptTemplate.substring(0, systemIndex);
244-
const user_prompt = promptTemplate.substring(
245-
systemIndex + systemMarker.length,
246-
promptIndex,
247-
);
248-
const ai_prompt = promptTemplate.substring(
249-
promptIndex + promptMarker.length,
250-
);
251-
252-
// Return the split parts
253-
return { system_prompt, user_prompt, ai_prompt };
254-
} else if (promptTemplate.includes(promptMarker)) {
255-
// Extract the parts of the string for the case where only promptMarker is present
256-
const promptIndex = promptTemplate.indexOf(promptMarker);
257-
const user_prompt = promptTemplate.substring(0, promptIndex);
258-
const ai_prompt = promptTemplate.substring(
259-
promptIndex + promptMarker.length,
260-
);
261-
262-
// Return the split parts
263-
return { user_prompt, ai_prompt };
264-
}
265-
266-
// Throw error if none of the conditions are met
267-
throw Error("Cannot split prompt template");
268-
}
269-
270199
/**
271200
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
272201
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
273202
*/
274-
export async function loadLLMModel(settings: any): Promise<Response> {
203+
async function loadLLMModel(settings: any): Promise<Response> {
204+
// The nitro subprocess must be started before loading model
205+
if (!subprocess) throw Error("Calling loadLLMModel without running nitro");
206+
275207
log(`[NITRO]::Debug: Loading model with params ${JSON.stringify(settings)}`);
276208
try {
277209
const res = await fetchRetry(NITRO_HTTP_LOAD_MODEL_URL, {
@@ -301,7 +233,7 @@ export async function loadLLMModel(settings: any): Promise<Response> {
301233
* @returns {Promise<Response>} A Promise that resolves when the chat completion success, or rejects with an error if the completion fails.
302234
* @description If outStream is specified, the response body is consumed and cannot be used to reconstruct the data
303235
*/
304-
export async function chatCompletion(
236+
async function chatCompletion(
305237
request: any,
306238
outStream?: WritableStream,
307239
): Promise<Response> {
@@ -350,7 +282,7 @@ export async function chatCompletion(
350282
* If the model is loaded successfully, the object is empty.
351283
* If the model is not loaded successfully, the object contains an error message.
352284
*/
353-
export async function validateModelStatus(): Promise<NitroModelOperationResponse> {
285+
async function validateModelStatus(): Promise<NitroModelOperationResponse> {
354286
// Send a GET request to the validation URL.
355287
// Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries.
356288
const response = await fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, {
@@ -382,12 +314,13 @@ export async function validateModelStatus(): Promise<NitroModelOperationResponse
382314
* Terminates the Nitro subprocess.
383315
* @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate.
384316
*/
385-
export async function killSubprocess(): Promise<NitroModelOperationResponse> {
317+
async function killSubprocess(): Promise<NitroModelOperationResponse> {
386318
const controller = new AbortController();
387319
setTimeout(() => controller.abort(), 5000);
388320
log(`[NITRO]::Debug: Request to kill Nitro`);
389321

390322
try {
323+
// FIXME: should use this response?
391324
const _response = await fetch(NITRO_HTTP_KILL_URL, {
392325
method: "DELETE",
393326
signal: controller.signal,
@@ -406,11 +339,13 @@ export async function killSubprocess(): Promise<NitroModelOperationResponse> {
406339
* Spawns a Nitro subprocess.
407340
* @returns A promise that resolves when the Nitro subprocess is started.
408341
*/
409-
export function spawnNitroProcess(): Promise<NitroModelOperationResponse> {
342+
function spawnNitroProcess(
343+
runMode?: "cpu" | "gpu",
344+
): Promise<NitroModelOperationResponse> {
410345
log(`[NITRO]::Debug: Spawning Nitro subprocess...`);
411346

412347
return new Promise(async (resolve, reject) => {
413-
const executableOptions = executableNitroFile(nvidiaConfig, binPath);
348+
const executableOptions = executableNitroFile(binPath, runMode);
414349

415350
const args: string[] = ["1", LOCAL_HOST, PORT.toString()];
416351
// Execute the binary
@@ -451,27 +386,23 @@ export function spawnNitroProcess(): Promise<NitroModelOperationResponse> {
451386
});
452387
}
453388

454-
/**
455-
* Get the system resources information
456-
*/
457-
export async function getResourcesInfo(): Promise<ResourcesInfo> {
458-
const cpu = osUtils.cpuCount();
459-
log(`[NITRO]::CPU informations - ${cpu}`);
460-
const response: ResourcesInfo = {
461-
numCpuPhysicalCore: cpu,
462-
memAvailable: 0,
463-
};
464-
return response;
465-
}
466-
467-
export const updateNvidiaInfo = async () =>
468-
await _updateNvidiaInfo(nvidiaConfig);
469-
export const getCurrentNitroProcessInfo = () => getNitroProcessInfo(subprocess);
470-
471389
/**
472390
* Trap for system signal so we can stop nitro process on exit
473391
*/
474392
process.on("SIGTERM", async () => {
475393
log(`[NITRO]::Debug: Received SIGTERM signal`);
476394
await killSubprocess();
477395
});
396+
397+
export {
398+
getCurrentNitroProcessInfo,
399+
getBinPath,
400+
setBinPath,
401+
initialize,
402+
stopModel,
403+
runModel,
404+
loadLLMModel,
405+
chatCompletion,
406+
validateModelStatus,
407+
killSubprocess,
408+
};

0 commit comments

Comments
 (0)