Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit c6a2656

Browse files
committed
fix(nitro-node): add missing settings when loading model
1 parent 21e3026 commit c6a2656

File tree

3 files changed

+33
-14
lines changed

3 files changed

+33
-14
lines changed

nitro-node/src/nitro.ts

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,15 @@ function stopModel(): Promise<NitroModelOperationResponse> {
108108
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
109109
*/
110110
async function runModel(
111-
{ modelPath, promptTemplate }: NitroModelInitOptions,
111+
{
112+
modelPath,
113+
promptTemplate,
114+
ctx_len = 2048,
115+
ngl = 100,
116+
cont_batching = false,
117+
embedding = true,
118+
cpu_threads,
119+
}: NitroModelInitOptions,
112120
runMode?: "cpu" | "gpu",
113121
): Promise<NitroModelOperationResponse> {
114122
// Download nitro binaries if it's not already downloaded
@@ -149,11 +157,15 @@ async function runModel(
149157
currentSettings = {
150158
...prompt,
151159
llama_model_path: currentModelFile,
160+
ctx_len,
161+
ngl,
162+
cont_batching,
163+
embedding,
152164
// This is critical and requires real system information
153-
cpu_threads: Math.max(
154-
1,
155-
Math.round(nitroResourceProbe.numCpuPhysicalCore / 2),
156-
),
165+
cpu_threads:
166+
cpu_threads && cpu_threads > 0
167+
? cpu_threads
168+
: Math.max(1, Math.round(nitroResourceProbe.numCpuPhysicalCore / 2)),
157169
};
158170
return runNitroAndLoadModel(runMode);
159171
}
@@ -184,7 +196,7 @@ async function runNitroAndLoadModel(
184196
return spawnResult;
185197
}
186198
// TODO: Use this response?
187-
const _loadModelResponse = await loadLLMModel(currentSettings);
199+
const _loadModelResponse = await loadLLMModel(currentSettings!);
188200
const validationResult = await validateModelStatus();
189201
if (validationResult.error) {
190202
return validationResult;
@@ -201,7 +213,7 @@ async function runNitroAndLoadModel(
201213
* Loads a LLM model into the Nitro subprocess by sending a HTTP POST request.
202214
* @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load.
203215
*/
204-
async function loadLLMModel(settings: any): Promise<Response> {
216+
async function loadLLMModel(settings: NitroModelSetting): Promise<Response> {
205217
// The nitro subprocess must be started before loading model
206218
if (!subprocess) throw Error("Calling loadLLMModel without running nitro");
207219

nitro-node/src/types/index.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,15 +33,24 @@ export interface NitroPromptSetting {
3333
*/
3434
export interface NitroModelSetting extends NitroPromptSetting {
3535
llama_model_path: string;
36+
ctx_len: number;
37+
ngl: number;
38+
cont_batching: boolean;
39+
embedding: boolean;
3640
cpu_threads: number;
3741
}
3842

3943
/**
40-
* The response object for model init operation.
44+
* The parameters for model init operation.
4145
*/
4246
export interface NitroModelInitOptions {
4347
modelPath: string;
4448
promptTemplate?: string;
49+
ctx_len?: number;
50+
ngl?: number;
51+
cont_batching?: boolean;
52+
embedding?: boolean;
53+
cpu_threads?: number;
4554
}
4655

4756
/**

nitro-node/test/nitro-process.test.ts

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -166,16 +166,14 @@ describe("Manage nitro process", () => {
166166
await runModel({
167167
modelPath,
168168
promptTemplate: modelCfg.settings.prompt_template,
169-
});
170-
// Wait 5s for nitro to start
171-
await sleep(5 * 1000);
172-
// Load LLM model
173-
await loadLLMModel({
174-
llama_model_path: modelPath,
175169
ctx_len: modelCfg.settings.ctx_len,
176170
ngl: modelCfg.settings.ngl,
171+
cont_batching: false,
177172
embedding: false,
173+
cpu_threads: -1, // Default to auto
178174
});
175+
// Wait 5s for nitro to start
176+
await sleep(5 * 1000);
179177
// Validate model status
180178
await validateModelStatus();
181179
// Arrays of all the chunked response

0 commit comments

Comments
 (0)