add customization for batch size

tikikun · tikikun · commit 59737700faa8 · 2023-12-02T17:09:28.000+07:00
diff --git a/README.md b/README.md
@@ -107,6 +107,7 @@ Table of parameters
 | `system_prompt`    | String  | The prompt to use for system rules.                          |
 | `pre_prompt`    | String  | The prompt to use for internal configuration.                          |
 | `cpu_threads`   | Integer | The number of threads to use for inferencing (CPU MODE ONLY) |
+| `n_batch`       | Integer | The batch size for prompt eval step |
 
 ***OPTIONAL***: You can run Nitro on a different port like 5000 instead of 3928 by running it manually in terminal
 ```zsh
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -376,7 +376,7 @@ void llamaCPP::loadModel(
     params.n_ctx = (*jsonBody).get("ctx_len", 2048).asInt();
     params.embedding = (*jsonBody).get("embedding", true).asBool();
     // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
-
+    params.n_batch = (*jsonBody).get("n_batch",512).asInt();
     params.n_parallel = (*jsonBody).get("n_parallel", drogon_thread).asInt();
     params.n_threads =
         (*jsonBody)