Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit e6a6b01

Browse files
committed
remove usage of drogon thread to infer n_parallel
1 parent 9db8fd9 commit e6a6b01

File tree

1 file changed

+1
-3
lines changed

1 file changed

+1
-3
lines changed

controllers/llamaCPP.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,6 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
434434
gpt_params params;
435435

436436
// By default will setting based on number of handlers
437-
int drogon_thread = drogon::app().getThreadNum() - 5;
438-
LOG_INFO << "Drogon thread is:" << drogon_thread;
439437
if (jsonBody) {
440438
if (!jsonBody["mmproj"].isNull()) {
441439
LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
@@ -447,7 +445,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
447445
params.embedding = jsonBody.get("embedding", true).asBool();
448446
// Check if n_parallel exists in jsonBody, if not, set to drogon_thread
449447
params.n_batch = jsonBody.get("n_batch", 512).asInt();
450-
params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt();
448+
params.n_parallel = jsonBody.get("n_parallel", 1).asInt();
451449
params.n_threads =
452450
jsonBody.get("cpu_threads", std::thread::hardware_concurrency())
453451
.asInt();

0 commit comments

Comments
 (0)