Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 380420e

Browse files
authored
Merge pull request #294 from janhq/290-feat-proper-threadpool-allocations
290 feat proper threadpool allocations
2 parents aa973ed + 97df4b9 commit 380420e

File tree

2 files changed

+7
-4
lines changed

2 files changed

+7
-4
lines changed

controllers/llamaCPP.cc

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -434,8 +434,6 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
434434
gpt_params params;
435435

436436
// By default will setting based on number of handlers
437-
int drogon_thread = drogon::app().getThreadNum() - 5;
438-
LOG_INFO << "Drogon thread is:" << drogon_thread;
439437
if (jsonBody) {
440438
if (!jsonBody["mmproj"].isNull()) {
441439
LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
@@ -447,7 +445,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
447445
params.embedding = jsonBody.get("embedding", true).asBool();
448446
// Check if n_parallel exists in jsonBody, if not, set to drogon_thread
449447
params.n_batch = jsonBody.get("n_batch", 512).asInt();
450-
params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt();
448+
params.n_parallel = jsonBody.get("n_parallel", 1).asInt();
451449
params.n_threads =
452450
jsonBody.get("cpu_threads", std::thread::hardware_concurrency())
453451
.asInt();

main.cc

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
#include <climits> // for PATH_MAX
33
#include <drogon/HttpAppFramework.h>
44
#include <drogon/drogon.h>
5+
#include <iostream>
6+
#include <algorithm>
57

68
#if defined(__APPLE__) && defined(__MACH__)
79
#include <libgen.h> // for dirname()
@@ -11,6 +13,7 @@
1113
#include <unistd.h> // for readlink()
1214
#elif defined(_WIN32)
1315
#include <windows.h>
16+
#undef max
1417
#else
1518
#error "Unsupported platform!"
1619
#endif
@@ -35,11 +38,13 @@ int main(int argc, char *argv[]) {
3538
port = std::atoi(argv[3]); // Convert string argument to int
3639
}
3740

41+
int logical_cores = std::thread::hardware_concurrency();
42+
int drogon_thread_num = std::max(thread_num, logical_cores);
3843
nitro_utils::nitro_logo();
3944
LOG_INFO << "Server started, listening at: " << host << ":" << port;
4045
LOG_INFO << "Please load your model";
4146
drogon::app().addListener(host, port);
42-
drogon::app().setThreadNum(thread_num + 5);
47+
drogon::app().setThreadNum(drogon_thread_num);
4348
LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
4449

4550
drogon::app().run();

0 commit comments

Comments
 (0)