Merge pull request #294 from janhq/290-feat-proper-threadpool-allocations

tikikun · web-flow · commit 380420e53075 · 2023-12-22T08:30:47.000+07:00
290 feat proper threadpool allocations
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -434,8 +434,6 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
   gpt_params params;
 
   // By default will setting based on number of handlers
-  int drogon_thread = drogon::app().getThreadNum() - 5;
-  LOG_INFO << "Drogon thread is:" << drogon_thread;
   if (jsonBody) {
     if (!jsonBody["mmproj"].isNull()) {
       LOG_INFO << "MMPROJ FILE detected, multi-model enabled!";
@@ -447,7 +445,7 @@ bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
     params.embedding = jsonBody.get("embedding", true).asBool();
     // Check if n_parallel exists in jsonBody, if not, set to drogon_thread
     params.n_batch = jsonBody.get("n_batch", 512).asInt();
-    params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt();
+    params.n_parallel = jsonBody.get("n_parallel", 1).asInt();
     params.n_threads =
         jsonBody.get("cpu_threads", std::thread::hardware_concurrency())
             .asInt();
diff --git a/main.cc b/main.cc
@@ -2,6 +2,8 @@
 #include <climits> // for PATH_MAX
 #include <drogon/HttpAppFramework.h>
 #include <drogon/drogon.h>
+#include <iostream>
+#include <algorithm>
 
 #if defined(__APPLE__) && defined(__MACH__)
 #include <libgen.h> // for dirname()
@@ -11,6 +13,7 @@
 #include <unistd.h> // for readlink()
 #elif defined(_WIN32)
 #include <windows.h>
+#undef max
 #else
 #error "Unsupported platform!"
 #endif
@@ -35,11 +38,13 @@ int main(int argc, char *argv[]) {
     port = std::atoi(argv[3]); // Convert string argument to int
   }
 
+  int logical_cores = std::thread::hardware_concurrency();
+  int drogon_thread_num = std::max(thread_num, logical_cores);
   nitro_utils::nitro_logo();
   LOG_INFO << "Server started, listening at: " << host << ":" << port;
   LOG_INFO << "Please load your model";
   drogon::app().addListener(host, port);
-  drogon::app().setThreadNum(thread_num + 5);
+  drogon::app().setThreadNum(drogon_thread_num);
   LOG_INFO << "Number of thread is:" << drogon::app().getThreadNum();
 
   drogon::app().run();