Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 36b1ef1

Browse files
authored
Merge pull request #236 from tikikun/main
add file system POC
2 parents 73e4c10 + 2225292 commit 36b1ef1

File tree

3 files changed

+112
-41
lines changed

3 files changed

+112
-41
lines changed

controllers/llamaCPP.cc

Lines changed: 47 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -359,47 +359,32 @@ void llamaCPP::modelStatus(
359359
return;
360360
}
361361

362-
void llamaCPP::loadModel(
363-
const HttpRequestPtr &req,
364-
std::function<void(const HttpResponsePtr &)> &&callback) {
365-
366-
if (model_loaded) {
367-
LOG_INFO << "model loaded";
368-
Json::Value jsonResp;
369-
jsonResp["message"] = "Model already loaded";
370-
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
371-
resp->setStatusCode(drogon::k409Conflict);
372-
callback(resp);
373-
return;
374-
}
375-
376-
const auto &jsonBody = req->getJsonObject();
362+
bool llamaCPP::loadModelImpl(const Json::Value &jsonBody) {
377363

378364
gpt_params params;
379365

380366
// By default will setting based on number of handlers
381367
int drogon_thread = drogon::app().getThreadNum();
382368
LOG_INFO << "Drogon thread is:" << drogon_thread;
383369
if (jsonBody) {
384-
params.model = (*jsonBody)["llama_model_path"].asString();
385-
params.n_gpu_layers = (*jsonBody).get("ngl", 100).asInt();
386-
params.n_ctx = (*jsonBody).get("ctx_len", 2048).asInt();
387-
params.embedding = (*jsonBody).get("embedding", true).asBool();
370+
params.model = jsonBody["llama_model_path"].asString();
371+
params.n_gpu_layers = jsonBody.get("ngl", 100).asInt();
372+
params.n_ctx = jsonBody.get("ctx_len", 2048).asInt();
373+
params.embedding = jsonBody.get("embedding", true).asBool();
388374
// Check if n_parallel exists in jsonBody, if not, set to drogon_thread
389-
params.n_batch = (*jsonBody).get("n_batch", 512).asInt();
390-
params.n_parallel = (*jsonBody).get("n_parallel", drogon_thread).asInt();
375+
params.n_batch = jsonBody.get("n_batch", 512).asInt();
376+
params.n_parallel = jsonBody.get("n_parallel", drogon_thread).asInt();
391377
params.n_threads =
392-
(*jsonBody)
393-
.get("cpu_threads", std::thread::hardware_concurrency())
378+
jsonBody.get("cpu_threads", std::thread::hardware_concurrency())
394379
.asInt();
395-
params.cont_batching = (*jsonBody).get("cont_batching", false).asBool();
380+
params.cont_batching = jsonBody.get("cont_batching", false).asBool();
396381

397-
this->user_prompt = (*jsonBody).get("user_prompt", "USER: ").asString();
398-
this->ai_prompt = (*jsonBody).get("ai_prompt", "ASSISTANT: ").asString();
382+
this->user_prompt = jsonBody.get("user_prompt", "USER: ").asString();
383+
this->ai_prompt = jsonBody.get("ai_prompt", "ASSISTANT: ").asString();
399384
this->system_prompt =
400-
(*jsonBody).get("system_prompt", "ASSISTANT's RULE: ").asString();
401-
this->pre_prompt = (*jsonBody).get("pre_prompt", "").asString();
402-
this->repeat_last_n = (*jsonBody).get("repeat_last_n", 32).asInt();
385+
jsonBody.get("system_prompt", "ASSISTANT's RULE: ").asString();
386+
this->pre_prompt = jsonBody.get("pre_prompt", "").asString();
387+
this->repeat_last_n = jsonBody.get("repeat_last_n", 32).asInt();
403388
}
404389
#ifdef GGML_USE_CUBLAS
405390
LOG_INFO << "Setting up GGML CUBLAS PARAMS";
@@ -422,25 +407,46 @@ void llamaCPP::loadModel(
422407

423408
// load the model
424409
if (!llama.load_model(params)) {
425-
LOG_ERROR << "Error loading the model will exit the program";
426-
Json::Value jsonResp;
427-
jsonResp["message"] = "Failed to load model";
428-
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
429-
resp->setStatusCode(drogon::k500InternalServerError);
430-
callback(resp);
410+
LOG_ERROR << "Error loading the model";
411+
return false; // Indicate failure
431412
}
432413
llama.initialize();
433-
434-
Json::Value jsonResp;
435-
jsonResp["message"] = "Model loaded successfully";
436414
model_loaded = true;
437-
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
438-
439415
LOG_INFO << "Started background task here!";
440416
backgroundThread = std::thread(&llamaCPP::backgroundTask, this);
441417
warmupModel();
418+
return true;
419+
}
442420

443-
callback(resp);
421+
void llamaCPP::loadModel(
422+
const HttpRequestPtr &req,
423+
std::function<void(const HttpResponsePtr &)> &&callback) {
424+
425+
if (model_loaded) {
426+
LOG_INFO << "model loaded";
427+
Json::Value jsonResp;
428+
jsonResp["message"] = "Model already loaded";
429+
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
430+
resp->setStatusCode(drogon::k409Conflict);
431+
callback(resp);
432+
return;
433+
}
434+
435+
const auto &jsonBody = req->getJsonObject();
436+
if (!loadModelImpl(*jsonBody)) {
437+
// Error occurred during model loading
438+
Json::Value jsonResp;
439+
jsonResp["message"] = "Failed to load model";
440+
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
441+
resp->setStatusCode(drogon::k500InternalServerError);
442+
callback(resp);
443+
} else {
444+
// Model loaded successfully
445+
Json::Value jsonResp;
446+
jsonResp["message"] = "Model loaded successfully";
447+
auto resp = nitro_utils::nitroHttpJsonResponse(jsonResp);
448+
callback(resp);
449+
}
444450
}
445451

446452
void llamaCPP::backgroundTask() {

controllers/llamaCPP.h

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#include <iostream>
2+
#include <string>
13
#if defined(_WIN32)
24
#define NOMINMAX
35
#endif
@@ -2117,6 +2119,25 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
21172119
// log_disable(); // Disable the log to file feature, reduce bloat for
21182120
// target
21192121
// system ()
2122+
std::vector<std::string> llama_models =
2123+
nitro_utils::listFilesInDir(nitro_utils::models_folder);
2124+
std::string model_index;
2125+
if (llama_models.size() > 0) {
2126+
LOG_INFO << "Found models folder, here are the llama models you have:";
2127+
int index_val = 0;
2128+
for (auto llama_model : llama_models) {
2129+
LOG_INFO << "index: " << index_val++ << "| model: " << llama_model;
2130+
std::cout
2131+
<< "Please type the index of the model you want to load here >> ";
2132+
std::cin >> model_index;
2133+
Json::Value jsonBody;
2134+
jsonBody["llama_model_path"] = nitro_utils::models_folder + "/" +
2135+
llama_models[std::stoi(model_index)];
2136+
loadModelImpl(jsonBody);
2137+
}
2138+
} else {
2139+
LOG_INFO << "Not found models folder, start server as usual";
2140+
}
21202141
}
21212142

21222143
METHOD_LIST_BEGIN
@@ -2145,6 +2166,8 @@ class llamaCPP : public drogon::HttpController<llamaCPP> {
21452166
void modelStatus(const HttpRequestPtr &req,
21462167
std::function<void(const HttpResponsePtr &)> &&callback);
21472168

2169+
bool loadModelImpl(const Json::Value &jsonBody);
2170+
21482171
void warmupModel();
21492172

21502173
void backgroundTask();

utils/nitro_utils.h

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,51 @@
66
#include <drogon/HttpResponse.h>
77
#include <iostream>
88
#include <ostream>
9+
// Include platform-specific headers
10+
#ifdef _WIN32
11+
#include <windows.h>
12+
#else
13+
#include <dirent.h>
14+
#endif
915

1016
namespace nitro_utils {
1117

18+
inline std::string models_folder = "./models";
19+
20+
inline std::vector<std::string> listFilesInDir(const std::string &path) {
21+
std::vector<std::string> files;
22+
23+
#ifdef _WIN32
24+
// Windows-specific code
25+
WIN32_FIND_DATA findFileData;
26+
HANDLE hFind = FindFirstFile((path + "\\*").c_str(), &findFileData);
27+
28+
if (hFind != INVALID_HANDLE_VALUE) {
29+
do {
30+
if (!(findFileData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) {
31+
files.push_back(findFileData.cFileName);
32+
}
33+
} while (FindNextFile(hFind, &findFileData) != 0);
34+
FindClose(hFind);
35+
}
36+
#else
37+
// POSIX-specific code (Linux, Unix, MacOS)
38+
DIR *dir;
39+
struct dirent *ent;
40+
41+
if ((dir = opendir(path.c_str())) != NULL) {
42+
while ((ent = readdir(dir)) != NULL) {
43+
if (ent->d_type == DT_REG) { // Check if it's a regular file
44+
files.push_back(ent->d_name);
45+
}
46+
}
47+
closedir(dir);
48+
}
49+
#endif
50+
51+
return files;
52+
}
53+
1254
inline std::string rtrim(const std::string &str) {
1355
size_t end = str.find_last_not_of("\n\t ");
1456
return (end == std::string::npos) ? "" : str.substr(0, end + 1);

0 commit comments

Comments
 (0)