|
| 1 | +#include <drogon/HttpTypes.h> |
1 | 2 | #if defined(_WIN32) |
2 | 3 | #define NOMINMAX |
3 | 4 | #endif |
@@ -1311,51 +1312,56 @@ namespace inferences { |
1311 | 1312 | class llamaCPP : public drogon::HttpController<llamaCPP> { |
1312 | 1313 | public: |
1313 | 1314 | llamaCPP() { |
1314 | | - gpt_params params; |
1315 | | - auto conf = drogon::app().getCustomConfig(); |
1316 | | - params.model = conf["llama_model_path"].asString(); |
1317 | | - params.n_gpu_layers = conf["ngl"].asInt(); |
1318 | | - params.n_ctx = conf["ctx_len"].asInt(); |
1319 | | - params.embedding = conf["embedding"].asBool(); |
1320 | | -#ifdef GGML_USE_CUBLAS |
1321 | | - LOG_INFO << "Setting up GGML CUBLAS PARAMS"; |
1322 | | - params.mul_mat_q = false; |
1323 | | -#endif // GGML_USE_CUBLAS |
1324 | | - if (params.model_alias == "unknown") { |
1325 | | - params.model_alias = params.model; |
1326 | | - } |
1327 | | - |
1328 | | - llama_backend_init(params.numa); |
1329 | | - |
1330 | | - LOG_INFO_LLAMA("build info", |
1331 | | - {{"build", BUILD_NUMBER}, {"commit", BUILD_COMMIT}}); |
1332 | | - LOG_INFO_LLAMA("system info", |
1333 | | - { |
1334 | | - {"n_threads", params.n_threads}, |
1335 | | - {"total_threads", std::thread::hardware_concurrency()}, |
1336 | | - {"system_info", llama_print_system_info()}, |
1337 | | - }); |
1338 | | - |
1339 | | - // load the model |
1340 | | - if (!llama.loadModel(params)) { |
1341 | | - LOG_ERROR << "Error loading the model will exit the program"; |
1342 | | - std::terminate(); |
1343 | | - } |
1344 | | - nitro_utils::nitro_logo(); |
| 1315 | + // gpt_params params; |
| 1316 | + // auto conf = drogon::app().getCustomConfig(); |
| 1317 | + // params.model = conf["llama_model_path"].asString(); |
| 1318 | + // params.n_gpu_layers = conf["ngl"].asInt(); |
| 1319 | + // params.n_ctx = conf["ctx_len"].asInt(); |
| 1320 | + // params.embedding = conf["embedding"].asBool(); |
| 1321 | + // #ifdef GGML_USE_CUBLAS |
| 1322 | + // LOG_INFO << "Setting up GGML CUBLAS PARAMS"; |
| 1323 | + // params.mul_mat_q = false; |
| 1324 | + // #endif // GGML_USE_CUBLAS |
| 1325 | + // if (params.model_alias == "unknown") { |
| 1326 | + // params.model_alias = params.model; |
| 1327 | + // } |
| 1328 | + // |
| 1329 | + // llama_backend_init(params.numa); |
| 1330 | + // |
| 1331 | + // LOG_INFO_LLAMA("build info", |
| 1332 | + // {{"build", BUILD_NUMBER}, {"commit", BUILD_COMMIT}}); |
| 1333 | + // LOG_INFO_LLAMA("system info", |
| 1334 | + // { |
| 1335 | + // {"n_threads", params.n_threads}, |
| 1336 | + // {"total_threads", |
| 1337 | + // std::thread::hardware_concurrency()}, |
| 1338 | + // {"system_info", llama_print_system_info()}, |
| 1339 | + // }); |
| 1340 | + // |
| 1341 | + // // load the model |
| 1342 | + // if (!llama.loadModel(params)) { |
| 1343 | + // LOG_ERROR << "Error loading the model will exit the program"; |
| 1344 | + // std::terminate(); |
| 1345 | + // } |
| 1346 | + // deprecate this if find no usecase |
1345 | 1347 | } |
1346 | 1348 | METHOD_LIST_BEGIN |
1347 | 1349 | // list path definitions here; |
1348 | | - METHOD_ADD(llamaCPP::chatCompletion, "chat_completion"); |
1349 | | - METHOD_ADD(llamaCPP::embedding,"embedding"); |
| 1350 | + METHOD_ADD(llamaCPP::chatCompletion, "chat_completion", Post); |
| 1351 | + METHOD_ADD(llamaCPP::embedding, "embedding", Post); |
| 1352 | + METHOD_ADD(llamaCPP::loadModel, "loadmodel", Post); |
1350 | 1353 | // PATH_ADD("/llama/chat_completion", Post); |
1351 | 1354 | METHOD_LIST_END |
1352 | 1355 | void chatCompletion(const HttpRequestPtr &req, |
1353 | 1356 | std::function<void(const HttpResponsePtr &)> &&callback); |
1354 | 1357 | void embedding(const HttpRequestPtr &req, |
1355 | 1358 | std::function<void(const HttpResponsePtr &)> &&callback); |
| 1359 | + void loadModel(const HttpRequestPtr &req, |
| 1360 | + std::function<void(const HttpResponsePtr &)> &&callback); |
1356 | 1361 |
|
1357 | 1362 | private: |
1358 | 1363 | llama_server_context llama; |
| 1364 | + bool model_loaded = false; |
1359 | 1365 | size_t sent_count = 0; |
1360 | 1366 | size_t sent_token_probs_index = 0; |
1361 | 1367 | }; |
|
0 commit comments