77using namespace inferences ;
88using json = nlohmann::json;
99
10+ /* *
11+ * There is a need to save state of current ongoing inference status of a
12+ * handler, this struct is to solve that issue
13+ *
14+ * @param inst Pointer to the llamaCPP instance this inference task is
15+ * associated with.
16+ */
1017struct inferenceState {
1118 bool is_stopped = false ;
1219 bool is_streaming = false ;
@@ -16,13 +23,19 @@ struct inferenceState {
1623 inferenceState (llamaCPP *inst) : instance(inst) {}
1724};
1825
26+ /* *
27+ * This function is to create the smart pointer to inferenceState, hence the
28+ * inferenceState will be persisting even tho the lambda in streaming might go
29+ * out of scope and the handler already moved on
30+ */
1931std::shared_ptr<inferenceState> create_inference_state (llamaCPP *instance) {
2032 return std::make_shared<inferenceState>(instance);
2133}
2234
23- // --------------------------------------------
24-
25- // Function to check if the model is loaded
35+ /* *
36+ * Check if model already loaded if not return message to user
37+ * @param callback the function to return message to user
38+ */
2639void llamaCPP::checkModelLoaded (
2740 std::function<void (const HttpResponsePtr &)> &callback) {
2841 if (!llama.model_loaded_external ) {
@@ -513,6 +526,36 @@ void llamaCPP::modelStatus(
513526 callback (resp);
514527 return ;
515528}
529+ void llamaCPP::loadModel (
530+ const HttpRequestPtr &req,
531+ std::function<void (const HttpResponsePtr &)> &&callback) {
532+
533+ if (llama.model_loaded_external ) {
534+ LOG_INFO << " model loaded" ;
535+ Json::Value jsonResp;
536+ jsonResp[" message" ] = " Model already loaded" ;
537+ auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
538+ resp->setStatusCode (drogon::k409Conflict);
539+ callback (resp);
540+ return ;
541+ }
542+
543+ const auto &jsonBody = req->getJsonObject ();
544+ if (!loadModelImpl (jsonBody)) {
545+ // Error occurred during model loading
546+ Json::Value jsonResp;
547+ jsonResp[" message" ] = " Failed to load model" ;
548+ auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
549+ resp->setStatusCode (drogon::k500InternalServerError);
550+ callback (resp);
551+ } else {
552+ // Model loaded successfully
553+ Json::Value jsonResp;
554+ jsonResp[" message" ] = " Model loaded successfully" ;
555+ auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
556+ callback (resp);
557+ }
558+ }
516559
517560bool llamaCPP::loadModelImpl (std::shared_ptr<Json::Value> jsonBody) {
518561
@@ -610,37 +653,6 @@ bool llamaCPP::loadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
610653 return true ;
611654}
612655
613- void llamaCPP::loadModel (
614- const HttpRequestPtr &req,
615- std::function<void (const HttpResponsePtr &)> &&callback) {
616-
617- if (llama.model_loaded_external ) {
618- LOG_INFO << " model loaded" ;
619- Json::Value jsonResp;
620- jsonResp[" message" ] = " Model already loaded" ;
621- auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
622- resp->setStatusCode (drogon::k409Conflict);
623- callback (resp);
624- return ;
625- }
626-
627- const auto &jsonBody = req->getJsonObject ();
628- if (!loadModelImpl (jsonBody)) {
629- // Error occurred during model loading
630- Json::Value jsonResp;
631- jsonResp[" message" ] = " Failed to load model" ;
632- auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
633- resp->setStatusCode (drogon::k500InternalServerError);
634- callback (resp);
635- } else {
636- // Model loaded successfully
637- Json::Value jsonResp;
638- jsonResp[" message" ] = " Model loaded successfully" ;
639- auto resp = nitro_utils::nitroHttpJsonResponse (jsonResp);
640- callback (resp);
641- }
642- }
643-
644656void llamaCPP::backgroundTask () {
645657 while (llama.model_loaded_external ) {
646658 // model_loaded =
0 commit comments