hotfix: turn off current cache logic

tikikun · tikikun · commit e2bc67aac12e · 2024-04-15T15:41:51.000+07:00
diff --git a/controllers/llamaCPP.cc b/controllers/llamaCPP.cc
@@ -197,12 +197,12 @@ void llamaCPP::InferenceImpl(
   // To set default value
 
   // Increase number of chats received and clean the prompt
-  no_of_chats++;
-  if (no_of_chats % clean_cache_threshold == 0) {
-    LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
-    llama.kv_cache_clear();
-    LOG_INFO_REQUEST(request_id) << "Cache cleaned";
-  }
+  //no_of_chats++;
+  //if (no_of_chats % clean_cache_threshold == 0) {
+  //  LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
+  //  llama.kv_cache_clear();
+  //  LOG_INFO_REQUEST(request_id) << "Cache cleaned";
+  //}
 
   // Default values to enable auto caching
   //data["cache_prompt"] = caching_enabled;
@@ -656,7 +656,7 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
     params.cont_batching = jsonBody->get("cont_batching", false).asBool();
     this->clean_cache_threshold =
         jsonBody->get("clean_cache_threshold", 5).asInt();
-    this->caching_enabled = jsonBody->get("caching_enabled", false).asBool();
+    this->caching_enabled = jsonBody->get("caching_enabled", true).asBool();
     this->user_prompt = jsonBody->get("user_prompt", "USER: ").asString();
     this->ai_prompt = jsonBody->get("ai_prompt", "ASSISTANT: ").asString();
     this->system_prompt =