This repository was archived by the owner on Jul 4, 2025. It is now read-only.
File tree Expand file tree Collapse file tree 1 file changed +7
-7
lines changed
Expand file tree Collapse file tree 1 file changed +7
-7
lines changed Original file line number Diff line number Diff line change @@ -197,12 +197,12 @@ void llamaCPP::InferenceImpl(
197197 // To set default value
198198
199199 // Increase number of chats received and clean the prompt
200- no_of_chats++;
201- if (no_of_chats % clean_cache_threshold == 0 ) {
202- LOG_INFO_REQUEST (request_id) << " Clean cache threshold reached!" ;
203- llama.kv_cache_clear ();
204- LOG_INFO_REQUEST (request_id) << " Cache cleaned" ;
205- }
200+ // no_of_chats++;
201+ // if (no_of_chats % clean_cache_threshold == 0) {
202+ // LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
203+ // llama.kv_cache_clear();
204+ // LOG_INFO_REQUEST(request_id) << "Cache cleaned";
205+ // }
206206
207207 // Default values to enable auto caching
208208 // data["cache_prompt"] = caching_enabled;
@@ -656,7 +656,7 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
656656 params.cont_batching = jsonBody->get (" cont_batching" , false ).asBool ();
657657 this ->clean_cache_threshold =
658658 jsonBody->get (" clean_cache_threshold" , 5 ).asInt ();
659- this ->caching_enabled = jsonBody->get (" caching_enabled" , false ).asBool ();
659+ this ->caching_enabled = jsonBody->get (" caching_enabled" , true ).asBool ();
660660 this ->user_prompt = jsonBody->get (" user_prompt" , " USER: " ).asString ();
661661 this ->ai_prompt = jsonBody->get (" ai_prompt" , " ASSISTANT: " ).asString ();
662662 this ->system_prompt =
You can’t perform that action at this time.
0 commit comments