Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit e2bc67a

Browse files
committed
hotfix: turn off current cache logic
1 parent 9a46624 commit e2bc67a

File tree

1 file changed

+7
-7
lines changed

1 file changed

+7
-7
lines changed

controllers/llamaCPP.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -197,12 +197,12 @@ void llamaCPP::InferenceImpl(
197197
// To set default value
198198

199199
// Increase number of chats received and clean the prompt
200-
no_of_chats++;
201-
if (no_of_chats % clean_cache_threshold == 0) {
202-
LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
203-
llama.kv_cache_clear();
204-
LOG_INFO_REQUEST(request_id) << "Cache cleaned";
205-
}
200+
//no_of_chats++;
201+
//if (no_of_chats % clean_cache_threshold == 0) {
202+
// LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
203+
// llama.kv_cache_clear();
204+
// LOG_INFO_REQUEST(request_id) << "Cache cleaned";
205+
//}
206206

207207
// Default values to enable auto caching
208208
//data["cache_prompt"] = caching_enabled;
@@ -656,7 +656,7 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
656656
params.cont_batching = jsonBody->get("cont_batching", false).asBool();
657657
this->clean_cache_threshold =
658658
jsonBody->get("clean_cache_threshold", 5).asInt();
659-
this->caching_enabled = jsonBody->get("caching_enabled", false).asBool();
659+
this->caching_enabled = jsonBody->get("caching_enabled", true).asBool();
660660
this->user_prompt = jsonBody->get("user_prompt", "USER: ").asString();
661661
this->ai_prompt = jsonBody->get("ai_prompt", "ASSISTANT: ").asString();
662662
this->system_prompt =

0 commit comments

Comments
 (0)