Skip to content
This repository was archived by the owner on Jul 4, 2025. It is now read-only.

Commit 70b2a92

Browse files
authored
Merge pull request #509 from janhq/hotfix-turnoff-all-caches
hotfix: will always be cleaning cache
2 parents d2441b4 + 1276fbc commit 70b2a92

File tree

2 files changed

+10
-9
lines changed

2 files changed

+10
-9
lines changed

context/llama_server_context.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,7 @@ struct llama_server_context {
471471
llama_batch batch;
472472

473473
bool multimodal = false;
474-
bool clean_kv_cache = false;
474+
bool clean_kv_cache = true;
475475
bool all_slots_are_idle = false;
476476
bool add_bos_token = true;
477477

controllers/llamaCPP.cc

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -197,15 +197,16 @@ void llamaCPP::InferenceImpl(
197197
// To set default value
198198

199199
// Increase number of chats received and clean the prompt
200-
no_of_chats++;
201-
if (no_of_chats % clean_cache_threshold == 0) {
202-
LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
203-
llama.kv_cache_clear();
204-
LOG_INFO_REQUEST(request_id) << "Cache cleaned";
205-
}
200+
//no_of_chats++;
201+
//if (no_of_chats % clean_cache_threshold == 0) {
202+
// LOG_INFO_REQUEST(request_id) << "Clean cache threshold reached!";
203+
// llama.kv_cache_clear();
204+
// LOG_INFO_REQUEST(request_id) << "Cache cleaned";
205+
//}
206206

207207
// Default values to enable auto caching
208-
data["cache_prompt"] = caching_enabled;
208+
//data["cache_prompt"] = caching_enabled;
209+
data["cache_prompt"] = false;
209210
data["n_keep"] = -1;
210211

211212
// Passing load value
@@ -655,7 +656,7 @@ bool llamaCPP::LoadModelImpl(std::shared_ptr<Json::Value> jsonBody) {
655656
params.cont_batching = jsonBody->get("cont_batching", false).asBool();
656657
this->clean_cache_threshold =
657658
jsonBody->get("clean_cache_threshold", 5).asInt();
658-
this->caching_enabled = jsonBody->get("caching_enabled", true).asBool();
659+
this->caching_enabled = jsonBody->get("caching_enabled", false).asBool();
659660
this->user_prompt = jsonBody->get("user_prompt", "USER: ").asString();
660661
this->ai_prompt = jsonBody->get("ai_prompt", "ASSISTANT: ").asString();
661662
this->system_prompt =

0 commit comments

Comments
 (0)