We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent c37132b commit 798d4fcCopy full SHA for 798d4fc
llama_cpp/llama_chat_format.py
@@ -696,6 +696,8 @@ def chat_completion_handler(
696
return _convert_completion_to_chat_function(
697
tool_name, completion_or_chunks, stream
698
)
699
+ llama.reset()
700
+ llama._ctx.kv_cache_clear()
701
return _convert_completion_to_chat(completion_or_chunks, stream=stream)
702
703
return chat_completion_handler
0 commit comments