Clear kv cache and reset tokens after chat completion

thisisayushg · web-flow · commit 798d4fc548fb · 2026-03-14T09:26:30.000+05:30
diff --git a/llama_cpp/llama_chat_format.py b/llama_cpp/llama_chat_format.py
@@ -696,6 +696,8 @@ def chat_completion_handler(
             return _convert_completion_to_chat_function(
                 tool_name, completion_or_chunks, stream
             )
+        llama.reset()
+        llama._ctx.kv_cache_clear()
         return _convert_completion_to_chat(completion_or_chunks, stream=stream)
 
     return chat_completion_handler

Original file line number	Diff line number	Diff line change
`@@ -696,6 +696,8 @@ def chat_completion_handler(`
`696`	`696`	`return _convert_completion_to_chat_function(`
`697`	`697`	`tool_name, completion_or_chunks, stream`
`698`	`698`	`)`
	`699`	`+ llama.reset()`
	`700`	`+ llama._ctx.kv_cache_clear()`
`699`	`701`	`return _convert_completion_to_chat(completion_or_chunks, stream=stream)`
`700`	`702`
`701`	`703`	`return chat_completion_handler`