livekit · tinalenguyen · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026 · longcw
diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -1944,6 +1944,7 @@ def _tool_execution_started_cb(fnc_call: llm.FunctionCall) -> None:
             # reset the `created_at` to the start time of the tool execution
             fnc_call.created_at = time.time()
             speech_handle._item_added([fnc_call])
+            self._session._update_agent_state("processing")
 
         def _tool_execution_completed_cb(out: ToolExecutionOutput) -> None:
             if out.fnc_call_out:
@@ -2214,6 +2215,7 @@ async def _realtime_generation_task(
         with tracer.start_as_current_span(
             "agent_turn", context=self._session._root_span_context
         ) as current_span:
+            self._session._update_agent_state("thinking")
             current_span.set_attribute(trace_types.ATTR_AGENT_TURN_ID, speech_handle._generation_id)
             if parent_id := speech_handle._parent_generation_id:
                 current_span.set_attribute(trace_types.ATTR_AGENT_PARENT_TURN_ID, parent_id)
@@ -2414,6 +2416,7 @@ async def _read_fnc_stream() -> None:
         )
 
         def _tool_execution_started_cb(fnc_call: llm.FunctionCall) -> None:
+            self._session._update_agent_state("processing")
             speech_handle._item_added([fnc_call])
             self._agent._chat_ctx.items.append(fnc_call)
             self._session._tool_items_added([fnc_call])
@@ -2444,7 +2447,8 @@ def _tool_execution_completed_cb(out: ToolExecutionOutput) -> None:
             await speech_handle.wait_if_not_interrupted(
                 [asyncio.ensure_future(audio_output.wait_for_playout())]
             )
-            self._session._update_agent_state("listening")
+            if exe_task.done():
+                self._session._update_agent_state("listening")
             current_span.set_attribute(
                 trace_types.ATTR_SPEECH_INTERRUPTED, speech_handle.interrupted
             )

diff --git a/livekit-agents/livekit/agents/voice/events.py b/livekit-agents/livekit/agents/voice/events.py
@@ -95,7 +95,7 @@ async def wait_for_playout(self) -> None:
 ]
 
 UserState = Literal["speaking", "listening", "away"]
-AgentState = Literal["initializing", "idle", "listening", "thinking", "speaking"]
+AgentState = Literal["initializing", "idle", "listening", "thinking", "speaking", "processing"]
 
 
 class UserStateChangedEvent(BaseModel):

diff --git a/tests/test_agent_session.py b/tests/test_agent_session.py
@@ -177,14 +177,14 @@ async def test_tool_call() -> None:
     check_timestamp(playback_finished_events[0].playback_position, 2.0, speed_factor=speed)
     check_timestamp(playback_finished_events[1].playback_position, 3.0, speed_factor=speed)
 
-    assert len(agent_state_events) == 6
+    assert len(agent_state_events) == 7
     assert agent_state_events[0].old_state == "initializing"
     assert agent_state_events[0].new_state == "listening"
     assert agent_state_events[1].new_state == "thinking"
-    assert agent_state_events[2].new_state == "speaking"
     assert (
-        agent_state_events[3].new_state == "thinking"
-    )  # from speaking to thinking when tool call is executed
+        agent_state_events[2].new_state == "processing"
+    )  # from thinking to processing when tool call is executed
+    assert agent_state_events[3].new_state == "thinking"
     check_timestamp(agent_state_events[3].created_at - t_origin, 5.5, speed_factor=speed)
     assert agent_state_events[4].new_state == "speaking"
     assert agent_state_events[5].new_state == "listening"