Merge pull request #1428 from 3clyp50/dirtyjson

frdel · web-flow · commit 2da44168da8b · 2026-04-03T17:05:48.000+02:00
Dispatch tool calls at first completed JSON object
diff --git a/agent.py b/agent.py
@@ -388,6 +388,7 @@ async def monologue(self):
                     self.context.streaming_agent = self  # mark self as current streamer
                     self.loop_data.iteration += 1
                     self.loop_data.params_temporary = {}  # clear temporary params
+                    last_response_stream_full = ""
 
                     # call message_loop_start extensions
                     await extension.call_extensions_async(
@@ -425,12 +426,32 @@ async def reasoning_callback(chunk: str, full: str):
                             await self.handle_reasoning_stream(stream_data["full"])
 
                         async def stream_callback(chunk: str, full: str):
+                            nonlocal last_response_stream_full
                             await self.handle_intervention()
                             # output the agent response stream
                             if chunk == full:
                                 printer.print("Response: ")  # start of response
                             # Pass chunk and full data to extensions for processing
                             stream_data = {"chunk": chunk, "full": full}
+                            stop_response: str | None = None
+
+                            snapshot = extract_tools.extract_json_root_string(full)
+                            if snapshot:
+                                parsed_snapshot = extract_tools.json_parse_dirty(snapshot)
+                                if parsed_snapshot is not None:
+                                    try:
+                                        await self.validate_tool_request(parsed_snapshot)
+                                    except Exception:
+                                        pass
+                                    else:
+                                        previous_full = last_response_stream_full
+                                        stream_data["full"] = snapshot
+                                        if snapshot.startswith(previous_full):
+                                            stream_data["chunk"] = snapshot[len(previous_full) :]
+                                        else:
+                                            stream_data["chunk"] = snapshot
+                                        stop_response = snapshot
+
                             await extension.call_extensions_async(
                                 "response_stream_chunk",
                                 self,
@@ -442,6 +463,9 @@ async def stream_callback(chunk: str, full: str):
                                 printer.stream(stream_data["chunk"])
                             # Use the potentially modified full text for downstream processing
                             await self.handle_response_stream(stream_data["full"])
+                            last_response_stream_full = stream_data["full"]
+                            if stop_response is not None:
+                                return stop_response
 
                         # call main LLM
                         agent_response, _reasoning = await self.call_chat_model(
@@ -770,7 +794,7 @@ async def stream_callback(chunk: str, total: str):
     async def call_chat_model(
         self,
         messages: list[BaseMessage],
-        response_callback: Callable[[str, str], Awaitable[None]] | None = None,
+        response_callback: Callable[[str, str], Awaitable[str | None]] | None = None,
         reasoning_callback: Callable[[str, str], Awaitable[None]] | None = None,
         background: bool = False,
         explicit_caching: bool = True,
diff --git a/helpers/dirty_json.py b/helpers/dirty_json.py
@@ -25,6 +25,14 @@ def _reset(self):
         self.current_char = None
         self.result = None
         self.stack = []
+        self.completed = False
+        self._parsing_started = False
+
+    def _pop_stack(self, root_closed: bool = False):
+        """Pop from the parsing stack and mark completed only on an explicit root close."""
+        self.stack.pop()
+        if root_closed and self._parsing_started and not self.stack:
+            self.completed = True
 
     @staticmethod
     def parse_string(json_string):
@@ -95,13 +103,17 @@ def _skip_multi_line_comment(self):
             self._advance()
 
     def _parse(self):
+        if self.completed and not self.stack:
+            return
         if self.result is None:
             self.result = self._parse_value()
         else:
             self._continue_parsing()
 
     def _continue_parsing(self):
         while self.current_char is not None:
+            if self.completed and not self.stack:
+                return
             if isinstance(self.result, dict):
                 self._parse_object_content()
             elif isinstance(self.result, list):
@@ -114,7 +126,9 @@ def _continue_parsing(self):
     def _parse_value(self):
         self._skip_whitespace()
         if self.current_char == "{":
-            if self._peek(1) == "{":  # Handle {{
+            # Only treat doubled braces as a wrapper at the root; nested objects
+            # must keep their closing braces paired correctly.
+            if not self.stack and self._peek(1) == "{":  # Handle {{
                 self._advance(2)
             return self._parse_object()
         elif self.current_char == "[":
@@ -153,21 +167,24 @@ def _parse_object(self):
         obj = {}
         self._advance()  # Skip opening brace
         self.stack.append(obj)
+        self._parsing_started = True
         self._parse_object_content()
         return obj
 
     def _parse_object_content(self):
         while self.current_char is not None:
             self._skip_whitespace()
             if self.current_char == "}":
-                if self._peek(1) == "}":  # Handle }}
+                # Root-level wrapper outputs may end in "}}"; nested objects must
+                # still close one brace at a time.
+                if len(self.stack) == 1 and self._peek(1) == "}":  # Handle }}
                     self._advance(2)
                 else:
                     self._advance()
-                self.stack.pop()
+                self._pop_stack(root_closed=True)
                 return
             if self.current_char is None:
-                self.stack.pop()
+                self._pop_stack()
                 return  # End of input reached while parsing object
 
             key = self._parse_key()
@@ -190,7 +207,7 @@ def _parse_object_content(self):
                 continue
             elif self.current_char != "}":
                 if self.current_char is None:
-                    self.stack.pop()
+                    self._pop_stack()
                     return  # End of input reached after value
                 continue
 
@@ -216,6 +233,7 @@ def _parse_array(self):
         arr = []
         self._advance()  # Skip opening bracket
         self.stack.append(arr)
+        self._parsing_started = True
         self._parse_array_content()
         return arr
 
@@ -224,7 +242,7 @@ def _parse_array_content(self):
             self._skip_whitespace()
             if self.current_char == "]":
                 self._advance()
-                self.stack.pop()
+                self._pop_stack(root_closed=True)
                 return
             value = self._parse_value()
             self.stack[-1].append(value)
@@ -236,10 +254,10 @@ def _parse_array_content(self):
                 if self.current_char is None or self.current_char == "]":
                     if self.current_char == "]":
                         self._advance()
-                    self.stack.pop()
+                    self._pop_stack(root_closed=True)
                     return
             elif self.current_char != "]":
-                self.stack.pop()
+                self._pop_stack()
                 return
 
     def _parse_string(self):
diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py
@@ -19,6 +19,28 @@ def json_parse_dirty(json: str) -> dict[str, Any] | None:
             return None
     return None
 
+def extract_json_root_string(content: str) -> str | None:
+    if not content or not isinstance(content, str):
+        return None
+
+    start = content.find("{")
+    if start == -1:
+        return None
+    first_array = content.find("[")
+    if first_array != -1 and first_array < start:
+        return None
+
+    parser = DirtyJson()
+    try:
+        parser.parse(content[start:])
+    except Exception:
+        return None
+
+    if not parser.completed:
+        return None
+
+    return content[start : start + parser.index]
+
 
 def extract_json_object_string(content):
     start = content.find("{")
diff --git a/models.py b/models.py
@@ -475,7 +475,7 @@ async def unified_call(
         system_message="",
         user_message="",
         messages: List[BaseMessage] | None = None,
-        response_callback: Callable[[str, str], Awaitable[None]] | None = None,
+        response_callback: Callable[[str, str], Awaitable[str | None]] | None = None,
         reasoning_callback: Callable[[str, str], Awaitable[None]] | None = None,
         tokens_callback: Callable[[str, int], Awaitable[None]] | None = None,
         rate_limiter_callback: (
@@ -526,36 +526,46 @@ async def unified_call(
 
                 if stream:
                     # iterate over chunks
-                    async for chunk in _completion:  # type: ignore
-                        got_any_chunk = True
-                        # parse chunk
-                        parsed = _parse_chunk(chunk)
-                        output = result.add_chunk(parsed)
-
-                        # collect reasoning delta and call callbacks
-                        if output["reasoning_delta"]:
-                            if reasoning_callback:
-                                await reasoning_callback(output["reasoning_delta"], result.reasoning)
-                            if tokens_callback:
-                                await tokens_callback(
-                                    output["reasoning_delta"],
-                                    approximate_tokens(output["reasoning_delta"]),
-                                )
-                            # Add output tokens to rate limiter if configured
-                            if limiter:
-                                limiter.add(output=approximate_tokens(output["reasoning_delta"]))
-                        # collect response delta and call callbacks
-                        if output["response_delta"]:
-                            if response_callback:
-                                await response_callback(output["response_delta"], result.response)
-                            if tokens_callback:
-                                await tokens_callback(
-                                    output["response_delta"],
-                                    approximate_tokens(output["response_delta"]),
-                                )
-                            # Add output tokens to rate limiter if configured
-                            if limiter:
-                                limiter.add(output=approximate_tokens(output["response_delta"]))
+                    stop_response: str | None = None
+                    try:
+                        async for chunk in _completion:  # type: ignore
+                            got_any_chunk = True
+                            # parse chunk
+                            parsed = _parse_chunk(chunk)
+                            output = result.add_chunk(parsed)
+
+                            # collect reasoning delta and call callbacks
+                            if output["reasoning_delta"]:
+                                if reasoning_callback:
+                                    await reasoning_callback(output["reasoning_delta"], result.reasoning)
+                                if tokens_callback:
+                                    await tokens_callback(
+                                        output["reasoning_delta"],
+                                        approximate_tokens(output["reasoning_delta"]),
+                                    )
+                                # Add output tokens to rate limiter if configured
+                                if limiter:
+                                    limiter.add(output=approximate_tokens(output["reasoning_delta"]))
+                            # collect response delta and call callbacks
+                            if output["response_delta"]:
+                                if response_callback:
+                                    stop_response = await response_callback(
+                                        output["response_delta"], result.response
+                                    )
+                                if tokens_callback:
+                                    await tokens_callback(
+                                        output["response_delta"],
+                                        approximate_tokens(output["response_delta"]),
+                                    )
+                                # Add output tokens to rate limiter if configured
+                                if limiter:
+                                    limiter.add(output=approximate_tokens(output["response_delta"]))
+                            if stop_response is not None:
+                                result.response = stop_response
+                                break
+                    finally:
+                        if stop_response is not None and hasattr(_completion, "aclose"):
+                            await _completion.aclose()  # type: ignore[attr-defined]
 
                 # non-stream response
                 else:
diff --git a/tests/test_dirty_json.py b/tests/test_dirty_json.py
@@ -0,0 +1,56 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+from helpers.dirty_json import DirtyJson
+
+
+@pytest.mark.parametrize(
+    ("payload", "expected"),
+    [
+        (
+            '{"tool_name":"x","tool_args":{}}',
+            {"tool_name": "x", "tool_args": {}},
+        ),
+        ("[1, 2, 3]", [1, 2, 3]),
+    ],
+)
+def test_completed_true_when_root_is_explicitly_closed(payload, expected) -> None:
+    parser = DirtyJson()
+
+    assert parser.parse(payload) == expected
+    assert parser.completed is True
+
+
+def test_completed_false_when_root_hits_eof_before_closing() -> None:
+    parser = DirtyJson()
+
+    assert parser.parse('{"tool_name":"x","tool_args":{}') == {
+        "tool_name": "x",
+        "tool_args": {},
+    }
+    assert parser.completed is False
+
+
+def test_completed_remains_true_after_trailing_content() -> None:
+    parser = DirtyJson()
+
+    assert parser.feed('{"tool_name":"x","tool_args":{}}') == {
+        "tool_name": "x",
+        "tool_args": {},
+    }
+    assert parser.completed is True
+
+    assert parser.feed(" trailing noise") == {
+        "tool_name": "x",
+        "tool_args": {},
+    }
+
+    assert parser.completed is True
diff --git a/tests/test_stream_tool_early_stop.py b/tests/test_stream_tool_early_stop.py