VectorlyApp
diff --git a/‎bluebox/agents/abstract_agent.py‎
Lines changed: 83 additions & 20 deletions b/‎bluebox/agents/abstract_agent.py‎
Lines changed: 83 additions & 20 deletions
diff --git a/‎bluebox/agents/specialists/interaction_specialist.py‎
Lines changed: 1 addition & 0 deletions b/‎bluebox/agents/specialists/interaction_specialist.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bluebox/agents/specialists/network_specialist.py‎
Lines changed: 1 addition & 0 deletions b/‎bluebox/agents/specialists/network_specialist.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎bluebox/utils/llm_serialization.py‎
Lines changed: 167 additions & 0 deletions b/‎bluebox/utils/llm_serialization.py‎
Lines changed: 167 additions & 0 deletions
@@ -61,24 +61,47 @@
     get_workaround_for_error,
 )
 from bluebox.utils.data_utils import format_bytes
+from bluebox.utils.llm_serialization import serialize_tool_result, strip_llm_excluded
 from bluebox.utils.llm_utils import token_optimized as token_optimized_decorator
 from bluebox.utils.logger import get_logger
 
 logger = get_logger(name=__name__)
 
 
+# Keep persisted tool previews small so iterative runs don't bloat context
+PERSISTED_TOOL_PREVIEW_MAX_CHARS = 800
+
+
 class ToolResultPersistMode(StrEnum):
+    """
+    Policy controlling when a tool result is persisted to the workspace.
+
+    Persistence saves the full result as a raw artifact and returns a
+    compact preview to the LLM, keeping context usage in check for
+    large payloads.
+
+    Attributes:
+        NEVER: Never persist; the full result is returned inline.
+        ALWAYS: Always persist, regardless of size.
+        OVERFLOW: Persist only when the serialized result exceeds the
+            tool's ``max_characters`` threshold.
+    """
     NEVER = "never"
     ALWAYS = "always"
     OVERFLOW = "overflow"
 
 
-# Keep persisted tool previews small so iterative runs don't blow context.
-PERSISTED_TOOL_PREVIEW_MAX_CHARS = 800
-
-
 class AgentExecutionMode(StrEnum):
-    """Execution mode for agent loops."""
+    """
+    Execution mode for agent loops.
+
+    Attributes:
+        CONVERSATIONAL: Interactive mode where the agent responds to user
+            messages one at a time via :meth:`process_new_message`.
+        AUTONOMOUS: Self-directed mode where the agent runs a tool-driven
+            loop until it calls a finalize tool or hits the iteration cap.
+            See :meth:`AbstractAgent.run_autonomous`.
+    """
     CONVERSATIONAL = "conversational"
     AUTONOMOUS = "autonomous"
 
@@ -108,36 +131,75 @@ class variable. Orchestrator agents use these cards to discover subagent
 
 @dataclass(frozen=True)
 class _ToolMeta:
-    """Metadata attached to a handler method by @agent_tool."""
-    name: str                                           # tool name registered with the LLM client
-    description: str                                    # tool description shown to the LLM
-    parameters: dict[str, Any]                          # JSON Schema for tool parameters
-    availability: bool | Callable[..., bool]            # whether the tool should be registered right now
+    """
+    Metadata attached to a handler method by :func:`agent_tool`.
+
+    Instances are stored on the decorated method as ``method._tool_meta``
+    and collected at class-definition time by
+    :meth:`AbstractAgent._collect_tools`.
+
+    Attributes:
+        name: Tool name registered with the LLM client (derived from the
+            method name by stripping leading underscores).
+        description: Human-readable description shown to the LLM.
+        parameters: JSON Schema ``object`` describing accepted parameters.
+        availability: Static boolean or a callable ``(self) -> bool``
+            evaluated before each LLM call to gate tool registration.
+        persist: Result-persistence policy. See :class:`ToolResultPersistMode`.
+        max_characters: Character threshold used by
+            :attr:`ToolResultPersistMode.OVERFLOW` to decide when to
+            persist a result to the workspace.
+        token_optimized: If ``True``, the tool result is encoded with
+            the ``token_optimized`` decorator for reduced token usage.
+    """
+    name: str
+    description: str
+    parameters: dict[str, Any]
+    availability: bool | Callable[..., bool]
     persist: ToolResultPersistMode = ToolResultPersistMode.NEVER
     max_characters: int = 10_000
     token_optimized: bool = False
 
 
-def _serialize_tool_result(tool_result: Any) -> tuple[str, str]:
-    try:
-        return json.dumps(tool_result, ensure_ascii=False, default=str, indent=2), "json"
-    except (TypeError, ValueError):
-        return str(tool_result), "text"
+def _normalize_file_scope(scope: str) -> str:
+    """
+    Normalize and validate a file-tool scope string. Strips whitespace, lowercases,
+    and ensures the value is one of the accepted scope literals.
 
+    Args:
+        scope: Raw scope value from a tool call (e.g. ``"Workspace"``).
 
-def _normalize_file_scope(scope: str) -> str:
-    """Normalize and validate file tool scope."""
+    Returns:
+        The normalized scope (``"workspace"`` or ``"docs"``).
+
+    Raises:
+        ValueError: If *scope* is not a recognized value.
+    """
     normalized_scope = scope.strip().lower()
     if normalized_scope not in {"workspace", "docs"}:
         raise ValueError("scope must be 'workspace' or 'docs'")
     return normalized_scope
 
 
 def _parse_search_terms(query: str) -> list[str]:
-    """Split query text into distinct terms for terms-mode search."""
+    """
+    Split a query string into unique, order-preserving search terms.
+
+    Tokens are split on commas and whitespace. Empty tokens and
+    duplicates are discarded while preserving first-occurrence order.
+
+    Args:
+        query: Free-text search query (e.g. ``"foo, bar baz"``).
+
+    Returns:
+        Deduplicated list of non-empty terms in original order.
+    """
     seen: set[str] = set()
     terms: list[str] = []
-    for token in re.split(r"[,\s]+", query):
+    for token in re.split(
+        pattern=r"[,\s]+",
+        string=query
+    ):
         term = token.strip()
         if term and term not in seen:
             seen.add(term)
@@ -551,7 +613,7 @@ def _maybe_persist_tool_result(
         if persist_mode == ToolResultPersistMode.NEVER:
             return tool_result
 
-        serialized, content_type = _serialize_tool_result(tool_result)
+        serialized, content_type = serialize_tool_result(tool_result)
         char_count = len(serialized)
 
         if persist_mode == ToolResultPersistMode.OVERFLOW and char_count <= tool_meta.max_characters:
@@ -1080,6 +1142,7 @@ def _execute_tool(self, tool_name: str, tool_arguments: dict[str, Any]) -> dict[
         logger.debug("Executing tool %s with arguments: %s", tool_name, tool_arguments)
         # handler is unbound (from cls, not self) so pass self explicitly
         raw_result = handler(self, **validated_arguments)
+        raw_result = strip_llm_excluded(raw_result)  # strip LLMExclude-annotated fields from any Pydantic models
         result_for_llm = self._maybe_persist_tool_result(
             tool_name=tool_name,
             tool_meta=tool_meta,
 
@@ -56,6 +56,7 @@ class InteractionSpecialist(AbstractAgent):
             "structural context (forms, inputs, buttons, links)."
         ),
     )
+
     SYSTEM_PROMPT: str = dedent("""\
         You are a UI interaction analyst specializing in understanding what users
         did on web pages from recorded browser interaction events.
 
@@ -53,6 +53,7 @@ class NetworkSpecialist(AbstractAgent):
             "inspecting request/response data, and semantic search across captured traffic."
         ),
     )
+
     SYSTEM_PROMPT: str = dedent(f"""
         You are a network traffic analyst specializing in captured browser network data.
 
 
@@ -0,0 +1,167 @@
+"""
+bluebox/utils/llm_serialization.py
+
+Utilities for controlling what data gets sent to LLMs from tool results.
+
+The LLMExclude marker lets you annotate Pydantic model fields that should be
+stripped before a tool result is serialized for the LLM — e.g. large blobs,
+internal IDs, or raw data the model doesn't need.
+
+Usage on models::
+
+    from typing import Annotated
+    from pydantic import BaseModel
+    from bluebox.utils.llm_serialization import LLMExclude
+
+    class NetworkTransaction(BaseModel):
+        url: str
+        method: str
+        response_body: Annotated[str, LLMExclude()]   # stripped before LLM sees it
+
+Tool handlers can return these models (or dicts containing them) directly —
+the agent infrastructure calls strip_llm_excluded() automatically.
+"""
+
+from __future__ import annotations
+
+import functools
+import json
+from enum import StrEnum
+from typing import Any, NamedTuple
+
+from pydantic import BaseModel
+
+
+class SerializedContentType(StrEnum):
+    """
+    Content type of a serialized tool result.
+
+    Attributes:
+        JSON: Successfully serialized as JSON.
+        TEXT: Fell back to ``str()`` representation.
+    """
+    JSON = "json"
+    TEXT = "text"
+
+
+class SerializedToolResult(NamedTuple):
+    """
+    Result of serializing a tool return value for the LLM.
+
+    Attributes:
+        serialized: The serialized string (JSON or plain text).
+        content_type: How the value was serialized.
+    """
+    serialized: str
+    content_type: SerializedContentType
+
+
+class LLMExclude:
+    """
+    Marker: exclude this field from LLM tool results.
+
+    Attach via ``Annotated``::
+
+        name: str                                    # included
+        raw_blob: Annotated[bytes, LLMExclude()]     # excluded
+    """
+    pass
+
+
+def serialize_tool_result(tool_result: Any) -> SerializedToolResult:
+    """
+    Serialize a tool result to a JSON or plain-text string for the LLM.
+
+    Attempts JSON serialization first (using ``default=str`` for non-serializable
+    types). Falls back to ``str()`` if JSON encoding fails.
+
+    Args:
+        tool_result: The value returned by a tool handler (typically a dict).
+
+    Returns:
+        A :class:`SerializedToolResult` (also unpacks as a two-tuple).
+    """
+    try:
+        return SerializedToolResult(
+            serialized=json.dumps(
+                tool_result,
+                ensure_ascii=False,
+                default=str,
+                indent=2
+            ),
+            content_type=SerializedContentType.JSON,
+        )
+    except (TypeError, ValueError):
+        return SerializedToolResult(
+            serialized=str(tool_result),
+            content_type=SerializedContentType.TEXT
+        )
+
+
+@functools.lru_cache(maxsize=256)
+def _excluded_fields(model_cls: type[BaseModel]) -> frozenset[str]:
+    """
+    Return the set of field names annotated with LLMExclude for a model class.
+
+    Scans ``model_cls.model_fields`` and checks each field's ``metadata`` list
+    for an ``LLMExclude`` instance (attached via ``Annotated[Type, LLMExclude()]``).
+
+    Results are cached per class via ``lru_cache``. Safe because Pydantic field
+    definitions are fixed at class creation time.
+
+    Args:
+        model_cls: A Pydantic BaseModel subclass to inspect.
+
+    Returns:
+        Frozen set of field names that should be excluded from LLM serialization.
+        Empty frozenset if the model has no LLMExclude annotations.
+    """
+    return frozenset(
+        name
+        for name, info in model_cls.model_fields.items()
+        if any(isinstance(m, LLMExclude) for m in info.metadata)
+    )
+
+
+def strip_llm_excluded(obj: Any) -> Any:
+    """
+    Recursively strip LLMExclude-annotated fields from Pydantic models.
+
+    Walks the object tree and converts any ``BaseModel`` instance into a dict
+    with LLMExclude-annotated fields removed. Non-BaseModel values pass through
+    unchanged (just an ``isinstance`` check).
+
+    Supported containers (recursed into):
+        - ``BaseModel``: fields filtered, remaining values recursed
+        - ``dict``: values recursed, keys preserved
+        - ``list`` / ``tuple``: elements recursed, container type preserved
+
+    Args:
+        obj: Any object — typically a tool handler's return value. Can be a
+            BaseModel, dict, list, tuple, or primitive.
+
+    Returns:
+        A plain-dict / list / tuple / primitive copy with all LLMExclude fields
+        removed from any BaseModel instances found at any nesting depth.
+    """
+    if isinstance(obj, BaseModel):
+        cls = type(obj)
+        excluded = _excluded_fields(cls)
+        result = {
+            name: strip_llm_excluded(value)
+            for name in cls.model_fields
+            if name not in excluded
+            for value in (getattr(obj, name),)  # bind to local for clarity
+        }
+        # include @computed_field properties (not in model_fields)
+        for name in cls.model_computed_fields:
+            if name not in excluded:
+                result[name] = strip_llm_excluded(getattr(obj, name))
+        return result
+    if isinstance(obj, dict):
+        return {k: strip_llm_excluded(v) for k, v in obj.items()}
+    if isinstance(obj, list):
+        return [strip_llm_excluded(item) for item in obj]
+    if isinstance(obj, tuple):
+        return tuple(strip_llm_excluded(item) for item in obj)
+    return obj
Original file line number	Diff line number	Diff line change
`@@ -56,6 +56,7 @@ class InteractionSpecialist(AbstractAgent):`
`56`	`56`	`"structural context (forms, inputs, buttons, links)."`
`57`	`57`	`),`
`58`	`58`	`)`
	`59`	`+`
`59`	`60`	`SYSTEM_PROMPT: str = dedent("""\`
`60`	`61`	`You are a UI interaction analyst specializing in understanding what users`
`61`	`62`	`did on web pages from recorded browser interaction events.`
Original file line number	Diff line number	Diff line change
`@@ -53,6 +53,7 @@ class NetworkSpecialist(AbstractAgent):`
`53`	`53`	`"inspecting request/response data, and semantic search across captured traffic."`
`54`	`54`	`),`
`55`	`55`	`)`
	`56`	`+`
`56`	`57`	`SYSTEM_PROMPT: str = dedent(f"""`
`57`	`58`	`You are a network traffic analyst specializing in captured browser network data.`
`58`	`59`