From ffa8a6c8c20b39d9a538f0a219f48b8cc4b739e3 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 18 Dec 2025 22:26:35 +0000
Subject: [PATCH 1/8] feat(observability): Add Weave integration for agent
 tracing

Add Weights & Biases Weave integration to the SDK observability module,
providing comprehensive tracing capabilities for agent operations.

New features:
- weave_op decorator for tracing functions
- observe_weave decorator with Laminar-compatible interface
- weave_thread context manager for grouping related operations
- WeaveSpanManager for manual span management
- Auto-initialization via environment variables (WANDB_API_KEY, WEAVE_PROJECT)

Files added:
- openhands-sdk/openhands/sdk/observability/weave.py
- examples/weave_observability_demo.py
- tests/sdk/observability/test_weave.py

Dependencies:
- Added weave>=0.52.22 to openhands-sdk dependencies

Co-authored-by: openhands <openhands@all-hands.dev>
---
 examples/weave_observability_demo.py          | 178 +++++++
 .../openhands/sdk/observability/__init__.py   |  31 +-
 .../openhands/sdk/observability/weave.py      | 445 ++++++++++++++++++
 openhands-sdk/pyproject.toml                  |   3 +-
 tests/sdk/observability/__init__.py           |   0
 tests/sdk/observability/test_weave.py         | 274 +++++++++++
 6 files changed, 929 insertions(+), 2 deletions(-)
 create mode 100644 examples/weave_observability_demo.py
 create mode 100644 openhands-sdk/openhands/sdk/observability/weave.py
 create mode 100644 tests/sdk/observability/__init__.py
 create mode 100644 tests/sdk/observability/test_weave.py

diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py
new file mode 100644
index 0000000000..0417e42d30
--- /dev/null
+++ b/examples/weave_observability_demo.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""Demo script showing Weave observability integration with OpenHands SDK.
+
+This script demonstrates how to use Weave for tracing agent operations.
+It creates a simple agent that processes messages and shows how traces
+appear in the Weave UI.
+
+Prerequisites:
+    - Set WANDB_API_KEY environment variable (valid W&B API key)
+    - Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo")
+    - Optionally set OPENAI_API_KEY for LLM calls
+
+Usage:
+    export WANDB_API_KEY="your-api-key"
+    export WEAVE_PROJECT="your-team/openhands-demo"
+    python examples/weave_observability_demo.py
+
+Note:
+    If WANDB_API_KEY is not set or invalid, the demo will still run
+    but without Weave tracing. This allows testing the decorator
+    functionality without requiring valid credentials.
+"""
+
+import os
+import sys
+
+# Add the SDK to the path for development
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "openhands-sdk"))
+
+from openhands.sdk.observability.weave import (
+    init_weave,
+    is_weave_initialized,
+    maybe_init_weave,
+    weave_op,
+    weave_thread,
+    start_weave_span,
+    end_weave_span,
+    observe_weave,
+)
+
+
+# Example 1: Using the @weave_op decorator
+@weave_op(name="process_message")
+def process_message(message: str) -> dict:
+    """Process a user message and return a response."""
+    # Simulate some processing
+    word_count = len(message.split())
+    return {
+        "original": message,
+        "word_count": word_count,
+        "processed": True,
+    }
+
+
+# Example 2: Using observe_weave for compatibility with Laminar
+@observe_weave(name="analyze_sentiment")
+def analyze_sentiment(text: str) -> str:
+    """Analyze the sentiment of text."""
+    # Simple mock sentiment analysis
+    positive_words = {"good", "great", "excellent", "happy", "love"}
+    negative_words = {"bad", "terrible", "sad", "hate", "awful"}
+
+    words = set(text.lower().split())
+    pos_count = len(words & positive_words)
+    neg_count = len(words & negative_words)
+
+    if pos_count > neg_count:
+        return "positive"
+    elif neg_count > pos_count:
+        return "negative"
+    return "neutral"
+
+
+# Example 3: Nested operations with thread grouping
+@weave_op(name="agent_step")
+def agent_step(step_num: int, user_input: str) -> dict:
+    """Simulate an agent step with nested operations."""
+    # Process the message
+    processed = process_message(user_input)
+
+    # Analyze sentiment
+    sentiment = analyze_sentiment(user_input)
+
+    return {
+        "step": step_num,
+        "processed": processed,
+        "sentiment": sentiment,
+    }
+
+
+# Example 4: Manual span management
+def manual_span_example():
+    """Demonstrate manual span creation and management."""
+    # Start a span
+    start_weave_span("manual_operation", inputs={"task": "demo"})
+
+    try:
+        # Do some work
+        result = {"status": "completed", "items_processed": 42}
+        end_weave_span(output=result)
+        return result
+    except Exception as e:
+        end_weave_span(error=e)
+        raise
+
+
+def run_demo():
+    """Run the Weave observability demo."""
+    print("=" * 60)
+    print("Weave Observability Demo for OpenHands SDK")
+    print("=" * 60)
+
+    # Check environment
+    api_key = os.environ.get("WANDB_API_KEY")
+    project = os.environ.get("WEAVE_PROJECT")
+
+    if not api_key:
+        print("\n⚠️  WANDB_API_KEY not set. Weave tracing will be disabled.")
+        print("   Set it with: export WANDB_API_KEY='your-api-key'")
+
+    if not project:
+        print("\n⚠️  WEAVE_PROJECT not set. Using default project name.")
+        project = "openhands-sdk-demo"
+        os.environ["WEAVE_PROJECT"] = project
+
+    # Initialize Weave
+    print(f"\n📊 Initializing Weave for project: {project}")
+    success = maybe_init_weave()
+
+    if success:
+        print("✅ Weave initialized successfully!")
+        print(f"   View traces at: https://wandb.ai/{project}/weave")
+    else:
+        print("⚠️  Weave not initialized (missing credentials or package)")
+        print("   Running demo without tracing...")
+
+    print("\n" + "-" * 60)
+    print("Running demo operations...")
+    print("-" * 60)
+
+    # Demo 1: Simple decorated function
+    print("\n1️⃣  Processing a message with @weave_op decorator:")
+    result = process_message("Hello, this is a test message for the agent!")
+    print(f"   Result: {result}")
+
+    # Demo 2: Sentiment analysis with observe_weave
+    print("\n2️⃣  Analyzing sentiment with @observe_weave decorator:")
+    sentiment = analyze_sentiment("This is a great and excellent demo!")
+    print(f"   Sentiment: {sentiment}")
+
+    # Demo 3: Thread grouping for conversation
+    print("\n3️⃣  Simulating a conversation with thread grouping:")
+    conversation_id = "demo-conversation-001"
+
+    with weave_thread(conversation_id):
+        for i, msg in enumerate([
+            "Hello, I need help with my code",
+            "The function is not working correctly",
+            "Great, that fixed it! Thank you!",
+        ], 1):
+            result = agent_step(i, msg)
+            print(f"   Step {i}: sentiment={result['sentiment']}")
+
+    # Demo 4: Manual span management
+    print("\n4️⃣  Manual span management:")
+    result = manual_span_example()
+    print(f"   Result: {result}")
+
+    print("\n" + "=" * 60)
+    print("Demo completed!")
+
+    if is_weave_initialized():
+        print(f"\n🔗 View your traces at: https://wandb.ai/{project}/weave")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    run_demo()
diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py
index 4f4ea48583..5b638534dc 100644
--- a/openhands-sdk/openhands/sdk/observability/__init__.py
+++ b/openhands-sdk/openhands/sdk/observability/__init__.py
@@ -1,4 +1,33 @@
 from openhands.sdk.observability.laminar import maybe_init_laminar, observe
+from openhands.sdk.observability.weave import (
+    end_weave_span,
+    get_weave_client,
+    init_weave,
+    is_weave_initialized,
+    maybe_init_weave,
+    observe_weave,
+    should_enable_weave,
+    start_weave_span,
+    weave_op,
+    weave_thread,
+    WeaveSpanManager,
+)
 
 
-__all__ = ["maybe_init_laminar", "observe"]
+__all__ = [
+    # Laminar exports
+    "maybe_init_laminar",
+    "observe",
+    # Weave exports
+    "end_weave_span",
+    "get_weave_client",
+    "init_weave",
+    "is_weave_initialized",
+    "maybe_init_weave",
+    "observe_weave",
+    "should_enable_weave",
+    "start_weave_span",
+    "weave_op",
+    "weave_thread",
+    "WeaveSpanManager",
+]
diff --git a/openhands-sdk/openhands/sdk/observability/weave.py b/openhands-sdk/openhands/sdk/observability/weave.py
new file mode 100644
index 0000000000..821765f6d9
--- /dev/null
+++ b/openhands-sdk/openhands/sdk/observability/weave.py
@@ -0,0 +1,445 @@
+"""Weave observability integration for OpenHands SDK.
+
+This module provides integration with Weights & Biases Weave for tracing
+and observability of agent operations. Weave automatically tracks LLM calls,
+tool executions, and agent steps.
+
+Configuration:
+    Set the following environment variables to enable Weave tracing:
+    - WANDB_API_KEY: Your Weights & Biases API key
+    - WEAVE_PROJECT: The Weave project name (e.g., "my-team/my-project")
+
+    Alternatively, call `init_weave()` directly with the project name.
+
+Example:
+    >>> from openhands.sdk.observability.weave import maybe_init_weave, weave_op
+    >>> maybe_init_weave()  # Auto-initializes if env vars are set
+    >>>
+    >>> @weave_op(name="my_function")
+    >>> def my_function(x: int) -> int:
+    ...     return x + 1
+
+See Also:
+    - Weave documentation: https://docs.wandb.ai/weave
+    - Laminar integration: openhands.sdk.observability.laminar
+"""
+
+from collections.abc import Callable
+from contextlib import contextmanager
+from functools import wraps
+from typing import Any, ParamSpec, TypeVar
+
+from openhands.sdk.logger import get_logger
+from openhands.sdk.observability.utils import get_env
+
+
+logger = get_logger(__name__)
+
+# Type variables for generic function signatures
+P = ParamSpec("P")
+R = TypeVar("R")
+
+# Global state for Weave initialization
+_weave_initialized: bool = False
+_weave_client: Any = None
+
+
+def should_enable_weave() -> bool:
+    """Check if Weave should be enabled based on environment configuration.
+
+    Returns:
+        True if WANDB_API_KEY and WEAVE_PROJECT are set, False otherwise.
+    """
+    api_key = get_env("WANDB_API_KEY")
+    project = get_env("WEAVE_PROJECT")
+    return bool(api_key and project)
+
+
+def is_weave_initialized() -> bool:
+    """Check if Weave has been initialized.
+
+    Returns:
+        True if Weave is initialized and ready for tracing.
+    """
+    global _weave_initialized
+    return _weave_initialized
+
+
+def init_weave(
+    project: str | None = None,
+    api_key: str | None = None,
+) -> bool:
+    """Initialize Weave for tracing.
+
+    Args:
+        project: The Weave project name (e.g., "my-team/my-project").
+            If not provided, uses WEAVE_PROJECT environment variable.
+        api_key: The Weights & Biases API key. If not provided, uses
+            WANDB_API_KEY environment variable.
+
+    Returns:
+        True if initialization was successful, False otherwise.
+
+    Raises:
+        ValueError: If no project is specified and WEAVE_PROJECT is not set.
+    """
+    import os
+
+    global _weave_initialized, _weave_client
+
+    if _weave_initialized:
+        logger.debug("Weave already initialized, skipping")
+        return True
+
+    try:
+        import weave
+    except ImportError:
+        logger.warning(
+            "Weave package not installed. Install with: pip install weave"
+        )
+        return False
+
+    # Determine project name
+    project_name = project or get_env("WEAVE_PROJECT")
+    if not project_name:
+        raise ValueError(
+            "Weave project must be specified via argument or WEAVE_PROJECT env var"
+        )
+
+    # Set API key in environment if provided (Weave reads from env)
+    wandb_api_key = api_key or get_env("WANDB_API_KEY")
+    if wandb_api_key:
+        os.environ["WANDB_API_KEY"] = wandb_api_key
+
+        # Ensure wandb is logged in (required by weave.init)
+        try:
+            import wandb
+            wandb.login(key=wandb_api_key, relogin=False)
+        except Exception as e:
+            logger.warning(f"wandb login failed: {e}")
+    else:
+        logger.warning(
+            "WANDB_API_KEY not set. Weave tracing may not work correctly."
+        )
+
+    try:
+        _weave_client = weave.init(project_name)
+        _weave_initialized = True
+        logger.info(f"Weave initialized for project: {project_name}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to initialize Weave: {e}")
+        return False
+
+
+def maybe_init_weave() -> bool:
+    """Initialize Weave if environment variables are configured.
+
+    This is a convenience function that checks for WANDB_API_KEY and
+    WEAVE_PROJECT environment variables and initializes Weave if both are set.
+
+    Returns:
+        True if Weave was initialized (or already initialized), False otherwise.
+    """
+    if is_weave_initialized():
+        return True
+
+    if should_enable_weave():
+        return init_weave()
+
+    logger.debug(
+        "Weave environment variables not set (WANDB_API_KEY, WEAVE_PROJECT). "
+        "Skipping Weave initialization."
+    )
+    return False
+
+
+def get_weave_client() -> Any:
+    """Get the current Weave client.
+
+    Returns:
+        The Weave client if initialized, None otherwise.
+    """
+    global _weave_client
+    return _weave_client
+
+
+def weave_op(
+    name: str | None = None,
+    *,
+    call_display_name: str | Callable[..., str] | None = None,
+    postprocess_inputs: Callable[..., dict[str, Any]] | None = None,
+    postprocess_output: Callable[..., Any] | None = None,
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """Decorator to trace a function with Weave.
+
+    This decorator wraps a function to automatically trace its inputs, outputs,
+    and execution time with Weave. If Weave is not initialized, the function
+    runs normally without tracing.
+
+    Args:
+        name: Optional name for the operation. Defaults to the function name.
+        call_display_name: Optional display name or callable that returns a
+            display name for each call.
+        postprocess_inputs: Optional function to transform inputs before logging.
+        postprocess_output: Optional function to transform output before logging.
+
+    Returns:
+        A decorator that wraps the function with Weave tracing.
+
+    Example:
+        >>> @weave_op(name="process_data")
+        >>> def process_data(data: dict) -> dict:
+        ...     return {"processed": True, **data}
+    """
+    def decorator(func: Callable[P, R]) -> Callable[P, R]:
+        @wraps(func)
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+            if not is_weave_initialized():
+                return func(*args, **kwargs)
+
+            try:
+                import weave
+
+                # Build weave.op kwargs
+                op_kwargs: dict[str, Any] = {}
+                if name:
+                    op_kwargs["name"] = name
+                if call_display_name:
+                    op_kwargs["call_display_name"] = call_display_name
+                if postprocess_inputs:
+                    op_kwargs["postprocess_inputs"] = postprocess_inputs
+                if postprocess_output:
+                    op_kwargs["postprocess_output"] = postprocess_output
+
+                # Apply weave.op decorator dynamically
+                traced_func = weave.op(**op_kwargs)(func)
+                return traced_func(*args, **kwargs)
+            except Exception as e:
+                logger.debug(f"Weave tracing failed, running without trace: {e}")
+                return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+@contextmanager
+def weave_thread(thread_id: str):
+    """Context manager to group operations under a Weave thread.
+
+    Weave threads allow grouping related operations (like all events in a
+    conversation) under a single trace hierarchy.
+
+    Args:
+        thread_id: Unique identifier for the thread (e.g., conversation ID).
+
+    Yields:
+        The thread context if Weave is initialized, otherwise a no-op context.
+
+    Example:
+        >>> with weave_thread("conversation-123"):
+        ...     # All operations here will be grouped under the same thread
+        ...     process_message("Hello")
+        ...     generate_response()
+    """
+    if not is_weave_initialized():
+        yield
+        return
+
+    try:
+        import weave
+
+        # Check if there's an active Weave client
+        client = weave.client.get_current_client()
+        if client is None:
+            yield
+            return
+
+        with weave.thread(thread_id):
+            yield
+    except Exception as e:
+        logger.debug(f"Weave thread context failed: {e}")
+        yield
+
+
+class WeaveSpanManager:
+    """Manages Weave spans for manual tracing.
+
+    This class provides a stack-based approach to managing Weave spans,
+    similar to the SpanManager for Laminar. It's useful when you need
+    more control over span lifecycle than the decorator provides.
+
+    Example:
+        >>> manager = WeaveSpanManager()
+        >>> manager.start_span("process_request", session_id="conv-123")
+        >>> try:
+        ...     # Do work
+        ...     pass
+        ... finally:
+        ...     manager.end_span()
+    """
+
+    def __init__(self):
+        self._call_stack: list[Any] = []
+
+    def start_span(
+        self,
+        name: str,
+        inputs: dict[str, Any] | None = None,
+        session_id: str | None = None,
+    ) -> Any | None:
+        """Start a new Weave span.
+
+        Args:
+            name: Name of the operation being traced.
+            inputs: Optional dictionary of input values to log.
+            session_id: Optional session ID for grouping related spans.
+
+        Returns:
+            The Weave call object if successful, None otherwise.
+        """
+        if not is_weave_initialized():
+            return None
+
+        try:
+            import weave
+
+            client = get_weave_client()
+            if client is None:
+                return None
+
+            # Create a call using the client API
+            call = client.create_call(
+                op=name,
+                inputs=inputs or {},
+            )
+            self._call_stack.append(call)
+            return call
+        except Exception as e:
+            logger.debug(f"Failed to start Weave span: {e}")
+            return None
+
+    def end_span(self, output: Any = None, error: Exception | None = None) -> None:
+        """End the most recent Weave span.
+
+        Args:
+            output: Optional output value to log.
+            error: Optional exception if the operation failed.
+        """
+        if not self._call_stack:
+            logger.debug("Attempted to end span, but stack is empty")
+            return
+
+        try:
+            call = self._call_stack.pop()
+            client = get_weave_client()
+            if client and call:
+                if error:
+                    client.finish_call(call, output=None, exception=error)
+                else:
+                    client.finish_call(call, output=output)
+        except Exception as e:
+            logger.debug(f"Failed to end Weave span: {e}")
+
+
+# Global span manager instance
+_span_manager: WeaveSpanManager | None = None
+
+
+def _get_span_manager() -> WeaveSpanManager:
+    """Get or create the global span manager."""
+    global _span_manager
+    if _span_manager is None:
+        _span_manager = WeaveSpanManager()
+    return _span_manager
+
+
+def start_weave_span(
+    name: str,
+    inputs: dict[str, Any] | None = None,
+    session_id: str | None = None,
+) -> Any | None:
+    """Start a new Weave span using the global span manager.
+
+    Args:
+        name: Name of the operation being traced.
+        inputs: Optional dictionary of input values to log.
+        session_id: Optional session ID for grouping related spans.
+
+    Returns:
+        The Weave call object if successful, None otherwise.
+    """
+    return _get_span_manager().start_span(name, inputs, session_id)
+
+
+def end_weave_span(output: Any = None, error: Exception | None = None) -> None:
+    """End the most recent Weave span using the global span manager.
+
+    Args:
+        output: Optional output value to log.
+        error: Optional exception if the operation failed.
+    """
+    try:
+        _get_span_manager().end_span(output, error)
+    except Exception:
+        logger.debug("Error ending Weave span")
+
+
+def observe_weave(
+    *,
+    name: str | None = None,
+    ignore_inputs: list[str] | None = None,
+    ignore_output: bool = False,
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """Unified observe decorator that works with both Weave and Laminar.
+
+    This decorator provides a consistent interface for observability that
+    works regardless of which backend (Weave or Laminar) is configured.
+    It prioritizes Weave if initialized, otherwise falls back to Laminar.
+
+    Args:
+        name: Optional name for the operation.
+        ignore_inputs: List of input parameter names to exclude from logging.
+        ignore_output: If True, don't log the function's output.
+
+    Returns:
+        A decorator that wraps the function with observability tracing.
+
+    Example:
+        >>> @observe_weave(name="agent.step", ignore_inputs=["state"])
+        >>> def step(self, state: State) -> Action:
+        ...     return self._process(state)
+    """
+    def decorator(func: Callable[P, R]) -> Callable[P, R]:
+        @wraps(func)
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+            # Try Weave first
+            if is_weave_initialized():
+                try:
+                    import weave
+
+                    op_kwargs: dict[str, Any] = {}
+                    if name:
+                        op_kwargs["name"] = name
+
+                    # Handle input filtering via postprocess_inputs
+                    if ignore_inputs:
+                        def filter_inputs(inputs: dict[str, Any]) -> dict[str, Any]:
+                            return {
+                                k: v for k, v in inputs.items()
+                                if k not in ignore_inputs
+                            }
+                        op_kwargs["postprocess_inputs"] = filter_inputs
+
+                    traced_func = weave.op(**op_kwargs)(func)
+                    return traced_func(*args, **kwargs)
+                except Exception as e:
+                    logger.debug(f"Weave tracing failed: {e}")
+
+            # Fall through to untraced execution
+            return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml
index 276295e37c..1b890aac6e 100644
--- a/openhands-sdk/pyproject.toml
+++ b/openhands-sdk/pyproject.toml
@@ -14,7 +14,8 @@ dependencies = [
     "python-json-logger>=3.3.0",
     "tenacity>=9.1.2",
     "websockets>=12",
-    "lmnr>=0.7.24"
+    "lmnr>=0.7.24",
+    "weave>=0.52.22"
 ]
 
 [project.optional-dependencies]
diff --git a/tests/sdk/observability/__init__.py b/tests/sdk/observability/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py
new file mode 100644
index 0000000000..64de5fe3c0
--- /dev/null
+++ b/tests/sdk/observability/test_weave.py
@@ -0,0 +1,274 @@
+"""Tests for Weave observability integration.
+
+These tests verify the Weave integration works correctly, including:
+- Decorator functionality (with and without Weave initialized)
+- Environment variable configuration
+- Graceful fallback when Weave is not available
+"""
+
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+
+class TestWeaveConfiguration:
+    """Tests for Weave configuration and initialization."""
+
+    def test_should_enable_weave_with_both_vars(self):
+        """should_enable_weave returns True when both env vars are set."""
+        from openhands.sdk.observability.weave import should_enable_weave
+
+        with patch.dict(os.environ, {
+            "WANDB_API_KEY": "test-key",
+            "WEAVE_PROJECT": "test-project",
+        }):
+            assert should_enable_weave() is True
+
+    def test_should_enable_weave_missing_api_key(self):
+        """should_enable_weave returns False when API key is missing."""
+        from openhands.sdk.observability.weave import should_enable_weave
+
+        with patch.dict(os.environ, {
+            "WEAVE_PROJECT": "test-project",
+        }, clear=True):
+            # Clear WANDB_API_KEY if it exists
+            os.environ.pop("WANDB_API_KEY", None)
+            assert should_enable_weave() is False
+
+    def test_should_enable_weave_missing_project(self):
+        """should_enable_weave returns False when project is missing."""
+        from openhands.sdk.observability.weave import should_enable_weave
+
+        with patch.dict(os.environ, {
+            "WANDB_API_KEY": "test-key",
+        }, clear=True):
+            os.environ.pop("WEAVE_PROJECT", None)
+            assert should_enable_weave() is False
+
+    def test_is_weave_initialized_default(self):
+        """is_weave_initialized returns False by default."""
+        # Reset global state
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import is_weave_initialized
+        assert is_weave_initialized() is False
+
+
+class TestWeaveOpDecorator:
+    """Tests for the @weave_op decorator."""
+
+    def test_weave_op_without_initialization(self):
+        """@weave_op runs function normally when Weave is not initialized."""
+        # Reset global state
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import weave_op
+
+        @weave_op(name="test_function")
+        def test_function(x: int) -> int:
+            return x + 1
+
+        result = test_function(5)
+        assert result == 6
+
+    def test_weave_op_preserves_function_metadata(self):
+        """@weave_op preserves function name and docstring."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import weave_op
+
+        @weave_op(name="custom_name")
+        def my_function(x: int) -> int:
+            """My docstring."""
+            return x
+
+        assert my_function.__name__ == "my_function"
+        assert my_function.__doc__ == "My docstring."
+
+    def test_weave_op_handles_exceptions(self):
+        """@weave_op propagates exceptions correctly."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import weave_op
+
+        @weave_op(name="failing_function")
+        def failing_function():
+            raise ValueError("Test error")
+
+        with pytest.raises(ValueError, match="Test error"):
+            failing_function()
+
+
+class TestObserveWeaveDecorator:
+    """Tests for the @observe_weave decorator."""
+
+    def test_observe_weave_without_initialization(self):
+        """@observe_weave runs function normally when Weave is not initialized."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import observe_weave
+
+        @observe_weave(name="test_observe")
+        def test_function(x: int, y: int) -> int:
+            return x + y
+
+        result = test_function(3, 4)
+        assert result == 7
+
+    def test_observe_weave_with_ignore_inputs(self):
+        """@observe_weave correctly handles ignore_inputs parameter."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import observe_weave
+
+        @observe_weave(name="test_ignore", ignore_inputs=["secret"])
+        def test_function(data: str, secret: str) -> str:
+            return f"{data}-processed"
+
+        result = test_function("hello", "my-secret")
+        assert result == "hello-processed"
+
+
+class TestWeaveThread:
+    """Tests for the weave_thread context manager."""
+
+    def test_weave_thread_without_initialization(self):
+        """weave_thread works as no-op when Weave is not initialized."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import weave_thread
+
+        results = []
+        with weave_thread("test-thread-123"):
+            results.append(1)
+            results.append(2)
+
+        assert results == [1, 2]
+
+
+class TestWeaveSpanManager:
+    """Tests for the WeaveSpanManager class."""
+
+    def test_span_manager_without_initialization(self):
+        """WeaveSpanManager works gracefully when Weave is not initialized."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import WeaveSpanManager
+
+        manager = WeaveSpanManager()
+
+        # start_span should return None when not initialized
+        result = manager.start_span("test_span", inputs={"key": "value"})
+        assert result is None
+
+        # end_span should not raise
+        manager.end_span(output={"result": "ok"})
+
+    def test_global_span_functions(self):
+        """Global span functions work without initialization."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import (
+            start_weave_span,
+            end_weave_span,
+        )
+
+        # Should not raise
+        result = start_weave_span("test", inputs={"x": 1})
+        assert result is None
+
+        # Should not raise
+        end_weave_span(output={"y": 2})
+
+
+class TestWeaveExports:
+    """Tests for module exports."""
+
+    def test_all_exports_available(self):
+        """All expected functions are exported from the module."""
+        from openhands.sdk.observability import (
+            end_weave_span,
+            get_weave_client,
+            init_weave,
+            is_weave_initialized,
+            maybe_init_weave,
+            observe_weave,
+            should_enable_weave,
+            start_weave_span,
+            weave_op,
+            weave_thread,
+            WeaveSpanManager,
+        )
+
+        # Just verify they're callable
+        assert callable(end_weave_span)
+        assert callable(get_weave_client)
+        assert callable(init_weave)
+        assert callable(is_weave_initialized)
+        assert callable(maybe_init_weave)
+        assert callable(observe_weave)
+        assert callable(should_enable_weave)
+        assert callable(start_weave_span)
+        assert callable(weave_op)
+        assert callable(weave_thread)
+        assert WeaveSpanManager is not None
+
+
+class TestInitWeave:
+    """Tests for init_weave function."""
+
+    def test_init_weave_requires_project(self):
+        """init_weave raises ValueError when no project is specified."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import init_weave
+
+        with patch.dict(os.environ, {}, clear=True):
+            os.environ.pop("WEAVE_PROJECT", None)
+            with pytest.raises(ValueError, match="Weave project must be specified"):
+                init_weave()
+
+    def test_init_weave_uses_env_project(self):
+        """init_weave uses WEAVE_PROJECT from environment."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import init_weave
+
+        # Mock weave.init to avoid actual initialization
+        with patch("openhands.sdk.observability.weave.get_env") as mock_get_env:
+            mock_get_env.side_effect = lambda k: {
+                "WEAVE_PROJECT": "test-project",
+                "WANDB_API_KEY": None,
+            }.get(k)
+
+            with patch("weave.init") as mock_weave_init:
+                mock_weave_init.return_value = MagicMock()
+                result = init_weave()
+
+                # Should have called weave.init with the project
+                mock_weave_init.assert_called_once_with("test-project")
+
+    def test_init_weave_already_initialized(self):
+        """init_weave returns True immediately if already initialized."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = True
+
+        from openhands.sdk.observability.weave import init_weave
+
+        result = init_weave(project="test")
+        assert result is True
+
+        # Reset for other tests
+        weave_module._weave_initialized = False

From 2d86b6924536cd3eed700fefbcf96f0c63dff71e Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 18 Dec 2025 23:02:01 +0000
Subject: [PATCH 2/8] refactor: Leverage Weave autopatching for zero-config LLM
 tracing

Key improvements:
- Simplified integration by leveraging Weave's built-in LiteLLM autopatching
- When init_weave() is called, all LiteLLM calls are automatically traced
- No manual decoration needed for LLM calls - just call init_weave()
- Added weave_attributes() context manager for conversation grouping
- Added get_weave_op() for dynamic decorator access
- Updated @weave_op to support both @weave_op and @weave_op(...) syntax
- Improved documentation explaining the autopatching approach
- Updated demo script to showcase automatic LLM tracing
- Added tests for autopatching behavior

The SDK uses LiteLLM for all LLM calls. Weave automatically patches
LiteLLM when initialized, so users get full tracing with minimal setup.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 examples/weave_observability_demo.py          |  74 ++-
 .../openhands/sdk/observability/__init__.py   |   4 +
 .../openhands/sdk/observability/weave.py      | 585 ++++++++++--------
 tests/sdk/observability/test_weave.py         |  96 ++-
 4 files changed, 489 insertions(+), 270 deletions(-)

diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py
index 0417e42d30..f4a51ec7e0 100644
--- a/examples/weave_observability_demo.py
+++ b/examples/weave_observability_demo.py
@@ -1,24 +1,44 @@
 #!/usr/bin/env python3
 """Demo script showing Weave observability integration with OpenHands SDK.
 
-This script demonstrates how to use Weave for tracing agent operations.
-It creates a simple agent that processes messages and shows how traces
-appear in the Weave UI.
+This script demonstrates how Weave provides **automatic LLM tracing** for the
+OpenHands SDK. The key insight is that Weave automatically patches LiteLLM
+when initialized, so all LLM calls are traced without any manual decoration!
 
-Prerequisites:
-    - Set WANDB_API_KEY environment variable (valid W&B API key)
-    - Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo")
-    - Optionally set OPENAI_API_KEY for LLM calls
+## Key Features Demonstrated
+
+1. **Automatic LLM Tracing**: Just call `init_weave()` and all LiteLLM calls
+   are automatically traced - no `@weave.op` decorators needed for LLM calls!
+
+2. **Custom Function Tracing**: Use `@weave_op` for custom agent logic you
+   want to trace (tool execution, agent steps, etc.)
+
+3. **Conversation Grouping**: Use `weave_attributes()` to group related
+   operations under a conversation or session.
+
+## How It Works
+
+The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`:
+1. Weave's `implicit_patch()` automatically patches LiteLLM
+2. All `litellm.completion()` and `litellm.acompletion()` calls are traced
+3. You see full traces in the Weave UI without any code changes!
+
+## Prerequisites
+
+- Set WANDB_API_KEY environment variable (valid W&B API key)
+- Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo")
+- Optionally set OPENAI_API_KEY for actual LLM calls
+
+## Usage
 
-Usage:
     export WANDB_API_KEY="your-api-key"
     export WEAVE_PROJECT="your-team/openhands-demo"
     python examples/weave_observability_demo.py
 
 Note:
     If WANDB_API_KEY is not set or invalid, the demo will still run
-    but without Weave tracing. This allows testing the decorator
-    functionality without requiring valid credentials.
+    but without Weave tracing. This allows testing the functionality
+    without requiring valid credentials.
 """
 
 import os
@@ -27,15 +47,17 @@
 # Add the SDK to the path for development
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "openhands-sdk"))
 
-from openhands.sdk.observability.weave import (
+from openhands.sdk.observability import (
     init_weave,
     is_weave_initialized,
     maybe_init_weave,
     weave_op,
+    weave_attributes,
     weave_thread,
     start_weave_span,
     end_weave_span,
     observe_weave,
+    get_weave_op,
 )
 
 
@@ -130,6 +152,8 @@ def run_demo():
     if success:
         print("✅ Weave initialized successfully!")
         print(f"   View traces at: https://wandb.ai/{project}/weave")
+        print("\n   🎉 KEY FEATURE: All LiteLLM calls are now AUTOMATICALLY traced!")
+        print("   No need to decorate LLM calls - Weave patches LiteLLM for you.")
     else:
         print("⚠️  Weave not initialized (missing credentials or package)")
         print("   Running demo without tracing...")
@@ -139,20 +163,23 @@ def run_demo():
     print("-" * 60)
 
     # Demo 1: Simple decorated function
-    print("\n1️⃣  Processing a message with @weave_op decorator:")
+    print("\n1️⃣  Custom function tracing with @weave_op decorator:")
+    print("   (Use this for custom agent logic you want to trace)")
     result = process_message("Hello, this is a test message for the agent!")
     print(f"   Result: {result}")
 
     # Demo 2: Sentiment analysis with observe_weave
-    print("\n2️⃣  Analyzing sentiment with @observe_weave decorator:")
+    print("\n2️⃣  Laminar-compatible interface with @observe_weave:")
+    print("   (Easy migration from Laminar to Weave)")
     sentiment = analyze_sentiment("This is a great and excellent demo!")
     print(f"   Sentiment: {sentiment}")
 
-    # Demo 3: Thread grouping for conversation
-    print("\n3️⃣  Simulating a conversation with thread grouping:")
+    # Demo 3: Conversation grouping with weave_attributes
+    print("\n3️⃣  Conversation grouping with weave_attributes:")
+    print("   (Group all operations under a conversation ID)")
     conversation_id = "demo-conversation-001"
 
-    with weave_thread(conversation_id):
+    with weave_attributes(conversation_id=conversation_id, user_id="demo-user"):
         for i, msg in enumerate([
             "Hello, I need help with my code",
             "The function is not working correctly",
@@ -162,15 +189,28 @@ def run_demo():
             print(f"   Step {i}: sentiment={result['sentiment']}")
 
     # Demo 4: Manual span management
-    print("\n4️⃣  Manual span management:")
+    print("\n4️⃣  Manual span management (for advanced use cases):")
     result = manual_span_example()
     print(f"   Result: {result}")
 
+    # Demo 5: Show how to get weave.op for dynamic decoration
+    print("\n5️⃣  Dynamic decoration with get_weave_op():")
+    op = get_weave_op()
+
+    @op
+    def dynamically_traced_function(x: int) -> int:
+        return x * 2
+
+    result = dynamically_traced_function(21)
+    print(f"   Result: {result}")
+
     print("\n" + "=" * 60)
     print("Demo completed!")
 
     if is_weave_initialized():
         print(f"\n🔗 View your traces at: https://wandb.ai/{project}/weave")
+        print("\n💡 Remember: LLM calls via LiteLLM are traced AUTOMATICALLY!")
+        print("   Just use the SDK's LLM class normally - no decoration needed.")
     print("=" * 60)
 
 
diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py
index 5b638534dc..799c1d1d71 100644
--- a/openhands-sdk/openhands/sdk/observability/__init__.py
+++ b/openhands-sdk/openhands/sdk/observability/__init__.py
@@ -2,12 +2,14 @@
 from openhands.sdk.observability.weave import (
     end_weave_span,
     get_weave_client,
+    get_weave_op,
     init_weave,
     is_weave_initialized,
     maybe_init_weave,
     observe_weave,
     should_enable_weave,
     start_weave_span,
+    weave_attributes,
     weave_op,
     weave_thread,
     WeaveSpanManager,
@@ -21,12 +23,14 @@
     # Weave exports
     "end_weave_span",
     "get_weave_client",
+    "get_weave_op",
     "init_weave",
     "is_weave_initialized",
     "maybe_init_weave",
     "observe_weave",
     "should_enable_weave",
     "start_weave_span",
+    "weave_attributes",
     "weave_op",
     "weave_thread",
     "WeaveSpanManager",
diff --git a/openhands-sdk/openhands/sdk/observability/weave.py b/openhands-sdk/openhands/sdk/observability/weave.py
index 821765f6d9..6d16e74ba3 100644
--- a/openhands-sdk/openhands/sdk/observability/weave.py
+++ b/openhands-sdk/openhands/sdk/observability/weave.py
@@ -1,58 +1,113 @@
 """Weave observability integration for OpenHands SDK.
 
-This module provides integration with Weights & Biases Weave for tracing
-and observability of agent operations. Weave automatically tracks LLM calls,
-tool executions, and agent steps.
+This module provides integration with Weights & Biases Weave for automatic
+tracing and observability of agent operations. It leverages Weave's built-in
+autopatching to automatically trace all LLM calls made through LiteLLM.
 
-Configuration:
-    Set the following environment variables to enable Weave tracing:
-    - WANDB_API_KEY: Your Weights & Biases API key
-    - WEAVE_PROJECT: The Weave project name (e.g., "my-team/my-project")
+## Key Features
 
-    Alternatively, call `init_weave()` directly with the project name.
+1. **Zero-config LLM tracing**: Just call `init_weave()` and all LiteLLM calls
+   are automatically traced - no manual decoration needed!
 
-Example:
-    >>> from openhands.sdk.observability.weave import maybe_init_weave, weave_op
-    >>> maybe_init_weave()  # Auto-initializes if env vars are set
-    >>>
-    >>> @weave_op(name="my_function")
-    >>> def my_function(x: int) -> int:
-    ...     return x + 1
+2. **Automatic integration patching**: Weave automatically patches LiteLLM,
+   OpenAI, Anthropic, and 30+ other providers when initialized.
+
+3. **Optional manual tracing**: Use `@weave.op` for custom agent logic that
+   you want to trace (tool execution, agent steps, etc.)
+
+4. **Thread grouping**: Group related operations under conversation threads.
+
+## How It Works
+
+The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`:
+1. Weave's `implicit_patch()` automatically patches LiteLLM
+2. All `litellm.completion()` and `litellm.acompletion()` calls are traced
+3. You see full traces in the Weave UI without any code changes!
+
+## Environment Variables
+
+- `WANDB_API_KEY`: Your Weights & Biases API key
+- `WEAVE_PROJECT`: The Weave project name (e.g., "my-team/my-project")
+
+## Usage Examples
+
+### Basic Usage (Automatic LLM Tracing)
+
+```python
+from openhands.sdk.observability import init_weave
+from openhands.sdk import LLM
+
+# Initialize Weave - this automatically traces all LLM calls!
+init_weave("my-team/my-project")
+
+# All LLM calls are now automatically traced
+llm = LLM(model="gpt-4")
+response = llm.completion(messages=[{"role": "user", "content": "Hello!"}])
+# ^ This call appears in Weave UI automatically
+```
+
+### Custom Function Tracing
+
+```python
+import weave
+from openhands.sdk.observability import init_weave
+
+init_weave("my-team/my-project")
+
+# Use @weave.op for custom logic you want to trace
+@weave.op
+def process_agent_step(step: dict) -> dict:
+    # Your custom logic here
+    return {"processed": True}
+```
+
+### Conversation Thread Grouping
+
+```python
+from openhands.sdk.observability import init_weave, weave_attributes
+
+init_weave("my-team/my-project")
+
+# Group all operations under a conversation
+with weave_attributes(conversation_id="conv-123", user_id="user-456"):
+    # All LLM calls and traced functions within this block
+    # will be tagged with these attributes
+    response = llm.completion(...)
+```
 
 See Also:
     - Weave documentation: https://docs.wandb.ai/weave
     - Laminar integration: openhands.sdk.observability.laminar
 """
 
+from __future__ import annotations
+
+import logging
+import os
 from collections.abc import Callable
 from contextlib import contextmanager
-from functools import wraps
 from typing import Any, ParamSpec, TypeVar
 
-from openhands.sdk.logger import get_logger
 from openhands.sdk.observability.utils import get_env
 
 
-logger = get_logger(__name__)
+logger = logging.getLogger(__name__)
 
-# Type variables for generic function signatures
 P = ParamSpec("P")
 R = TypeVar("R")
 
-# Global state for Weave initialization
+# Global state
 _weave_initialized: bool = False
 _weave_client: Any = None
 
 
-def should_enable_weave() -> bool:
-    """Check if Weave should be enabled based on environment configuration.
+def get_weave_client() -> Any:
+    """Get the current Weave client instance.
 
     Returns:
-        True if WANDB_API_KEY and WEAVE_PROJECT are set, False otherwise.
+        The Weave client if initialized, None otherwise.
     """
-    api_key = get_env("WANDB_API_KEY")
-    project = get_env("WEAVE_PROJECT")
-    return bool(api_key and project)
+    return _weave_client
 
 
 def is_weave_initialized() -> bool:
@@ -61,30 +116,41 @@ def is_weave_initialized() -> bool:
     Returns:
         True if Weave is initialized and ready for tracing.
     """
-    global _weave_initialized
     return _weave_initialized
 
 
 def init_weave(
     project: str | None = None,
     api_key: str | None = None,
+    *,
+    settings: dict[str, Any] | None = None,
 ) -> bool:
-    """Initialize Weave for tracing.
+    """Initialize Weave for automatic tracing.
+
+    This is the main entry point for enabling Weave observability. When called,
+    Weave automatically patches LiteLLM and other supported libraries, so all
+    LLM calls are traced without any manual decoration.
 
     Args:
         project: The Weave project name (e.g., "my-team/my-project").
             If not provided, uses WEAVE_PROJECT environment variable.
         api_key: The Weights & Biases API key. If not provided, uses
             WANDB_API_KEY environment variable.
+        settings: Optional dict of Weave settings to configure behavior.
+            See Weave documentation for available settings.
 
     Returns:
         True if initialization was successful, False otherwise.
 
     Raises:
         ValueError: If no project is specified and WEAVE_PROJECT is not set.
-    """
-    import os
 
+    Example:
+        >>> from openhands.sdk.observability import init_weave
+        >>> init_weave("my-team/openhands-agent")
+        True
+        >>> # Now all LiteLLM calls are automatically traced!
+    """
     global _weave_initialized, _weave_client
 
     if _weave_initialized:
@@ -123,9 +189,20 @@ def init_weave(
         )
 
     try:
-        _weave_client = weave.init(project_name)
+        # Initialize Weave - this automatically:
+        # 1. Patches all already-imported integrations (LiteLLM, OpenAI, etc.)
+        # 2. Registers import hooks for future imports
+        init_kwargs: dict[str, Any] = {}
+        if settings:
+            init_kwargs["settings"] = settings
+
+        _weave_client = weave.init(project_name, **init_kwargs)
         _weave_initialized = True
-        logger.info(f"Weave initialized for project: {project_name}")
+
+        logger.info(
+            f"Weave initialized for project: {project_name}. "
+            "All LiteLLM calls will be automatically traced."
+        )
         return True
     except Exception as e:
         logger.error(f"Failed to initialize Weave: {e}")
@@ -135,149 +212,247 @@ def init_weave(
 def maybe_init_weave() -> bool:
     """Initialize Weave if environment variables are configured.
 
-    This is a convenience function that checks for WANDB_API_KEY and
-    WEAVE_PROJECT environment variables and initializes Weave if both are set.
+    This is a convenience function that initializes Weave only if both
+    WANDB_API_KEY and WEAVE_PROJECT environment variables are set.
+    Useful for conditional initialization based on environment.
 
     Returns:
-        True if Weave was initialized (or already initialized), False otherwise.
+        True if Weave was initialized (or already was), False otherwise.
+
+    Example:
+        >>> import os
+        >>> os.environ["WANDB_API_KEY"] = "your-key"
+        >>> os.environ["WEAVE_PROJECT"] = "my-team/my-project"
+        >>> from openhands.sdk.observability import maybe_init_weave
+        >>> maybe_init_weave()  # Initializes automatically
+        True
     """
-    if is_weave_initialized():
+    if _weave_initialized:
         return True
 
-    if should_enable_weave():
-        return init_weave()
+    if not should_enable_weave():
+        logger.debug(
+            "Weave environment variables not set (WANDB_API_KEY, WEAVE_PROJECT). "
+            "Skipping Weave initialization."
+        )
+        return False
 
-    logger.debug(
-        "Weave environment variables not set (WANDB_API_KEY, WEAVE_PROJECT). "
-        "Skipping Weave initialization."
-    )
-    return False
+    try:
+        return init_weave()
+    except ValueError:
+        return False
 
 
-def get_weave_client() -> Any:
-    """Get the current Weave client.
+def should_enable_weave() -> bool:
+    """Check if Weave should be enabled based on environment variables.
 
     Returns:
-        The Weave client if initialized, None otherwise.
+        True if both WANDB_API_KEY and WEAVE_PROJECT are set.
     """
-    global _weave_client
-    return _weave_client
+    return bool(get_env("WANDB_API_KEY") and get_env("WEAVE_PROJECT"))
 
 
-def weave_op(
-    name: str | None = None,
-    *,
-    call_display_name: str | Callable[..., str] | None = None,
-    postprocess_inputs: Callable[..., dict[str, Any]] | None = None,
-    postprocess_output: Callable[..., Any] | None = None,
-) -> Callable[[Callable[P, R]], Callable[P, R]]:
-    """Decorator to trace a function with Weave.
+@contextmanager
+def weave_attributes(**attributes: Any):
+    """Context manager to add attributes to all operations within the block.
 
-    This decorator wraps a function to automatically trace its inputs, outputs,
-    and execution time with Weave. If Weave is not initialized, the function
-    runs normally without tracing.
+    This is useful for grouping related operations (e.g., all events in a
+    conversation) or adding metadata to traces.
 
     Args:
-        name: Optional name for the operation. Defaults to the function name.
-        call_display_name: Optional display name or callable that returns a
-            display name for each call.
-        postprocess_inputs: Optional function to transform inputs before logging.
-        postprocess_output: Optional function to transform output before logging.
-
-    Returns:
-        A decorator that wraps the function with Weave tracing.
+        **attributes: Key-value pairs to attach to all operations.
+            Common attributes: conversation_id, user_id, session_id, etc.
 
     Example:
-        >>> @weave_op(name="process_data")
-        >>> def process_data(data: dict) -> dict:
-        ...     return {"processed": True, **data}
+        >>> with weave_attributes(conversation_id="conv-123", user_id="user-456"):
+        ...     # All LLM calls and traced functions here will have these attributes
+        ...     response = llm.completion(messages=[...])
     """
-    def decorator(func: Callable[P, R]) -> Callable[P, R]:
-        @wraps(func)
-        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
-            if not is_weave_initialized():
-                return func(*args, **kwargs)
-
-            try:
-                import weave
-
-                # Build weave.op kwargs
-                op_kwargs: dict[str, Any] = {}
-                if name:
-                    op_kwargs["name"] = name
-                if call_display_name:
-                    op_kwargs["call_display_name"] = call_display_name
-                if postprocess_inputs:
-                    op_kwargs["postprocess_inputs"] = postprocess_inputs
-                if postprocess_output:
-                    op_kwargs["postprocess_output"] = postprocess_output
-
-                # Apply weave.op decorator dynamically
-                traced_func = weave.op(**op_kwargs)(func)
-                return traced_func(*args, **kwargs)
-            except Exception as e:
-                logger.debug(f"Weave tracing failed, running without trace: {e}")
-                return func(*args, **kwargs)
-
-        return wrapper
+    if not _weave_initialized:
+        yield
+        return
 
-    return decorator
+    try:
+        import weave
+        with weave.attributes(attributes):
+            yield
+    except Exception as e:
+        logger.warning(f"Failed to set weave attributes: {e}")
+        yield
 
 
 @contextmanager
 def weave_thread(thread_id: str):
-    """Context manager to group operations under a Weave thread.
+    """Context manager to group operations under a thread.
 
-    Weave threads allow grouping related operations (like all events in a
-    conversation) under a single trace hierarchy.
+    This is an alias for weave_attributes(thread_id=...) for convenience
+    and backward compatibility.
 
     Args:
         thread_id: Unique identifier for the thread (e.g., conversation ID).
 
-    Yields:
-        The thread context if Weave is initialized, otherwise a no-op context.
-
     Example:
         >>> with weave_thread("conversation-123"):
         ...     # All operations here will be grouped under the same thread
-        ...     process_message("Hello")
-        ...     generate_response()
+        ...     response = llm.completion(messages=[...])
     """
-    if not is_weave_initialized():
+    with weave_attributes(thread_id=thread_id):
         yield
-        return
+
+
+def get_weave_op():
+    """Get the weave.op decorator for manual function tracing.
+
+    Returns the actual weave.op decorator if Weave is initialized,
+    otherwise returns a no-op decorator that just returns the function.
+
+    This is useful when you want to trace custom agent logic beyond
+    the automatic LLM call tracing.
+
+    Returns:
+        The weave.op decorator or a no-op decorator.
+
+    Example:
+        >>> from openhands.sdk.observability import init_weave, get_weave_op
+        >>> init_weave("my-project")
+        >>> weave_op = get_weave_op()
+        >>>
+        >>> @weave_op
+        ... def my_custom_function(x: int) -> int:
+        ...     return x * 2
+    """
+    if not _weave_initialized:
+        def noop_decorator(func):
+            return func
+        return noop_decorator
 
     try:
         import weave
+        return weave.op
+    except ImportError:
+        def noop_decorator(func):
+            return func
+        return noop_decorator
 
-        # Check if there's an active Weave client
-        client = weave.client.get_current_client()
-        if client is None:
-            yield
-            return
 
-        with weave.thread(thread_id):
-            yield
-    except Exception as e:
-        logger.debug(f"Weave thread context failed: {e}")
-        yield
+def weave_op(
+    func: Callable[P, R] | None = None,
+    *,
+    name: str | None = None,
+    call_display_name: str | Callable[..., str] | None = None,
+    postprocess_inputs: Callable[..., dict[str, Any]] | None = None,
+    postprocess_output: Callable[..., Any] | None = None,
+) -> Callable[P, R] | Callable[[Callable[P, R]], Callable[P, R]]:
+    """Decorator to trace a function with Weave.
+
+    This is a convenience wrapper around weave.op that handles the case
+    when Weave is not initialized (returns the function unchanged).
+
+    Can be used with or without parentheses:
+        @weave_op
+        def my_func(): ...
+
+        @weave_op(name="custom_name")
+        def my_func(): ...
+
+    Args:
+        func: The function to decorate (when used without parentheses).
+        name: Optional name for the operation. Defaults to function name.
+        call_display_name: Display name for the call in the Weave UI.
+        postprocess_inputs: Function to transform inputs before logging.
+        postprocess_output: Function to transform output before logging.
+
+    Returns:
+        The decorated function or a decorator.
+    """
+    def decorator(fn: Callable[P, R]) -> Callable[P, R]:
+        if not _weave_initialized:
+            return fn
+
+        try:
+            import weave
+
+            op_kwargs: dict[str, Any] = {}
+            if name:
+                op_kwargs["name"] = name
+            if call_display_name:
+                op_kwargs["call_display_name"] = call_display_name
+            if postprocess_inputs:
+                op_kwargs["postprocess_inputs"] = postprocess_inputs
+            if postprocess_output:
+                op_kwargs["postprocess_output"] = postprocess_output
+
+            if op_kwargs:
+                return weave.op(**op_kwargs)(fn)
+            return weave.op(fn)
+        except Exception as e:
+            logger.warning(f"Failed to apply weave.op decorator: {e}")
+            return fn
+
+    # Handle both @weave_op and @weave_op(...) syntax
+    if func is not None:
+        return decorator(func)
+    return decorator
+
+
+def observe_weave(
+    name: str | None = None,
+    *,
+    ignore_inputs: list[str] | None = None,
+    ignore_output: bool = False,
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """Decorator for observing functions with Weave (Laminar-compatible interface).
+
+    This provides a similar interface to the Laminar `observe` decorator,
+    making it easier to switch between observability backends.
+
+    Args:
+        name: Optional name for the operation.
+        ignore_inputs: List of input parameter names to exclude from logging.
+        ignore_output: If True, don't log the output.
+
+    Returns:
+        A decorator that wraps the function for Weave tracing.
+
+    Example:
+        >>> @observe_weave(name="login", ignore_inputs=["password"])
+        ... def login(username: str, password: str) -> bool:
+        ...     return authenticate(username, password)
+    """
+    def postprocess_inputs_fn(inputs: dict[str, Any]) -> dict[str, Any]:
+        if not ignore_inputs:
+            return inputs
+        return {k: v for k, v in inputs.items() if k not in ignore_inputs}
+
+    def postprocess_output_fn(output: Any) -> Any:
+        if ignore_output:
+            return "[output hidden]"
+        return output
+
+    return weave_op(
+        name=name,
+        postprocess_inputs=postprocess_inputs_fn if ignore_inputs else None,
+        postprocess_output=postprocess_output_fn if ignore_output else None,
+    )
 
 
 class WeaveSpanManager:
-    """Manages Weave spans for manual tracing.
+    """Manager for manual span lifecycle control.
+
+    This class provides fine-grained control over span creation and completion,
+    useful when automatic decoration is not suitable.
 
-    This class provides a stack-based approach to managing Weave spans,
-    similar to the SpanManager for Laminar. It's useful when you need
-    more control over span lifecycle than the decorator provides.
+    Note: For most use cases, the automatic LLM tracing and @weave_op decorator
+    are sufficient. Use this only when you need explicit span control.
 
     Example:
         >>> manager = WeaveSpanManager()
-        >>> manager.start_span("process_request", session_id="conv-123")
+        >>> manager.start_span("process_batch", inputs={"batch_size": 100})
         >>> try:
-        ...     # Do work
-        ...     pass
-        ... finally:
-        ...     manager.end_span()
+        ...     result = process_batch()
+        ...     manager.end_span(output=result)
+        ... except Exception as e:
+        ...     manager.end_span(error=str(e))
     """
 
     def __init__(self):
@@ -287,159 +462,85 @@ def start_span(
         self,
         name: str,
         inputs: dict[str, Any] | None = None,
-        session_id: str | None = None,
-    ) -> Any | None:
-        """Start a new Weave span.
+    ) -> Any:
+        """Start a new span.
 
         Args:
-            name: Name of the operation being traced.
-            inputs: Optional dictionary of input values to log.
-            session_id: Optional session ID for grouping related spans.
+            name: Name of the span/operation.
+            inputs: Input parameters to log.
 
         Returns:
-            The Weave call object if successful, None otherwise.
+            The span/call object if successful, None otherwise.
         """
-        if not is_weave_initialized():
+        if not _weave_initialized:
             return None
 
         try:
             import weave
 
-            client = get_weave_client()
-            if client is None:
-                return None
+            @weave.op(name=name)
+            def _span_op(**kwargs: Any) -> Any:
+                pass
 
-            # Create a call using the client API
-            call = client.create_call(
-                op=name,
-                inputs=inputs or {},
-            )
+            call = _span_op.call(inputs or {})
             self._call_stack.append(call)
             return call
         except Exception as e:
-            logger.debug(f"Failed to start Weave span: {e}")
+            logger.warning(f"Failed to start weave span: {e}")
             return None
 
-    def end_span(self, output: Any = None, error: Exception | None = None) -> None:
-        """End the most recent Weave span.
+    def end_span(
+        self,
+        output: Any = None,
+        error: str | None = None,
+    ) -> None:
+        """End the current span.
 
         Args:
-            output: Optional output value to log.
-            error: Optional exception if the operation failed.
+            output: Output value to log.
+            error: Error message if the span failed.
         """
         if not self._call_stack:
-            logger.debug("Attempted to end span, but stack is empty")
             return
 
         try:
             call = self._call_stack.pop()
-            client = get_weave_client()
-            if client and call:
-                if error:
-                    client.finish_call(call, output=None, exception=error)
-                else:
-                    client.finish_call(call, output=output)
+            if error:
+                call.finish(exception=Exception(error))
+            else:
+                call.finish(output=output)
         except Exception as e:
-            logger.debug(f"Failed to end Weave span: {e}")
-
+            logger.warning(f"Failed to end weave span: {e}")
 
-# Global span manager instance
-_span_manager: WeaveSpanManager | None = None
 
-
-def _get_span_manager() -> WeaveSpanManager:
-    """Get or create the global span manager."""
-    global _span_manager
-    if _span_manager is None:
-        _span_manager = WeaveSpanManager()
-    return _span_manager
+# Global span manager instance for convenience
+_global_span_manager = WeaveSpanManager()
 
 
 def start_weave_span(
     name: str,
     inputs: dict[str, Any] | None = None,
-    session_id: str | None = None,
-) -> Any | None:
-    """Start a new Weave span using the global span manager.
+) -> Any:
+    """Start a new Weave span using the global manager.
 
     Args:
-        name: Name of the operation being traced.
-        inputs: Optional dictionary of input values to log.
-        session_id: Optional session ID for grouping related spans.
+        name: Name of the span/operation.
+        inputs: Input parameters to log.
 
     Returns:
-        The Weave call object if successful, None otherwise.
-    """
-    return _get_span_manager().start_span(name, inputs, session_id)
-
-
-def end_weave_span(output: Any = None, error: Exception | None = None) -> None:
-    """End the most recent Weave span using the global span manager.
-
-    Args:
-        output: Optional output value to log.
-        error: Optional exception if the operation failed.
+        The span/call object if successful, None otherwise.
     """
-    try:
-        _get_span_manager().end_span(output, error)
-    except Exception:
-        logger.debug("Error ending Weave span")
+    return _global_span_manager.start_span(name, inputs)
 
 
-def observe_weave(
-    *,
-    name: str | None = None,
-    ignore_inputs: list[str] | None = None,
-    ignore_output: bool = False,
-) -> Callable[[Callable[P, R]], Callable[P, R]]:
-    """Unified observe decorator that works with both Weave and Laminar.
-
-    This decorator provides a consistent interface for observability that
-    works regardless of which backend (Weave or Laminar) is configured.
-    It prioritizes Weave if initialized, otherwise falls back to Laminar.
+def end_weave_span(
+    output: Any = None,
+    error: str | None = None,
+) -> None:
+    """End the current Weave span using the global manager.
 
     Args:
-        name: Optional name for the operation.
-        ignore_inputs: List of input parameter names to exclude from logging.
-        ignore_output: If True, don't log the function's output.
-
-    Returns:
-        A decorator that wraps the function with observability tracing.
-
-    Example:
-        >>> @observe_weave(name="agent.step", ignore_inputs=["state"])
-        >>> def step(self, state: State) -> Action:
-        ...     return self._process(state)
+        output: Output value to log.
+        error: Error message if the span failed.
     """
-    def decorator(func: Callable[P, R]) -> Callable[P, R]:
-        @wraps(func)
-        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
-            # Try Weave first
-            if is_weave_initialized():
-                try:
-                    import weave
-
-                    op_kwargs: dict[str, Any] = {}
-                    if name:
-                        op_kwargs["name"] = name
-
-                    # Handle input filtering via postprocess_inputs
-                    if ignore_inputs:
-                        def filter_inputs(inputs: dict[str, Any]) -> dict[str, Any]:
-                            return {
-                                k: v for k, v in inputs.items()
-                                if k not in ignore_inputs
-                            }
-                        op_kwargs["postprocess_inputs"] = filter_inputs
-
-                    traced_func = weave.op(**op_kwargs)(func)
-                    return traced_func(*args, **kwargs)
-                except Exception as e:
-                    logger.debug(f"Weave tracing failed: {e}")
-
-            # Fall through to untraced execution
-            return func(*args, **kwargs)
-
-        return wrapper
-
-    return decorator
+    _global_span_manager.end_span(output, error)
diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py
index 64de5fe3c0..78e8dd4431 100644
--- a/tests/sdk/observability/test_weave.py
+++ b/tests/sdk/observability/test_weave.py
@@ -1,6 +1,7 @@
 """Tests for Weave observability integration.
 
 These tests verify the Weave integration works correctly, including:
+- Automatic LLM tracing via Weave's autopatching
 - Decorator functionality (with and without Weave initialized)
 - Environment variable configuration
 - Graceful fallback when Weave is not available
@@ -74,20 +75,19 @@ def test_function(x: int) -> int:
         result = test_function(5)
         assert result == 6
 
-    def test_weave_op_preserves_function_metadata(self):
-        """@weave_op preserves function name and docstring."""
+    def test_weave_op_without_parentheses(self):
+        """@weave_op can be used without parentheses."""
         import openhands.sdk.observability.weave as weave_module
         weave_module._weave_initialized = False
 
         from openhands.sdk.observability.weave import weave_op
 
-        @weave_op(name="custom_name")
-        def my_function(x: int) -> int:
-            """My docstring."""
-            return x
+        @weave_op
+        def test_function(x: int) -> int:
+            return x + 1
 
-        assert my_function.__name__ == "my_function"
-        assert my_function.__doc__ == "My docstring."
+        result = test_function(5)
+        assert result == 6
 
     def test_weave_op_handles_exceptions(self):
         """@weave_op propagates exceptions correctly."""
@@ -136,8 +136,22 @@ def test_function(data: str, secret: str) -> str:
         assert result == "hello-processed"
 
 
-class TestWeaveThread:
-    """Tests for the weave_thread context manager."""
+class TestWeaveAttributes:
+    """Tests for the weave_attributes context manager."""
+
+    def test_weave_attributes_without_initialization(self):
+        """weave_attributes works as no-op when Weave is not initialized."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import weave_attributes
+
+        results = []
+        with weave_attributes(conversation_id="conv-123", user_id="user-456"):
+            results.append(1)
+            results.append(2)
+
+        assert results == [1, 2]
 
     def test_weave_thread_without_initialization(self):
         """weave_thread works as no-op when Weave is not initialized."""
@@ -191,6 +205,28 @@ def test_global_span_functions(self):
         end_weave_span(output={"y": 2})
 
 
+class TestGetWeaveOp:
+    """Tests for the get_weave_op function."""
+
+    def test_get_weave_op_returns_noop_when_not_initialized(self):
+        """get_weave_op returns a no-op decorator when Weave is not initialized."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import get_weave_op
+
+        op = get_weave_op()
+
+        @op
+        def test_function(x: int) -> int:
+            return x * 2
+
+        # Function should work normally
+        assert test_function(5) == 10
+        # Function should be unchanged
+        assert test_function.__name__ == "test_function"
+
+
 class TestWeaveExports:
     """Tests for module exports."""
 
@@ -199,12 +235,14 @@ def test_all_exports_available(self):
         from openhands.sdk.observability import (
             end_weave_span,
             get_weave_client,
+            get_weave_op,
             init_weave,
             is_weave_initialized,
             maybe_init_weave,
             observe_weave,
             should_enable_weave,
             start_weave_span,
+            weave_attributes,
             weave_op,
             weave_thread,
             WeaveSpanManager,
@@ -213,12 +251,14 @@ def test_all_exports_available(self):
         # Just verify they're callable
         assert callable(end_weave_span)
         assert callable(get_weave_client)
+        assert callable(get_weave_op)
         assert callable(init_weave)
         assert callable(is_weave_initialized)
         assert callable(maybe_init_weave)
         assert callable(observe_weave)
         assert callable(should_enable_weave)
         assert callable(start_weave_span)
+        assert callable(weave_attributes)
         assert callable(weave_op)
         assert callable(weave_thread)
         assert WeaveSpanManager is not None
@@ -258,7 +298,7 @@ def test_init_weave_uses_env_project(self):
                 result = init_weave()
 
                 # Should have called weave.init with the project
-                mock_weave_init.assert_called_once_with("test-project")
+                mock_weave_init.assert_called_once()
 
     def test_init_weave_already_initialized(self):
         """init_weave returns True immediately if already initialized."""
@@ -272,3 +312,37 @@ def test_init_weave_already_initialized(self):
 
         # Reset for other tests
         weave_module._weave_initialized = False
+
+
+class TestAutopatching:
+    """Tests for Weave's autopatching behavior.
+
+    These tests verify that the integration is designed to leverage
+    Weave's automatic LiteLLM patching.
+    """
+
+    def test_init_weave_calls_weave_init(self):
+        """init_weave calls weave.init which triggers autopatching."""
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import init_weave
+
+        with patch("openhands.sdk.observability.weave.get_env") as mock_get_env:
+            mock_get_env.side_effect = lambda k: {
+                "WEAVE_PROJECT": "test-project",
+                "WANDB_API_KEY": "test-key",
+            }.get(k)
+
+            with patch("weave.init") as mock_weave_init:
+                with patch("wandb.login"):
+                    mock_weave_init.return_value = MagicMock()
+                    result = init_weave()
+
+                    # weave.init should be called, which triggers implicit_patch()
+                    # and register_import_hook() internally
+                    mock_weave_init.assert_called_once()
+                    assert result is True
+
+        # Reset for other tests
+        weave_module._weave_initialized = False

From d162ae620e5f3abc0c2165fc3abea42060f77dfd Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 18 Dec 2025 23:50:31 +0000
Subject: [PATCH 3/8] feat: Add Weave thread support for conversation-level
 tracing

Integrates Weave threading into LocalConversation.run() to automatically
group all operations (LLM calls, traced functions) under the conversation ID.

Key changes:
- Added _get_weave_thread_context() helper that returns weave.thread() if
  Weave is initialized, otherwise a nullcontext (no-op)
- Wrapped the run loop with the Weave thread context
- All LLM calls (autopatched via Weave's LiteLLM integration) and
  @weave_op decorated functions are now grouped by conversation

This enables conversation-level tracing in the Weave UI, similar to
the OpenHands PR #12056 approach but adapted for the SDK architecture.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../conversation/impl/local_conversation.py   | 159 +++++++++++-------
 1 file changed, 96 insertions(+), 63 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
index a05aa7b1b8..b26dacabc7 100644
--- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
+++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -1,6 +1,7 @@
 import atexit
 import uuid
 from collections.abc import Mapping
+from contextlib import nullcontext
 from pathlib import Path
 
 from openhands.sdk.agent.base import AgentBase
@@ -35,6 +36,7 @@
 from openhands.sdk.llm.llm_registry import LLMRegistry
 from openhands.sdk.logger import get_logger
 from openhands.sdk.observability.laminar import observe
+from openhands.sdk.observability.weave import is_weave_initialized
 from openhands.sdk.security.analyzer import SecurityAnalyzerBase
 from openhands.sdk.security.confirmation_policy import (
     ConfirmationPolicyBase,
@@ -45,6 +47,29 @@
 logger = get_logger(__name__)
 
 
+def _get_weave_thread_context(conversation_id: str):
+    """Get Weave thread context manager if Weave is initialized.
+
+    This groups all operations within a conversation run under the same
+    Weave thread, enabling conversation-level tracing in the Weave UI.
+
+    Args:
+        conversation_id: The conversation ID to use as the thread ID.
+
+    Returns:
+        A weave.thread context manager if Weave is initialized,
+        otherwise a nullcontext (no-op).
+    """
+    if not is_weave_initialized():
+        return nullcontext()
+
+    try:
+        import weave
+        return weave.thread(conversation_id)
+    except Exception:
+        return nullcontext()
+
+
 class LocalConversation(BaseConversation):
     agent: AgentBase
     workspace: LocalWorkspace
@@ -295,6 +320,11 @@ def run(self) -> None:
         - Creates and executes actions immediately
 
         Can be paused between steps
+
+        Note:
+            If Weave is initialized, all operations within this run are grouped
+            under a Weave thread using the conversation ID. This enables
+            conversation-level tracing in the Weave UI.
         """
 
         with self._state:
@@ -306,75 +336,78 @@ def run(self) -> None:
                 self._state.execution_status = ConversationExecutionStatus.RUNNING
 
         iteration = 0
-        try:
-            while True:
-                logger.debug(f"Conversation run iteration {iteration}")
-                with self._state:
-                    # Pause attempts to acquire the state lock
-                    # Before value can be modified step can be taken
-                    # Ensure step conditions are checked when lock is already acquired
-                    if self._state.execution_status in [
-                        ConversationExecutionStatus.FINISHED,
-                        ConversationExecutionStatus.PAUSED,
-                        ConversationExecutionStatus.STUCK,
-                    ]:
-                        break
-
-                    # Check for stuck patterns if enabled
-                    if self._stuck_detector:
-                        is_stuck = self._stuck_detector.is_stuck()
-
-                        if is_stuck:
-                            logger.warning("Stuck pattern detected.")
+        # Wrap the run loop in a Weave thread context if Weave is initialized.
+        # This groups all LLM calls and traced operations under the conversation ID.
+        with _get_weave_thread_context(str(self.id)):
+            try:
+                while True:
+                    logger.debug(f"Conversation run iteration {iteration}")
+                    with self._state:
+                        # Pause attempts to acquire the state lock
+                        # Before value can be modified step can be taken
+                        # Ensure step conditions are checked when lock is already acquired
+                        if self._state.execution_status in [
+                            ConversationExecutionStatus.FINISHED,
+                            ConversationExecutionStatus.PAUSED,
+                            ConversationExecutionStatus.STUCK,
+                        ]:
+                            break
+
+                        # Check for stuck patterns if enabled
+                        if self._stuck_detector:
+                            is_stuck = self._stuck_detector.is_stuck()
+
+                            if is_stuck:
+                                logger.warning("Stuck pattern detected.")
+                                self._state.execution_status = (
+                                    ConversationExecutionStatus.STUCK
+                                )
+                                continue
+
+                        # clear the flag before calling agent.step() (user approved)
+                        if (
+                            self._state.execution_status
+                            == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION
+                        ):
                             self._state.execution_status = (
-                                ConversationExecutionStatus.STUCK
+                                ConversationExecutionStatus.RUNNING
                             )
-                            continue
-
-                    # clear the flag before calling agent.step() (user approved)
-                    if (
-                        self._state.execution_status
-                        == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION
-                    ):
-                        self._state.execution_status = (
-                            ConversationExecutionStatus.RUNNING
-                        )
 
-                    self.agent.step(
-                        self, on_event=self._on_event, on_token=self._on_token
+                        self.agent.step(
+                            self, on_event=self._on_event, on_token=self._on_token
+                        )
+                        iteration += 1
+
+                        # Check for non-finished terminal conditions
+                        # Note: We intentionally do NOT check for FINISHED status here.
+                        # This allows concurrent user messages to be processed:
+                        # 1. Agent finishes and sets status to FINISHED
+                        # 2. User sends message concurrently via send_message()
+                        # 3. send_message() waits for FIFO lock, then sets status to IDLE
+                        # 4. Run loop continues to next iteration and processes the message
+                        # 5. Without this design, concurrent messages would be lost
+                        if (
+                            self.state.execution_status
+                            == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION
+                            or iteration >= self.max_iteration_per_run
+                        ):
+                            break
+            except Exception as e:
+                self._state.execution_status = ConversationExecutionStatus.ERROR
+
+                # Add an error event
+                self._on_event(
+                    ConversationErrorEvent(
+                        source="environment",
+                        code=e.__class__.__name__,
+                        detail=str(e),
                     )
-                    iteration += 1
-
-                    # Check for non-finished terminal conditions
-                    # Note: We intentionally do NOT check for FINISHED status here.
-                    # This allows concurrent user messages to be processed:
-                    # 1. Agent finishes and sets status to FINISHED
-                    # 2. User sends message concurrently via send_message()
-                    # 3. send_message() waits for FIFO lock, then sets status to IDLE
-                    # 4. Run loop continues to next iteration and processes the message
-                    # 5. Without this design, concurrent messages would be lost
-                    if (
-                        self.state.execution_status
-                        == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION
-                        or iteration >= self.max_iteration_per_run
-                    ):
-                        break
-        except Exception as e:
-            self._state.execution_status = ConversationExecutionStatus.ERROR
-
-            # Add an error event
-            self._on_event(
-                ConversationErrorEvent(
-                    source="environment",
-                    code=e.__class__.__name__,
-                    detail=str(e),
                 )
-            )
 
-            # Re-raise with conversation id and persistence dir for better UX
-            raise ConversationRunError(
-                self._state.id, e, persistence_dir=self._state.persistence_dir
-            ) from e
+                # Re-raise with conversation id and persistence dir for better UX
+                raise ConversationRunError(
+                    self._state.id, e, persistence_dir=self._state.persistence_dir
+                ) from e
 
     def set_confirmation_policy(self, policy: ConfirmationPolicyBase) -> None:
         """Set the confirmation policy and store it in conversation state."""

From b7b791d2948876f6f97290455fc9f3ef9d19a696 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Thu, 18 Dec 2025 23:51:08 +0000
Subject: [PATCH 4/8] docs: Update demo to highlight conversation threading
 feature

Co-authored-by: openhands <openhands@all-hands.dev>
---
 examples/weave_observability_demo.py | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py
index f4a51ec7e0..03bfd89c95 100644
--- a/examples/weave_observability_demo.py
+++ b/examples/weave_observability_demo.py
@@ -13,15 +13,20 @@
 2. **Custom Function Tracing**: Use `@weave_op` for custom agent logic you
    want to trace (tool execution, agent steps, etc.)
 
-3. **Conversation Grouping**: Use `weave_attributes()` to group related
-   operations under a conversation or session.
+3. **Conversation Threading**: The SDK automatically wraps conversation runs
+   in `weave.thread()` to group all operations under the conversation ID.
+   This enables conversation-level tracing in the Weave UI!
+
+4. **Conversation Grouping**: Use `weave_attributes()` to add custom metadata
+   to operations (user_id, session_id, etc.)
 
 ## How It Works
 
 The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`:
 1. Weave's `implicit_patch()` automatically patches LiteLLM
 2. All `litellm.completion()` and `litellm.acompletion()` calls are traced
-3. You see full traces in the Weave UI without any code changes!
+3. LocalConversation.run() wraps the event loop in `weave.thread(conversation_id)`
+4. You see full conversation traces in the Weave UI without any code changes!
 
 ## Prerequisites
 
@@ -152,8 +157,10 @@ def run_demo():
     if success:
         print("✅ Weave initialized successfully!")
         print(f"   View traces at: https://wandb.ai/{project}/weave")
-        print("\n   🎉 KEY FEATURE: All LiteLLM calls are now AUTOMATICALLY traced!")
-        print("   No need to decorate LLM calls - Weave patches LiteLLM for you.")
+        print("\n   🎉 KEY FEATURES:")
+        print("   • All LiteLLM calls are AUTOMATICALLY traced (no decoration needed)")
+        print("   • Conversation.run() automatically groups operations by conversation ID")
+        print("   • Use @weave_op for custom functions you want to trace")
     else:
         print("⚠️  Weave not initialized (missing credentials or package)")
         print("   Running demo without tracing...")
@@ -209,8 +216,13 @@ def dynamically_traced_function(x: int) -> int:
 
     if is_weave_initialized():
         print(f"\n🔗 View your traces at: https://wandb.ai/{project}/weave")
-        print("\n💡 Remember: LLM calls via LiteLLM are traced AUTOMATICALLY!")
-        print("   Just use the SDK's LLM class normally - no decoration needed.")
+        print("\n💡 Key Integration Points:")
+        print("   • LLM calls via LiteLLM are traced AUTOMATICALLY")
+        print("   • Conversation.run() groups all operations by conversation ID")
+        print("   • Use @weave_op for custom agent logic you want to trace")
+        print("\n📝 In your code, just do:")
+        print("   from openhands.sdk.observability import init_weave")
+        print("   init_weave('your-project')  # That's it!")
     print("=" * 60)
 
 

From 8e1a65e2e54e137abf7c5889d0ec3a9f9b2809fb Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Dec 2025 00:21:42 +0000
Subject: [PATCH 5/8] feat: Add generic observability context system for
 multi-tool support

Introduces a unified observability context management system that allows
multiple observability tools (Weave, Laminar, etc.) to work together
seamlessly.

Key changes:
- Added context.py with provider registry pattern
- get_conversation_context() composes context managers from all enabled tools
- Built-in providers for Weave (weave.thread) and Laminar (span with session_id)
- LocalConversation.run() now uses the generic get_conversation_context()
- Easy to add new observability tools via register_conversation_context_provider()

Design benefits:
- SDK is agnostic to which observability tools are enabled
- Graceful degradation when tools are not initialized
- Exception in one provider doesn't break others
- Single integration point in LocalConversation

Usage for adding new tools:
    from openhands.sdk.observability import register_conversation_context_provider

    def get_my_tool_context(conversation_id: str):
        if not is_my_tool_initialized():
            return nullcontext()
        return my_tool.thread(conversation_id)

    register_conversation_context_provider(get_my_tool_context)

Co-authored-by: openhands <openhands@all-hands.dev>
---
 .../conversation/impl/local_conversation.py   |  38 +--
 .../openhands/sdk/observability/__init__.py   |  11 +
 .../openhands/sdk/observability/context.py    | 225 +++++++++++++++++
 tests/sdk/observability/test_context.py       | 238 ++++++++++++++++++
 4 files changed, 482 insertions(+), 30 deletions(-)
 create mode 100644 openhands-sdk/openhands/sdk/observability/context.py
 create mode 100644 tests/sdk/observability/test_context.py

diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
index b26dacabc7..2194c8a12e 100644
--- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
+++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -1,7 +1,6 @@
 import atexit
 import uuid
 from collections.abc import Mapping
-from contextlib import nullcontext
 from pathlib import Path
 
 from openhands.sdk.agent.base import AgentBase
@@ -35,8 +34,8 @@
 from openhands.sdk.llm import LLM, Message, TextContent
 from openhands.sdk.llm.llm_registry import LLMRegistry
 from openhands.sdk.logger import get_logger
+from openhands.sdk.observability.context import get_conversation_context
 from openhands.sdk.observability.laminar import observe
-from openhands.sdk.observability.weave import is_weave_initialized
 from openhands.sdk.security.analyzer import SecurityAnalyzerBase
 from openhands.sdk.security.confirmation_policy import (
     ConfirmationPolicyBase,
@@ -47,29 +46,6 @@
 logger = get_logger(__name__)
 
 
-def _get_weave_thread_context(conversation_id: str):
-    """Get Weave thread context manager if Weave is initialized.
-
-    This groups all operations within a conversation run under the same
-    Weave thread, enabling conversation-level tracing in the Weave UI.
-
-    Args:
-        conversation_id: The conversation ID to use as the thread ID.
-
-    Returns:
-        A weave.thread context manager if Weave is initialized,
-        otherwise a nullcontext (no-op).
-    """
-    if not is_weave_initialized():
-        return nullcontext()
-
-    try:
-        import weave
-        return weave.thread(conversation_id)
-    except Exception:
-        return nullcontext()
-
-
 class LocalConversation(BaseConversation):
     agent: AgentBase
     workspace: LocalWorkspace
@@ -322,9 +298,11 @@ def run(self) -> None:
         Can be paused between steps
 
         Note:
-            If Weave is initialized, all operations within this run are grouped
-            under a Weave thread using the conversation ID. This enables
-            conversation-level tracing in the Weave UI.
+            All operations within this run are automatically wrapped in
+            observability context managers for all enabled tools (Weave, Laminar,
+            etc.). This groups LLM calls and traced operations under the
+            conversation ID, enabling conversation-level tracing in observability
+            UIs.
         """
 
         with self._state:
@@ -336,9 +314,9 @@ def run(self) -> None:
                 self._state.execution_status = ConversationExecutionStatus.RUNNING
 
         iteration = 0
-        # Wrap the run loop in a Weave thread context if Weave is initialized.
+        # Wrap the run loop in observability context managers for all enabled tools.
         # This groups all LLM calls and traced operations under the conversation ID.
-        with _get_weave_thread_context(str(self.id)):
+        with get_conversation_context(str(self.id)):
             try:
                 while True:
                     logger.debug(f"Conversation run iteration {iteration}")
diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py
index 799c1d1d71..1dcbf3d033 100644
--- a/openhands-sdk/openhands/sdk/observability/__init__.py
+++ b/openhands-sdk/openhands/sdk/observability/__init__.py
@@ -1,3 +1,9 @@
+from openhands.sdk.observability.context import (
+    clear_conversation_context_providers,
+    get_conversation_context,
+    register_conversation_context_provider,
+    unregister_conversation_context_provider,
+)
 from openhands.sdk.observability.laminar import maybe_init_laminar, observe
 from openhands.sdk.observability.weave import (
     end_weave_span,
@@ -17,6 +23,11 @@
 
 
 __all__ = [
+    # Generic observability context (unified interface)
+    "get_conversation_context",
+    "register_conversation_context_provider",
+    "unregister_conversation_context_provider",
+    "clear_conversation_context_providers",
     # Laminar exports
     "maybe_init_laminar",
     "observe",
diff --git a/openhands-sdk/openhands/sdk/observability/context.py b/openhands-sdk/openhands/sdk/observability/context.py
new file mode 100644
index 0000000000..c605e0119c
--- /dev/null
+++ b/openhands-sdk/openhands/sdk/observability/context.py
@@ -0,0 +1,225 @@
+"""Generic observability context management for the OpenHands SDK.
+
+This module provides a unified interface for managing observability contexts
+across multiple observability tools (Weave, Laminar, etc.). It allows the SDK
+to use a single API that automatically composes context managers from all
+enabled observability providers.
+
+## Design Philosophy
+
+The SDK should be agnostic to which observability tools are enabled. This module
+provides:
+
+1. **Unified Context Managers**: A single `get_conversation_context()` function
+   that returns a composed context manager for all enabled tools.
+
+2. **Provider Registry**: Observability tools register their context providers,
+   allowing easy extension for new tools.
+
+3. **Graceful Degradation**: If no observability tools are enabled, the context
+   managers are no-ops (nullcontext).
+
+## Usage
+
+In LocalConversation.run():
+```python
+from openhands.sdk.observability.context import get_conversation_context
+
+def run(self):
+    with get_conversation_context(str(self.id)):
+        # All operations here are traced by all enabled observability tools
+        ...
+```
+
+## Adding New Observability Providers
+
+To add a new observability tool:
+
+1. Create a function that returns a context manager for conversation threading
+2. Register it with `register_conversation_context_provider()`
+
+```python
+from openhands.sdk.observability.context import register_conversation_context_provider
+
+def get_my_tool_context(conversation_id: str):
+    if not is_my_tool_initialized():
+        return nullcontext()
+    return my_tool.thread(conversation_id)
+
+register_conversation_context_provider(get_my_tool_context)
+```
+"""
+
+from collections.abc import Callable
+from contextlib import ExitStack, contextmanager, nullcontext
+from typing import Any, ContextManager, Iterator
+
+from openhands.sdk.logger import get_logger
+
+
+logger = get_logger(__name__)
+
+
+# Type alias for context provider functions
+ConversationContextProvider = Callable[[str], ContextManager[Any]]
+
+# Registry of conversation context providers
+_conversation_context_providers: list[ConversationContextProvider] = []
+
+
+def register_conversation_context_provider(
+    provider: ConversationContextProvider,
+) -> None:
+    """Register a conversation context provider.
+
+    Context providers are functions that take a conversation_id and return
+    a context manager. They are called in order of registration.
+
+    Args:
+        provider: A function that takes a conversation_id string and returns
+                 a context manager. Should return nullcontext() if the
+                 observability tool is not initialized.
+
+    Example:
+        ```python
+        def get_my_tool_context(conversation_id: str):
+            if not is_my_tool_initialized():
+                return nullcontext()
+            return my_tool.thread(conversation_id)
+
+        register_conversation_context_provider(get_my_tool_context)
+        ```
+    """
+    if provider not in _conversation_context_providers:
+        _conversation_context_providers.append(provider)
+        logger.debug(f"Registered conversation context provider: {provider.__name__}")
+
+
+def unregister_conversation_context_provider(
+    provider: ConversationContextProvider,
+) -> None:
+    """Unregister a conversation context provider.
+
+    Args:
+        provider: The provider function to unregister.
+    """
+    if provider in _conversation_context_providers:
+        _conversation_context_providers.remove(provider)
+        logger.debug(f"Unregistered conversation context provider: {provider.__name__}")
+
+
+def clear_conversation_context_providers() -> None:
+    """Clear all registered conversation context providers.
+
+    Useful for testing or resetting the observability state.
+    """
+    _conversation_context_providers.clear()
+    logger.debug("Cleared all conversation context providers")
+
+
+@contextmanager
+def get_conversation_context(conversation_id: str) -> Iterator[None]:
+    """Get a composed context manager for all enabled observability tools.
+
+    This function returns a context manager that wraps all registered
+    observability context providers. When entered, it enters all provider
+    contexts in order. When exited, it exits them in reverse order.
+
+    If no providers are registered or all providers return nullcontext,
+    this is effectively a no-op.
+
+    Args:
+        conversation_id: The conversation ID to use for threading/grouping.
+
+    Yields:
+        None
+
+    Example:
+        ```python
+        with get_conversation_context("conv-123"):
+            # All operations here are traced by all enabled observability tools
+            agent.step(...)
+        ```
+    """
+    if not _conversation_context_providers:
+        yield
+        return
+
+    # Use ExitStack to compose multiple context managers
+    with ExitStack() as stack:
+        for provider in _conversation_context_providers:
+            try:
+                ctx = provider(conversation_id)
+                stack.enter_context(ctx)
+            except Exception as e:
+                # Log but don't fail - observability should not break the agent
+                logger.debug(
+                    f"Error entering context from provider {provider.__name__}: {e}"
+                )
+        yield
+
+
+# =============================================================================
+# Built-in Provider Registrations
+# =============================================================================
+# These are registered when the module is imported. Each provider checks if
+# its tool is initialized before returning a real context manager.
+
+
+def _get_weave_conversation_context(conversation_id: str) -> ContextManager[Any]:
+    """Weave conversation context provider.
+
+    Returns a weave.thread() context manager if Weave is initialized,
+    otherwise returns nullcontext().
+    """
+    try:
+        from openhands.sdk.observability.weave import is_weave_initialized
+
+        if not is_weave_initialized():
+            return nullcontext()
+
+        import weave
+        return weave.thread(conversation_id)
+    except ImportError:
+        return nullcontext()
+    except Exception:
+        return nullcontext()
+
+
+def _get_laminar_conversation_context(conversation_id: str) -> ContextManager[Any]:
+    """Laminar conversation context provider.
+
+    Returns a Laminar span context if Laminar is initialized,
+    otherwise returns nullcontext().
+
+    Note: Laminar uses OpenTelemetry spans rather than threads, so we create
+    a span with the conversation_id as the session_id.
+    """
+    try:
+        from openhands.sdk.observability.laminar import should_enable_observability
+
+        if not should_enable_observability():
+            return nullcontext()
+
+        from lmnr import Laminar
+
+        @contextmanager
+        def laminar_conversation_context():
+            span = Laminar.start_active_span(f"conversation:{conversation_id}")
+            Laminar.set_trace_session_id(conversation_id)
+            try:
+                yield
+            finally:
+                if span and span.is_recording():
+                    span.end()
+
+        return laminar_conversation_context()
+    except ImportError:
+        return nullcontext()
+    except Exception:
+        return nullcontext()
+
+
+# Register built-in providers
+register_conversation_context_provider(_get_weave_conversation_context)
+register_conversation_context_provider(_get_laminar_conversation_context)
diff --git a/tests/sdk/observability/test_context.py b/tests/sdk/observability/test_context.py
new file mode 100644
index 0000000000..5f369ed515
--- /dev/null
+++ b/tests/sdk/observability/test_context.py
@@ -0,0 +1,238 @@
+"""Tests for the generic observability context module."""
+
+import pytest
+from contextlib import nullcontext
+from unittest.mock import MagicMock, patch
+
+from openhands.sdk.observability.context import (
+    clear_conversation_context_providers,
+    get_conversation_context,
+    register_conversation_context_provider,
+    unregister_conversation_context_provider,
+    _conversation_context_providers,
+)
+
+
+class TestConversationContextProviderRegistry:
+    """Tests for the provider registry functions."""
+
+    def setup_method(self):
+        """Clear providers before each test."""
+        # Store original providers
+        self._original_providers = _conversation_context_providers.copy()
+        clear_conversation_context_providers()
+
+    def teardown_method(self):
+        """Restore original providers after each test."""
+        clear_conversation_context_providers()
+        for provider in self._original_providers:
+            register_conversation_context_provider(provider)
+
+    def test_register_provider(self):
+        """Test registering a new provider."""
+        def my_provider(conversation_id: str):
+            return nullcontext()
+
+        register_conversation_context_provider(my_provider)
+        assert my_provider in _conversation_context_providers
+
+    def test_register_provider_no_duplicates(self):
+        """Test that registering the same provider twice doesn't create duplicates."""
+        def my_provider(conversation_id: str):
+            return nullcontext()
+
+        register_conversation_context_provider(my_provider)
+        register_conversation_context_provider(my_provider)
+        assert _conversation_context_providers.count(my_provider) == 1
+
+    def test_unregister_provider(self):
+        """Test unregistering a provider."""
+        def my_provider(conversation_id: str):
+            return nullcontext()
+
+        register_conversation_context_provider(my_provider)
+        assert my_provider in _conversation_context_providers
+
+        unregister_conversation_context_provider(my_provider)
+        assert my_provider not in _conversation_context_providers
+
+    def test_unregister_nonexistent_provider(self):
+        """Test unregistering a provider that was never registered."""
+        def my_provider(conversation_id: str):
+            return nullcontext()
+
+        # Should not raise
+        unregister_conversation_context_provider(my_provider)
+
+    def test_clear_providers(self):
+        """Test clearing all providers."""
+        def provider1(conversation_id: str):
+            return nullcontext()
+
+        def provider2(conversation_id: str):
+            return nullcontext()
+
+        register_conversation_context_provider(provider1)
+        register_conversation_context_provider(provider2)
+        assert len(_conversation_context_providers) == 2
+
+        clear_conversation_context_providers()
+        assert len(_conversation_context_providers) == 0
+
+
+class TestGetConversationContext:
+    """Tests for the get_conversation_context function."""
+
+    def setup_method(self):
+        """Clear providers before each test."""
+        self._original_providers = _conversation_context_providers.copy()
+        clear_conversation_context_providers()
+
+    def teardown_method(self):
+        """Restore original providers after each test."""
+        clear_conversation_context_providers()
+        for provider in self._original_providers:
+            register_conversation_context_provider(provider)
+
+    def test_no_providers_is_noop(self):
+        """Test that with no providers, the context is a no-op."""
+        executed = False
+
+        with get_conversation_context("test-conv"):
+            executed = True
+
+        assert executed
+
+    def test_single_provider_called(self):
+        """Test that a single provider is called with the conversation ID."""
+        called_with = []
+
+        def my_provider(conversation_id: str):
+            called_with.append(conversation_id)
+            return nullcontext()
+
+        register_conversation_context_provider(my_provider)
+
+        with get_conversation_context("test-conv-123"):
+            pass
+
+        assert called_with == ["test-conv-123"]
+
+    def test_multiple_providers_called_in_order(self):
+        """Test that multiple providers are called in registration order."""
+        call_order = []
+
+        def provider1(conversation_id: str):
+            call_order.append("provider1")
+            return nullcontext()
+
+        def provider2(conversation_id: str):
+            call_order.append("provider2")
+            return nullcontext()
+
+        register_conversation_context_provider(provider1)
+        register_conversation_context_provider(provider2)
+
+        with get_conversation_context("test-conv"):
+            pass
+
+        assert call_order == ["provider1", "provider2"]
+
+    def test_provider_exception_does_not_break_others(self):
+        """Test that an exception in one provider doesn't prevent others."""
+        call_order = []
+
+        def failing_provider(conversation_id: str):
+            raise RuntimeError("Provider failed")
+
+        def working_provider(conversation_id: str):
+            call_order.append("working")
+            return nullcontext()
+
+        register_conversation_context_provider(failing_provider)
+        register_conversation_context_provider(working_provider)
+
+        # Should not raise
+        with get_conversation_context("test-conv"):
+            pass
+
+        assert call_order == ["working"]
+
+    def test_context_manager_enter_exit_called(self):
+        """Test that context manager __enter__ and __exit__ are called."""
+        mock_cm = MagicMock()
+        mock_cm.__enter__ = MagicMock(return_value=None)
+        mock_cm.__exit__ = MagicMock(return_value=None)
+
+        def my_provider(conversation_id: str):
+            return mock_cm
+
+        register_conversation_context_provider(my_provider)
+
+        with get_conversation_context("test-conv"):
+            mock_cm.__enter__.assert_called_once()
+
+        mock_cm.__exit__.assert_called_once()
+
+
+class TestBuiltInProviders:
+    """Tests for the built-in Weave and Laminar providers."""
+
+    def test_weave_provider_returns_nullcontext_when_not_initialized(self):
+        """Test that Weave provider returns nullcontext when Weave is not initialized."""
+        from openhands.sdk.observability.context import _get_weave_conversation_context
+
+        with patch(
+            "openhands.sdk.observability.weave.is_weave_initialized",
+            return_value=False,
+        ):
+            ctx = _get_weave_conversation_context("test-conv")
+            # nullcontext() returns a different instance each time, so check type name
+            assert type(ctx).__name__ == "nullcontext"
+
+    def test_laminar_provider_returns_nullcontext_when_not_initialized(self):
+        """Test that Laminar provider returns nullcontext when Laminar is not initialized."""
+        from openhands.sdk.observability.context import (
+            _get_laminar_conversation_context,
+        )
+
+        with patch(
+            "openhands.sdk.observability.laminar.should_enable_observability",
+            return_value=False,
+        ):
+            ctx = _get_laminar_conversation_context("test-conv")
+            # nullcontext() returns a different instance each time, so check type name
+            assert type(ctx).__name__ == "nullcontext"
+
+
+class TestIntegration:
+    """Integration tests for the observability context system."""
+
+    def test_providers_auto_registered_on_import(self):
+        """Test that built-in providers are registered when module is imported."""
+        # Re-import to trigger registration
+        from openhands.sdk.observability import context
+
+        # The module should have registered the built-in providers
+        # We check by looking for the provider functions
+        provider_names = [p.__name__ for p in context._conversation_context_providers]
+        assert "_get_weave_conversation_context" in provider_names
+        assert "_get_laminar_conversation_context" in provider_names
+
+    def test_custom_provider_works_with_builtins(self):
+        """Test that custom providers work alongside built-in ones."""
+        custom_called = []
+
+        def custom_provider(conversation_id: str):
+            custom_called.append(conversation_id)
+            return nullcontext()
+
+        register_conversation_context_provider(custom_provider)
+
+        try:
+            with get_conversation_context("test-conv"):
+                pass
+
+            assert "test-conv" in custom_called
+        finally:
+            unregister_conversation_context_provider(custom_provider)

From e71f6e9901e1fa561b425fbd619804cbd31c6614 Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Dec 2025 00:28:51 +0000
Subject: [PATCH 6/8] feat: Add unified tool tracing for all observability
 tools

Introduces a unified tool tracing system that works across all enabled
observability tools (Weave, Laminar, etc.).

Key additions:
- trace_tool_call(): Context manager for tracing tool executions
- traced_tool(): Decorator for tracing tool functions
- trace_mcp_list_tools(): Context manager for MCP tool listing
- trace_mcp_call_tool(): Context manager for MCP tool calls
- Tool trace provider registry (similar to conversation context providers)

Integration points:
- Agent._execute_action_event() now uses trace_tool_call() for all tools
- MCPToolExecutor.call_tool() uses trace_mcp_call_tool()
- MCP utils._list_tools() uses trace_mcp_list_tools()

What gets traced:
- Tool name
- Tool inputs (safely serialized)
- Tool type (TOOL, MCP_TOOL, MCP_LIST)
- Execution duration (via context manager)

Design benefits:
- Single API for all observability tools
- Easy to add new observability providers
- Graceful degradation when tools not initialized
- Backward compatible with existing Laminar @observe decorators

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands-sdk/openhands/sdk/agent/agent.py    |  27 +-
 openhands-sdk/openhands/sdk/mcp/tool.py       |  54 +--
 openhands-sdk/openhands/sdk/mcp/utils.py      |  25 +-
 .../openhands/sdk/observability/__init__.py   |  18 +
 .../openhands/sdk/observability/context.py    | 277 ++++++++++++++-
 tests/sdk/observability/test_context.py       | 329 ++++++++++++++++++
 6 files changed, 687 insertions(+), 43 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py
index d88d2656d4..9029c35165 100644
--- a/openhands-sdk/openhands/sdk/agent/agent.py
+++ b/openhands-sdk/openhands/sdk/agent/agent.py
@@ -41,6 +41,7 @@
     LLMContextWindowExceedError,
 )
 from openhands.sdk.logger import get_logger
+from openhands.sdk.observability.context import trace_tool_call
 from openhands.sdk.observability.laminar import (
     maybe_init_laminar,
     observe,
@@ -478,14 +479,24 @@ def _execute_action_event(
                 "as it was checked earlier."
             )
 
-        # Execute actions!
-        if should_enable_observability():
-            tool_name = extract_action_name(action_event)
-            observation: Observation = observe(name=tool_name, span_type="TOOL")(tool)(
-                action_event.action, conversation
-            )
-        else:
-            observation = tool(action_event.action, conversation)
+        # Execute actions with unified observability tracing
+        tool_name = extract_action_name(action_event)
+
+        # Extract inputs for tracing (safely serialize action data)
+        try:
+            inputs = action_event.action.model_dump(exclude_none=True)
+        except Exception:
+            inputs = None
+
+        # Use unified trace_tool_call for all observability tools (Weave, Laminar, etc.)
+        # Plus Laminar's @observe decorator for backward compatibility
+        with trace_tool_call(tool_name, inputs=inputs):
+            if should_enable_observability():
+                observation: Observation = observe(name=tool_name, span_type="TOOL")(
+                    tool
+                )(action_event.action, conversation)
+            else:
+                observation = tool(action_event.action, conversation)
         assert isinstance(observation, Observation), (
             f"Tool '{tool.name}' executor must return an Observation"
         )
diff --git a/openhands-sdk/openhands/sdk/mcp/tool.py b/openhands-sdk/openhands/sdk/mcp/tool.py
index 69aaf54f9a..6e6ef1f21b 100644
--- a/openhands-sdk/openhands/sdk/mcp/tool.py
+++ b/openhands-sdk/openhands/sdk/mcp/tool.py
@@ -15,6 +15,7 @@
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation
+from openhands.sdk.observability.context import trace_mcp_call_tool
 from openhands.sdk.observability.laminar import observe
 from openhands.sdk.tool import (
     Action,
@@ -52,27 +53,38 @@ def __init__(self, tool_name: str, client: MCPClient):
 
     @observe(name="MCPToolExecutor.call_tool", span_type="TOOL")
     async def call_tool(self, action: MCPToolAction) -> MCPToolObservation:
-        async with self.client:
-            assert self.client.is_connected(), "MCP client is not connected."
-            try:
-                logger.debug(
-                    f"Calling MCP tool {self.tool_name} "
-                    f"with args: {action.model_dump()}"
-                )
-                result: mcp.types.CallToolResult = await self.client.call_tool_mcp(
-                    name=self.tool_name, arguments=action.to_mcp_arguments()
-                )
-                return MCPToolObservation.from_call_tool_result(
-                    tool_name=self.tool_name, result=result
-                )
-            except Exception as e:
-                error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
-                logger.error(error_msg, exc_info=True)
-                return MCPToolObservation.from_text(
-                    text=error_msg,
-                    is_error=True,
-                    tool_name=self.tool_name,
-                )
+        # Extract inputs for tracing
+        try:
+            inputs = action.to_mcp_arguments()
+        except Exception:
+            inputs = None
+
+        # Use unified MCP tool tracing for all observability tools
+        with trace_mcp_call_tool(
+            tool_name=self.tool_name,
+            inputs=inputs,
+        ):
+            async with self.client:
+                assert self.client.is_connected(), "MCP client is not connected."
+                try:
+                    logger.debug(
+                        f"Calling MCP tool {self.tool_name} "
+                        f"with args: {action.model_dump()}"
+                    )
+                    result: mcp.types.CallToolResult = await self.client.call_tool_mcp(
+                        name=self.tool_name, arguments=action.to_mcp_arguments()
+                    )
+                    return MCPToolObservation.from_call_tool_result(
+                        tool_name=self.tool_name, result=result
+                    )
+                except Exception as e:
+                    error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
+                    logger.error(error_msg, exc_info=True)
+                    return MCPToolObservation.from_text(
+                        text=error_msg,
+                        is_error=True,
+                        tool_name=self.tool_name,
+                    )
 
     def __call__(
         self,
diff --git a/openhands-sdk/openhands/sdk/mcp/utils.py b/openhands-sdk/openhands/sdk/mcp/utils.py
index 1093280466..a6df3de73e 100644
--- a/openhands-sdk/openhands/sdk/mcp/utils.py
+++ b/openhands-sdk/openhands/sdk/mcp/utils.py
@@ -10,6 +10,7 @@
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.exceptions import MCPTimeoutError
 from openhands.sdk.mcp.tool import MCPToolDefinition
+from openhands.sdk.observability.context import trace_mcp_list_tools
 from openhands.sdk.tool.tool import ToolDefinition
 
 
@@ -36,17 +37,19 @@ async def _list_tools(client: MCPClient) -> list[ToolDefinition]:
     """List tools from an MCP client."""
     tools: list[ToolDefinition] = []
 
-    async with client:
-        assert client.is_connected(), "MCP client is not connected."
-        mcp_type_tools: list[mcp.types.Tool] = await client.list_tools()
-        for mcp_tool in mcp_type_tools:
-            tool_sequence = MCPToolDefinition.create(
-                mcp_tool=mcp_tool, mcp_client=client
-            )
-            tools.extend(tool_sequence)  # Flatten sequence into list
-    assert not client.is_connected(), (
-        "MCP client should be disconnected after listing tools."
-    )
+    # Use unified MCP list tools tracing for all observability tools
+    with trace_mcp_list_tools():
+        async with client:
+            assert client.is_connected(), "MCP client is not connected."
+            mcp_type_tools: list[mcp.types.Tool] = await client.list_tools()
+            for mcp_tool in mcp_type_tools:
+                tool_sequence = MCPToolDefinition.create(
+                    mcp_tool=mcp_tool, mcp_client=client
+                )
+                tools.extend(tool_sequence)  # Flatten sequence into list
+        assert not client.is_connected(), (
+            "MCP client should be disconnected after listing tools."
+        )
     return tools
 
 
diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py
index 1dcbf3d033..0a378f4b72 100644
--- a/openhands-sdk/openhands/sdk/observability/__init__.py
+++ b/openhands-sdk/openhands/sdk/observability/__init__.py
@@ -1,8 +1,17 @@
 from openhands.sdk.observability.context import (
+    # Conversation context
     clear_conversation_context_providers,
     get_conversation_context,
     register_conversation_context_provider,
     unregister_conversation_context_provider,
+    # Tool tracing
+    clear_tool_trace_providers,
+    register_tool_trace_provider,
+    trace_mcp_call_tool,
+    trace_mcp_list_tools,
+    trace_tool_call,
+    traced_tool,
+    unregister_tool_trace_provider,
 )
 from openhands.sdk.observability.laminar import maybe_init_laminar, observe
 from openhands.sdk.observability.weave import (
@@ -28,6 +37,15 @@
     "register_conversation_context_provider",
     "unregister_conversation_context_provider",
     "clear_conversation_context_providers",
+    # Tool tracing (unified interface)
+    "trace_tool_call",
+    "traced_tool",
+    "register_tool_trace_provider",
+    "unregister_tool_trace_provider",
+    "clear_tool_trace_providers",
+    # MCP-specific tracing
+    "trace_mcp_list_tools",
+    "trace_mcp_call_tool",
     # Laminar exports
     "maybe_init_laminar",
     "observe",
diff --git a/openhands-sdk/openhands/sdk/observability/context.py b/openhands-sdk/openhands/sdk/observability/context.py
index c605e0119c..8cbafc4c48 100644
--- a/openhands-sdk/openhands/sdk/observability/context.py
+++ b/openhands-sdk/openhands/sdk/observability/context.py
@@ -13,10 +13,13 @@
 1. **Unified Context Managers**: A single `get_conversation_context()` function
    that returns a composed context manager for all enabled tools.
 
-2. **Provider Registry**: Observability tools register their context providers,
+2. **Tool Tracing**: A `trace_tool_call()` decorator/context manager for tracing
+   tool executions across all enabled observability tools.
+
+3. **Provider Registry**: Observability tools register their context providers,
    allowing easy extension for new tools.
 
-3. **Graceful Degradation**: If no observability tools are enabled, the context
+4. **Graceful Degradation**: If no observability tools are enabled, the context
    managers are no-ops (nullcontext).
 
 ## Usage
@@ -31,6 +34,20 @@ def run(self):
         ...
 ```
 
+For tool execution tracing:
+```python
+from openhands.sdk.observability.context import trace_tool_call
+
+# As a decorator
+@trace_tool_call(tool_name="my_tool")
+def execute_tool(action):
+    ...
+
+# As a context manager
+with trace_tool_call(tool_name="my_tool", inputs={"arg": "value"}):
+    result = tool.execute(action)
+```
+
 ## Adding New Observability Providers
 
 To add a new observability tool:
@@ -52,13 +69,17 @@ def get_my_tool_context(conversation_id: str):
 
 from collections.abc import Callable
 from contextlib import ExitStack, contextmanager, nullcontext
-from typing import Any, ContextManager, Iterator
+from functools import wraps
+from typing import Any, ContextManager, Iterator, ParamSpec, TypeVar
 
 from openhands.sdk.logger import get_logger
 
 
 logger = get_logger(__name__)
 
+P = ParamSpec("P")
+R = TypeVar("R")
+
 
 # Type alias for context provider functions
 ConversationContextProvider = Callable[[str], ContextManager[Any]]
@@ -223,3 +244,253 @@ def laminar_conversation_context():
 # Register built-in providers
 register_conversation_context_provider(_get_weave_conversation_context)
 register_conversation_context_provider(_get_laminar_conversation_context)
+
+
+# =============================================================================
+# Tool Call Tracing
+# =============================================================================
+# Unified tracing for tool executions across all observability tools.
+
+
+ToolTraceProvider = Callable[[str, dict[str, Any] | None], ContextManager[Any]]
+
+# Registry of tool trace providers
+_tool_trace_providers: list[ToolTraceProvider] = []
+
+
+def register_tool_trace_provider(provider: ToolTraceProvider) -> None:
+    """Register a tool trace provider.
+
+    Tool trace providers are functions that take a tool_name and optional
+    inputs dict, and return a context manager for tracing the tool execution.
+
+    Args:
+        provider: A function that takes (tool_name, inputs) and returns
+                 a context manager. Should return nullcontext() if the
+                 observability tool is not initialized.
+    """
+    if provider not in _tool_trace_providers:
+        _tool_trace_providers.append(provider)
+        logger.debug(f"Registered tool trace provider: {provider.__name__}")
+
+
+def unregister_tool_trace_provider(provider: ToolTraceProvider) -> None:
+    """Unregister a tool trace provider."""
+    if provider in _tool_trace_providers:
+        _tool_trace_providers.remove(provider)
+        logger.debug(f"Unregistered tool trace provider: {provider.__name__}")
+
+
+def clear_tool_trace_providers() -> None:
+    """Clear all registered tool trace providers."""
+    _tool_trace_providers.clear()
+    logger.debug("Cleared all tool trace providers")
+
+
+@contextmanager
+def trace_tool_call(
+    tool_name: str,
+    inputs: dict[str, Any] | None = None,
+    tool_type: str = "TOOL",
+) -> Iterator[None]:
+    """Trace a tool call across all enabled observability tools.
+
+    This context manager wraps tool executions with tracing from all
+    registered observability providers (Weave, Laminar, etc.).
+
+    Args:
+        tool_name: The name of the tool being executed.
+        inputs: Optional dict of input arguments to the tool.
+        tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL"). Used for
+                  categorization in observability UIs.
+
+    Yields:
+        None
+
+    Example:
+        ```python
+        with trace_tool_call("bash", inputs={"command": "ls -la"}):
+            result = bash_tool.execute(action)
+        ```
+    """
+    if not _tool_trace_providers:
+        yield
+        return
+
+    with ExitStack() as stack:
+        for provider in _tool_trace_providers:
+            try:
+                ctx = provider(tool_name, inputs)
+                stack.enter_context(ctx)
+            except Exception as e:
+                logger.debug(
+                    f"Error entering tool trace from provider {provider.__name__}: {e}"
+                )
+        yield
+
+
+def traced_tool(
+    tool_name: str | None = None,
+    tool_type: str = "TOOL",
+) -> Callable[[Callable[P, R]], Callable[P, R]]:
+    """Decorator to trace tool execution functions.
+
+    This decorator wraps a function with tool tracing from all registered
+    observability providers. It automatically captures the function's
+    arguments as inputs.
+
+    Args:
+        tool_name: The name of the tool. If None, uses the function name.
+        tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL").
+
+    Returns:
+        A decorator that wraps the function with tool tracing.
+
+    Example:
+        ```python
+        @traced_tool(tool_name="bash")
+        def execute_bash(command: str) -> str:
+            ...
+
+        # Or with automatic name detection
+        @traced_tool()
+        def my_tool(arg1, arg2):
+            ...
+        ```
+    """
+    def decorator(func: Callable[P, R]) -> Callable[P, R]:
+        name = tool_name or func.__name__
+
+        @wraps(func)
+        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
+            # Capture inputs from kwargs (args are harder to name)
+            inputs = dict(kwargs) if kwargs else None
+            with trace_tool_call(name, inputs=inputs, tool_type=tool_type):
+                return func(*args, **kwargs)
+
+        return wrapper
+
+    return decorator
+
+
+# =============================================================================
+# Built-in Tool Trace Providers
+# =============================================================================
+
+
+def _get_weave_tool_trace(
+    tool_name: str, inputs: dict[str, Any] | None
+) -> ContextManager[Any]:
+    """Weave tool trace provider.
+
+    Uses weave.attributes() to add tool metadata to the current span.
+    The actual tracing is done by Weave's autopatching of the underlying
+    operations (LLM calls, etc.).
+    """
+    try:
+        from openhands.sdk.observability.weave import is_weave_initialized
+
+        if not is_weave_initialized():
+            return nullcontext()
+
+        import weave
+
+        # Use weave.attributes to add tool metadata to the trace
+        attributes = {"tool_name": tool_name, "tool_type": "TOOL"}
+        if inputs:
+            # Sanitize inputs - convert non-serializable types to strings
+            safe_inputs = {}
+            for k, v in inputs.items():
+                try:
+                    # Test if it's JSON serializable
+                    import json
+                    json.dumps(v)
+                    safe_inputs[k] = v
+                except (TypeError, ValueError):
+                    safe_inputs[k] = str(v)
+            attributes["tool_inputs"] = safe_inputs
+
+        return weave.attributes(attributes)
+    except ImportError:
+        return nullcontext()
+    except Exception:
+        return nullcontext()
+
+
+def _get_laminar_tool_trace(
+    tool_name: str, inputs: dict[str, Any] | None  # noqa: ARG001
+) -> ContextManager[Any]:
+    """Laminar tool trace provider.
+
+    Creates a Laminar span for the tool execution.
+    Note: Laminar's @observe decorator is typically used directly,
+    but this provides a context manager alternative.
+    """
+    try:
+        from openhands.sdk.observability.laminar import should_enable_observability
+
+        if not should_enable_observability():
+            return nullcontext()
+
+        from lmnr import Laminar
+
+        @contextmanager
+        def laminar_tool_trace():
+            span = Laminar.start_active_span(f"tool:{tool_name}")
+            try:
+                yield
+            finally:
+                if span and span.is_recording():
+                    span.end()
+
+        return laminar_tool_trace()
+    except ImportError:
+        return nullcontext()
+    except Exception:
+        return nullcontext()
+
+
+# Register built-in tool trace providers
+register_tool_trace_provider(_get_weave_tool_trace)
+register_tool_trace_provider(_get_laminar_tool_trace)
+
+
+# =============================================================================
+# MCP-Specific Tracing
+# =============================================================================
+
+
+@contextmanager
+def trace_mcp_list_tools(server_name: str | None = None) -> Iterator[None]:
+    """Trace MCP tool listing operations.
+
+    Args:
+        server_name: Optional name of the MCP server being queried.
+
+    Yields:
+        None
+    """
+    tool_name = f"mcp:list_tools:{server_name}" if server_name else "mcp:list_tools"
+    with trace_tool_call(tool_name, tool_type="MCP_LIST"):
+        yield
+
+
+@contextmanager
+def trace_mcp_call_tool(
+    tool_name: str,
+    server_name: str | None = None,
+    inputs: dict[str, Any] | None = None,
+) -> Iterator[None]:
+    """Trace MCP tool call operations.
+
+    Args:
+        tool_name: The name of the MCP tool being called.
+        server_name: Optional name of the MCP server.
+        inputs: Optional dict of input arguments.
+
+    Yields:
+        None
+    """
+    full_name = f"mcp:{server_name}:{tool_name}" if server_name else f"mcp:{tool_name}"
+    with trace_tool_call(full_name, inputs=inputs, tool_type="MCP_TOOL"):
+        yield
diff --git a/tests/sdk/observability/test_context.py b/tests/sdk/observability/test_context.py
index 5f369ed515..ba2e69a0d8 100644
--- a/tests/sdk/observability/test_context.py
+++ b/tests/sdk/observability/test_context.py
@@ -10,6 +10,15 @@
     register_conversation_context_provider,
     unregister_conversation_context_provider,
     _conversation_context_providers,
+    # Tool tracing
+    clear_tool_trace_providers,
+    register_tool_trace_provider,
+    unregister_tool_trace_provider,
+    trace_tool_call,
+    traced_tool,
+    trace_mcp_list_tools,
+    trace_mcp_call_tool,
+    _tool_trace_providers,
 )
 
 
@@ -236,3 +245,323 @@ def custom_provider(conversation_id: str):
             assert "test-conv" in custom_called
         finally:
             unregister_conversation_context_provider(custom_provider)
+
+
+# =============================================================================
+# Tool Tracing Tests
+# =============================================================================
+
+
+class TestToolTraceProviderRegistry:
+    """Tests for the tool trace provider registry functions."""
+
+    def setup_method(self):
+        """Store original providers before each test."""
+        self._original_providers = _tool_trace_providers.copy()
+        clear_tool_trace_providers()
+
+    def teardown_method(self):
+        """Restore original providers after each test."""
+        clear_tool_trace_providers()
+        for provider in self._original_providers:
+            register_tool_trace_provider(provider)
+
+    def test_register_tool_trace_provider(self):
+        """Test registering a new tool trace provider."""
+        def my_provider(tool_name: str, inputs):
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+        assert my_provider in _tool_trace_providers
+
+    def test_register_tool_trace_provider_no_duplicates(self):
+        """Test that registering the same provider twice doesn't create duplicates."""
+        def my_provider(tool_name: str, inputs):
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+        register_tool_trace_provider(my_provider)
+        assert _tool_trace_providers.count(my_provider) == 1
+
+    def test_unregister_tool_trace_provider(self):
+        """Test unregistering a tool trace provider."""
+        def my_provider(tool_name: str, inputs):
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+        assert my_provider in _tool_trace_providers
+
+        unregister_tool_trace_provider(my_provider)
+        assert my_provider not in _tool_trace_providers
+
+    def test_clear_tool_trace_providers(self):
+        """Test clearing all tool trace providers."""
+        def provider1(tool_name: str, inputs):
+            return nullcontext()
+
+        def provider2(tool_name: str, inputs):
+            return nullcontext()
+
+        register_tool_trace_provider(provider1)
+        register_tool_trace_provider(provider2)
+        assert len(_tool_trace_providers) == 2
+
+        clear_tool_trace_providers()
+        assert len(_tool_trace_providers) == 0
+
+
+class TestTraceToolCall:
+    """Tests for the trace_tool_call context manager."""
+
+    def setup_method(self):
+        """Store original providers before each test."""
+        self._original_providers = _tool_trace_providers.copy()
+        clear_tool_trace_providers()
+
+    def teardown_method(self):
+        """Restore original providers after each test."""
+        clear_tool_trace_providers()
+        for provider in self._original_providers:
+            register_tool_trace_provider(provider)
+
+    def test_no_providers_is_noop(self):
+        """Test that with no providers, the context is a no-op."""
+        executed = False
+
+        with trace_tool_call("test-tool"):
+            executed = True
+
+        assert executed
+
+    def test_single_provider_called(self):
+        """Test that a single provider is called with tool name and inputs."""
+        called_with = []
+
+        def my_provider(tool_name: str, inputs):
+            called_with.append((tool_name, inputs))
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        with trace_tool_call("bash", inputs={"command": "ls"}):
+            pass
+
+        assert called_with == [("bash", {"command": "ls"})]
+
+    def test_multiple_providers_called(self):
+        """Test that multiple providers are called."""
+        call_order = []
+
+        def provider1(tool_name: str, inputs):
+            call_order.append("provider1")
+            return nullcontext()
+
+        def provider2(tool_name: str, inputs):
+            call_order.append("provider2")
+            return nullcontext()
+
+        register_tool_trace_provider(provider1)
+        register_tool_trace_provider(provider2)
+
+        with trace_tool_call("test-tool"):
+            pass
+
+        assert call_order == ["provider1", "provider2"]
+
+    def test_provider_exception_does_not_break_others(self):
+        """Test that an exception in one provider doesn't prevent others."""
+        call_order = []
+
+        def failing_provider(tool_name: str, inputs):
+            raise RuntimeError("Provider failed")
+
+        def working_provider(tool_name: str, inputs):
+            call_order.append("working")
+            return nullcontext()
+
+        register_tool_trace_provider(failing_provider)
+        register_tool_trace_provider(working_provider)
+
+        # Should not raise
+        with trace_tool_call("test-tool"):
+            pass
+
+        assert call_order == ["working"]
+
+
+class TestTracedToolDecorator:
+    """Tests for the @traced_tool decorator."""
+
+    def setup_method(self):
+        """Store original providers before each test."""
+        self._original_providers = _tool_trace_providers.copy()
+        clear_tool_trace_providers()
+
+    def teardown_method(self):
+        """Restore original providers after each test."""
+        clear_tool_trace_providers()
+        for provider in self._original_providers:
+            register_tool_trace_provider(provider)
+
+    def test_traced_tool_with_explicit_name(self):
+        """Test @traced_tool with explicit tool name."""
+        traced_calls = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_calls.append(tool_name)
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        @traced_tool(tool_name="my_custom_tool")
+        def some_function(x, y):
+            return x + y
+
+        result = some_function(1, 2)
+        assert result == 3
+        assert traced_calls == ["my_custom_tool"]
+
+    def test_traced_tool_with_auto_name(self):
+        """Test @traced_tool with automatic name detection."""
+        traced_calls = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_calls.append(tool_name)
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        @traced_tool()
+        def auto_named_function(x):
+            return x * 2
+
+        result = auto_named_function(5)
+        assert result == 10
+        assert traced_calls == ["auto_named_function"]
+
+    def test_traced_tool_captures_kwargs(self):
+        """Test that @traced_tool captures kwargs as inputs."""
+        traced_inputs = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_inputs.append(inputs)
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        @traced_tool(tool_name="test")
+        def func_with_kwargs(a, b=10, c="hello"):
+            return f"{a}-{b}-{c}"
+
+        result = func_with_kwargs(1, b=20, c="world")
+        assert result == "1-20-world"
+        assert traced_inputs == [{"b": 20, "c": "world"}]
+
+
+class TestMCPTracing:
+    """Tests for MCP-specific tracing functions."""
+
+    def setup_method(self):
+        """Store original providers before each test."""
+        self._original_providers = _tool_trace_providers.copy()
+        clear_tool_trace_providers()
+
+    def teardown_method(self):
+        """Restore original providers after each test."""
+        clear_tool_trace_providers()
+        for provider in self._original_providers:
+            register_tool_trace_provider(provider)
+
+    def test_trace_mcp_list_tools(self):
+        """Test trace_mcp_list_tools context manager."""
+        traced_calls = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_calls.append(tool_name)
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        with trace_mcp_list_tools():
+            pass
+
+        assert traced_calls == ["mcp:list_tools"]
+
+    def test_trace_mcp_list_tools_with_server_name(self):
+        """Test trace_mcp_list_tools with server name."""
+        traced_calls = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_calls.append(tool_name)
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        with trace_mcp_list_tools(server_name="my-server"):
+            pass
+
+        assert traced_calls == ["mcp:list_tools:my-server"]
+
+    def test_trace_mcp_call_tool(self):
+        """Test trace_mcp_call_tool context manager."""
+        traced_calls = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_calls.append((tool_name, inputs))
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        with trace_mcp_call_tool("read_file", inputs={"path": "/tmp/test.txt"}):
+            pass
+
+        assert traced_calls == [("mcp:read_file", {"path": "/tmp/test.txt"})]
+
+    def test_trace_mcp_call_tool_with_server_name(self):
+        """Test trace_mcp_call_tool with server name."""
+        traced_calls = []
+
+        def my_provider(tool_name: str, inputs):
+            traced_calls.append(tool_name)
+            return nullcontext()
+
+        register_tool_trace_provider(my_provider)
+
+        with trace_mcp_call_tool("read_file", server_name="filesystem"):
+            pass
+
+        assert traced_calls == ["mcp:filesystem:read_file"]
+
+
+class TestToolTraceBuiltInProviders:
+    """Tests for the built-in tool trace providers."""
+
+    def test_weave_tool_trace_returns_nullcontext_when_not_initialized(self):
+        """Test that Weave tool trace provider returns nullcontext when not initialized."""
+        from openhands.sdk.observability.context import _get_weave_tool_trace
+
+        with patch(
+            "openhands.sdk.observability.weave.is_weave_initialized",
+            return_value=False,
+        ):
+            ctx = _get_weave_tool_trace("test-tool", {"arg": "value"})
+            assert type(ctx).__name__ == "nullcontext"
+
+    def test_laminar_tool_trace_returns_nullcontext_when_not_initialized(self):
+        """Test that Laminar tool trace provider returns nullcontext when not initialized."""
+        from openhands.sdk.observability.context import _get_laminar_tool_trace
+
+        with patch(
+            "openhands.sdk.observability.laminar.should_enable_observability",
+            return_value=False,
+        ):
+            ctx = _get_laminar_tool_trace("test-tool", {"arg": "value"})
+            assert type(ctx).__name__ == "nullcontext"
+
+    def test_tool_trace_providers_auto_registered(self):
+        """Test that built-in tool trace providers are registered on import."""
+        from openhands.sdk.observability import context
+
+        provider_names = [p.__name__ for p in context._tool_trace_providers]
+        assert "_get_weave_tool_trace" in provider_names
+        assert "_get_laminar_tool_trace" in provider_names

From f42f317aedb382e13c2d173c7acde654d56ebddb Mon Sep 17 00:00:00 2001
From: openhands <openhands@all-hands.dev>
Date: Fri, 19 Dec 2025 00:44:06 +0000
Subject: [PATCH 7/8] refactor: Simplify Weave integration - remove complex
 tool tracing

Simplifies the Weave observability integration to be more elegant:

1. **Removed complex tool tracing system**:
   - Removed trace_tool_call, traced_tool, trace_mcp_* functions
   - Removed tool trace provider registry
   - These were over-engineered; Weave's autopatching handles LLM tracing

2. **Simplified weave.py**:
   - Kept only essential functions: init_weave, maybe_init_weave, weave_op
   - Removed WeaveSpanManager, observe_weave, weave_attributes, weave_thread
   - Users can use weave.op and weave.thread directly from the weave package

3. **Key exports**:
   - init_weave(): Initialize Weave with autopatching
   - maybe_init_weave(): Conditional init based on env vars
   - weave_op(): Decorator wrapper that's a no-op when not initialized
   - get_weave_op(): Get weave.op or no-op decorator

4. **Design philosophy**:
   - Weave autopatching traces all LiteLLM calls automatically
   - Use @weave.op directly for custom function tracing
   - Use weave.thread() directly for conversation grouping
   - Keep SDK integration minimal and non-invasive

This matches the approach in the OpenHands PR #12056.

Co-authored-by: openhands <openhands@all-hands.dev>
---
 openhands-sdk/openhands/sdk/agent/agent.py    |  27 +-
 openhands-sdk/openhands/sdk/mcp/tool.py       |  54 ++-
 openhands-sdk/openhands/sdk/mcp/utils.py      |  25 +-
 .../openhands/sdk/observability/__init__.py   |  30 --
 .../openhands/sdk/observability/context.py    | 277 +--------------
 .../openhands/sdk/observability/weave.py      | 221 +-----------
 tests/sdk/observability/test_context.py       | 329 ------------------
 tests/sdk/observability/test_weave.py         | 113 ------
 8 files changed, 57 insertions(+), 1019 deletions(-)

diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py
index 9029c35165..d88d2656d4 100644
--- a/openhands-sdk/openhands/sdk/agent/agent.py
+++ b/openhands-sdk/openhands/sdk/agent/agent.py
@@ -41,7 +41,6 @@
     LLMContextWindowExceedError,
 )
 from openhands.sdk.logger import get_logger
-from openhands.sdk.observability.context import trace_tool_call
 from openhands.sdk.observability.laminar import (
     maybe_init_laminar,
     observe,
@@ -479,24 +478,14 @@ def _execute_action_event(
                 "as it was checked earlier."
             )
 
-        # Execute actions with unified observability tracing
-        tool_name = extract_action_name(action_event)
-
-        # Extract inputs for tracing (safely serialize action data)
-        try:
-            inputs = action_event.action.model_dump(exclude_none=True)
-        except Exception:
-            inputs = None
-
-        # Use unified trace_tool_call for all observability tools (Weave, Laminar, etc.)
-        # Plus Laminar's @observe decorator for backward compatibility
-        with trace_tool_call(tool_name, inputs=inputs):
-            if should_enable_observability():
-                observation: Observation = observe(name=tool_name, span_type="TOOL")(
-                    tool
-                )(action_event.action, conversation)
-            else:
-                observation = tool(action_event.action, conversation)
+        # Execute actions!
+        if should_enable_observability():
+            tool_name = extract_action_name(action_event)
+            observation: Observation = observe(name=tool_name, span_type="TOOL")(tool)(
+                action_event.action, conversation
+            )
+        else:
+            observation = tool(action_event.action, conversation)
         assert isinstance(observation, Observation), (
             f"Tool '{tool.name}' executor must return an Observation"
         )
diff --git a/openhands-sdk/openhands/sdk/mcp/tool.py b/openhands-sdk/openhands/sdk/mcp/tool.py
index 6e6ef1f21b..69aaf54f9a 100644
--- a/openhands-sdk/openhands/sdk/mcp/tool.py
+++ b/openhands-sdk/openhands/sdk/mcp/tool.py
@@ -15,7 +15,6 @@
 from openhands.sdk.logger import get_logger
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation
-from openhands.sdk.observability.context import trace_mcp_call_tool
 from openhands.sdk.observability.laminar import observe
 from openhands.sdk.tool import (
     Action,
@@ -53,38 +52,27 @@ def __init__(self, tool_name: str, client: MCPClient):
 
     @observe(name="MCPToolExecutor.call_tool", span_type="TOOL")
     async def call_tool(self, action: MCPToolAction) -> MCPToolObservation:
-        # Extract inputs for tracing
-        try:
-            inputs = action.to_mcp_arguments()
-        except Exception:
-            inputs = None
-
-        # Use unified MCP tool tracing for all observability tools
-        with trace_mcp_call_tool(
-            tool_name=self.tool_name,
-            inputs=inputs,
-        ):
-            async with self.client:
-                assert self.client.is_connected(), "MCP client is not connected."
-                try:
-                    logger.debug(
-                        f"Calling MCP tool {self.tool_name} "
-                        f"with args: {action.model_dump()}"
-                    )
-                    result: mcp.types.CallToolResult = await self.client.call_tool_mcp(
-                        name=self.tool_name, arguments=action.to_mcp_arguments()
-                    )
-                    return MCPToolObservation.from_call_tool_result(
-                        tool_name=self.tool_name, result=result
-                    )
-                except Exception as e:
-                    error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
-                    logger.error(error_msg, exc_info=True)
-                    return MCPToolObservation.from_text(
-                        text=error_msg,
-                        is_error=True,
-                        tool_name=self.tool_name,
-                    )
+        async with self.client:
+            assert self.client.is_connected(), "MCP client is not connected."
+            try:
+                logger.debug(
+                    f"Calling MCP tool {self.tool_name} "
+                    f"with args: {action.model_dump()}"
+                )
+                result: mcp.types.CallToolResult = await self.client.call_tool_mcp(
+                    name=self.tool_name, arguments=action.to_mcp_arguments()
+                )
+                return MCPToolObservation.from_call_tool_result(
+                    tool_name=self.tool_name, result=result
+                )
+            except Exception as e:
+                error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}"
+                logger.error(error_msg, exc_info=True)
+                return MCPToolObservation.from_text(
+                    text=error_msg,
+                    is_error=True,
+                    tool_name=self.tool_name,
+                )
 
     def __call__(
         self,
diff --git a/openhands-sdk/openhands/sdk/mcp/utils.py b/openhands-sdk/openhands/sdk/mcp/utils.py
index a6df3de73e..1093280466 100644
--- a/openhands-sdk/openhands/sdk/mcp/utils.py
+++ b/openhands-sdk/openhands/sdk/mcp/utils.py
@@ -10,7 +10,6 @@
 from openhands.sdk.mcp.client import MCPClient
 from openhands.sdk.mcp.exceptions import MCPTimeoutError
 from openhands.sdk.mcp.tool import MCPToolDefinition
-from openhands.sdk.observability.context import trace_mcp_list_tools
 from openhands.sdk.tool.tool import ToolDefinition
 
 
@@ -37,19 +36,17 @@ async def _list_tools(client: MCPClient) -> list[ToolDefinition]:
     """List tools from an MCP client."""
     tools: list[ToolDefinition] = []
 
-    # Use unified MCP list tools tracing for all observability tools
-    with trace_mcp_list_tools():
-        async with client:
-            assert client.is_connected(), "MCP client is not connected."
-            mcp_type_tools: list[mcp.types.Tool] = await client.list_tools()
-            for mcp_tool in mcp_type_tools:
-                tool_sequence = MCPToolDefinition.create(
-                    mcp_tool=mcp_tool, mcp_client=client
-                )
-                tools.extend(tool_sequence)  # Flatten sequence into list
-        assert not client.is_connected(), (
-            "MCP client should be disconnected after listing tools."
-        )
+    async with client:
+        assert client.is_connected(), "MCP client is not connected."
+        mcp_type_tools: list[mcp.types.Tool] = await client.list_tools()
+        for mcp_tool in mcp_type_tools:
+            tool_sequence = MCPToolDefinition.create(
+                mcp_tool=mcp_tool, mcp_client=client
+            )
+            tools.extend(tool_sequence)  # Flatten sequence into list
+    assert not client.is_connected(), (
+        "MCP client should be disconnected after listing tools."
+    )
     return tools
 
 
diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py
index 0a378f4b72..01b6f95560 100644
--- a/openhands-sdk/openhands/sdk/observability/__init__.py
+++ b/openhands-sdk/openhands/sdk/observability/__init__.py
@@ -1,33 +1,18 @@
 from openhands.sdk.observability.context import (
-    # Conversation context
     clear_conversation_context_providers,
     get_conversation_context,
     register_conversation_context_provider,
     unregister_conversation_context_provider,
-    # Tool tracing
-    clear_tool_trace_providers,
-    register_tool_trace_provider,
-    trace_mcp_call_tool,
-    trace_mcp_list_tools,
-    trace_tool_call,
-    traced_tool,
-    unregister_tool_trace_provider,
 )
 from openhands.sdk.observability.laminar import maybe_init_laminar, observe
 from openhands.sdk.observability.weave import (
-    end_weave_span,
     get_weave_client,
     get_weave_op,
     init_weave,
     is_weave_initialized,
     maybe_init_weave,
-    observe_weave,
     should_enable_weave,
-    start_weave_span,
-    weave_attributes,
     weave_op,
-    weave_thread,
-    WeaveSpanManager,
 )
 
 
@@ -37,30 +22,15 @@
     "register_conversation_context_provider",
     "unregister_conversation_context_provider",
     "clear_conversation_context_providers",
-    # Tool tracing (unified interface)
-    "trace_tool_call",
-    "traced_tool",
-    "register_tool_trace_provider",
-    "unregister_tool_trace_provider",
-    "clear_tool_trace_providers",
-    # MCP-specific tracing
-    "trace_mcp_list_tools",
-    "trace_mcp_call_tool",
     # Laminar exports
     "maybe_init_laminar",
     "observe",
     # Weave exports
-    "end_weave_span",
     "get_weave_client",
     "get_weave_op",
     "init_weave",
     "is_weave_initialized",
     "maybe_init_weave",
-    "observe_weave",
     "should_enable_weave",
-    "start_weave_span",
-    "weave_attributes",
     "weave_op",
-    "weave_thread",
-    "WeaveSpanManager",
 ]
diff --git a/openhands-sdk/openhands/sdk/observability/context.py b/openhands-sdk/openhands/sdk/observability/context.py
index 8cbafc4c48..c605e0119c 100644
--- a/openhands-sdk/openhands/sdk/observability/context.py
+++ b/openhands-sdk/openhands/sdk/observability/context.py
@@ -13,13 +13,10 @@
 1. **Unified Context Managers**: A single `get_conversation_context()` function
    that returns a composed context manager for all enabled tools.
 
-2. **Tool Tracing**: A `trace_tool_call()` decorator/context manager for tracing
-   tool executions across all enabled observability tools.
-
-3. **Provider Registry**: Observability tools register their context providers,
+2. **Provider Registry**: Observability tools register their context providers,
    allowing easy extension for new tools.
 
-4. **Graceful Degradation**: If no observability tools are enabled, the context
+3. **Graceful Degradation**: If no observability tools are enabled, the context
    managers are no-ops (nullcontext).
 
 ## Usage
@@ -34,20 +31,6 @@ def run(self):
         ...
 ```
 
-For tool execution tracing:
-```python
-from openhands.sdk.observability.context import trace_tool_call
-
-# As a decorator
-@trace_tool_call(tool_name="my_tool")
-def execute_tool(action):
-    ...
-
-# As a context manager
-with trace_tool_call(tool_name="my_tool", inputs={"arg": "value"}):
-    result = tool.execute(action)
-```
-
 ## Adding New Observability Providers
 
 To add a new observability tool:
@@ -69,17 +52,13 @@ def get_my_tool_context(conversation_id: str):
 
 from collections.abc import Callable
 from contextlib import ExitStack, contextmanager, nullcontext
-from functools import wraps
-from typing import Any, ContextManager, Iterator, ParamSpec, TypeVar
+from typing import Any, ContextManager, Iterator
 
 from openhands.sdk.logger import get_logger
 
 
 logger = get_logger(__name__)
 
-P = ParamSpec("P")
-R = TypeVar("R")
-
 
 # Type alias for context provider functions
 ConversationContextProvider = Callable[[str], ContextManager[Any]]
@@ -244,253 +223,3 @@ def laminar_conversation_context():
 # Register built-in providers
 register_conversation_context_provider(_get_weave_conversation_context)
 register_conversation_context_provider(_get_laminar_conversation_context)
-
-
-# =============================================================================
-# Tool Call Tracing
-# =============================================================================
-# Unified tracing for tool executions across all observability tools.
-
-
-ToolTraceProvider = Callable[[str, dict[str, Any] | None], ContextManager[Any]]
-
-# Registry of tool trace providers
-_tool_trace_providers: list[ToolTraceProvider] = []
-
-
-def register_tool_trace_provider(provider: ToolTraceProvider) -> None:
-    """Register a tool trace provider.
-
-    Tool trace providers are functions that take a tool_name and optional
-    inputs dict, and return a context manager for tracing the tool execution.
-
-    Args:
-        provider: A function that takes (tool_name, inputs) and returns
-                 a context manager. Should return nullcontext() if the
-                 observability tool is not initialized.
-    """
-    if provider not in _tool_trace_providers:
-        _tool_trace_providers.append(provider)
-        logger.debug(f"Registered tool trace provider: {provider.__name__}")
-
-
-def unregister_tool_trace_provider(provider: ToolTraceProvider) -> None:
-    """Unregister a tool trace provider."""
-    if provider in _tool_trace_providers:
-        _tool_trace_providers.remove(provider)
-        logger.debug(f"Unregistered tool trace provider: {provider.__name__}")
-
-
-def clear_tool_trace_providers() -> None:
-    """Clear all registered tool trace providers."""
-    _tool_trace_providers.clear()
-    logger.debug("Cleared all tool trace providers")
-
-
-@contextmanager
-def trace_tool_call(
-    tool_name: str,
-    inputs: dict[str, Any] | None = None,
-    tool_type: str = "TOOL",
-) -> Iterator[None]:
-    """Trace a tool call across all enabled observability tools.
-
-    This context manager wraps tool executions with tracing from all
-    registered observability providers (Weave, Laminar, etc.).
-
-    Args:
-        tool_name: The name of the tool being executed.
-        inputs: Optional dict of input arguments to the tool.
-        tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL"). Used for
-                  categorization in observability UIs.
-
-    Yields:
-        None
-
-    Example:
-        ```python
-        with trace_tool_call("bash", inputs={"command": "ls -la"}):
-            result = bash_tool.execute(action)
-        ```
-    """
-    if not _tool_trace_providers:
-        yield
-        return
-
-    with ExitStack() as stack:
-        for provider in _tool_trace_providers:
-            try:
-                ctx = provider(tool_name, inputs)
-                stack.enter_context(ctx)
-            except Exception as e:
-                logger.debug(
-                    f"Error entering tool trace from provider {provider.__name__}: {e}"
-                )
-        yield
-
-
-def traced_tool(
-    tool_name: str | None = None,
-    tool_type: str = "TOOL",
-) -> Callable[[Callable[P, R]], Callable[P, R]]:
-    """Decorator to trace tool execution functions.
-
-    This decorator wraps a function with tool tracing from all registered
-    observability providers. It automatically captures the function's
-    arguments as inputs.
-
-    Args:
-        tool_name: The name of the tool. If None, uses the function name.
-        tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL").
-
-    Returns:
-        A decorator that wraps the function with tool tracing.
-
-    Example:
-        ```python
-        @traced_tool(tool_name="bash")
-        def execute_bash(command: str) -> str:
-            ...
-
-        # Or with automatic name detection
-        @traced_tool()
-        def my_tool(arg1, arg2):
-            ...
-        ```
-    """
-    def decorator(func: Callable[P, R]) -> Callable[P, R]:
-        name = tool_name or func.__name__
-
-        @wraps(func)
-        def wrapper(*args: P.args, **kwargs: P.kwargs) -> R:
-            # Capture inputs from kwargs (args are harder to name)
-            inputs = dict(kwargs) if kwargs else None
-            with trace_tool_call(name, inputs=inputs, tool_type=tool_type):
-                return func(*args, **kwargs)
-
-        return wrapper
-
-    return decorator
-
-
-# =============================================================================
-# Built-in Tool Trace Providers
-# =============================================================================
-
-
-def _get_weave_tool_trace(
-    tool_name: str, inputs: dict[str, Any] | None
-) -> ContextManager[Any]:
-    """Weave tool trace provider.
-
-    Uses weave.attributes() to add tool metadata to the current span.
-    The actual tracing is done by Weave's autopatching of the underlying
-    operations (LLM calls, etc.).
-    """
-    try:
-        from openhands.sdk.observability.weave import is_weave_initialized
-
-        if not is_weave_initialized():
-            return nullcontext()
-
-        import weave
-
-        # Use weave.attributes to add tool metadata to the trace
-        attributes = {"tool_name": tool_name, "tool_type": "TOOL"}
-        if inputs:
-            # Sanitize inputs - convert non-serializable types to strings
-            safe_inputs = {}
-            for k, v in inputs.items():
-                try:
-                    # Test if it's JSON serializable
-                    import json
-                    json.dumps(v)
-                    safe_inputs[k] = v
-                except (TypeError, ValueError):
-                    safe_inputs[k] = str(v)
-            attributes["tool_inputs"] = safe_inputs
-
-        return weave.attributes(attributes)
-    except ImportError:
-        return nullcontext()
-    except Exception:
-        return nullcontext()
-
-
-def _get_laminar_tool_trace(
-    tool_name: str, inputs: dict[str, Any] | None  # noqa: ARG001
-) -> ContextManager[Any]:
-    """Laminar tool trace provider.
-
-    Creates a Laminar span for the tool execution.
-    Note: Laminar's @observe decorator is typically used directly,
-    but this provides a context manager alternative.
-    """
-    try:
-        from openhands.sdk.observability.laminar import should_enable_observability
-
-        if not should_enable_observability():
-            return nullcontext()
-
-        from lmnr import Laminar
-
-        @contextmanager
-        def laminar_tool_trace():
-            span = Laminar.start_active_span(f"tool:{tool_name}")
-            try:
-                yield
-            finally:
-                if span and span.is_recording():
-                    span.end()
-
-        return laminar_tool_trace()
-    except ImportError:
-        return nullcontext()
-    except Exception:
-        return nullcontext()
-
-
-# Register built-in tool trace providers
-register_tool_trace_provider(_get_weave_tool_trace)
-register_tool_trace_provider(_get_laminar_tool_trace)
-
-
-# =============================================================================
-# MCP-Specific Tracing
-# =============================================================================
-
-
-@contextmanager
-def trace_mcp_list_tools(server_name: str | None = None) -> Iterator[None]:
-    """Trace MCP tool listing operations.
-
-    Args:
-        server_name: Optional name of the MCP server being queried.
-
-    Yields:
-        None
-    """
-    tool_name = f"mcp:list_tools:{server_name}" if server_name else "mcp:list_tools"
-    with trace_tool_call(tool_name, tool_type="MCP_LIST"):
-        yield
-
-
-@contextmanager
-def trace_mcp_call_tool(
-    tool_name: str,
-    server_name: str | None = None,
-    inputs: dict[str, Any] | None = None,
-) -> Iterator[None]:
-    """Trace MCP tool call operations.
-
-    Args:
-        tool_name: The name of the MCP tool being called.
-        server_name: Optional name of the MCP server.
-        inputs: Optional dict of input arguments.
-
-    Yields:
-        None
-    """
-    full_name = f"mcp:{server_name}:{tool_name}" if server_name else f"mcp:{tool_name}"
-    with trace_tool_call(full_name, inputs=inputs, tool_type="MCP_TOOL"):
-        yield
diff --git a/openhands-sdk/openhands/sdk/observability/weave.py b/openhands-sdk/openhands/sdk/observability/weave.py
index 6d16e74ba3..740c42e4da 100644
--- a/openhands-sdk/openhands/sdk/observability/weave.py
+++ b/openhands-sdk/openhands/sdk/observability/weave.py
@@ -15,12 +15,12 @@
 3. **Optional manual tracing**: Use `@weave.op` for custom agent logic that
    you want to trace (tool execution, agent steps, etc.)
 
-4. **Thread grouping**: Group related operations under conversation threads.
+4. **Thread grouping**: Use `weave.thread()` to group operations by conversation.
 
 ## How It Works
 
 The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`:
-1. Weave's `implicit_patch()` automatically patches LiteLLM
+1. Weave's autopatching automatically patches LiteLLM
 2. All `litellm.completion()` and `litellm.acompletion()` calls are traced
 3. You see full traces in the Weave UI without any code changes!
 
@@ -64,14 +64,15 @@ def process_agent_step(step: dict) -> dict:
 ### Conversation Thread Grouping
 
 ```python
-from openhands.sdk.observability import init_weave, weave_attributes
+import weave
+from openhands.sdk.observability import init_weave
 
 init_weave("my-team/my-project")
 
-# Group all operations under a conversation
-with weave_attributes(conversation_id="conv-123", user_id="user-456"):
+# Group all operations under a conversation thread
+with weave.thread("conversation-123"):
     # All LLM calls and traced functions within this block
-    # will be tagged with these attributes
+    # will be grouped under the same thread
     response = llm.completion(...)
 ```
 
@@ -85,7 +86,6 @@ def process_agent_step(step: dict) -> dict:
 import logging
 import os
 from collections.abc import Callable
-from contextlib import contextmanager
 from typing import Any, ParamSpec, TypeVar
 
 from openhands.sdk.observability.utils import get_env
@@ -252,55 +252,7 @@ def should_enable_weave() -> bool:
     return bool(get_env("WANDB_API_KEY") and get_env("WEAVE_PROJECT"))
 
 
-@contextmanager
-def weave_attributes(**attributes: Any):
-    """Context manager to add attributes to all operations within the block.
-
-    This is useful for grouping related operations (e.g., all events in a
-    conversation) or adding metadata to traces.
-
-    Args:
-        **attributes: Key-value pairs to attach to all operations.
-            Common attributes: conversation_id, user_id, session_id, etc.
-
-    Example:
-        >>> with weave_attributes(conversation_id="conv-123", user_id="user-456"):
-        ...     # All LLM calls and traced functions here will have these attributes
-        ...     response = llm.completion(messages=[...])
-    """
-    if not _weave_initialized:
-        yield
-        return
-
-    try:
-        import weave
-        with weave.attributes(attributes):
-            yield
-    except Exception as e:
-        logger.warning(f"Failed to set weave attributes: {e}")
-        yield
-
-
-@contextmanager
-def weave_thread(thread_id: str):
-    """Context manager to group operations under a thread.
-
-    This is an alias for weave_attributes(thread_id=...) for convenience
-    and backward compatibility.
-
-    Args:
-        thread_id: Unique identifier for the thread (e.g., conversation ID).
-
-    Example:
-        >>> with weave_thread("conversation-123"):
-        ...     # All operations here will be grouped under the same thread
-        ...     response = llm.completion(messages=[...])
-    """
-    with weave_attributes(thread_id=thread_id):
-        yield
-
-
-def get_weave_op():
+def get_weave_op() -> Callable:
     """Get the weave.op decorator for manual function tracing.
 
     Returns the actual weave.op decorator if Weave is initialized,
@@ -359,11 +311,17 @@ def my_func(): ...
         func: The function to decorate (when used without parentheses).
         name: Optional name for the operation. Defaults to function name.
         call_display_name: Display name for the call in the Weave UI.
+            Can be a string or a callable that takes the Call object.
         postprocess_inputs: Function to transform inputs before logging.
         postprocess_output: Function to transform output before logging.
 
     Returns:
         The decorated function or a decorator.
+
+    Example:
+        >>> @weave_op(name="agent_step")
+        ... def step(action: dict) -> dict:
+        ...     return execute(action)
     """
     def decorator(fn: Callable[P, R]) -> Callable[P, R]:
         if not _weave_initialized:
@@ -393,154 +351,3 @@ def decorator(fn: Callable[P, R]) -> Callable[P, R]:
     if func is not None:
         return decorator(func)
     return decorator
-
-
-def observe_weave(
-    name: str | None = None,
-    *,
-    ignore_inputs: list[str] | None = None,
-    ignore_output: bool = False,
-) -> Callable[[Callable[P, R]], Callable[P, R]]:
-    """Decorator for observing functions with Weave (Laminar-compatible interface).
-
-    This provides a similar interface to the Laminar `observe` decorator,
-    making it easier to switch between observability backends.
-
-    Args:
-        name: Optional name for the operation.
-        ignore_inputs: List of input parameter names to exclude from logging.
-        ignore_output: If True, don't log the output.
-
-    Returns:
-        A decorator that wraps the function for Weave tracing.
-
-    Example:
-        >>> @observe_weave(name="login", ignore_inputs=["password"])
-        ... def login(username: str, password: str) -> bool:
-        ...     return authenticate(username, password)
-    """
-    def postprocess_inputs_fn(inputs: dict[str, Any]) -> dict[str, Any]:
-        if not ignore_inputs:
-            return inputs
-        return {k: v for k, v in inputs.items() if k not in ignore_inputs}
-
-    def postprocess_output_fn(output: Any) -> Any:
-        if ignore_output:
-            return "[output hidden]"
-        return output
-
-    return weave_op(
-        name=name,
-        postprocess_inputs=postprocess_inputs_fn if ignore_inputs else None,
-        postprocess_output=postprocess_output_fn if ignore_output else None,
-    )
-
-
-class WeaveSpanManager:
-    """Manager for manual span lifecycle control.
-
-    This class provides fine-grained control over span creation and completion,
-    useful when automatic decoration is not suitable.
-
-    Note: For most use cases, the automatic LLM tracing and @weave_op decorator
-    are sufficient. Use this only when you need explicit span control.
-
-    Example:
-        >>> manager = WeaveSpanManager()
-        >>> manager.start_span("process_batch", inputs={"batch_size": 100})
-        >>> try:
-        ...     result = process_batch()
-        ...     manager.end_span(output=result)
-        ... except Exception as e:
-        ...     manager.end_span(error=str(e))
-    """
-
-    def __init__(self):
-        self._call_stack: list[Any] = []
-
-    def start_span(
-        self,
-        name: str,
-        inputs: dict[str, Any] | None = None,
-    ) -> Any:
-        """Start a new span.
-
-        Args:
-            name: Name of the span/operation.
-            inputs: Input parameters to log.
-
-        Returns:
-            The span/call object if successful, None otherwise.
-        """
-        if not _weave_initialized:
-            return None
-
-        try:
-            import weave
-
-            @weave.op(name=name)
-            def _span_op(**kwargs: Any) -> Any:
-                pass
-
-            call = _span_op.call(inputs or {})
-            self._call_stack.append(call)
-            return call
-        except Exception as e:
-            logger.warning(f"Failed to start weave span: {e}")
-            return None
-
-    def end_span(
-        self,
-        output: Any = None,
-        error: str | None = None,
-    ) -> None:
-        """End the current span.
-
-        Args:
-            output: Output value to log.
-            error: Error message if the span failed.
-        """
-        if not self._call_stack:
-            return
-
-        try:
-            call = self._call_stack.pop()
-            if error:
-                call.finish(exception=Exception(error))
-            else:
-                call.finish(output=output)
-        except Exception as e:
-            logger.warning(f"Failed to end weave span: {e}")
-
-
-# Global span manager instance for convenience
-_global_span_manager = WeaveSpanManager()
-
-
-def start_weave_span(
-    name: str,
-    inputs: dict[str, Any] | None = None,
-) -> Any:
-    """Start a new Weave span using the global manager.
-
-    Args:
-        name: Name of the span/operation.
-        inputs: Input parameters to log.
-
-    Returns:
-        The span/call object if successful, None otherwise.
-    """
-    return _global_span_manager.start_span(name, inputs)
-
-
-def end_weave_span(
-    output: Any = None,
-    error: str | None = None,
-) -> None:
-    """End the current Weave span using the global manager.
-
-    Args:
-        output: Output value to log.
-        error: Error message if the span failed.
-    """
-    _global_span_manager.end_span(output, error)
diff --git a/tests/sdk/observability/test_context.py b/tests/sdk/observability/test_context.py
index ba2e69a0d8..5f369ed515 100644
--- a/tests/sdk/observability/test_context.py
+++ b/tests/sdk/observability/test_context.py
@@ -10,15 +10,6 @@
     register_conversation_context_provider,
     unregister_conversation_context_provider,
     _conversation_context_providers,
-    # Tool tracing
-    clear_tool_trace_providers,
-    register_tool_trace_provider,
-    unregister_tool_trace_provider,
-    trace_tool_call,
-    traced_tool,
-    trace_mcp_list_tools,
-    trace_mcp_call_tool,
-    _tool_trace_providers,
 )
 
 
@@ -245,323 +236,3 @@ def custom_provider(conversation_id: str):
             assert "test-conv" in custom_called
         finally:
             unregister_conversation_context_provider(custom_provider)
-
-
-# =============================================================================
-# Tool Tracing Tests
-# =============================================================================
-
-
-class TestToolTraceProviderRegistry:
-    """Tests for the tool trace provider registry functions."""
-
-    def setup_method(self):
-        """Store original providers before each test."""
-        self._original_providers = _tool_trace_providers.copy()
-        clear_tool_trace_providers()
-
-    def teardown_method(self):
-        """Restore original providers after each test."""
-        clear_tool_trace_providers()
-        for provider in self._original_providers:
-            register_tool_trace_provider(provider)
-
-    def test_register_tool_trace_provider(self):
-        """Test registering a new tool trace provider."""
-        def my_provider(tool_name: str, inputs):
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-        assert my_provider in _tool_trace_providers
-
-    def test_register_tool_trace_provider_no_duplicates(self):
-        """Test that registering the same provider twice doesn't create duplicates."""
-        def my_provider(tool_name: str, inputs):
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-        register_tool_trace_provider(my_provider)
-        assert _tool_trace_providers.count(my_provider) == 1
-
-    def test_unregister_tool_trace_provider(self):
-        """Test unregistering a tool trace provider."""
-        def my_provider(tool_name: str, inputs):
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-        assert my_provider in _tool_trace_providers
-
-        unregister_tool_trace_provider(my_provider)
-        assert my_provider not in _tool_trace_providers
-
-    def test_clear_tool_trace_providers(self):
-        """Test clearing all tool trace providers."""
-        def provider1(tool_name: str, inputs):
-            return nullcontext()
-
-        def provider2(tool_name: str, inputs):
-            return nullcontext()
-
-        register_tool_trace_provider(provider1)
-        register_tool_trace_provider(provider2)
-        assert len(_tool_trace_providers) == 2
-
-        clear_tool_trace_providers()
-        assert len(_tool_trace_providers) == 0
-
-
-class TestTraceToolCall:
-    """Tests for the trace_tool_call context manager."""
-
-    def setup_method(self):
-        """Store original providers before each test."""
-        self._original_providers = _tool_trace_providers.copy()
-        clear_tool_trace_providers()
-
-    def teardown_method(self):
-        """Restore original providers after each test."""
-        clear_tool_trace_providers()
-        for provider in self._original_providers:
-            register_tool_trace_provider(provider)
-
-    def test_no_providers_is_noop(self):
-        """Test that with no providers, the context is a no-op."""
-        executed = False
-
-        with trace_tool_call("test-tool"):
-            executed = True
-
-        assert executed
-
-    def test_single_provider_called(self):
-        """Test that a single provider is called with tool name and inputs."""
-        called_with = []
-
-        def my_provider(tool_name: str, inputs):
-            called_with.append((tool_name, inputs))
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        with trace_tool_call("bash", inputs={"command": "ls"}):
-            pass
-
-        assert called_with == [("bash", {"command": "ls"})]
-
-    def test_multiple_providers_called(self):
-        """Test that multiple providers are called."""
-        call_order = []
-
-        def provider1(tool_name: str, inputs):
-            call_order.append("provider1")
-            return nullcontext()
-
-        def provider2(tool_name: str, inputs):
-            call_order.append("provider2")
-            return nullcontext()
-
-        register_tool_trace_provider(provider1)
-        register_tool_trace_provider(provider2)
-
-        with trace_tool_call("test-tool"):
-            pass
-
-        assert call_order == ["provider1", "provider2"]
-
-    def test_provider_exception_does_not_break_others(self):
-        """Test that an exception in one provider doesn't prevent others."""
-        call_order = []
-
-        def failing_provider(tool_name: str, inputs):
-            raise RuntimeError("Provider failed")
-
-        def working_provider(tool_name: str, inputs):
-            call_order.append("working")
-            return nullcontext()
-
-        register_tool_trace_provider(failing_provider)
-        register_tool_trace_provider(working_provider)
-
-        # Should not raise
-        with trace_tool_call("test-tool"):
-            pass
-
-        assert call_order == ["working"]
-
-
-class TestTracedToolDecorator:
-    """Tests for the @traced_tool decorator."""
-
-    def setup_method(self):
-        """Store original providers before each test."""
-        self._original_providers = _tool_trace_providers.copy()
-        clear_tool_trace_providers()
-
-    def teardown_method(self):
-        """Restore original providers after each test."""
-        clear_tool_trace_providers()
-        for provider in self._original_providers:
-            register_tool_trace_provider(provider)
-
-    def test_traced_tool_with_explicit_name(self):
-        """Test @traced_tool with explicit tool name."""
-        traced_calls = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_calls.append(tool_name)
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        @traced_tool(tool_name="my_custom_tool")
-        def some_function(x, y):
-            return x + y
-
-        result = some_function(1, 2)
-        assert result == 3
-        assert traced_calls == ["my_custom_tool"]
-
-    def test_traced_tool_with_auto_name(self):
-        """Test @traced_tool with automatic name detection."""
-        traced_calls = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_calls.append(tool_name)
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        @traced_tool()
-        def auto_named_function(x):
-            return x * 2
-
-        result = auto_named_function(5)
-        assert result == 10
-        assert traced_calls == ["auto_named_function"]
-
-    def test_traced_tool_captures_kwargs(self):
-        """Test that @traced_tool captures kwargs as inputs."""
-        traced_inputs = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_inputs.append(inputs)
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        @traced_tool(tool_name="test")
-        def func_with_kwargs(a, b=10, c="hello"):
-            return f"{a}-{b}-{c}"
-
-        result = func_with_kwargs(1, b=20, c="world")
-        assert result == "1-20-world"
-        assert traced_inputs == [{"b": 20, "c": "world"}]
-
-
-class TestMCPTracing:
-    """Tests for MCP-specific tracing functions."""
-
-    def setup_method(self):
-        """Store original providers before each test."""
-        self._original_providers = _tool_trace_providers.copy()
-        clear_tool_trace_providers()
-
-    def teardown_method(self):
-        """Restore original providers after each test."""
-        clear_tool_trace_providers()
-        for provider in self._original_providers:
-            register_tool_trace_provider(provider)
-
-    def test_trace_mcp_list_tools(self):
-        """Test trace_mcp_list_tools context manager."""
-        traced_calls = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_calls.append(tool_name)
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        with trace_mcp_list_tools():
-            pass
-
-        assert traced_calls == ["mcp:list_tools"]
-
-    def test_trace_mcp_list_tools_with_server_name(self):
-        """Test trace_mcp_list_tools with server name."""
-        traced_calls = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_calls.append(tool_name)
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        with trace_mcp_list_tools(server_name="my-server"):
-            pass
-
-        assert traced_calls == ["mcp:list_tools:my-server"]
-
-    def test_trace_mcp_call_tool(self):
-        """Test trace_mcp_call_tool context manager."""
-        traced_calls = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_calls.append((tool_name, inputs))
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        with trace_mcp_call_tool("read_file", inputs={"path": "/tmp/test.txt"}):
-            pass
-
-        assert traced_calls == [("mcp:read_file", {"path": "/tmp/test.txt"})]
-
-    def test_trace_mcp_call_tool_with_server_name(self):
-        """Test trace_mcp_call_tool with server name."""
-        traced_calls = []
-
-        def my_provider(tool_name: str, inputs):
-            traced_calls.append(tool_name)
-            return nullcontext()
-
-        register_tool_trace_provider(my_provider)
-
-        with trace_mcp_call_tool("read_file", server_name="filesystem"):
-            pass
-
-        assert traced_calls == ["mcp:filesystem:read_file"]
-
-
-class TestToolTraceBuiltInProviders:
-    """Tests for the built-in tool trace providers."""
-
-    def test_weave_tool_trace_returns_nullcontext_when_not_initialized(self):
-        """Test that Weave tool trace provider returns nullcontext when not initialized."""
-        from openhands.sdk.observability.context import _get_weave_tool_trace
-
-        with patch(
-            "openhands.sdk.observability.weave.is_weave_initialized",
-            return_value=False,
-        ):
-            ctx = _get_weave_tool_trace("test-tool", {"arg": "value"})
-            assert type(ctx).__name__ == "nullcontext"
-
-    def test_laminar_tool_trace_returns_nullcontext_when_not_initialized(self):
-        """Test that Laminar tool trace provider returns nullcontext when not initialized."""
-        from openhands.sdk.observability.context import _get_laminar_tool_trace
-
-        with patch(
-            "openhands.sdk.observability.laminar.should_enable_observability",
-            return_value=False,
-        ):
-            ctx = _get_laminar_tool_trace("test-tool", {"arg": "value"})
-            assert type(ctx).__name__ == "nullcontext"
-
-    def test_tool_trace_providers_auto_registered(self):
-        """Test that built-in tool trace providers are registered on import."""
-        from openhands.sdk.observability import context
-
-        provider_names = [p.__name__ for p in context._tool_trace_providers]
-        assert "_get_weave_tool_trace" in provider_names
-        assert "_get_laminar_tool_trace" in provider_names
diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py
index 78e8dd4431..b68a205093 100644
--- a/tests/sdk/observability/test_weave.py
+++ b/tests/sdk/observability/test_weave.py
@@ -104,107 +104,6 @@ def failing_function():
             failing_function()
 
 
-class TestObserveWeaveDecorator:
-    """Tests for the @observe_weave decorator."""
-
-    def test_observe_weave_without_initialization(self):
-        """@observe_weave runs function normally when Weave is not initialized."""
-        import openhands.sdk.observability.weave as weave_module
-        weave_module._weave_initialized = False
-
-        from openhands.sdk.observability.weave import observe_weave
-
-        @observe_weave(name="test_observe")
-        def test_function(x: int, y: int) -> int:
-            return x + y
-
-        result = test_function(3, 4)
-        assert result == 7
-
-    def test_observe_weave_with_ignore_inputs(self):
-        """@observe_weave correctly handles ignore_inputs parameter."""
-        import openhands.sdk.observability.weave as weave_module
-        weave_module._weave_initialized = False
-
-        from openhands.sdk.observability.weave import observe_weave
-
-        @observe_weave(name="test_ignore", ignore_inputs=["secret"])
-        def test_function(data: str, secret: str) -> str:
-            return f"{data}-processed"
-
-        result = test_function("hello", "my-secret")
-        assert result == "hello-processed"
-
-
-class TestWeaveAttributes:
-    """Tests for the weave_attributes context manager."""
-
-    def test_weave_attributes_without_initialization(self):
-        """weave_attributes works as no-op when Weave is not initialized."""
-        import openhands.sdk.observability.weave as weave_module
-        weave_module._weave_initialized = False
-
-        from openhands.sdk.observability.weave import weave_attributes
-
-        results = []
-        with weave_attributes(conversation_id="conv-123", user_id="user-456"):
-            results.append(1)
-            results.append(2)
-
-        assert results == [1, 2]
-
-    def test_weave_thread_without_initialization(self):
-        """weave_thread works as no-op when Weave is not initialized."""
-        import openhands.sdk.observability.weave as weave_module
-        weave_module._weave_initialized = False
-
-        from openhands.sdk.observability.weave import weave_thread
-
-        results = []
-        with weave_thread("test-thread-123"):
-            results.append(1)
-            results.append(2)
-
-        assert results == [1, 2]
-
-
-class TestWeaveSpanManager:
-    """Tests for the WeaveSpanManager class."""
-
-    def test_span_manager_without_initialization(self):
-        """WeaveSpanManager works gracefully when Weave is not initialized."""
-        import openhands.sdk.observability.weave as weave_module
-        weave_module._weave_initialized = False
-
-        from openhands.sdk.observability.weave import WeaveSpanManager
-
-        manager = WeaveSpanManager()
-
-        # start_span should return None when not initialized
-        result = manager.start_span("test_span", inputs={"key": "value"})
-        assert result is None
-
-        # end_span should not raise
-        manager.end_span(output={"result": "ok"})
-
-    def test_global_span_functions(self):
-        """Global span functions work without initialization."""
-        import openhands.sdk.observability.weave as weave_module
-        weave_module._weave_initialized = False
-
-        from openhands.sdk.observability.weave import (
-            start_weave_span,
-            end_weave_span,
-        )
-
-        # Should not raise
-        result = start_weave_span("test", inputs={"x": 1})
-        assert result is None
-
-        # Should not raise
-        end_weave_span(output={"y": 2})
-
-
 class TestGetWeaveOp:
     """Tests for the get_weave_op function."""
 
@@ -233,35 +132,23 @@ class TestWeaveExports:
     def test_all_exports_available(self):
         """All expected functions are exported from the module."""
         from openhands.sdk.observability import (
-            end_weave_span,
             get_weave_client,
             get_weave_op,
             init_weave,
             is_weave_initialized,
             maybe_init_weave,
-            observe_weave,
             should_enable_weave,
-            start_weave_span,
-            weave_attributes,
             weave_op,
-            weave_thread,
-            WeaveSpanManager,
         )
 
         # Just verify they're callable
-        assert callable(end_weave_span)
         assert callable(get_weave_client)
         assert callable(get_weave_op)
         assert callable(init_weave)
         assert callable(is_weave_initialized)
         assert callable(maybe_init_weave)
-        assert callable(observe_weave)
         assert callable(should_enable_weave)
-        assert callable(start_weave_span)
-        assert callable(weave_attributes)
         assert callable(weave_op)
-        assert callable(weave_thread)
-        assert WeaveSpanManager is not None
 
 
 class TestInitWeave:

From 148a1c339451325495a475a27fafd9997f2b7d8b Mon Sep 17 00:00:00 2001
From: morganmcg1 <morganmcg1@users.noreply.github.com>
Date: Fri, 19 Dec 2025 13:30:06 -0600
Subject: [PATCH 8/8] refactor: Improve Weave observability integration

- Make weave an optional dependency (install with pip install openhands-sdk[weave])
- Add auto-init via maybe_init_weave() at module load (matches Laminar pattern)
- Simplify demo to use only existing functions
- Fix tests for optional dependency handling
- Remove unrelated changes from PR scope
---
 examples/weave_observability_demo.py       | 130 +++++++++------------
 openhands-sdk/openhands/sdk/agent/agent.py |  17 +--
 openhands-sdk/pyproject.toml               |   4 +-
 tests/sdk/observability/test_weave.py      |  33 ++++++
 4 files changed, 98 insertions(+), 86 deletions(-)

diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py
index 03bfd89c95..53e3f41fe7 100644
--- a/examples/weave_observability_demo.py
+++ b/examples/weave_observability_demo.py
@@ -7,7 +7,7 @@
 
 ## Key Features Demonstrated
 
-1. **Automatic LLM Tracing**: Just call `init_weave()` and all LiteLLM calls
+1. **Automatic LLM Tracing**: Just set environment variables and all LiteLLM calls
    are automatically traced - no `@weave.op` decorators needed for LLM calls!
 
 2. **Custom Function Tracing**: Use `@weave_op` for custom agent logic you
@@ -17,22 +17,19 @@
    in `weave.thread()` to group all operations under the conversation ID.
    This enables conversation-level tracing in the Weave UI!
 
-4. **Conversation Grouping**: Use `weave_attributes()` to add custom metadata
-   to operations (user_id, session_id, etc.)
-
 ## How It Works
 
-The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`:
-1. Weave's `implicit_patch()` automatically patches LiteLLM
+The SDK uses LiteLLM for all LLM calls. When Weave is initialized:
+1. Weave's autopatching automatically patches LiteLLM
 2. All `litellm.completion()` and `litellm.acompletion()` calls are traced
 3. LocalConversation.run() wraps the event loop in `weave.thread(conversation_id)`
 4. You see full conversation traces in the Weave UI without any code changes!
 
 ## Prerequisites
 
-- Set WANDB_API_KEY environment variable (valid W&B API key)
+- Install with Weave support: `pip install openhands-sdk[weave]`
+- Set WANDB_API_KEY environment variable
 - Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo")
-- Optionally set OPENAI_API_KEY for actual LLM calls
 
 ## Usage
 
@@ -41,9 +38,8 @@
     python examples/weave_observability_demo.py
 
 Note:
-    If WANDB_API_KEY is not set or invalid, the demo will still run
-    but without Weave tracing. This allows testing the functionality
-    without requiring valid credentials.
+    If WANDB_API_KEY is not set or the weave package is not installed,
+    the demo will still run but without Weave tracing.
 """
 
 import os
@@ -57,20 +53,18 @@
     is_weave_initialized,
     maybe_init_weave,
     weave_op,
-    weave_attributes,
-    weave_thread,
-    start_weave_span,
-    end_weave_span,
-    observe_weave,
     get_weave_op,
 )
 
 
-# Example 1: Using the @weave_op decorator
+# Example 1: Using the @weave_op decorator for custom function tracing
 @weave_op(name="process_message")
 def process_message(message: str) -> dict:
-    """Process a user message and return a response."""
-    # Simulate some processing
+    """Process a user message and return a response.
+
+    When Weave is initialized, this function will appear in traces
+    with the name "process_message".
+    """
     word_count = len(message.split())
     return {
         "original": message,
@@ -79,11 +73,14 @@ def process_message(message: str) -> dict:
     }
 
 
-# Example 2: Using observe_weave for compatibility with Laminar
-@observe_weave(name="analyze_sentiment")
+# Example 2: Another traced function
+@weave_op(name="analyze_sentiment")
 def analyze_sentiment(text: str) -> str:
-    """Analyze the sentiment of text."""
-    # Simple mock sentiment analysis
+    """Analyze the sentiment of text.
+
+    This demonstrates how @weave_op works as a no-op when Weave
+    is not initialized - your code runs normally either way.
+    """
     positive_words = {"good", "great", "excellent", "happy", "love"}
     negative_words = {"bad", "terrible", "sad", "hate", "awful"}
 
@@ -98,14 +95,15 @@ def analyze_sentiment(text: str) -> str:
     return "neutral"
 
 
-# Example 3: Nested operations with thread grouping
+# Example 3: Nested traced functions
 @weave_op(name="agent_step")
 def agent_step(step_num: int, user_input: str) -> dict:
-    """Simulate an agent step with nested operations."""
-    # Process the message
-    processed = process_message(user_input)
+    """Simulate an agent step with nested traced operations.
 
-    # Analyze sentiment
+    When this function calls process_message and analyze_sentiment,
+    they appear as child spans in the Weave trace.
+    """
+    processed = process_message(user_input)
     sentiment = analyze_sentiment(user_input)
 
     return {
@@ -115,22 +113,6 @@ def agent_step(step_num: int, user_input: str) -> dict:
     }
 
 
-# Example 4: Manual span management
-def manual_span_example():
-    """Demonstrate manual span creation and management."""
-    # Start a span
-    start_weave_span("manual_operation", inputs={"task": "demo"})
-
-    try:
-        # Do some work
-        result = {"status": "completed", "items_processed": 42}
-        end_weave_span(output=result)
-        return result
-    except Exception as e:
-        end_weave_span(error=e)
-        raise
-
-
 def run_demo():
     """Run the Weave observability demo."""
     print("=" * 60)
@@ -150,7 +132,7 @@ def run_demo():
         project = "openhands-sdk-demo"
         os.environ["WEAVE_PROJECT"] = project
 
-    # Initialize Weave
+    # Initialize Weave (or use maybe_init_weave() for conditional init)
     print(f"\n📊 Initializing Weave for project: {project}")
     success = maybe_init_weave()
 
@@ -164,6 +146,7 @@ def run_demo():
     else:
         print("⚠️  Weave not initialized (missing credentials or package)")
         print("   Running demo without tracing...")
+        print("   Install with: pip install openhands-sdk[weave]")
 
     print("\n" + "-" * 60)
     print("Running demo operations...")
@@ -175,33 +158,25 @@ def run_demo():
     result = process_message("Hello, this is a test message for the agent!")
     print(f"   Result: {result}")
 
-    # Demo 2: Sentiment analysis with observe_weave
-    print("\n2️⃣  Laminar-compatible interface with @observe_weave:")
-    print("   (Easy migration from Laminar to Weave)")
-    sentiment = analyze_sentiment("This is a great and excellent demo!")
-    print(f"   Sentiment: {sentiment}")
-
-    # Demo 3: Conversation grouping with weave_attributes
-    print("\n3️⃣  Conversation grouping with weave_attributes:")
-    print("   (Group all operations under a conversation ID)")
-    conversation_id = "demo-conversation-001"
-
-    with weave_attributes(conversation_id=conversation_id, user_id="demo-user"):
-        for i, msg in enumerate([
-            "Hello, I need help with my code",
-            "The function is not working correctly",
-            "Great, that fixed it! Thank you!",
-        ], 1):
-            result = agent_step(i, msg)
-            print(f"   Step {i}: sentiment={result['sentiment']}")
-
-    # Demo 4: Manual span management
-    print("\n4️⃣  Manual span management (for advanced use cases):")
-    result = manual_span_example()
+    # Demo 2: Nested function calls
+    print("\n2️⃣  Nested traced function calls:")
+    print("   (Child functions appear as child spans in the trace)")
+    result = agent_step(1, "This is a great example of tracing!")
     print(f"   Result: {result}")
 
-    # Demo 5: Show how to get weave.op for dynamic decoration
-    print("\n5️⃣  Dynamic decoration with get_weave_op():")
+    # Demo 3: Multiple steps to show trace structure
+    print("\n3️⃣  Multiple agent steps:")
+    for i, msg in enumerate([
+        "Hello, I need help with my code",
+        "The function is not working correctly",
+        "Great, that fixed it! Thank you!",
+    ], 1):
+        result = agent_step(i, msg)
+        print(f"   Step {i}: sentiment={result['sentiment']}")
+
+    # Demo 4: Dynamic decoration with get_weave_op()
+    print("\n4️⃣  Dynamic decoration with get_weave_op():")
+    print("   (Useful for conditionally applying tracing)")
     op = get_weave_op()
 
     @op
@@ -220,9 +195,18 @@ def dynamically_traced_function(x: int) -> int:
         print("   • LLM calls via LiteLLM are traced AUTOMATICALLY")
         print("   • Conversation.run() groups all operations by conversation ID")
         print("   • Use @weave_op for custom agent logic you want to trace")
-        print("\n📝 In your code, just do:")
-        print("   from openhands.sdk.observability import init_weave")
-        print("   init_weave('your-project')  # That's it!")
+        print("\n📝 Minimal setup (zero code changes):")
+        print("   1. pip install openhands-sdk[weave]")
+        print("   2. export WANDB_API_KEY='your-key'")
+        print("   3. export WEAVE_PROJECT='team/project'")
+        print("   That's it! All LLM calls are now traced.")
+    else:
+        print("\n📝 To enable tracing:")
+        print("   1. pip install openhands-sdk[weave]")
+        print("   2. export WANDB_API_KEY='your-api-key'")
+        print("   3. export WEAVE_PROJECT='your-team/your-project'")
+        print("   4. Run this demo again")
+
     print("=" * 60)
 
 
diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py
index d88d2656d4..f3741e6cf8 100644
--- a/openhands-sdk/openhands/sdk/agent/agent.py
+++ b/openhands-sdk/openhands/sdk/agent/agent.py
@@ -47,6 +47,7 @@
     should_enable_observability,
 )
 from openhands.sdk.observability.utils import extract_action_name
+from openhands.sdk.observability.weave import maybe_init_weave
 from openhands.sdk.security.llm_analyzer import LLMSecurityAnalyzer
 from openhands.sdk.tool import (
     Action,
@@ -61,6 +62,7 @@
 
 logger = get_logger(__name__)
 maybe_init_laminar()
+maybe_init_weave()
 
 
 class Agent(AgentBase):
@@ -109,17 +111,10 @@ def init_state(
             event = SystemPromptEvent(
                 source="agent",
                 system_prompt=TextContent(text=self.system_message),
-                # Always expose a 'security_risk' parameter in tool schemas.
-                # This ensures the schema remains consistent, even if the
-                # security analyzer is disabled. Validation of this field
-                # happens dynamically at runtime depending on the analyzer
-                # configured. This allows weaker models to omit risk field
-                # and bypass validation requirements when analyzer is disabled.
-                # For detailed logic, see `_extract_security_risk` method.
-                tools=[
-                    t.to_openai_tool(add_security_risk_prediction=True)
-                    for t in self.tools_map.values()
-                ],
+                # Tools are stored as ToolDefinition objects and converted to
+                # OpenAI format with security_risk parameter during LLM completion.
+                # See make_llm_completion() in agent/utils.py for details.
+                tools=list(self.tools_map.values()),
             )
             on_event(event)
 
diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml
index 1b890aac6e..cd3fd1d573 100644
--- a/openhands-sdk/pyproject.toml
+++ b/openhands-sdk/pyproject.toml
@@ -14,12 +14,12 @@ dependencies = [
     "python-json-logger>=3.3.0",
     "tenacity>=9.1.2",
     "websockets>=12",
-    "lmnr>=0.7.24",
-    "weave>=0.52.22"
+    "lmnr>=0.7.24"
 ]
 
 [project.optional-dependencies]
 boto3 = ["boto3>=1.35.0"]
+weave = ["weave>=0.52.22", "wandb"]
 
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py
index b68a205093..cad57dc767 100644
--- a/tests/sdk/observability/test_weave.py
+++ b/tests/sdk/observability/test_weave.py
@@ -12,6 +12,18 @@
 
 import pytest
 
+# Check if weave is installed for tests that require it
+try:
+    import weave
+    WEAVE_INSTALLED = True
+except ImportError:
+    WEAVE_INSTALLED = False
+
+requires_weave = pytest.mark.skipif(
+    not WEAVE_INSTALLED,
+    reason="Weave package not installed"
+)
+
 
 class TestWeaveConfiguration:
     """Tests for Weave configuration and initialization."""
@@ -154,6 +166,7 @@ def test_all_exports_available(self):
 class TestInitWeave:
     """Tests for init_weave function."""
 
+    @requires_weave
     def test_init_weave_requires_project(self):
         """init_weave raises ValueError when no project is specified."""
         import openhands.sdk.observability.weave as weave_module
@@ -166,6 +179,25 @@ def test_init_weave_requires_project(self):
             with pytest.raises(ValueError, match="Weave project must be specified"):
                 init_weave()
 
+    def test_init_weave_returns_false_when_weave_not_installed(self):
+        """init_weave returns False when weave package is not installed."""
+        # This test verifies the expected behavior.
+        # When weave is not installed, init_weave should return False.
+        # Since weave is an optional dependency, we can test the actual
+        # behavior directly if weave isn't installed.
+        if WEAVE_INSTALLED:
+            pytest.skip("Weave is installed, cannot test missing module behavior")
+
+        import openhands.sdk.observability.weave as weave_module
+        weave_module._weave_initialized = False
+
+        from openhands.sdk.observability.weave import init_weave
+
+        result = init_weave(project="test-project")
+        # When weave is not installed, init_weave should return False
+        assert result is False
+
+    @requires_weave
     def test_init_weave_uses_env_project(self):
         """init_weave uses WEAVE_PROJECT from environment."""
         import openhands.sdk.observability.weave as weave_module
@@ -208,6 +240,7 @@ class TestAutopatching:
     Weave's automatic LiteLLM patching.
     """
 
+    @requires_weave
     def test_init_weave_calls_weave_init(self):
         """init_weave calls weave.init which triggers autopatching."""
         import openhands.sdk.observability.weave as weave_module