From ffa8a6c8c20b39d9a538f0a219f48b8cc4b739e3 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 18 Dec 2025 22:26:35 +0000 Subject: [PATCH 1/8] feat(observability): Add Weave integration for agent tracing Add Weights & Biases Weave integration to the SDK observability module, providing comprehensive tracing capabilities for agent operations. New features: - weave_op decorator for tracing functions - observe_weave decorator with Laminar-compatible interface - weave_thread context manager for grouping related operations - WeaveSpanManager for manual span management - Auto-initialization via environment variables (WANDB_API_KEY, WEAVE_PROJECT) Files added: - openhands-sdk/openhands/sdk/observability/weave.py - examples/weave_observability_demo.py - tests/sdk/observability/test_weave.py Dependencies: - Added weave>=0.52.22 to openhands-sdk dependencies Co-authored-by: openhands --- examples/weave_observability_demo.py | 178 +++++++ .../openhands/sdk/observability/__init__.py | 31 +- .../openhands/sdk/observability/weave.py | 445 ++++++++++++++++++ openhands-sdk/pyproject.toml | 3 +- tests/sdk/observability/__init__.py | 0 tests/sdk/observability/test_weave.py | 274 +++++++++++ 6 files changed, 929 insertions(+), 2 deletions(-) create mode 100644 examples/weave_observability_demo.py create mode 100644 openhands-sdk/openhands/sdk/observability/weave.py create mode 100644 tests/sdk/observability/__init__.py create mode 100644 tests/sdk/observability/test_weave.py diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py new file mode 100644 index 0000000000..0417e42d30 --- /dev/null +++ b/examples/weave_observability_demo.py @@ -0,0 +1,178 @@ +#!/usr/bin/env python3 +"""Demo script showing Weave observability integration with OpenHands SDK. + +This script demonstrates how to use Weave for tracing agent operations. +It creates a simple agent that processes messages and shows how traces +appear in the Weave UI. + +Prerequisites: + - Set WANDB_API_KEY environment variable (valid W&B API key) + - Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo") + - Optionally set OPENAI_API_KEY for LLM calls + +Usage: + export WANDB_API_KEY="your-api-key" + export WEAVE_PROJECT="your-team/openhands-demo" + python examples/weave_observability_demo.py + +Note: + If WANDB_API_KEY is not set or invalid, the demo will still run + but without Weave tracing. This allows testing the decorator + functionality without requiring valid credentials. +""" + +import os +import sys + +# Add the SDK to the path for development +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "openhands-sdk")) + +from openhands.sdk.observability.weave import ( + init_weave, + is_weave_initialized, + maybe_init_weave, + weave_op, + weave_thread, + start_weave_span, + end_weave_span, + observe_weave, +) + + +# Example 1: Using the @weave_op decorator +@weave_op(name="process_message") +def process_message(message: str) -> dict: + """Process a user message and return a response.""" + # Simulate some processing + word_count = len(message.split()) + return { + "original": message, + "word_count": word_count, + "processed": True, + } + + +# Example 2: Using observe_weave for compatibility with Laminar +@observe_weave(name="analyze_sentiment") +def analyze_sentiment(text: str) -> str: + """Analyze the sentiment of text.""" + # Simple mock sentiment analysis + positive_words = {"good", "great", "excellent", "happy", "love"} + negative_words = {"bad", "terrible", "sad", "hate", "awful"} + + words = set(text.lower().split()) + pos_count = len(words & positive_words) + neg_count = len(words & negative_words) + + if pos_count > neg_count: + return "positive" + elif neg_count > pos_count: + return "negative" + return "neutral" + + +# Example 3: Nested operations with thread grouping +@weave_op(name="agent_step") +def agent_step(step_num: int, user_input: str) -> dict: + """Simulate an agent step with nested operations.""" + # Process the message + processed = process_message(user_input) + + # Analyze sentiment + sentiment = analyze_sentiment(user_input) + + return { + "step": step_num, + "processed": processed, + "sentiment": sentiment, + } + + +# Example 4: Manual span management +def manual_span_example(): + """Demonstrate manual span creation and management.""" + # Start a span + start_weave_span("manual_operation", inputs={"task": "demo"}) + + try: + # Do some work + result = {"status": "completed", "items_processed": 42} + end_weave_span(output=result) + return result + except Exception as e: + end_weave_span(error=e) + raise + + +def run_demo(): + """Run the Weave observability demo.""" + print("=" * 60) + print("Weave Observability Demo for OpenHands SDK") + print("=" * 60) + + # Check environment + api_key = os.environ.get("WANDB_API_KEY") + project = os.environ.get("WEAVE_PROJECT") + + if not api_key: + print("\n⚠️ WANDB_API_KEY not set. Weave tracing will be disabled.") + print(" Set it with: export WANDB_API_KEY='your-api-key'") + + if not project: + print("\n⚠️ WEAVE_PROJECT not set. Using default project name.") + project = "openhands-sdk-demo" + os.environ["WEAVE_PROJECT"] = project + + # Initialize Weave + print(f"\n📊 Initializing Weave for project: {project}") + success = maybe_init_weave() + + if success: + print("✅ Weave initialized successfully!") + print(f" View traces at: https://wandb.ai/{project}/weave") + else: + print("⚠️ Weave not initialized (missing credentials or package)") + print(" Running demo without tracing...") + + print("\n" + "-" * 60) + print("Running demo operations...") + print("-" * 60) + + # Demo 1: Simple decorated function + print("\n1️⃣ Processing a message with @weave_op decorator:") + result = process_message("Hello, this is a test message for the agent!") + print(f" Result: {result}") + + # Demo 2: Sentiment analysis with observe_weave + print("\n2️⃣ Analyzing sentiment with @observe_weave decorator:") + sentiment = analyze_sentiment("This is a great and excellent demo!") + print(f" Sentiment: {sentiment}") + + # Demo 3: Thread grouping for conversation + print("\n3️⃣ Simulating a conversation with thread grouping:") + conversation_id = "demo-conversation-001" + + with weave_thread(conversation_id): + for i, msg in enumerate([ + "Hello, I need help with my code", + "The function is not working correctly", + "Great, that fixed it! Thank you!", + ], 1): + result = agent_step(i, msg) + print(f" Step {i}: sentiment={result['sentiment']}") + + # Demo 4: Manual span management + print("\n4️⃣ Manual span management:") + result = manual_span_example() + print(f" Result: {result}") + + print("\n" + "=" * 60) + print("Demo completed!") + + if is_weave_initialized(): + print(f"\n🔗 View your traces at: https://wandb.ai/{project}/weave") + print("=" * 60) + + +if __name__ == "__main__": + run_demo() diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py index 4f4ea48583..5b638534dc 100644 --- a/openhands-sdk/openhands/sdk/observability/__init__.py +++ b/openhands-sdk/openhands/sdk/observability/__init__.py @@ -1,4 +1,33 @@ from openhands.sdk.observability.laminar import maybe_init_laminar, observe +from openhands.sdk.observability.weave import ( + end_weave_span, + get_weave_client, + init_weave, + is_weave_initialized, + maybe_init_weave, + observe_weave, + should_enable_weave, + start_weave_span, + weave_op, + weave_thread, + WeaveSpanManager, +) -__all__ = ["maybe_init_laminar", "observe"] +__all__ = [ + # Laminar exports + "maybe_init_laminar", + "observe", + # Weave exports + "end_weave_span", + "get_weave_client", + "init_weave", + "is_weave_initialized", + "maybe_init_weave", + "observe_weave", + "should_enable_weave", + "start_weave_span", + "weave_op", + "weave_thread", + "WeaveSpanManager", +] diff --git a/openhands-sdk/openhands/sdk/observability/weave.py b/openhands-sdk/openhands/sdk/observability/weave.py new file mode 100644 index 0000000000..821765f6d9 --- /dev/null +++ b/openhands-sdk/openhands/sdk/observability/weave.py @@ -0,0 +1,445 @@ +"""Weave observability integration for OpenHands SDK. + +This module provides integration with Weights & Biases Weave for tracing +and observability of agent operations. Weave automatically tracks LLM calls, +tool executions, and agent steps. + +Configuration: + Set the following environment variables to enable Weave tracing: + - WANDB_API_KEY: Your Weights & Biases API key + - WEAVE_PROJECT: The Weave project name (e.g., "my-team/my-project") + + Alternatively, call `init_weave()` directly with the project name. + +Example: + >>> from openhands.sdk.observability.weave import maybe_init_weave, weave_op + >>> maybe_init_weave() # Auto-initializes if env vars are set + >>> + >>> @weave_op(name="my_function") + >>> def my_function(x: int) -> int: + ... return x + 1 + +See Also: + - Weave documentation: https://docs.wandb.ai/weave + - Laminar integration: openhands.sdk.observability.laminar +""" + +from collections.abc import Callable +from contextlib import contextmanager +from functools import wraps +from typing import Any, ParamSpec, TypeVar + +from openhands.sdk.logger import get_logger +from openhands.sdk.observability.utils import get_env + + +logger = get_logger(__name__) + +# Type variables for generic function signatures +P = ParamSpec("P") +R = TypeVar("R") + +# Global state for Weave initialization +_weave_initialized: bool = False +_weave_client: Any = None + + +def should_enable_weave() -> bool: + """Check if Weave should be enabled based on environment configuration. + + Returns: + True if WANDB_API_KEY and WEAVE_PROJECT are set, False otherwise. + """ + api_key = get_env("WANDB_API_KEY") + project = get_env("WEAVE_PROJECT") + return bool(api_key and project) + + +def is_weave_initialized() -> bool: + """Check if Weave has been initialized. + + Returns: + True if Weave is initialized and ready for tracing. + """ + global _weave_initialized + return _weave_initialized + + +def init_weave( + project: str | None = None, + api_key: str | None = None, +) -> bool: + """Initialize Weave for tracing. + + Args: + project: The Weave project name (e.g., "my-team/my-project"). + If not provided, uses WEAVE_PROJECT environment variable. + api_key: The Weights & Biases API key. If not provided, uses + WANDB_API_KEY environment variable. + + Returns: + True if initialization was successful, False otherwise. + + Raises: + ValueError: If no project is specified and WEAVE_PROJECT is not set. + """ + import os + + global _weave_initialized, _weave_client + + if _weave_initialized: + logger.debug("Weave already initialized, skipping") + return True + + try: + import weave + except ImportError: + logger.warning( + "Weave package not installed. Install with: pip install weave" + ) + return False + + # Determine project name + project_name = project or get_env("WEAVE_PROJECT") + if not project_name: + raise ValueError( + "Weave project must be specified via argument or WEAVE_PROJECT env var" + ) + + # Set API key in environment if provided (Weave reads from env) + wandb_api_key = api_key or get_env("WANDB_API_KEY") + if wandb_api_key: + os.environ["WANDB_API_KEY"] = wandb_api_key + + # Ensure wandb is logged in (required by weave.init) + try: + import wandb + wandb.login(key=wandb_api_key, relogin=False) + except Exception as e: + logger.warning(f"wandb login failed: {e}") + else: + logger.warning( + "WANDB_API_KEY not set. Weave tracing may not work correctly." + ) + + try: + _weave_client = weave.init(project_name) + _weave_initialized = True + logger.info(f"Weave initialized for project: {project_name}") + return True + except Exception as e: + logger.error(f"Failed to initialize Weave: {e}") + return False + + +def maybe_init_weave() -> bool: + """Initialize Weave if environment variables are configured. + + This is a convenience function that checks for WANDB_API_KEY and + WEAVE_PROJECT environment variables and initializes Weave if both are set. + + Returns: + True if Weave was initialized (or already initialized), False otherwise. + """ + if is_weave_initialized(): + return True + + if should_enable_weave(): + return init_weave() + + logger.debug( + "Weave environment variables not set (WANDB_API_KEY, WEAVE_PROJECT). " + "Skipping Weave initialization." + ) + return False + + +def get_weave_client() -> Any: + """Get the current Weave client. + + Returns: + The Weave client if initialized, None otherwise. + """ + global _weave_client + return _weave_client + + +def weave_op( + name: str | None = None, + *, + call_display_name: str | Callable[..., str] | None = None, + postprocess_inputs: Callable[..., dict[str, Any]] | None = None, + postprocess_output: Callable[..., Any] | None = None, +) -> Callable[[Callable[P, R]], Callable[P, R]]: + """Decorator to trace a function with Weave. + + This decorator wraps a function to automatically trace its inputs, outputs, + and execution time with Weave. If Weave is not initialized, the function + runs normally without tracing. + + Args: + name: Optional name for the operation. Defaults to the function name. + call_display_name: Optional display name or callable that returns a + display name for each call. + postprocess_inputs: Optional function to transform inputs before logging. + postprocess_output: Optional function to transform output before logging. + + Returns: + A decorator that wraps the function with Weave tracing. + + Example: + >>> @weave_op(name="process_data") + >>> def process_data(data: dict) -> dict: + ... return {"processed": True, **data} + """ + def decorator(func: Callable[P, R]) -> Callable[P, R]: + @wraps(func) + def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: + if not is_weave_initialized(): + return func(*args, **kwargs) + + try: + import weave + + # Build weave.op kwargs + op_kwargs: dict[str, Any] = {} + if name: + op_kwargs["name"] = name + if call_display_name: + op_kwargs["call_display_name"] = call_display_name + if postprocess_inputs: + op_kwargs["postprocess_inputs"] = postprocess_inputs + if postprocess_output: + op_kwargs["postprocess_output"] = postprocess_output + + # Apply weave.op decorator dynamically + traced_func = weave.op(**op_kwargs)(func) + return traced_func(*args, **kwargs) + except Exception as e: + logger.debug(f"Weave tracing failed, running without trace: {e}") + return func(*args, **kwargs) + + return wrapper + + return decorator + + +@contextmanager +def weave_thread(thread_id: str): + """Context manager to group operations under a Weave thread. + + Weave threads allow grouping related operations (like all events in a + conversation) under a single trace hierarchy. + + Args: + thread_id: Unique identifier for the thread (e.g., conversation ID). + + Yields: + The thread context if Weave is initialized, otherwise a no-op context. + + Example: + >>> with weave_thread("conversation-123"): + ... # All operations here will be grouped under the same thread + ... process_message("Hello") + ... generate_response() + """ + if not is_weave_initialized(): + yield + return + + try: + import weave + + # Check if there's an active Weave client + client = weave.client.get_current_client() + if client is None: + yield + return + + with weave.thread(thread_id): + yield + except Exception as e: + logger.debug(f"Weave thread context failed: {e}") + yield + + +class WeaveSpanManager: + """Manages Weave spans for manual tracing. + + This class provides a stack-based approach to managing Weave spans, + similar to the SpanManager for Laminar. It's useful when you need + more control over span lifecycle than the decorator provides. + + Example: + >>> manager = WeaveSpanManager() + >>> manager.start_span("process_request", session_id="conv-123") + >>> try: + ... # Do work + ... pass + ... finally: + ... manager.end_span() + """ + + def __init__(self): + self._call_stack: list[Any] = [] + + def start_span( + self, + name: str, + inputs: dict[str, Any] | None = None, + session_id: str | None = None, + ) -> Any | None: + """Start a new Weave span. + + Args: + name: Name of the operation being traced. + inputs: Optional dictionary of input values to log. + session_id: Optional session ID for grouping related spans. + + Returns: + The Weave call object if successful, None otherwise. + """ + if not is_weave_initialized(): + return None + + try: + import weave + + client = get_weave_client() + if client is None: + return None + + # Create a call using the client API + call = client.create_call( + op=name, + inputs=inputs or {}, + ) + self._call_stack.append(call) + return call + except Exception as e: + logger.debug(f"Failed to start Weave span: {e}") + return None + + def end_span(self, output: Any = None, error: Exception | None = None) -> None: + """End the most recent Weave span. + + Args: + output: Optional output value to log. + error: Optional exception if the operation failed. + """ + if not self._call_stack: + logger.debug("Attempted to end span, but stack is empty") + return + + try: + call = self._call_stack.pop() + client = get_weave_client() + if client and call: + if error: + client.finish_call(call, output=None, exception=error) + else: + client.finish_call(call, output=output) + except Exception as e: + logger.debug(f"Failed to end Weave span: {e}") + + +# Global span manager instance +_span_manager: WeaveSpanManager | None = None + + +def _get_span_manager() -> WeaveSpanManager: + """Get or create the global span manager.""" + global _span_manager + if _span_manager is None: + _span_manager = WeaveSpanManager() + return _span_manager + + +def start_weave_span( + name: str, + inputs: dict[str, Any] | None = None, + session_id: str | None = None, +) -> Any | None: + """Start a new Weave span using the global span manager. + + Args: + name: Name of the operation being traced. + inputs: Optional dictionary of input values to log. + session_id: Optional session ID for grouping related spans. + + Returns: + The Weave call object if successful, None otherwise. + """ + return _get_span_manager().start_span(name, inputs, session_id) + + +def end_weave_span(output: Any = None, error: Exception | None = None) -> None: + """End the most recent Weave span using the global span manager. + + Args: + output: Optional output value to log. + error: Optional exception if the operation failed. + """ + try: + _get_span_manager().end_span(output, error) + except Exception: + logger.debug("Error ending Weave span") + + +def observe_weave( + *, + name: str | None = None, + ignore_inputs: list[str] | None = None, + ignore_output: bool = False, +) -> Callable[[Callable[P, R]], Callable[P, R]]: + """Unified observe decorator that works with both Weave and Laminar. + + This decorator provides a consistent interface for observability that + works regardless of which backend (Weave or Laminar) is configured. + It prioritizes Weave if initialized, otherwise falls back to Laminar. + + Args: + name: Optional name for the operation. + ignore_inputs: List of input parameter names to exclude from logging. + ignore_output: If True, don't log the function's output. + + Returns: + A decorator that wraps the function with observability tracing. + + Example: + >>> @observe_weave(name="agent.step", ignore_inputs=["state"]) + >>> def step(self, state: State) -> Action: + ... return self._process(state) + """ + def decorator(func: Callable[P, R]) -> Callable[P, R]: + @wraps(func) + def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: + # Try Weave first + if is_weave_initialized(): + try: + import weave + + op_kwargs: dict[str, Any] = {} + if name: + op_kwargs["name"] = name + + # Handle input filtering via postprocess_inputs + if ignore_inputs: + def filter_inputs(inputs: dict[str, Any]) -> dict[str, Any]: + return { + k: v for k, v in inputs.items() + if k not in ignore_inputs + } + op_kwargs["postprocess_inputs"] = filter_inputs + + traced_func = weave.op(**op_kwargs)(func) + return traced_func(*args, **kwargs) + except Exception as e: + logger.debug(f"Weave tracing failed: {e}") + + # Fall through to untraced execution + return func(*args, **kwargs) + + return wrapper + + return decorator diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml index 276295e37c..1b890aac6e 100644 --- a/openhands-sdk/pyproject.toml +++ b/openhands-sdk/pyproject.toml @@ -14,7 +14,8 @@ dependencies = [ "python-json-logger>=3.3.0", "tenacity>=9.1.2", "websockets>=12", - "lmnr>=0.7.24" + "lmnr>=0.7.24", + "weave>=0.52.22" ] [project.optional-dependencies] diff --git a/tests/sdk/observability/__init__.py b/tests/sdk/observability/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py new file mode 100644 index 0000000000..64de5fe3c0 --- /dev/null +++ b/tests/sdk/observability/test_weave.py @@ -0,0 +1,274 @@ +"""Tests for Weave observability integration. + +These tests verify the Weave integration works correctly, including: +- Decorator functionality (with and without Weave initialized) +- Environment variable configuration +- Graceful fallback when Weave is not available +""" + +import os +from unittest.mock import MagicMock, patch + +import pytest + + +class TestWeaveConfiguration: + """Tests for Weave configuration and initialization.""" + + def test_should_enable_weave_with_both_vars(self): + """should_enable_weave returns True when both env vars are set.""" + from openhands.sdk.observability.weave import should_enable_weave + + with patch.dict(os.environ, { + "WANDB_API_KEY": "test-key", + "WEAVE_PROJECT": "test-project", + }): + assert should_enable_weave() is True + + def test_should_enable_weave_missing_api_key(self): + """should_enable_weave returns False when API key is missing.""" + from openhands.sdk.observability.weave import should_enable_weave + + with patch.dict(os.environ, { + "WEAVE_PROJECT": "test-project", + }, clear=True): + # Clear WANDB_API_KEY if it exists + os.environ.pop("WANDB_API_KEY", None) + assert should_enable_weave() is False + + def test_should_enable_weave_missing_project(self): + """should_enable_weave returns False when project is missing.""" + from openhands.sdk.observability.weave import should_enable_weave + + with patch.dict(os.environ, { + "WANDB_API_KEY": "test-key", + }, clear=True): + os.environ.pop("WEAVE_PROJECT", None) + assert should_enable_weave() is False + + def test_is_weave_initialized_default(self): + """is_weave_initialized returns False by default.""" + # Reset global state + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import is_weave_initialized + assert is_weave_initialized() is False + + +class TestWeaveOpDecorator: + """Tests for the @weave_op decorator.""" + + def test_weave_op_without_initialization(self): + """@weave_op runs function normally when Weave is not initialized.""" + # Reset global state + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import weave_op + + @weave_op(name="test_function") + def test_function(x: int) -> int: + return x + 1 + + result = test_function(5) + assert result == 6 + + def test_weave_op_preserves_function_metadata(self): + """@weave_op preserves function name and docstring.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import weave_op + + @weave_op(name="custom_name") + def my_function(x: int) -> int: + """My docstring.""" + return x + + assert my_function.__name__ == "my_function" + assert my_function.__doc__ == "My docstring." + + def test_weave_op_handles_exceptions(self): + """@weave_op propagates exceptions correctly.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import weave_op + + @weave_op(name="failing_function") + def failing_function(): + raise ValueError("Test error") + + with pytest.raises(ValueError, match="Test error"): + failing_function() + + +class TestObserveWeaveDecorator: + """Tests for the @observe_weave decorator.""" + + def test_observe_weave_without_initialization(self): + """@observe_weave runs function normally when Weave is not initialized.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import observe_weave + + @observe_weave(name="test_observe") + def test_function(x: int, y: int) -> int: + return x + y + + result = test_function(3, 4) + assert result == 7 + + def test_observe_weave_with_ignore_inputs(self): + """@observe_weave correctly handles ignore_inputs parameter.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import observe_weave + + @observe_weave(name="test_ignore", ignore_inputs=["secret"]) + def test_function(data: str, secret: str) -> str: + return f"{data}-processed" + + result = test_function("hello", "my-secret") + assert result == "hello-processed" + + +class TestWeaveThread: + """Tests for the weave_thread context manager.""" + + def test_weave_thread_without_initialization(self): + """weave_thread works as no-op when Weave is not initialized.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import weave_thread + + results = [] + with weave_thread("test-thread-123"): + results.append(1) + results.append(2) + + assert results == [1, 2] + + +class TestWeaveSpanManager: + """Tests for the WeaveSpanManager class.""" + + def test_span_manager_without_initialization(self): + """WeaveSpanManager works gracefully when Weave is not initialized.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import WeaveSpanManager + + manager = WeaveSpanManager() + + # start_span should return None when not initialized + result = manager.start_span("test_span", inputs={"key": "value"}) + assert result is None + + # end_span should not raise + manager.end_span(output={"result": "ok"}) + + def test_global_span_functions(self): + """Global span functions work without initialization.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import ( + start_weave_span, + end_weave_span, + ) + + # Should not raise + result = start_weave_span("test", inputs={"x": 1}) + assert result is None + + # Should not raise + end_weave_span(output={"y": 2}) + + +class TestWeaveExports: + """Tests for module exports.""" + + def test_all_exports_available(self): + """All expected functions are exported from the module.""" + from openhands.sdk.observability import ( + end_weave_span, + get_weave_client, + init_weave, + is_weave_initialized, + maybe_init_weave, + observe_weave, + should_enable_weave, + start_weave_span, + weave_op, + weave_thread, + WeaveSpanManager, + ) + + # Just verify they're callable + assert callable(end_weave_span) + assert callable(get_weave_client) + assert callable(init_weave) + assert callable(is_weave_initialized) + assert callable(maybe_init_weave) + assert callable(observe_weave) + assert callable(should_enable_weave) + assert callable(start_weave_span) + assert callable(weave_op) + assert callable(weave_thread) + assert WeaveSpanManager is not None + + +class TestInitWeave: + """Tests for init_weave function.""" + + def test_init_weave_requires_project(self): + """init_weave raises ValueError when no project is specified.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import init_weave + + with patch.dict(os.environ, {}, clear=True): + os.environ.pop("WEAVE_PROJECT", None) + with pytest.raises(ValueError, match="Weave project must be specified"): + init_weave() + + def test_init_weave_uses_env_project(self): + """init_weave uses WEAVE_PROJECT from environment.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import init_weave + + # Mock weave.init to avoid actual initialization + with patch("openhands.sdk.observability.weave.get_env") as mock_get_env: + mock_get_env.side_effect = lambda k: { + "WEAVE_PROJECT": "test-project", + "WANDB_API_KEY": None, + }.get(k) + + with patch("weave.init") as mock_weave_init: + mock_weave_init.return_value = MagicMock() + result = init_weave() + + # Should have called weave.init with the project + mock_weave_init.assert_called_once_with("test-project") + + def test_init_weave_already_initialized(self): + """init_weave returns True immediately if already initialized.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = True + + from openhands.sdk.observability.weave import init_weave + + result = init_weave(project="test") + assert result is True + + # Reset for other tests + weave_module._weave_initialized = False From 2d86b6924536cd3eed700fefbcf96f0c63dff71e Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 18 Dec 2025 23:02:01 +0000 Subject: [PATCH 2/8] refactor: Leverage Weave autopatching for zero-config LLM tracing Key improvements: - Simplified integration by leveraging Weave's built-in LiteLLM autopatching - When init_weave() is called, all LiteLLM calls are automatically traced - No manual decoration needed for LLM calls - just call init_weave() - Added weave_attributes() context manager for conversation grouping - Added get_weave_op() for dynamic decorator access - Updated @weave_op to support both @weave_op and @weave_op(...) syntax - Improved documentation explaining the autopatching approach - Updated demo script to showcase automatic LLM tracing - Added tests for autopatching behavior The SDK uses LiteLLM for all LLM calls. Weave automatically patches LiteLLM when initialized, so users get full tracing with minimal setup. Co-authored-by: openhands --- examples/weave_observability_demo.py | 74 ++- .../openhands/sdk/observability/__init__.py | 4 + .../openhands/sdk/observability/weave.py | 585 ++++++++++-------- tests/sdk/observability/test_weave.py | 96 ++- 4 files changed, 489 insertions(+), 270 deletions(-) diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py index 0417e42d30..f4a51ec7e0 100644 --- a/examples/weave_observability_demo.py +++ b/examples/weave_observability_demo.py @@ -1,24 +1,44 @@ #!/usr/bin/env python3 """Demo script showing Weave observability integration with OpenHands SDK. -This script demonstrates how to use Weave for tracing agent operations. -It creates a simple agent that processes messages and shows how traces -appear in the Weave UI. +This script demonstrates how Weave provides **automatic LLM tracing** for the +OpenHands SDK. The key insight is that Weave automatically patches LiteLLM +when initialized, so all LLM calls are traced without any manual decoration! -Prerequisites: - - Set WANDB_API_KEY environment variable (valid W&B API key) - - Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo") - - Optionally set OPENAI_API_KEY for LLM calls +## Key Features Demonstrated + +1. **Automatic LLM Tracing**: Just call `init_weave()` and all LiteLLM calls + are automatically traced - no `@weave.op` decorators needed for LLM calls! + +2. **Custom Function Tracing**: Use `@weave_op` for custom agent logic you + want to trace (tool execution, agent steps, etc.) + +3. **Conversation Grouping**: Use `weave_attributes()` to group related + operations under a conversation or session. + +## How It Works + +The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`: +1. Weave's `implicit_patch()` automatically patches LiteLLM +2. All `litellm.completion()` and `litellm.acompletion()` calls are traced +3. You see full traces in the Weave UI without any code changes! + +## Prerequisites + +- Set WANDB_API_KEY environment variable (valid W&B API key) +- Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo") +- Optionally set OPENAI_API_KEY for actual LLM calls + +## Usage -Usage: export WANDB_API_KEY="your-api-key" export WEAVE_PROJECT="your-team/openhands-demo" python examples/weave_observability_demo.py Note: If WANDB_API_KEY is not set or invalid, the demo will still run - but without Weave tracing. This allows testing the decorator - functionality without requiring valid credentials. + but without Weave tracing. This allows testing the functionality + without requiring valid credentials. """ import os @@ -27,15 +47,17 @@ # Add the SDK to the path for development sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "openhands-sdk")) -from openhands.sdk.observability.weave import ( +from openhands.sdk.observability import ( init_weave, is_weave_initialized, maybe_init_weave, weave_op, + weave_attributes, weave_thread, start_weave_span, end_weave_span, observe_weave, + get_weave_op, ) @@ -130,6 +152,8 @@ def run_demo(): if success: print("✅ Weave initialized successfully!") print(f" View traces at: https://wandb.ai/{project}/weave") + print("\n 🎉 KEY FEATURE: All LiteLLM calls are now AUTOMATICALLY traced!") + print(" No need to decorate LLM calls - Weave patches LiteLLM for you.") else: print("⚠️ Weave not initialized (missing credentials or package)") print(" Running demo without tracing...") @@ -139,20 +163,23 @@ def run_demo(): print("-" * 60) # Demo 1: Simple decorated function - print("\n1️⃣ Processing a message with @weave_op decorator:") + print("\n1️⃣ Custom function tracing with @weave_op decorator:") + print(" (Use this for custom agent logic you want to trace)") result = process_message("Hello, this is a test message for the agent!") print(f" Result: {result}") # Demo 2: Sentiment analysis with observe_weave - print("\n2️⃣ Analyzing sentiment with @observe_weave decorator:") + print("\n2️⃣ Laminar-compatible interface with @observe_weave:") + print(" (Easy migration from Laminar to Weave)") sentiment = analyze_sentiment("This is a great and excellent demo!") print(f" Sentiment: {sentiment}") - # Demo 3: Thread grouping for conversation - print("\n3️⃣ Simulating a conversation with thread grouping:") + # Demo 3: Conversation grouping with weave_attributes + print("\n3️⃣ Conversation grouping with weave_attributes:") + print(" (Group all operations under a conversation ID)") conversation_id = "demo-conversation-001" - with weave_thread(conversation_id): + with weave_attributes(conversation_id=conversation_id, user_id="demo-user"): for i, msg in enumerate([ "Hello, I need help with my code", "The function is not working correctly", @@ -162,15 +189,28 @@ def run_demo(): print(f" Step {i}: sentiment={result['sentiment']}") # Demo 4: Manual span management - print("\n4️⃣ Manual span management:") + print("\n4️⃣ Manual span management (for advanced use cases):") result = manual_span_example() print(f" Result: {result}") + # Demo 5: Show how to get weave.op for dynamic decoration + print("\n5️⃣ Dynamic decoration with get_weave_op():") + op = get_weave_op() + + @op + def dynamically_traced_function(x: int) -> int: + return x * 2 + + result = dynamically_traced_function(21) + print(f" Result: {result}") + print("\n" + "=" * 60) print("Demo completed!") if is_weave_initialized(): print(f"\n🔗 View your traces at: https://wandb.ai/{project}/weave") + print("\n💡 Remember: LLM calls via LiteLLM are traced AUTOMATICALLY!") + print(" Just use the SDK's LLM class normally - no decoration needed.") print("=" * 60) diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py index 5b638534dc..799c1d1d71 100644 --- a/openhands-sdk/openhands/sdk/observability/__init__.py +++ b/openhands-sdk/openhands/sdk/observability/__init__.py @@ -2,12 +2,14 @@ from openhands.sdk.observability.weave import ( end_weave_span, get_weave_client, + get_weave_op, init_weave, is_weave_initialized, maybe_init_weave, observe_weave, should_enable_weave, start_weave_span, + weave_attributes, weave_op, weave_thread, WeaveSpanManager, @@ -21,12 +23,14 @@ # Weave exports "end_weave_span", "get_weave_client", + "get_weave_op", "init_weave", "is_weave_initialized", "maybe_init_weave", "observe_weave", "should_enable_weave", "start_weave_span", + "weave_attributes", "weave_op", "weave_thread", "WeaveSpanManager", diff --git a/openhands-sdk/openhands/sdk/observability/weave.py b/openhands-sdk/openhands/sdk/observability/weave.py index 821765f6d9..6d16e74ba3 100644 --- a/openhands-sdk/openhands/sdk/observability/weave.py +++ b/openhands-sdk/openhands/sdk/observability/weave.py @@ -1,58 +1,113 @@ """Weave observability integration for OpenHands SDK. -This module provides integration with Weights & Biases Weave for tracing -and observability of agent operations. Weave automatically tracks LLM calls, -tool executions, and agent steps. +This module provides integration with Weights & Biases Weave for automatic +tracing and observability of agent operations. It leverages Weave's built-in +autopatching to automatically trace all LLM calls made through LiteLLM. -Configuration: - Set the following environment variables to enable Weave tracing: - - WANDB_API_KEY: Your Weights & Biases API key - - WEAVE_PROJECT: The Weave project name (e.g., "my-team/my-project") +## Key Features - Alternatively, call `init_weave()` directly with the project name. +1. **Zero-config LLM tracing**: Just call `init_weave()` and all LiteLLM calls + are automatically traced - no manual decoration needed! -Example: - >>> from openhands.sdk.observability.weave import maybe_init_weave, weave_op - >>> maybe_init_weave() # Auto-initializes if env vars are set - >>> - >>> @weave_op(name="my_function") - >>> def my_function(x: int) -> int: - ... return x + 1 +2. **Automatic integration patching**: Weave automatically patches LiteLLM, + OpenAI, Anthropic, and 30+ other providers when initialized. + +3. **Optional manual tracing**: Use `@weave.op` for custom agent logic that + you want to trace (tool execution, agent steps, etc.) + +4. **Thread grouping**: Group related operations under conversation threads. + +## How It Works + +The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`: +1. Weave's `implicit_patch()` automatically patches LiteLLM +2. All `litellm.completion()` and `litellm.acompletion()` calls are traced +3. You see full traces in the Weave UI without any code changes! + +## Environment Variables + +- `WANDB_API_KEY`: Your Weights & Biases API key +- `WEAVE_PROJECT`: The Weave project name (e.g., "my-team/my-project") + +## Usage Examples + +### Basic Usage (Automatic LLM Tracing) + +```python +from openhands.sdk.observability import init_weave +from openhands.sdk import LLM + +# Initialize Weave - this automatically traces all LLM calls! +init_weave("my-team/my-project") + +# All LLM calls are now automatically traced +llm = LLM(model="gpt-4") +response = llm.completion(messages=[{"role": "user", "content": "Hello!"}]) +# ^ This call appears in Weave UI automatically +``` + +### Custom Function Tracing + +```python +import weave +from openhands.sdk.observability import init_weave + +init_weave("my-team/my-project") + +# Use @weave.op for custom logic you want to trace +@weave.op +def process_agent_step(step: dict) -> dict: + # Your custom logic here + return {"processed": True} +``` + +### Conversation Thread Grouping + +```python +from openhands.sdk.observability import init_weave, weave_attributes + +init_weave("my-team/my-project") + +# Group all operations under a conversation +with weave_attributes(conversation_id="conv-123", user_id="user-456"): + # All LLM calls and traced functions within this block + # will be tagged with these attributes + response = llm.completion(...) +``` See Also: - Weave documentation: https://docs.wandb.ai/weave - Laminar integration: openhands.sdk.observability.laminar """ +from __future__ import annotations + +import logging +import os from collections.abc import Callable from contextlib import contextmanager -from functools import wraps from typing import Any, ParamSpec, TypeVar -from openhands.sdk.logger import get_logger from openhands.sdk.observability.utils import get_env -logger = get_logger(__name__) +logger = logging.getLogger(__name__) -# Type variables for generic function signatures P = ParamSpec("P") R = TypeVar("R") -# Global state for Weave initialization +# Global state _weave_initialized: bool = False _weave_client: Any = None -def should_enable_weave() -> bool: - """Check if Weave should be enabled based on environment configuration. +def get_weave_client() -> Any: + """Get the current Weave client instance. Returns: - True if WANDB_API_KEY and WEAVE_PROJECT are set, False otherwise. + The Weave client if initialized, None otherwise. """ - api_key = get_env("WANDB_API_KEY") - project = get_env("WEAVE_PROJECT") - return bool(api_key and project) + return _weave_client def is_weave_initialized() -> bool: @@ -61,30 +116,41 @@ def is_weave_initialized() -> bool: Returns: True if Weave is initialized and ready for tracing. """ - global _weave_initialized return _weave_initialized def init_weave( project: str | None = None, api_key: str | None = None, + *, + settings: dict[str, Any] | None = None, ) -> bool: - """Initialize Weave for tracing. + """Initialize Weave for automatic tracing. + + This is the main entry point for enabling Weave observability. When called, + Weave automatically patches LiteLLM and other supported libraries, so all + LLM calls are traced without any manual decoration. Args: project: The Weave project name (e.g., "my-team/my-project"). If not provided, uses WEAVE_PROJECT environment variable. api_key: The Weights & Biases API key. If not provided, uses WANDB_API_KEY environment variable. + settings: Optional dict of Weave settings to configure behavior. + See Weave documentation for available settings. Returns: True if initialization was successful, False otherwise. Raises: ValueError: If no project is specified and WEAVE_PROJECT is not set. - """ - import os + Example: + >>> from openhands.sdk.observability import init_weave + >>> init_weave("my-team/openhands-agent") + True + >>> # Now all LiteLLM calls are automatically traced! + """ global _weave_initialized, _weave_client if _weave_initialized: @@ -123,9 +189,20 @@ def init_weave( ) try: - _weave_client = weave.init(project_name) + # Initialize Weave - this automatically: + # 1. Patches all already-imported integrations (LiteLLM, OpenAI, etc.) + # 2. Registers import hooks for future imports + init_kwargs: dict[str, Any] = {} + if settings: + init_kwargs["settings"] = settings + + _weave_client = weave.init(project_name, **init_kwargs) _weave_initialized = True - logger.info(f"Weave initialized for project: {project_name}") + + logger.info( + f"Weave initialized for project: {project_name}. " + "All LiteLLM calls will be automatically traced." + ) return True except Exception as e: logger.error(f"Failed to initialize Weave: {e}") @@ -135,149 +212,247 @@ def init_weave( def maybe_init_weave() -> bool: """Initialize Weave if environment variables are configured. - This is a convenience function that checks for WANDB_API_KEY and - WEAVE_PROJECT environment variables and initializes Weave if both are set. + This is a convenience function that initializes Weave only if both + WANDB_API_KEY and WEAVE_PROJECT environment variables are set. + Useful for conditional initialization based on environment. Returns: - True if Weave was initialized (or already initialized), False otherwise. + True if Weave was initialized (or already was), False otherwise. + + Example: + >>> import os + >>> os.environ["WANDB_API_KEY"] = "your-key" + >>> os.environ["WEAVE_PROJECT"] = "my-team/my-project" + >>> from openhands.sdk.observability import maybe_init_weave + >>> maybe_init_weave() # Initializes automatically + True """ - if is_weave_initialized(): + if _weave_initialized: return True - if should_enable_weave(): - return init_weave() + if not should_enable_weave(): + logger.debug( + "Weave environment variables not set (WANDB_API_KEY, WEAVE_PROJECT). " + "Skipping Weave initialization." + ) + return False - logger.debug( - "Weave environment variables not set (WANDB_API_KEY, WEAVE_PROJECT). " - "Skipping Weave initialization." - ) - return False + try: + return init_weave() + except ValueError: + return False -def get_weave_client() -> Any: - """Get the current Weave client. +def should_enable_weave() -> bool: + """Check if Weave should be enabled based on environment variables. Returns: - The Weave client if initialized, None otherwise. + True if both WANDB_API_KEY and WEAVE_PROJECT are set. """ - global _weave_client - return _weave_client + return bool(get_env("WANDB_API_KEY") and get_env("WEAVE_PROJECT")) -def weave_op( - name: str | None = None, - *, - call_display_name: str | Callable[..., str] | None = None, - postprocess_inputs: Callable[..., dict[str, Any]] | None = None, - postprocess_output: Callable[..., Any] | None = None, -) -> Callable[[Callable[P, R]], Callable[P, R]]: - """Decorator to trace a function with Weave. +@contextmanager +def weave_attributes(**attributes: Any): + """Context manager to add attributes to all operations within the block. - This decorator wraps a function to automatically trace its inputs, outputs, - and execution time with Weave. If Weave is not initialized, the function - runs normally without tracing. + This is useful for grouping related operations (e.g., all events in a + conversation) or adding metadata to traces. Args: - name: Optional name for the operation. Defaults to the function name. - call_display_name: Optional display name or callable that returns a - display name for each call. - postprocess_inputs: Optional function to transform inputs before logging. - postprocess_output: Optional function to transform output before logging. - - Returns: - A decorator that wraps the function with Weave tracing. + **attributes: Key-value pairs to attach to all operations. + Common attributes: conversation_id, user_id, session_id, etc. Example: - >>> @weave_op(name="process_data") - >>> def process_data(data: dict) -> dict: - ... return {"processed": True, **data} + >>> with weave_attributes(conversation_id="conv-123", user_id="user-456"): + ... # All LLM calls and traced functions here will have these attributes + ... response = llm.completion(messages=[...]) """ - def decorator(func: Callable[P, R]) -> Callable[P, R]: - @wraps(func) - def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: - if not is_weave_initialized(): - return func(*args, **kwargs) - - try: - import weave - - # Build weave.op kwargs - op_kwargs: dict[str, Any] = {} - if name: - op_kwargs["name"] = name - if call_display_name: - op_kwargs["call_display_name"] = call_display_name - if postprocess_inputs: - op_kwargs["postprocess_inputs"] = postprocess_inputs - if postprocess_output: - op_kwargs["postprocess_output"] = postprocess_output - - # Apply weave.op decorator dynamically - traced_func = weave.op(**op_kwargs)(func) - return traced_func(*args, **kwargs) - except Exception as e: - logger.debug(f"Weave tracing failed, running without trace: {e}") - return func(*args, **kwargs) - - return wrapper + if not _weave_initialized: + yield + return - return decorator + try: + import weave + with weave.attributes(attributes): + yield + except Exception as e: + logger.warning(f"Failed to set weave attributes: {e}") + yield @contextmanager def weave_thread(thread_id: str): - """Context manager to group operations under a Weave thread. + """Context manager to group operations under a thread. - Weave threads allow grouping related operations (like all events in a - conversation) under a single trace hierarchy. + This is an alias for weave_attributes(thread_id=...) for convenience + and backward compatibility. Args: thread_id: Unique identifier for the thread (e.g., conversation ID). - Yields: - The thread context if Weave is initialized, otherwise a no-op context. - Example: >>> with weave_thread("conversation-123"): ... # All operations here will be grouped under the same thread - ... process_message("Hello") - ... generate_response() + ... response = llm.completion(messages=[...]) """ - if not is_weave_initialized(): + with weave_attributes(thread_id=thread_id): yield - return + + +def get_weave_op(): + """Get the weave.op decorator for manual function tracing. + + Returns the actual weave.op decorator if Weave is initialized, + otherwise returns a no-op decorator that just returns the function. + + This is useful when you want to trace custom agent logic beyond + the automatic LLM call tracing. + + Returns: + The weave.op decorator or a no-op decorator. + + Example: + >>> from openhands.sdk.observability import init_weave, get_weave_op + >>> init_weave("my-project") + >>> weave_op = get_weave_op() + >>> + >>> @weave_op + ... def my_custom_function(x: int) -> int: + ... return x * 2 + """ + if not _weave_initialized: + def noop_decorator(func): + return func + return noop_decorator try: import weave + return weave.op + except ImportError: + def noop_decorator(func): + return func + return noop_decorator - # Check if there's an active Weave client - client = weave.client.get_current_client() - if client is None: - yield - return - with weave.thread(thread_id): - yield - except Exception as e: - logger.debug(f"Weave thread context failed: {e}") - yield +def weave_op( + func: Callable[P, R] | None = None, + *, + name: str | None = None, + call_display_name: str | Callable[..., str] | None = None, + postprocess_inputs: Callable[..., dict[str, Any]] | None = None, + postprocess_output: Callable[..., Any] | None = None, +) -> Callable[P, R] | Callable[[Callable[P, R]], Callable[P, R]]: + """Decorator to trace a function with Weave. + + This is a convenience wrapper around weave.op that handles the case + when Weave is not initialized (returns the function unchanged). + + Can be used with or without parentheses: + @weave_op + def my_func(): ... + + @weave_op(name="custom_name") + def my_func(): ... + + Args: + func: The function to decorate (when used without parentheses). + name: Optional name for the operation. Defaults to function name. + call_display_name: Display name for the call in the Weave UI. + postprocess_inputs: Function to transform inputs before logging. + postprocess_output: Function to transform output before logging. + + Returns: + The decorated function or a decorator. + """ + def decorator(fn: Callable[P, R]) -> Callable[P, R]: + if not _weave_initialized: + return fn + + try: + import weave + + op_kwargs: dict[str, Any] = {} + if name: + op_kwargs["name"] = name + if call_display_name: + op_kwargs["call_display_name"] = call_display_name + if postprocess_inputs: + op_kwargs["postprocess_inputs"] = postprocess_inputs + if postprocess_output: + op_kwargs["postprocess_output"] = postprocess_output + + if op_kwargs: + return weave.op(**op_kwargs)(fn) + return weave.op(fn) + except Exception as e: + logger.warning(f"Failed to apply weave.op decorator: {e}") + return fn + + # Handle both @weave_op and @weave_op(...) syntax + if func is not None: + return decorator(func) + return decorator + + +def observe_weave( + name: str | None = None, + *, + ignore_inputs: list[str] | None = None, + ignore_output: bool = False, +) -> Callable[[Callable[P, R]], Callable[P, R]]: + """Decorator for observing functions with Weave (Laminar-compatible interface). + + This provides a similar interface to the Laminar `observe` decorator, + making it easier to switch between observability backends. + + Args: + name: Optional name for the operation. + ignore_inputs: List of input parameter names to exclude from logging. + ignore_output: If True, don't log the output. + + Returns: + A decorator that wraps the function for Weave tracing. + + Example: + >>> @observe_weave(name="login", ignore_inputs=["password"]) + ... def login(username: str, password: str) -> bool: + ... return authenticate(username, password) + """ + def postprocess_inputs_fn(inputs: dict[str, Any]) -> dict[str, Any]: + if not ignore_inputs: + return inputs + return {k: v for k, v in inputs.items() if k not in ignore_inputs} + + def postprocess_output_fn(output: Any) -> Any: + if ignore_output: + return "[output hidden]" + return output + + return weave_op( + name=name, + postprocess_inputs=postprocess_inputs_fn if ignore_inputs else None, + postprocess_output=postprocess_output_fn if ignore_output else None, + ) class WeaveSpanManager: - """Manages Weave spans for manual tracing. + """Manager for manual span lifecycle control. + + This class provides fine-grained control over span creation and completion, + useful when automatic decoration is not suitable. - This class provides a stack-based approach to managing Weave spans, - similar to the SpanManager for Laminar. It's useful when you need - more control over span lifecycle than the decorator provides. + Note: For most use cases, the automatic LLM tracing and @weave_op decorator + are sufficient. Use this only when you need explicit span control. Example: >>> manager = WeaveSpanManager() - >>> manager.start_span("process_request", session_id="conv-123") + >>> manager.start_span("process_batch", inputs={"batch_size": 100}) >>> try: - ... # Do work - ... pass - ... finally: - ... manager.end_span() + ... result = process_batch() + ... manager.end_span(output=result) + ... except Exception as e: + ... manager.end_span(error=str(e)) """ def __init__(self): @@ -287,159 +462,85 @@ def start_span( self, name: str, inputs: dict[str, Any] | None = None, - session_id: str | None = None, - ) -> Any | None: - """Start a new Weave span. + ) -> Any: + """Start a new span. Args: - name: Name of the operation being traced. - inputs: Optional dictionary of input values to log. - session_id: Optional session ID for grouping related spans. + name: Name of the span/operation. + inputs: Input parameters to log. Returns: - The Weave call object if successful, None otherwise. + The span/call object if successful, None otherwise. """ - if not is_weave_initialized(): + if not _weave_initialized: return None try: import weave - client = get_weave_client() - if client is None: - return None + @weave.op(name=name) + def _span_op(**kwargs: Any) -> Any: + pass - # Create a call using the client API - call = client.create_call( - op=name, - inputs=inputs or {}, - ) + call = _span_op.call(inputs or {}) self._call_stack.append(call) return call except Exception as e: - logger.debug(f"Failed to start Weave span: {e}") + logger.warning(f"Failed to start weave span: {e}") return None - def end_span(self, output: Any = None, error: Exception | None = None) -> None: - """End the most recent Weave span. + def end_span( + self, + output: Any = None, + error: str | None = None, + ) -> None: + """End the current span. Args: - output: Optional output value to log. - error: Optional exception if the operation failed. + output: Output value to log. + error: Error message if the span failed. """ if not self._call_stack: - logger.debug("Attempted to end span, but stack is empty") return try: call = self._call_stack.pop() - client = get_weave_client() - if client and call: - if error: - client.finish_call(call, output=None, exception=error) - else: - client.finish_call(call, output=output) + if error: + call.finish(exception=Exception(error)) + else: + call.finish(output=output) except Exception as e: - logger.debug(f"Failed to end Weave span: {e}") - + logger.warning(f"Failed to end weave span: {e}") -# Global span manager instance -_span_manager: WeaveSpanManager | None = None - -def _get_span_manager() -> WeaveSpanManager: - """Get or create the global span manager.""" - global _span_manager - if _span_manager is None: - _span_manager = WeaveSpanManager() - return _span_manager +# Global span manager instance for convenience +_global_span_manager = WeaveSpanManager() def start_weave_span( name: str, inputs: dict[str, Any] | None = None, - session_id: str | None = None, -) -> Any | None: - """Start a new Weave span using the global span manager. +) -> Any: + """Start a new Weave span using the global manager. Args: - name: Name of the operation being traced. - inputs: Optional dictionary of input values to log. - session_id: Optional session ID for grouping related spans. + name: Name of the span/operation. + inputs: Input parameters to log. Returns: - The Weave call object if successful, None otherwise. - """ - return _get_span_manager().start_span(name, inputs, session_id) - - -def end_weave_span(output: Any = None, error: Exception | None = None) -> None: - """End the most recent Weave span using the global span manager. - - Args: - output: Optional output value to log. - error: Optional exception if the operation failed. + The span/call object if successful, None otherwise. """ - try: - _get_span_manager().end_span(output, error) - except Exception: - logger.debug("Error ending Weave span") + return _global_span_manager.start_span(name, inputs) -def observe_weave( - *, - name: str | None = None, - ignore_inputs: list[str] | None = None, - ignore_output: bool = False, -) -> Callable[[Callable[P, R]], Callable[P, R]]: - """Unified observe decorator that works with both Weave and Laminar. - - This decorator provides a consistent interface for observability that - works regardless of which backend (Weave or Laminar) is configured. - It prioritizes Weave if initialized, otherwise falls back to Laminar. +def end_weave_span( + output: Any = None, + error: str | None = None, +) -> None: + """End the current Weave span using the global manager. Args: - name: Optional name for the operation. - ignore_inputs: List of input parameter names to exclude from logging. - ignore_output: If True, don't log the function's output. - - Returns: - A decorator that wraps the function with observability tracing. - - Example: - >>> @observe_weave(name="agent.step", ignore_inputs=["state"]) - >>> def step(self, state: State) -> Action: - ... return self._process(state) + output: Output value to log. + error: Error message if the span failed. """ - def decorator(func: Callable[P, R]) -> Callable[P, R]: - @wraps(func) - def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: - # Try Weave first - if is_weave_initialized(): - try: - import weave - - op_kwargs: dict[str, Any] = {} - if name: - op_kwargs["name"] = name - - # Handle input filtering via postprocess_inputs - if ignore_inputs: - def filter_inputs(inputs: dict[str, Any]) -> dict[str, Any]: - return { - k: v for k, v in inputs.items() - if k not in ignore_inputs - } - op_kwargs["postprocess_inputs"] = filter_inputs - - traced_func = weave.op(**op_kwargs)(func) - return traced_func(*args, **kwargs) - except Exception as e: - logger.debug(f"Weave tracing failed: {e}") - - # Fall through to untraced execution - return func(*args, **kwargs) - - return wrapper - - return decorator + _global_span_manager.end_span(output, error) diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py index 64de5fe3c0..78e8dd4431 100644 --- a/tests/sdk/observability/test_weave.py +++ b/tests/sdk/observability/test_weave.py @@ -1,6 +1,7 @@ """Tests for Weave observability integration. These tests verify the Weave integration works correctly, including: +- Automatic LLM tracing via Weave's autopatching - Decorator functionality (with and without Weave initialized) - Environment variable configuration - Graceful fallback when Weave is not available @@ -74,20 +75,19 @@ def test_function(x: int) -> int: result = test_function(5) assert result == 6 - def test_weave_op_preserves_function_metadata(self): - """@weave_op preserves function name and docstring.""" + def test_weave_op_without_parentheses(self): + """@weave_op can be used without parentheses.""" import openhands.sdk.observability.weave as weave_module weave_module._weave_initialized = False from openhands.sdk.observability.weave import weave_op - @weave_op(name="custom_name") - def my_function(x: int) -> int: - """My docstring.""" - return x + @weave_op + def test_function(x: int) -> int: + return x + 1 - assert my_function.__name__ == "my_function" - assert my_function.__doc__ == "My docstring." + result = test_function(5) + assert result == 6 def test_weave_op_handles_exceptions(self): """@weave_op propagates exceptions correctly.""" @@ -136,8 +136,22 @@ def test_function(data: str, secret: str) -> str: assert result == "hello-processed" -class TestWeaveThread: - """Tests for the weave_thread context manager.""" +class TestWeaveAttributes: + """Tests for the weave_attributes context manager.""" + + def test_weave_attributes_without_initialization(self): + """weave_attributes works as no-op when Weave is not initialized.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import weave_attributes + + results = [] + with weave_attributes(conversation_id="conv-123", user_id="user-456"): + results.append(1) + results.append(2) + + assert results == [1, 2] def test_weave_thread_without_initialization(self): """weave_thread works as no-op when Weave is not initialized.""" @@ -191,6 +205,28 @@ def test_global_span_functions(self): end_weave_span(output={"y": 2}) +class TestGetWeaveOp: + """Tests for the get_weave_op function.""" + + def test_get_weave_op_returns_noop_when_not_initialized(self): + """get_weave_op returns a no-op decorator when Weave is not initialized.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import get_weave_op + + op = get_weave_op() + + @op + def test_function(x: int) -> int: + return x * 2 + + # Function should work normally + assert test_function(5) == 10 + # Function should be unchanged + assert test_function.__name__ == "test_function" + + class TestWeaveExports: """Tests for module exports.""" @@ -199,12 +235,14 @@ def test_all_exports_available(self): from openhands.sdk.observability import ( end_weave_span, get_weave_client, + get_weave_op, init_weave, is_weave_initialized, maybe_init_weave, observe_weave, should_enable_weave, start_weave_span, + weave_attributes, weave_op, weave_thread, WeaveSpanManager, @@ -213,12 +251,14 @@ def test_all_exports_available(self): # Just verify they're callable assert callable(end_weave_span) assert callable(get_weave_client) + assert callable(get_weave_op) assert callable(init_weave) assert callable(is_weave_initialized) assert callable(maybe_init_weave) assert callable(observe_weave) assert callable(should_enable_weave) assert callable(start_weave_span) + assert callable(weave_attributes) assert callable(weave_op) assert callable(weave_thread) assert WeaveSpanManager is not None @@ -258,7 +298,7 @@ def test_init_weave_uses_env_project(self): result = init_weave() # Should have called weave.init with the project - mock_weave_init.assert_called_once_with("test-project") + mock_weave_init.assert_called_once() def test_init_weave_already_initialized(self): """init_weave returns True immediately if already initialized.""" @@ -272,3 +312,37 @@ def test_init_weave_already_initialized(self): # Reset for other tests weave_module._weave_initialized = False + + +class TestAutopatching: + """Tests for Weave's autopatching behavior. + + These tests verify that the integration is designed to leverage + Weave's automatic LiteLLM patching. + """ + + def test_init_weave_calls_weave_init(self): + """init_weave calls weave.init which triggers autopatching.""" + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import init_weave + + with patch("openhands.sdk.observability.weave.get_env") as mock_get_env: + mock_get_env.side_effect = lambda k: { + "WEAVE_PROJECT": "test-project", + "WANDB_API_KEY": "test-key", + }.get(k) + + with patch("weave.init") as mock_weave_init: + with patch("wandb.login"): + mock_weave_init.return_value = MagicMock() + result = init_weave() + + # weave.init should be called, which triggers implicit_patch() + # and register_import_hook() internally + mock_weave_init.assert_called_once() + assert result is True + + # Reset for other tests + weave_module._weave_initialized = False From d162ae620e5f3abc0c2165fc3abea42060f77dfd Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 18 Dec 2025 23:50:31 +0000 Subject: [PATCH 3/8] feat: Add Weave thread support for conversation-level tracing Integrates Weave threading into LocalConversation.run() to automatically group all operations (LLM calls, traced functions) under the conversation ID. Key changes: - Added _get_weave_thread_context() helper that returns weave.thread() if Weave is initialized, otherwise a nullcontext (no-op) - Wrapped the run loop with the Weave thread context - All LLM calls (autopatched via Weave's LiteLLM integration) and @weave_op decorated functions are now grouped by conversation This enables conversation-level tracing in the Weave UI, similar to the OpenHands PR #12056 approach but adapted for the SDK architecture. Co-authored-by: openhands --- .../conversation/impl/local_conversation.py | 159 +++++++++++------- 1 file changed, 96 insertions(+), 63 deletions(-) diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index a05aa7b1b8..b26dacabc7 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1,6 +1,7 @@ import atexit import uuid from collections.abc import Mapping +from contextlib import nullcontext from pathlib import Path from openhands.sdk.agent.base import AgentBase @@ -35,6 +36,7 @@ from openhands.sdk.llm.llm_registry import LLMRegistry from openhands.sdk.logger import get_logger from openhands.sdk.observability.laminar import observe +from openhands.sdk.observability.weave import is_weave_initialized from openhands.sdk.security.analyzer import SecurityAnalyzerBase from openhands.sdk.security.confirmation_policy import ( ConfirmationPolicyBase, @@ -45,6 +47,29 @@ logger = get_logger(__name__) +def _get_weave_thread_context(conversation_id: str): + """Get Weave thread context manager if Weave is initialized. + + This groups all operations within a conversation run under the same + Weave thread, enabling conversation-level tracing in the Weave UI. + + Args: + conversation_id: The conversation ID to use as the thread ID. + + Returns: + A weave.thread context manager if Weave is initialized, + otherwise a nullcontext (no-op). + """ + if not is_weave_initialized(): + return nullcontext() + + try: + import weave + return weave.thread(conversation_id) + except Exception: + return nullcontext() + + class LocalConversation(BaseConversation): agent: AgentBase workspace: LocalWorkspace @@ -295,6 +320,11 @@ def run(self) -> None: - Creates and executes actions immediately Can be paused between steps + + Note: + If Weave is initialized, all operations within this run are grouped + under a Weave thread using the conversation ID. This enables + conversation-level tracing in the Weave UI. """ with self._state: @@ -306,75 +336,78 @@ def run(self) -> None: self._state.execution_status = ConversationExecutionStatus.RUNNING iteration = 0 - try: - while True: - logger.debug(f"Conversation run iteration {iteration}") - with self._state: - # Pause attempts to acquire the state lock - # Before value can be modified step can be taken - # Ensure step conditions are checked when lock is already acquired - if self._state.execution_status in [ - ConversationExecutionStatus.FINISHED, - ConversationExecutionStatus.PAUSED, - ConversationExecutionStatus.STUCK, - ]: - break - - # Check for stuck patterns if enabled - if self._stuck_detector: - is_stuck = self._stuck_detector.is_stuck() - - if is_stuck: - logger.warning("Stuck pattern detected.") + # Wrap the run loop in a Weave thread context if Weave is initialized. + # This groups all LLM calls and traced operations under the conversation ID. + with _get_weave_thread_context(str(self.id)): + try: + while True: + logger.debug(f"Conversation run iteration {iteration}") + with self._state: + # Pause attempts to acquire the state lock + # Before value can be modified step can be taken + # Ensure step conditions are checked when lock is already acquired + if self._state.execution_status in [ + ConversationExecutionStatus.FINISHED, + ConversationExecutionStatus.PAUSED, + ConversationExecutionStatus.STUCK, + ]: + break + + # Check for stuck patterns if enabled + if self._stuck_detector: + is_stuck = self._stuck_detector.is_stuck() + + if is_stuck: + logger.warning("Stuck pattern detected.") + self._state.execution_status = ( + ConversationExecutionStatus.STUCK + ) + continue + + # clear the flag before calling agent.step() (user approved) + if ( + self._state.execution_status + == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION + ): self._state.execution_status = ( - ConversationExecutionStatus.STUCK + ConversationExecutionStatus.RUNNING ) - continue - - # clear the flag before calling agent.step() (user approved) - if ( - self._state.execution_status - == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION - ): - self._state.execution_status = ( - ConversationExecutionStatus.RUNNING - ) - self.agent.step( - self, on_event=self._on_event, on_token=self._on_token + self.agent.step( + self, on_event=self._on_event, on_token=self._on_token + ) + iteration += 1 + + # Check for non-finished terminal conditions + # Note: We intentionally do NOT check for FINISHED status here. + # This allows concurrent user messages to be processed: + # 1. Agent finishes and sets status to FINISHED + # 2. User sends message concurrently via send_message() + # 3. send_message() waits for FIFO lock, then sets status to IDLE + # 4. Run loop continues to next iteration and processes the message + # 5. Without this design, concurrent messages would be lost + if ( + self.state.execution_status + == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION + or iteration >= self.max_iteration_per_run + ): + break + except Exception as e: + self._state.execution_status = ConversationExecutionStatus.ERROR + + # Add an error event + self._on_event( + ConversationErrorEvent( + source="environment", + code=e.__class__.__name__, + detail=str(e), ) - iteration += 1 - - # Check for non-finished terminal conditions - # Note: We intentionally do NOT check for FINISHED status here. - # This allows concurrent user messages to be processed: - # 1. Agent finishes and sets status to FINISHED - # 2. User sends message concurrently via send_message() - # 3. send_message() waits for FIFO lock, then sets status to IDLE - # 4. Run loop continues to next iteration and processes the message - # 5. Without this design, concurrent messages would be lost - if ( - self.state.execution_status - == ConversationExecutionStatus.WAITING_FOR_CONFIRMATION - or iteration >= self.max_iteration_per_run - ): - break - except Exception as e: - self._state.execution_status = ConversationExecutionStatus.ERROR - - # Add an error event - self._on_event( - ConversationErrorEvent( - source="environment", - code=e.__class__.__name__, - detail=str(e), ) - ) - # Re-raise with conversation id and persistence dir for better UX - raise ConversationRunError( - self._state.id, e, persistence_dir=self._state.persistence_dir - ) from e + # Re-raise with conversation id and persistence dir for better UX + raise ConversationRunError( + self._state.id, e, persistence_dir=self._state.persistence_dir + ) from e def set_confirmation_policy(self, policy: ConfirmationPolicyBase) -> None: """Set the confirmation policy and store it in conversation state.""" From b7b791d2948876f6f97290455fc9f3ef9d19a696 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 18 Dec 2025 23:51:08 +0000 Subject: [PATCH 4/8] docs: Update demo to highlight conversation threading feature Co-authored-by: openhands --- examples/weave_observability_demo.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py index f4a51ec7e0..03bfd89c95 100644 --- a/examples/weave_observability_demo.py +++ b/examples/weave_observability_demo.py @@ -13,15 +13,20 @@ 2. **Custom Function Tracing**: Use `@weave_op` for custom agent logic you want to trace (tool execution, agent steps, etc.) -3. **Conversation Grouping**: Use `weave_attributes()` to group related - operations under a conversation or session. +3. **Conversation Threading**: The SDK automatically wraps conversation runs + in `weave.thread()` to group all operations under the conversation ID. + This enables conversation-level tracing in the Weave UI! + +4. **Conversation Grouping**: Use `weave_attributes()` to add custom metadata + to operations (user_id, session_id, etc.) ## How It Works The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`: 1. Weave's `implicit_patch()` automatically patches LiteLLM 2. All `litellm.completion()` and `litellm.acompletion()` calls are traced -3. You see full traces in the Weave UI without any code changes! +3. LocalConversation.run() wraps the event loop in `weave.thread(conversation_id)` +4. You see full conversation traces in the Weave UI without any code changes! ## Prerequisites @@ -152,8 +157,10 @@ def run_demo(): if success: print("✅ Weave initialized successfully!") print(f" View traces at: https://wandb.ai/{project}/weave") - print("\n 🎉 KEY FEATURE: All LiteLLM calls are now AUTOMATICALLY traced!") - print(" No need to decorate LLM calls - Weave patches LiteLLM for you.") + print("\n 🎉 KEY FEATURES:") + print(" • All LiteLLM calls are AUTOMATICALLY traced (no decoration needed)") + print(" • Conversation.run() automatically groups operations by conversation ID") + print(" • Use @weave_op for custom functions you want to trace") else: print("⚠️ Weave not initialized (missing credentials or package)") print(" Running demo without tracing...") @@ -209,8 +216,13 @@ def dynamically_traced_function(x: int) -> int: if is_weave_initialized(): print(f"\n🔗 View your traces at: https://wandb.ai/{project}/weave") - print("\n💡 Remember: LLM calls via LiteLLM are traced AUTOMATICALLY!") - print(" Just use the SDK's LLM class normally - no decoration needed.") + print("\n💡 Key Integration Points:") + print(" • LLM calls via LiteLLM are traced AUTOMATICALLY") + print(" • Conversation.run() groups all operations by conversation ID") + print(" • Use @weave_op for custom agent logic you want to trace") + print("\n📝 In your code, just do:") + print(" from openhands.sdk.observability import init_weave") + print(" init_weave('your-project') # That's it!") print("=" * 60) From 8e1a65e2e54e137abf7c5889d0ec3a9f9b2809fb Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 19 Dec 2025 00:21:42 +0000 Subject: [PATCH 5/8] feat: Add generic observability context system for multi-tool support Introduces a unified observability context management system that allows multiple observability tools (Weave, Laminar, etc.) to work together seamlessly. Key changes: - Added context.py with provider registry pattern - get_conversation_context() composes context managers from all enabled tools - Built-in providers for Weave (weave.thread) and Laminar (span with session_id) - LocalConversation.run() now uses the generic get_conversation_context() - Easy to add new observability tools via register_conversation_context_provider() Design benefits: - SDK is agnostic to which observability tools are enabled - Graceful degradation when tools are not initialized - Exception in one provider doesn't break others - Single integration point in LocalConversation Usage for adding new tools: from openhands.sdk.observability import register_conversation_context_provider def get_my_tool_context(conversation_id: str): if not is_my_tool_initialized(): return nullcontext() return my_tool.thread(conversation_id) register_conversation_context_provider(get_my_tool_context) Co-authored-by: openhands --- .../conversation/impl/local_conversation.py | 38 +-- .../openhands/sdk/observability/__init__.py | 11 + .../openhands/sdk/observability/context.py | 225 +++++++++++++++++ tests/sdk/observability/test_context.py | 238 ++++++++++++++++++ 4 files changed, 482 insertions(+), 30 deletions(-) create mode 100644 openhands-sdk/openhands/sdk/observability/context.py create mode 100644 tests/sdk/observability/test_context.py diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py index b26dacabc7..2194c8a12e 100644 --- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py +++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py @@ -1,7 +1,6 @@ import atexit import uuid from collections.abc import Mapping -from contextlib import nullcontext from pathlib import Path from openhands.sdk.agent.base import AgentBase @@ -35,8 +34,8 @@ from openhands.sdk.llm import LLM, Message, TextContent from openhands.sdk.llm.llm_registry import LLMRegistry from openhands.sdk.logger import get_logger +from openhands.sdk.observability.context import get_conversation_context from openhands.sdk.observability.laminar import observe -from openhands.sdk.observability.weave import is_weave_initialized from openhands.sdk.security.analyzer import SecurityAnalyzerBase from openhands.sdk.security.confirmation_policy import ( ConfirmationPolicyBase, @@ -47,29 +46,6 @@ logger = get_logger(__name__) -def _get_weave_thread_context(conversation_id: str): - """Get Weave thread context manager if Weave is initialized. - - This groups all operations within a conversation run under the same - Weave thread, enabling conversation-level tracing in the Weave UI. - - Args: - conversation_id: The conversation ID to use as the thread ID. - - Returns: - A weave.thread context manager if Weave is initialized, - otherwise a nullcontext (no-op). - """ - if not is_weave_initialized(): - return nullcontext() - - try: - import weave - return weave.thread(conversation_id) - except Exception: - return nullcontext() - - class LocalConversation(BaseConversation): agent: AgentBase workspace: LocalWorkspace @@ -322,9 +298,11 @@ def run(self) -> None: Can be paused between steps Note: - If Weave is initialized, all operations within this run are grouped - under a Weave thread using the conversation ID. This enables - conversation-level tracing in the Weave UI. + All operations within this run are automatically wrapped in + observability context managers for all enabled tools (Weave, Laminar, + etc.). This groups LLM calls and traced operations under the + conversation ID, enabling conversation-level tracing in observability + UIs. """ with self._state: @@ -336,9 +314,9 @@ def run(self) -> None: self._state.execution_status = ConversationExecutionStatus.RUNNING iteration = 0 - # Wrap the run loop in a Weave thread context if Weave is initialized. + # Wrap the run loop in observability context managers for all enabled tools. # This groups all LLM calls and traced operations under the conversation ID. - with _get_weave_thread_context(str(self.id)): + with get_conversation_context(str(self.id)): try: while True: logger.debug(f"Conversation run iteration {iteration}") diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py index 799c1d1d71..1dcbf3d033 100644 --- a/openhands-sdk/openhands/sdk/observability/__init__.py +++ b/openhands-sdk/openhands/sdk/observability/__init__.py @@ -1,3 +1,9 @@ +from openhands.sdk.observability.context import ( + clear_conversation_context_providers, + get_conversation_context, + register_conversation_context_provider, + unregister_conversation_context_provider, +) from openhands.sdk.observability.laminar import maybe_init_laminar, observe from openhands.sdk.observability.weave import ( end_weave_span, @@ -17,6 +23,11 @@ __all__ = [ + # Generic observability context (unified interface) + "get_conversation_context", + "register_conversation_context_provider", + "unregister_conversation_context_provider", + "clear_conversation_context_providers", # Laminar exports "maybe_init_laminar", "observe", diff --git a/openhands-sdk/openhands/sdk/observability/context.py b/openhands-sdk/openhands/sdk/observability/context.py new file mode 100644 index 0000000000..c605e0119c --- /dev/null +++ b/openhands-sdk/openhands/sdk/observability/context.py @@ -0,0 +1,225 @@ +"""Generic observability context management for the OpenHands SDK. + +This module provides a unified interface for managing observability contexts +across multiple observability tools (Weave, Laminar, etc.). It allows the SDK +to use a single API that automatically composes context managers from all +enabled observability providers. + +## Design Philosophy + +The SDK should be agnostic to which observability tools are enabled. This module +provides: + +1. **Unified Context Managers**: A single `get_conversation_context()` function + that returns a composed context manager for all enabled tools. + +2. **Provider Registry**: Observability tools register their context providers, + allowing easy extension for new tools. + +3. **Graceful Degradation**: If no observability tools are enabled, the context + managers are no-ops (nullcontext). + +## Usage + +In LocalConversation.run(): +```python +from openhands.sdk.observability.context import get_conversation_context + +def run(self): + with get_conversation_context(str(self.id)): + # All operations here are traced by all enabled observability tools + ... +``` + +## Adding New Observability Providers + +To add a new observability tool: + +1. Create a function that returns a context manager for conversation threading +2. Register it with `register_conversation_context_provider()` + +```python +from openhands.sdk.observability.context import register_conversation_context_provider + +def get_my_tool_context(conversation_id: str): + if not is_my_tool_initialized(): + return nullcontext() + return my_tool.thread(conversation_id) + +register_conversation_context_provider(get_my_tool_context) +``` +""" + +from collections.abc import Callable +from contextlib import ExitStack, contextmanager, nullcontext +from typing import Any, ContextManager, Iterator + +from openhands.sdk.logger import get_logger + + +logger = get_logger(__name__) + + +# Type alias for context provider functions +ConversationContextProvider = Callable[[str], ContextManager[Any]] + +# Registry of conversation context providers +_conversation_context_providers: list[ConversationContextProvider] = [] + + +def register_conversation_context_provider( + provider: ConversationContextProvider, +) -> None: + """Register a conversation context provider. + + Context providers are functions that take a conversation_id and return + a context manager. They are called in order of registration. + + Args: + provider: A function that takes a conversation_id string and returns + a context manager. Should return nullcontext() if the + observability tool is not initialized. + + Example: + ```python + def get_my_tool_context(conversation_id: str): + if not is_my_tool_initialized(): + return nullcontext() + return my_tool.thread(conversation_id) + + register_conversation_context_provider(get_my_tool_context) + ``` + """ + if provider not in _conversation_context_providers: + _conversation_context_providers.append(provider) + logger.debug(f"Registered conversation context provider: {provider.__name__}") + + +def unregister_conversation_context_provider( + provider: ConversationContextProvider, +) -> None: + """Unregister a conversation context provider. + + Args: + provider: The provider function to unregister. + """ + if provider in _conversation_context_providers: + _conversation_context_providers.remove(provider) + logger.debug(f"Unregistered conversation context provider: {provider.__name__}") + + +def clear_conversation_context_providers() -> None: + """Clear all registered conversation context providers. + + Useful for testing or resetting the observability state. + """ + _conversation_context_providers.clear() + logger.debug("Cleared all conversation context providers") + + +@contextmanager +def get_conversation_context(conversation_id: str) -> Iterator[None]: + """Get a composed context manager for all enabled observability tools. + + This function returns a context manager that wraps all registered + observability context providers. When entered, it enters all provider + contexts in order. When exited, it exits them in reverse order. + + If no providers are registered or all providers return nullcontext, + this is effectively a no-op. + + Args: + conversation_id: The conversation ID to use for threading/grouping. + + Yields: + None + + Example: + ```python + with get_conversation_context("conv-123"): + # All operations here are traced by all enabled observability tools + agent.step(...) + ``` + """ + if not _conversation_context_providers: + yield + return + + # Use ExitStack to compose multiple context managers + with ExitStack() as stack: + for provider in _conversation_context_providers: + try: + ctx = provider(conversation_id) + stack.enter_context(ctx) + except Exception as e: + # Log but don't fail - observability should not break the agent + logger.debug( + f"Error entering context from provider {provider.__name__}: {e}" + ) + yield + + +# ============================================================================= +# Built-in Provider Registrations +# ============================================================================= +# These are registered when the module is imported. Each provider checks if +# its tool is initialized before returning a real context manager. + + +def _get_weave_conversation_context(conversation_id: str) -> ContextManager[Any]: + """Weave conversation context provider. + + Returns a weave.thread() context manager if Weave is initialized, + otherwise returns nullcontext(). + """ + try: + from openhands.sdk.observability.weave import is_weave_initialized + + if not is_weave_initialized(): + return nullcontext() + + import weave + return weave.thread(conversation_id) + except ImportError: + return nullcontext() + except Exception: + return nullcontext() + + +def _get_laminar_conversation_context(conversation_id: str) -> ContextManager[Any]: + """Laminar conversation context provider. + + Returns a Laminar span context if Laminar is initialized, + otherwise returns nullcontext(). + + Note: Laminar uses OpenTelemetry spans rather than threads, so we create + a span with the conversation_id as the session_id. + """ + try: + from openhands.sdk.observability.laminar import should_enable_observability + + if not should_enable_observability(): + return nullcontext() + + from lmnr import Laminar + + @contextmanager + def laminar_conversation_context(): + span = Laminar.start_active_span(f"conversation:{conversation_id}") + Laminar.set_trace_session_id(conversation_id) + try: + yield + finally: + if span and span.is_recording(): + span.end() + + return laminar_conversation_context() + except ImportError: + return nullcontext() + except Exception: + return nullcontext() + + +# Register built-in providers +register_conversation_context_provider(_get_weave_conversation_context) +register_conversation_context_provider(_get_laminar_conversation_context) diff --git a/tests/sdk/observability/test_context.py b/tests/sdk/observability/test_context.py new file mode 100644 index 0000000000..5f369ed515 --- /dev/null +++ b/tests/sdk/observability/test_context.py @@ -0,0 +1,238 @@ +"""Tests for the generic observability context module.""" + +import pytest +from contextlib import nullcontext +from unittest.mock import MagicMock, patch + +from openhands.sdk.observability.context import ( + clear_conversation_context_providers, + get_conversation_context, + register_conversation_context_provider, + unregister_conversation_context_provider, + _conversation_context_providers, +) + + +class TestConversationContextProviderRegistry: + """Tests for the provider registry functions.""" + + def setup_method(self): + """Clear providers before each test.""" + # Store original providers + self._original_providers = _conversation_context_providers.copy() + clear_conversation_context_providers() + + def teardown_method(self): + """Restore original providers after each test.""" + clear_conversation_context_providers() + for provider in self._original_providers: + register_conversation_context_provider(provider) + + def test_register_provider(self): + """Test registering a new provider.""" + def my_provider(conversation_id: str): + return nullcontext() + + register_conversation_context_provider(my_provider) + assert my_provider in _conversation_context_providers + + def test_register_provider_no_duplicates(self): + """Test that registering the same provider twice doesn't create duplicates.""" + def my_provider(conversation_id: str): + return nullcontext() + + register_conversation_context_provider(my_provider) + register_conversation_context_provider(my_provider) + assert _conversation_context_providers.count(my_provider) == 1 + + def test_unregister_provider(self): + """Test unregistering a provider.""" + def my_provider(conversation_id: str): + return nullcontext() + + register_conversation_context_provider(my_provider) + assert my_provider in _conversation_context_providers + + unregister_conversation_context_provider(my_provider) + assert my_provider not in _conversation_context_providers + + def test_unregister_nonexistent_provider(self): + """Test unregistering a provider that was never registered.""" + def my_provider(conversation_id: str): + return nullcontext() + + # Should not raise + unregister_conversation_context_provider(my_provider) + + def test_clear_providers(self): + """Test clearing all providers.""" + def provider1(conversation_id: str): + return nullcontext() + + def provider2(conversation_id: str): + return nullcontext() + + register_conversation_context_provider(provider1) + register_conversation_context_provider(provider2) + assert len(_conversation_context_providers) == 2 + + clear_conversation_context_providers() + assert len(_conversation_context_providers) == 0 + + +class TestGetConversationContext: + """Tests for the get_conversation_context function.""" + + def setup_method(self): + """Clear providers before each test.""" + self._original_providers = _conversation_context_providers.copy() + clear_conversation_context_providers() + + def teardown_method(self): + """Restore original providers after each test.""" + clear_conversation_context_providers() + for provider in self._original_providers: + register_conversation_context_provider(provider) + + def test_no_providers_is_noop(self): + """Test that with no providers, the context is a no-op.""" + executed = False + + with get_conversation_context("test-conv"): + executed = True + + assert executed + + def test_single_provider_called(self): + """Test that a single provider is called with the conversation ID.""" + called_with = [] + + def my_provider(conversation_id: str): + called_with.append(conversation_id) + return nullcontext() + + register_conversation_context_provider(my_provider) + + with get_conversation_context("test-conv-123"): + pass + + assert called_with == ["test-conv-123"] + + def test_multiple_providers_called_in_order(self): + """Test that multiple providers are called in registration order.""" + call_order = [] + + def provider1(conversation_id: str): + call_order.append("provider1") + return nullcontext() + + def provider2(conversation_id: str): + call_order.append("provider2") + return nullcontext() + + register_conversation_context_provider(provider1) + register_conversation_context_provider(provider2) + + with get_conversation_context("test-conv"): + pass + + assert call_order == ["provider1", "provider2"] + + def test_provider_exception_does_not_break_others(self): + """Test that an exception in one provider doesn't prevent others.""" + call_order = [] + + def failing_provider(conversation_id: str): + raise RuntimeError("Provider failed") + + def working_provider(conversation_id: str): + call_order.append("working") + return nullcontext() + + register_conversation_context_provider(failing_provider) + register_conversation_context_provider(working_provider) + + # Should not raise + with get_conversation_context("test-conv"): + pass + + assert call_order == ["working"] + + def test_context_manager_enter_exit_called(self): + """Test that context manager __enter__ and __exit__ are called.""" + mock_cm = MagicMock() + mock_cm.__enter__ = MagicMock(return_value=None) + mock_cm.__exit__ = MagicMock(return_value=None) + + def my_provider(conversation_id: str): + return mock_cm + + register_conversation_context_provider(my_provider) + + with get_conversation_context("test-conv"): + mock_cm.__enter__.assert_called_once() + + mock_cm.__exit__.assert_called_once() + + +class TestBuiltInProviders: + """Tests for the built-in Weave and Laminar providers.""" + + def test_weave_provider_returns_nullcontext_when_not_initialized(self): + """Test that Weave provider returns nullcontext when Weave is not initialized.""" + from openhands.sdk.observability.context import _get_weave_conversation_context + + with patch( + "openhands.sdk.observability.weave.is_weave_initialized", + return_value=False, + ): + ctx = _get_weave_conversation_context("test-conv") + # nullcontext() returns a different instance each time, so check type name + assert type(ctx).__name__ == "nullcontext" + + def test_laminar_provider_returns_nullcontext_when_not_initialized(self): + """Test that Laminar provider returns nullcontext when Laminar is not initialized.""" + from openhands.sdk.observability.context import ( + _get_laminar_conversation_context, + ) + + with patch( + "openhands.sdk.observability.laminar.should_enable_observability", + return_value=False, + ): + ctx = _get_laminar_conversation_context("test-conv") + # nullcontext() returns a different instance each time, so check type name + assert type(ctx).__name__ == "nullcontext" + + +class TestIntegration: + """Integration tests for the observability context system.""" + + def test_providers_auto_registered_on_import(self): + """Test that built-in providers are registered when module is imported.""" + # Re-import to trigger registration + from openhands.sdk.observability import context + + # The module should have registered the built-in providers + # We check by looking for the provider functions + provider_names = [p.__name__ for p in context._conversation_context_providers] + assert "_get_weave_conversation_context" in provider_names + assert "_get_laminar_conversation_context" in provider_names + + def test_custom_provider_works_with_builtins(self): + """Test that custom providers work alongside built-in ones.""" + custom_called = [] + + def custom_provider(conversation_id: str): + custom_called.append(conversation_id) + return nullcontext() + + register_conversation_context_provider(custom_provider) + + try: + with get_conversation_context("test-conv"): + pass + + assert "test-conv" in custom_called + finally: + unregister_conversation_context_provider(custom_provider) From e71f6e9901e1fa561b425fbd619804cbd31c6614 Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 19 Dec 2025 00:28:51 +0000 Subject: [PATCH 6/8] feat: Add unified tool tracing for all observability tools Introduces a unified tool tracing system that works across all enabled observability tools (Weave, Laminar, etc.). Key additions: - trace_tool_call(): Context manager for tracing tool executions - traced_tool(): Decorator for tracing tool functions - trace_mcp_list_tools(): Context manager for MCP tool listing - trace_mcp_call_tool(): Context manager for MCP tool calls - Tool trace provider registry (similar to conversation context providers) Integration points: - Agent._execute_action_event() now uses trace_tool_call() for all tools - MCPToolExecutor.call_tool() uses trace_mcp_call_tool() - MCP utils._list_tools() uses trace_mcp_list_tools() What gets traced: - Tool name - Tool inputs (safely serialized) - Tool type (TOOL, MCP_TOOL, MCP_LIST) - Execution duration (via context manager) Design benefits: - Single API for all observability tools - Easy to add new observability providers - Graceful degradation when tools not initialized - Backward compatible with existing Laminar @observe decorators Co-authored-by: openhands --- openhands-sdk/openhands/sdk/agent/agent.py | 27 +- openhands-sdk/openhands/sdk/mcp/tool.py | 54 +-- openhands-sdk/openhands/sdk/mcp/utils.py | 25 +- .../openhands/sdk/observability/__init__.py | 18 + .../openhands/sdk/observability/context.py | 277 ++++++++++++++- tests/sdk/observability/test_context.py | 329 ++++++++++++++++++ 6 files changed, 687 insertions(+), 43 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py index d88d2656d4..9029c35165 100644 --- a/openhands-sdk/openhands/sdk/agent/agent.py +++ b/openhands-sdk/openhands/sdk/agent/agent.py @@ -41,6 +41,7 @@ LLMContextWindowExceedError, ) from openhands.sdk.logger import get_logger +from openhands.sdk.observability.context import trace_tool_call from openhands.sdk.observability.laminar import ( maybe_init_laminar, observe, @@ -478,14 +479,24 @@ def _execute_action_event( "as it was checked earlier." ) - # Execute actions! - if should_enable_observability(): - tool_name = extract_action_name(action_event) - observation: Observation = observe(name=tool_name, span_type="TOOL")(tool)( - action_event.action, conversation - ) - else: - observation = tool(action_event.action, conversation) + # Execute actions with unified observability tracing + tool_name = extract_action_name(action_event) + + # Extract inputs for tracing (safely serialize action data) + try: + inputs = action_event.action.model_dump(exclude_none=True) + except Exception: + inputs = None + + # Use unified trace_tool_call for all observability tools (Weave, Laminar, etc.) + # Plus Laminar's @observe decorator for backward compatibility + with trace_tool_call(tool_name, inputs=inputs): + if should_enable_observability(): + observation: Observation = observe(name=tool_name, span_type="TOOL")( + tool + )(action_event.action, conversation) + else: + observation = tool(action_event.action, conversation) assert isinstance(observation, Observation), ( f"Tool '{tool.name}' executor must return an Observation" ) diff --git a/openhands-sdk/openhands/sdk/mcp/tool.py b/openhands-sdk/openhands/sdk/mcp/tool.py index 69aaf54f9a..6e6ef1f21b 100644 --- a/openhands-sdk/openhands/sdk/mcp/tool.py +++ b/openhands-sdk/openhands/sdk/mcp/tool.py @@ -15,6 +15,7 @@ from openhands.sdk.logger import get_logger from openhands.sdk.mcp.client import MCPClient from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation +from openhands.sdk.observability.context import trace_mcp_call_tool from openhands.sdk.observability.laminar import observe from openhands.sdk.tool import ( Action, @@ -52,27 +53,38 @@ def __init__(self, tool_name: str, client: MCPClient): @observe(name="MCPToolExecutor.call_tool", span_type="TOOL") async def call_tool(self, action: MCPToolAction) -> MCPToolObservation: - async with self.client: - assert self.client.is_connected(), "MCP client is not connected." - try: - logger.debug( - f"Calling MCP tool {self.tool_name} " - f"with args: {action.model_dump()}" - ) - result: mcp.types.CallToolResult = await self.client.call_tool_mcp( - name=self.tool_name, arguments=action.to_mcp_arguments() - ) - return MCPToolObservation.from_call_tool_result( - tool_name=self.tool_name, result=result - ) - except Exception as e: - error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}" - logger.error(error_msg, exc_info=True) - return MCPToolObservation.from_text( - text=error_msg, - is_error=True, - tool_name=self.tool_name, - ) + # Extract inputs for tracing + try: + inputs = action.to_mcp_arguments() + except Exception: + inputs = None + + # Use unified MCP tool tracing for all observability tools + with trace_mcp_call_tool( + tool_name=self.tool_name, + inputs=inputs, + ): + async with self.client: + assert self.client.is_connected(), "MCP client is not connected." + try: + logger.debug( + f"Calling MCP tool {self.tool_name} " + f"with args: {action.model_dump()}" + ) + result: mcp.types.CallToolResult = await self.client.call_tool_mcp( + name=self.tool_name, arguments=action.to_mcp_arguments() + ) + return MCPToolObservation.from_call_tool_result( + tool_name=self.tool_name, result=result + ) + except Exception as e: + error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}" + logger.error(error_msg, exc_info=True) + return MCPToolObservation.from_text( + text=error_msg, + is_error=True, + tool_name=self.tool_name, + ) def __call__( self, diff --git a/openhands-sdk/openhands/sdk/mcp/utils.py b/openhands-sdk/openhands/sdk/mcp/utils.py index 1093280466..a6df3de73e 100644 --- a/openhands-sdk/openhands/sdk/mcp/utils.py +++ b/openhands-sdk/openhands/sdk/mcp/utils.py @@ -10,6 +10,7 @@ from openhands.sdk.mcp.client import MCPClient from openhands.sdk.mcp.exceptions import MCPTimeoutError from openhands.sdk.mcp.tool import MCPToolDefinition +from openhands.sdk.observability.context import trace_mcp_list_tools from openhands.sdk.tool.tool import ToolDefinition @@ -36,17 +37,19 @@ async def _list_tools(client: MCPClient) -> list[ToolDefinition]: """List tools from an MCP client.""" tools: list[ToolDefinition] = [] - async with client: - assert client.is_connected(), "MCP client is not connected." - mcp_type_tools: list[mcp.types.Tool] = await client.list_tools() - for mcp_tool in mcp_type_tools: - tool_sequence = MCPToolDefinition.create( - mcp_tool=mcp_tool, mcp_client=client - ) - tools.extend(tool_sequence) # Flatten sequence into list - assert not client.is_connected(), ( - "MCP client should be disconnected after listing tools." - ) + # Use unified MCP list tools tracing for all observability tools + with trace_mcp_list_tools(): + async with client: + assert client.is_connected(), "MCP client is not connected." + mcp_type_tools: list[mcp.types.Tool] = await client.list_tools() + for mcp_tool in mcp_type_tools: + tool_sequence = MCPToolDefinition.create( + mcp_tool=mcp_tool, mcp_client=client + ) + tools.extend(tool_sequence) # Flatten sequence into list + assert not client.is_connected(), ( + "MCP client should be disconnected after listing tools." + ) return tools diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py index 1dcbf3d033..0a378f4b72 100644 --- a/openhands-sdk/openhands/sdk/observability/__init__.py +++ b/openhands-sdk/openhands/sdk/observability/__init__.py @@ -1,8 +1,17 @@ from openhands.sdk.observability.context import ( + # Conversation context clear_conversation_context_providers, get_conversation_context, register_conversation_context_provider, unregister_conversation_context_provider, + # Tool tracing + clear_tool_trace_providers, + register_tool_trace_provider, + trace_mcp_call_tool, + trace_mcp_list_tools, + trace_tool_call, + traced_tool, + unregister_tool_trace_provider, ) from openhands.sdk.observability.laminar import maybe_init_laminar, observe from openhands.sdk.observability.weave import ( @@ -28,6 +37,15 @@ "register_conversation_context_provider", "unregister_conversation_context_provider", "clear_conversation_context_providers", + # Tool tracing (unified interface) + "trace_tool_call", + "traced_tool", + "register_tool_trace_provider", + "unregister_tool_trace_provider", + "clear_tool_trace_providers", + # MCP-specific tracing + "trace_mcp_list_tools", + "trace_mcp_call_tool", # Laminar exports "maybe_init_laminar", "observe", diff --git a/openhands-sdk/openhands/sdk/observability/context.py b/openhands-sdk/openhands/sdk/observability/context.py index c605e0119c..8cbafc4c48 100644 --- a/openhands-sdk/openhands/sdk/observability/context.py +++ b/openhands-sdk/openhands/sdk/observability/context.py @@ -13,10 +13,13 @@ 1. **Unified Context Managers**: A single `get_conversation_context()` function that returns a composed context manager for all enabled tools. -2. **Provider Registry**: Observability tools register their context providers, +2. **Tool Tracing**: A `trace_tool_call()` decorator/context manager for tracing + tool executions across all enabled observability tools. + +3. **Provider Registry**: Observability tools register their context providers, allowing easy extension for new tools. -3. **Graceful Degradation**: If no observability tools are enabled, the context +4. **Graceful Degradation**: If no observability tools are enabled, the context managers are no-ops (nullcontext). ## Usage @@ -31,6 +34,20 @@ def run(self): ... ``` +For tool execution tracing: +```python +from openhands.sdk.observability.context import trace_tool_call + +# As a decorator +@trace_tool_call(tool_name="my_tool") +def execute_tool(action): + ... + +# As a context manager +with trace_tool_call(tool_name="my_tool", inputs={"arg": "value"}): + result = tool.execute(action) +``` + ## Adding New Observability Providers To add a new observability tool: @@ -52,13 +69,17 @@ def get_my_tool_context(conversation_id: str): from collections.abc import Callable from contextlib import ExitStack, contextmanager, nullcontext -from typing import Any, ContextManager, Iterator +from functools import wraps +from typing import Any, ContextManager, Iterator, ParamSpec, TypeVar from openhands.sdk.logger import get_logger logger = get_logger(__name__) +P = ParamSpec("P") +R = TypeVar("R") + # Type alias for context provider functions ConversationContextProvider = Callable[[str], ContextManager[Any]] @@ -223,3 +244,253 @@ def laminar_conversation_context(): # Register built-in providers register_conversation_context_provider(_get_weave_conversation_context) register_conversation_context_provider(_get_laminar_conversation_context) + + +# ============================================================================= +# Tool Call Tracing +# ============================================================================= +# Unified tracing for tool executions across all observability tools. + + +ToolTraceProvider = Callable[[str, dict[str, Any] | None], ContextManager[Any]] + +# Registry of tool trace providers +_tool_trace_providers: list[ToolTraceProvider] = [] + + +def register_tool_trace_provider(provider: ToolTraceProvider) -> None: + """Register a tool trace provider. + + Tool trace providers are functions that take a tool_name and optional + inputs dict, and return a context manager for tracing the tool execution. + + Args: + provider: A function that takes (tool_name, inputs) and returns + a context manager. Should return nullcontext() if the + observability tool is not initialized. + """ + if provider not in _tool_trace_providers: + _tool_trace_providers.append(provider) + logger.debug(f"Registered tool trace provider: {provider.__name__}") + + +def unregister_tool_trace_provider(provider: ToolTraceProvider) -> None: + """Unregister a tool trace provider.""" + if provider in _tool_trace_providers: + _tool_trace_providers.remove(provider) + logger.debug(f"Unregistered tool trace provider: {provider.__name__}") + + +def clear_tool_trace_providers() -> None: + """Clear all registered tool trace providers.""" + _tool_trace_providers.clear() + logger.debug("Cleared all tool trace providers") + + +@contextmanager +def trace_tool_call( + tool_name: str, + inputs: dict[str, Any] | None = None, + tool_type: str = "TOOL", +) -> Iterator[None]: + """Trace a tool call across all enabled observability tools. + + This context manager wraps tool executions with tracing from all + registered observability providers (Weave, Laminar, etc.). + + Args: + tool_name: The name of the tool being executed. + inputs: Optional dict of input arguments to the tool. + tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL"). Used for + categorization in observability UIs. + + Yields: + None + + Example: + ```python + with trace_tool_call("bash", inputs={"command": "ls -la"}): + result = bash_tool.execute(action) + ``` + """ + if not _tool_trace_providers: + yield + return + + with ExitStack() as stack: + for provider in _tool_trace_providers: + try: + ctx = provider(tool_name, inputs) + stack.enter_context(ctx) + except Exception as e: + logger.debug( + f"Error entering tool trace from provider {provider.__name__}: {e}" + ) + yield + + +def traced_tool( + tool_name: str | None = None, + tool_type: str = "TOOL", +) -> Callable[[Callable[P, R]], Callable[P, R]]: + """Decorator to trace tool execution functions. + + This decorator wraps a function with tool tracing from all registered + observability providers. It automatically captures the function's + arguments as inputs. + + Args: + tool_name: The name of the tool. If None, uses the function name. + tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL"). + + Returns: + A decorator that wraps the function with tool tracing. + + Example: + ```python + @traced_tool(tool_name="bash") + def execute_bash(command: str) -> str: + ... + + # Or with automatic name detection + @traced_tool() + def my_tool(arg1, arg2): + ... + ``` + """ + def decorator(func: Callable[P, R]) -> Callable[P, R]: + name = tool_name or func.__name__ + + @wraps(func) + def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: + # Capture inputs from kwargs (args are harder to name) + inputs = dict(kwargs) if kwargs else None + with trace_tool_call(name, inputs=inputs, tool_type=tool_type): + return func(*args, **kwargs) + + return wrapper + + return decorator + + +# ============================================================================= +# Built-in Tool Trace Providers +# ============================================================================= + + +def _get_weave_tool_trace( + tool_name: str, inputs: dict[str, Any] | None +) -> ContextManager[Any]: + """Weave tool trace provider. + + Uses weave.attributes() to add tool metadata to the current span. + The actual tracing is done by Weave's autopatching of the underlying + operations (LLM calls, etc.). + """ + try: + from openhands.sdk.observability.weave import is_weave_initialized + + if not is_weave_initialized(): + return nullcontext() + + import weave + + # Use weave.attributes to add tool metadata to the trace + attributes = {"tool_name": tool_name, "tool_type": "TOOL"} + if inputs: + # Sanitize inputs - convert non-serializable types to strings + safe_inputs = {} + for k, v in inputs.items(): + try: + # Test if it's JSON serializable + import json + json.dumps(v) + safe_inputs[k] = v + except (TypeError, ValueError): + safe_inputs[k] = str(v) + attributes["tool_inputs"] = safe_inputs + + return weave.attributes(attributes) + except ImportError: + return nullcontext() + except Exception: + return nullcontext() + + +def _get_laminar_tool_trace( + tool_name: str, inputs: dict[str, Any] | None # noqa: ARG001 +) -> ContextManager[Any]: + """Laminar tool trace provider. + + Creates a Laminar span for the tool execution. + Note: Laminar's @observe decorator is typically used directly, + but this provides a context manager alternative. + """ + try: + from openhands.sdk.observability.laminar import should_enable_observability + + if not should_enable_observability(): + return nullcontext() + + from lmnr import Laminar + + @contextmanager + def laminar_tool_trace(): + span = Laminar.start_active_span(f"tool:{tool_name}") + try: + yield + finally: + if span and span.is_recording(): + span.end() + + return laminar_tool_trace() + except ImportError: + return nullcontext() + except Exception: + return nullcontext() + + +# Register built-in tool trace providers +register_tool_trace_provider(_get_weave_tool_trace) +register_tool_trace_provider(_get_laminar_tool_trace) + + +# ============================================================================= +# MCP-Specific Tracing +# ============================================================================= + + +@contextmanager +def trace_mcp_list_tools(server_name: str | None = None) -> Iterator[None]: + """Trace MCP tool listing operations. + + Args: + server_name: Optional name of the MCP server being queried. + + Yields: + None + """ + tool_name = f"mcp:list_tools:{server_name}" if server_name else "mcp:list_tools" + with trace_tool_call(tool_name, tool_type="MCP_LIST"): + yield + + +@contextmanager +def trace_mcp_call_tool( + tool_name: str, + server_name: str | None = None, + inputs: dict[str, Any] | None = None, +) -> Iterator[None]: + """Trace MCP tool call operations. + + Args: + tool_name: The name of the MCP tool being called. + server_name: Optional name of the MCP server. + inputs: Optional dict of input arguments. + + Yields: + None + """ + full_name = f"mcp:{server_name}:{tool_name}" if server_name else f"mcp:{tool_name}" + with trace_tool_call(full_name, inputs=inputs, tool_type="MCP_TOOL"): + yield diff --git a/tests/sdk/observability/test_context.py b/tests/sdk/observability/test_context.py index 5f369ed515..ba2e69a0d8 100644 --- a/tests/sdk/observability/test_context.py +++ b/tests/sdk/observability/test_context.py @@ -10,6 +10,15 @@ register_conversation_context_provider, unregister_conversation_context_provider, _conversation_context_providers, + # Tool tracing + clear_tool_trace_providers, + register_tool_trace_provider, + unregister_tool_trace_provider, + trace_tool_call, + traced_tool, + trace_mcp_list_tools, + trace_mcp_call_tool, + _tool_trace_providers, ) @@ -236,3 +245,323 @@ def custom_provider(conversation_id: str): assert "test-conv" in custom_called finally: unregister_conversation_context_provider(custom_provider) + + +# ============================================================================= +# Tool Tracing Tests +# ============================================================================= + + +class TestToolTraceProviderRegistry: + """Tests for the tool trace provider registry functions.""" + + def setup_method(self): + """Store original providers before each test.""" + self._original_providers = _tool_trace_providers.copy() + clear_tool_trace_providers() + + def teardown_method(self): + """Restore original providers after each test.""" + clear_tool_trace_providers() + for provider in self._original_providers: + register_tool_trace_provider(provider) + + def test_register_tool_trace_provider(self): + """Test registering a new tool trace provider.""" + def my_provider(tool_name: str, inputs): + return nullcontext() + + register_tool_trace_provider(my_provider) + assert my_provider in _tool_trace_providers + + def test_register_tool_trace_provider_no_duplicates(self): + """Test that registering the same provider twice doesn't create duplicates.""" + def my_provider(tool_name: str, inputs): + return nullcontext() + + register_tool_trace_provider(my_provider) + register_tool_trace_provider(my_provider) + assert _tool_trace_providers.count(my_provider) == 1 + + def test_unregister_tool_trace_provider(self): + """Test unregistering a tool trace provider.""" + def my_provider(tool_name: str, inputs): + return nullcontext() + + register_tool_trace_provider(my_provider) + assert my_provider in _tool_trace_providers + + unregister_tool_trace_provider(my_provider) + assert my_provider not in _tool_trace_providers + + def test_clear_tool_trace_providers(self): + """Test clearing all tool trace providers.""" + def provider1(tool_name: str, inputs): + return nullcontext() + + def provider2(tool_name: str, inputs): + return nullcontext() + + register_tool_trace_provider(provider1) + register_tool_trace_provider(provider2) + assert len(_tool_trace_providers) == 2 + + clear_tool_trace_providers() + assert len(_tool_trace_providers) == 0 + + +class TestTraceToolCall: + """Tests for the trace_tool_call context manager.""" + + def setup_method(self): + """Store original providers before each test.""" + self._original_providers = _tool_trace_providers.copy() + clear_tool_trace_providers() + + def teardown_method(self): + """Restore original providers after each test.""" + clear_tool_trace_providers() + for provider in self._original_providers: + register_tool_trace_provider(provider) + + def test_no_providers_is_noop(self): + """Test that with no providers, the context is a no-op.""" + executed = False + + with trace_tool_call("test-tool"): + executed = True + + assert executed + + def test_single_provider_called(self): + """Test that a single provider is called with tool name and inputs.""" + called_with = [] + + def my_provider(tool_name: str, inputs): + called_with.append((tool_name, inputs)) + return nullcontext() + + register_tool_trace_provider(my_provider) + + with trace_tool_call("bash", inputs={"command": "ls"}): + pass + + assert called_with == [("bash", {"command": "ls"})] + + def test_multiple_providers_called(self): + """Test that multiple providers are called.""" + call_order = [] + + def provider1(tool_name: str, inputs): + call_order.append("provider1") + return nullcontext() + + def provider2(tool_name: str, inputs): + call_order.append("provider2") + return nullcontext() + + register_tool_trace_provider(provider1) + register_tool_trace_provider(provider2) + + with trace_tool_call("test-tool"): + pass + + assert call_order == ["provider1", "provider2"] + + def test_provider_exception_does_not_break_others(self): + """Test that an exception in one provider doesn't prevent others.""" + call_order = [] + + def failing_provider(tool_name: str, inputs): + raise RuntimeError("Provider failed") + + def working_provider(tool_name: str, inputs): + call_order.append("working") + return nullcontext() + + register_tool_trace_provider(failing_provider) + register_tool_trace_provider(working_provider) + + # Should not raise + with trace_tool_call("test-tool"): + pass + + assert call_order == ["working"] + + +class TestTracedToolDecorator: + """Tests for the @traced_tool decorator.""" + + def setup_method(self): + """Store original providers before each test.""" + self._original_providers = _tool_trace_providers.copy() + clear_tool_trace_providers() + + def teardown_method(self): + """Restore original providers after each test.""" + clear_tool_trace_providers() + for provider in self._original_providers: + register_tool_trace_provider(provider) + + def test_traced_tool_with_explicit_name(self): + """Test @traced_tool with explicit tool name.""" + traced_calls = [] + + def my_provider(tool_name: str, inputs): + traced_calls.append(tool_name) + return nullcontext() + + register_tool_trace_provider(my_provider) + + @traced_tool(tool_name="my_custom_tool") + def some_function(x, y): + return x + y + + result = some_function(1, 2) + assert result == 3 + assert traced_calls == ["my_custom_tool"] + + def test_traced_tool_with_auto_name(self): + """Test @traced_tool with automatic name detection.""" + traced_calls = [] + + def my_provider(tool_name: str, inputs): + traced_calls.append(tool_name) + return nullcontext() + + register_tool_trace_provider(my_provider) + + @traced_tool() + def auto_named_function(x): + return x * 2 + + result = auto_named_function(5) + assert result == 10 + assert traced_calls == ["auto_named_function"] + + def test_traced_tool_captures_kwargs(self): + """Test that @traced_tool captures kwargs as inputs.""" + traced_inputs = [] + + def my_provider(tool_name: str, inputs): + traced_inputs.append(inputs) + return nullcontext() + + register_tool_trace_provider(my_provider) + + @traced_tool(tool_name="test") + def func_with_kwargs(a, b=10, c="hello"): + return f"{a}-{b}-{c}" + + result = func_with_kwargs(1, b=20, c="world") + assert result == "1-20-world" + assert traced_inputs == [{"b": 20, "c": "world"}] + + +class TestMCPTracing: + """Tests for MCP-specific tracing functions.""" + + def setup_method(self): + """Store original providers before each test.""" + self._original_providers = _tool_trace_providers.copy() + clear_tool_trace_providers() + + def teardown_method(self): + """Restore original providers after each test.""" + clear_tool_trace_providers() + for provider in self._original_providers: + register_tool_trace_provider(provider) + + def test_trace_mcp_list_tools(self): + """Test trace_mcp_list_tools context manager.""" + traced_calls = [] + + def my_provider(tool_name: str, inputs): + traced_calls.append(tool_name) + return nullcontext() + + register_tool_trace_provider(my_provider) + + with trace_mcp_list_tools(): + pass + + assert traced_calls == ["mcp:list_tools"] + + def test_trace_mcp_list_tools_with_server_name(self): + """Test trace_mcp_list_tools with server name.""" + traced_calls = [] + + def my_provider(tool_name: str, inputs): + traced_calls.append(tool_name) + return nullcontext() + + register_tool_trace_provider(my_provider) + + with trace_mcp_list_tools(server_name="my-server"): + pass + + assert traced_calls == ["mcp:list_tools:my-server"] + + def test_trace_mcp_call_tool(self): + """Test trace_mcp_call_tool context manager.""" + traced_calls = [] + + def my_provider(tool_name: str, inputs): + traced_calls.append((tool_name, inputs)) + return nullcontext() + + register_tool_trace_provider(my_provider) + + with trace_mcp_call_tool("read_file", inputs={"path": "/tmp/test.txt"}): + pass + + assert traced_calls == [("mcp:read_file", {"path": "/tmp/test.txt"})] + + def test_trace_mcp_call_tool_with_server_name(self): + """Test trace_mcp_call_tool with server name.""" + traced_calls = [] + + def my_provider(tool_name: str, inputs): + traced_calls.append(tool_name) + return nullcontext() + + register_tool_trace_provider(my_provider) + + with trace_mcp_call_tool("read_file", server_name="filesystem"): + pass + + assert traced_calls == ["mcp:filesystem:read_file"] + + +class TestToolTraceBuiltInProviders: + """Tests for the built-in tool trace providers.""" + + def test_weave_tool_trace_returns_nullcontext_when_not_initialized(self): + """Test that Weave tool trace provider returns nullcontext when not initialized.""" + from openhands.sdk.observability.context import _get_weave_tool_trace + + with patch( + "openhands.sdk.observability.weave.is_weave_initialized", + return_value=False, + ): + ctx = _get_weave_tool_trace("test-tool", {"arg": "value"}) + assert type(ctx).__name__ == "nullcontext" + + def test_laminar_tool_trace_returns_nullcontext_when_not_initialized(self): + """Test that Laminar tool trace provider returns nullcontext when not initialized.""" + from openhands.sdk.observability.context import _get_laminar_tool_trace + + with patch( + "openhands.sdk.observability.laminar.should_enable_observability", + return_value=False, + ): + ctx = _get_laminar_tool_trace("test-tool", {"arg": "value"}) + assert type(ctx).__name__ == "nullcontext" + + def test_tool_trace_providers_auto_registered(self): + """Test that built-in tool trace providers are registered on import.""" + from openhands.sdk.observability import context + + provider_names = [p.__name__ for p in context._tool_trace_providers] + assert "_get_weave_tool_trace" in provider_names + assert "_get_laminar_tool_trace" in provider_names From f42f317aedb382e13c2d173c7acde654d56ebddb Mon Sep 17 00:00:00 2001 From: openhands Date: Fri, 19 Dec 2025 00:44:06 +0000 Subject: [PATCH 7/8] refactor: Simplify Weave integration - remove complex tool tracing Simplifies the Weave observability integration to be more elegant: 1. **Removed complex tool tracing system**: - Removed trace_tool_call, traced_tool, trace_mcp_* functions - Removed tool trace provider registry - These were over-engineered; Weave's autopatching handles LLM tracing 2. **Simplified weave.py**: - Kept only essential functions: init_weave, maybe_init_weave, weave_op - Removed WeaveSpanManager, observe_weave, weave_attributes, weave_thread - Users can use weave.op and weave.thread directly from the weave package 3. **Key exports**: - init_weave(): Initialize Weave with autopatching - maybe_init_weave(): Conditional init based on env vars - weave_op(): Decorator wrapper that's a no-op when not initialized - get_weave_op(): Get weave.op or no-op decorator 4. **Design philosophy**: - Weave autopatching traces all LiteLLM calls automatically - Use @weave.op directly for custom function tracing - Use weave.thread() directly for conversation grouping - Keep SDK integration minimal and non-invasive This matches the approach in the OpenHands PR #12056. Co-authored-by: openhands --- openhands-sdk/openhands/sdk/agent/agent.py | 27 +- openhands-sdk/openhands/sdk/mcp/tool.py | 54 ++- openhands-sdk/openhands/sdk/mcp/utils.py | 25 +- .../openhands/sdk/observability/__init__.py | 30 -- .../openhands/sdk/observability/context.py | 277 +-------------- .../openhands/sdk/observability/weave.py | 221 +----------- tests/sdk/observability/test_context.py | 329 ------------------ tests/sdk/observability/test_weave.py | 113 ------ 8 files changed, 57 insertions(+), 1019 deletions(-) diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py index 9029c35165..d88d2656d4 100644 --- a/openhands-sdk/openhands/sdk/agent/agent.py +++ b/openhands-sdk/openhands/sdk/agent/agent.py @@ -41,7 +41,6 @@ LLMContextWindowExceedError, ) from openhands.sdk.logger import get_logger -from openhands.sdk.observability.context import trace_tool_call from openhands.sdk.observability.laminar import ( maybe_init_laminar, observe, @@ -479,24 +478,14 @@ def _execute_action_event( "as it was checked earlier." ) - # Execute actions with unified observability tracing - tool_name = extract_action_name(action_event) - - # Extract inputs for tracing (safely serialize action data) - try: - inputs = action_event.action.model_dump(exclude_none=True) - except Exception: - inputs = None - - # Use unified trace_tool_call for all observability tools (Weave, Laminar, etc.) - # Plus Laminar's @observe decorator for backward compatibility - with trace_tool_call(tool_name, inputs=inputs): - if should_enable_observability(): - observation: Observation = observe(name=tool_name, span_type="TOOL")( - tool - )(action_event.action, conversation) - else: - observation = tool(action_event.action, conversation) + # Execute actions! + if should_enable_observability(): + tool_name = extract_action_name(action_event) + observation: Observation = observe(name=tool_name, span_type="TOOL")(tool)( + action_event.action, conversation + ) + else: + observation = tool(action_event.action, conversation) assert isinstance(observation, Observation), ( f"Tool '{tool.name}' executor must return an Observation" ) diff --git a/openhands-sdk/openhands/sdk/mcp/tool.py b/openhands-sdk/openhands/sdk/mcp/tool.py index 6e6ef1f21b..69aaf54f9a 100644 --- a/openhands-sdk/openhands/sdk/mcp/tool.py +++ b/openhands-sdk/openhands/sdk/mcp/tool.py @@ -15,7 +15,6 @@ from openhands.sdk.logger import get_logger from openhands.sdk.mcp.client import MCPClient from openhands.sdk.mcp.definition import MCPToolAction, MCPToolObservation -from openhands.sdk.observability.context import trace_mcp_call_tool from openhands.sdk.observability.laminar import observe from openhands.sdk.tool import ( Action, @@ -53,38 +52,27 @@ def __init__(self, tool_name: str, client: MCPClient): @observe(name="MCPToolExecutor.call_tool", span_type="TOOL") async def call_tool(self, action: MCPToolAction) -> MCPToolObservation: - # Extract inputs for tracing - try: - inputs = action.to_mcp_arguments() - except Exception: - inputs = None - - # Use unified MCP tool tracing for all observability tools - with trace_mcp_call_tool( - tool_name=self.tool_name, - inputs=inputs, - ): - async with self.client: - assert self.client.is_connected(), "MCP client is not connected." - try: - logger.debug( - f"Calling MCP tool {self.tool_name} " - f"with args: {action.model_dump()}" - ) - result: mcp.types.CallToolResult = await self.client.call_tool_mcp( - name=self.tool_name, arguments=action.to_mcp_arguments() - ) - return MCPToolObservation.from_call_tool_result( - tool_name=self.tool_name, result=result - ) - except Exception as e: - error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}" - logger.error(error_msg, exc_info=True) - return MCPToolObservation.from_text( - text=error_msg, - is_error=True, - tool_name=self.tool_name, - ) + async with self.client: + assert self.client.is_connected(), "MCP client is not connected." + try: + logger.debug( + f"Calling MCP tool {self.tool_name} " + f"with args: {action.model_dump()}" + ) + result: mcp.types.CallToolResult = await self.client.call_tool_mcp( + name=self.tool_name, arguments=action.to_mcp_arguments() + ) + return MCPToolObservation.from_call_tool_result( + tool_name=self.tool_name, result=result + ) + except Exception as e: + error_msg = f"Error calling MCP tool {self.tool_name}: {str(e)}" + logger.error(error_msg, exc_info=True) + return MCPToolObservation.from_text( + text=error_msg, + is_error=True, + tool_name=self.tool_name, + ) def __call__( self, diff --git a/openhands-sdk/openhands/sdk/mcp/utils.py b/openhands-sdk/openhands/sdk/mcp/utils.py index a6df3de73e..1093280466 100644 --- a/openhands-sdk/openhands/sdk/mcp/utils.py +++ b/openhands-sdk/openhands/sdk/mcp/utils.py @@ -10,7 +10,6 @@ from openhands.sdk.mcp.client import MCPClient from openhands.sdk.mcp.exceptions import MCPTimeoutError from openhands.sdk.mcp.tool import MCPToolDefinition -from openhands.sdk.observability.context import trace_mcp_list_tools from openhands.sdk.tool.tool import ToolDefinition @@ -37,19 +36,17 @@ async def _list_tools(client: MCPClient) -> list[ToolDefinition]: """List tools from an MCP client.""" tools: list[ToolDefinition] = [] - # Use unified MCP list tools tracing for all observability tools - with trace_mcp_list_tools(): - async with client: - assert client.is_connected(), "MCP client is not connected." - mcp_type_tools: list[mcp.types.Tool] = await client.list_tools() - for mcp_tool in mcp_type_tools: - tool_sequence = MCPToolDefinition.create( - mcp_tool=mcp_tool, mcp_client=client - ) - tools.extend(tool_sequence) # Flatten sequence into list - assert not client.is_connected(), ( - "MCP client should be disconnected after listing tools." - ) + async with client: + assert client.is_connected(), "MCP client is not connected." + mcp_type_tools: list[mcp.types.Tool] = await client.list_tools() + for mcp_tool in mcp_type_tools: + tool_sequence = MCPToolDefinition.create( + mcp_tool=mcp_tool, mcp_client=client + ) + tools.extend(tool_sequence) # Flatten sequence into list + assert not client.is_connected(), ( + "MCP client should be disconnected after listing tools." + ) return tools diff --git a/openhands-sdk/openhands/sdk/observability/__init__.py b/openhands-sdk/openhands/sdk/observability/__init__.py index 0a378f4b72..01b6f95560 100644 --- a/openhands-sdk/openhands/sdk/observability/__init__.py +++ b/openhands-sdk/openhands/sdk/observability/__init__.py @@ -1,33 +1,18 @@ from openhands.sdk.observability.context import ( - # Conversation context clear_conversation_context_providers, get_conversation_context, register_conversation_context_provider, unregister_conversation_context_provider, - # Tool tracing - clear_tool_trace_providers, - register_tool_trace_provider, - trace_mcp_call_tool, - trace_mcp_list_tools, - trace_tool_call, - traced_tool, - unregister_tool_trace_provider, ) from openhands.sdk.observability.laminar import maybe_init_laminar, observe from openhands.sdk.observability.weave import ( - end_weave_span, get_weave_client, get_weave_op, init_weave, is_weave_initialized, maybe_init_weave, - observe_weave, should_enable_weave, - start_weave_span, - weave_attributes, weave_op, - weave_thread, - WeaveSpanManager, ) @@ -37,30 +22,15 @@ "register_conversation_context_provider", "unregister_conversation_context_provider", "clear_conversation_context_providers", - # Tool tracing (unified interface) - "trace_tool_call", - "traced_tool", - "register_tool_trace_provider", - "unregister_tool_trace_provider", - "clear_tool_trace_providers", - # MCP-specific tracing - "trace_mcp_list_tools", - "trace_mcp_call_tool", # Laminar exports "maybe_init_laminar", "observe", # Weave exports - "end_weave_span", "get_weave_client", "get_weave_op", "init_weave", "is_weave_initialized", "maybe_init_weave", - "observe_weave", "should_enable_weave", - "start_weave_span", - "weave_attributes", "weave_op", - "weave_thread", - "WeaveSpanManager", ] diff --git a/openhands-sdk/openhands/sdk/observability/context.py b/openhands-sdk/openhands/sdk/observability/context.py index 8cbafc4c48..c605e0119c 100644 --- a/openhands-sdk/openhands/sdk/observability/context.py +++ b/openhands-sdk/openhands/sdk/observability/context.py @@ -13,13 +13,10 @@ 1. **Unified Context Managers**: A single `get_conversation_context()` function that returns a composed context manager for all enabled tools. -2. **Tool Tracing**: A `trace_tool_call()` decorator/context manager for tracing - tool executions across all enabled observability tools. - -3. **Provider Registry**: Observability tools register their context providers, +2. **Provider Registry**: Observability tools register their context providers, allowing easy extension for new tools. -4. **Graceful Degradation**: If no observability tools are enabled, the context +3. **Graceful Degradation**: If no observability tools are enabled, the context managers are no-ops (nullcontext). ## Usage @@ -34,20 +31,6 @@ def run(self): ... ``` -For tool execution tracing: -```python -from openhands.sdk.observability.context import trace_tool_call - -# As a decorator -@trace_tool_call(tool_name="my_tool") -def execute_tool(action): - ... - -# As a context manager -with trace_tool_call(tool_name="my_tool", inputs={"arg": "value"}): - result = tool.execute(action) -``` - ## Adding New Observability Providers To add a new observability tool: @@ -69,17 +52,13 @@ def get_my_tool_context(conversation_id: str): from collections.abc import Callable from contextlib import ExitStack, contextmanager, nullcontext -from functools import wraps -from typing import Any, ContextManager, Iterator, ParamSpec, TypeVar +from typing import Any, ContextManager, Iterator from openhands.sdk.logger import get_logger logger = get_logger(__name__) -P = ParamSpec("P") -R = TypeVar("R") - # Type alias for context provider functions ConversationContextProvider = Callable[[str], ContextManager[Any]] @@ -244,253 +223,3 @@ def laminar_conversation_context(): # Register built-in providers register_conversation_context_provider(_get_weave_conversation_context) register_conversation_context_provider(_get_laminar_conversation_context) - - -# ============================================================================= -# Tool Call Tracing -# ============================================================================= -# Unified tracing for tool executions across all observability tools. - - -ToolTraceProvider = Callable[[str, dict[str, Any] | None], ContextManager[Any]] - -# Registry of tool trace providers -_tool_trace_providers: list[ToolTraceProvider] = [] - - -def register_tool_trace_provider(provider: ToolTraceProvider) -> None: - """Register a tool trace provider. - - Tool trace providers are functions that take a tool_name and optional - inputs dict, and return a context manager for tracing the tool execution. - - Args: - provider: A function that takes (tool_name, inputs) and returns - a context manager. Should return nullcontext() if the - observability tool is not initialized. - """ - if provider not in _tool_trace_providers: - _tool_trace_providers.append(provider) - logger.debug(f"Registered tool trace provider: {provider.__name__}") - - -def unregister_tool_trace_provider(provider: ToolTraceProvider) -> None: - """Unregister a tool trace provider.""" - if provider in _tool_trace_providers: - _tool_trace_providers.remove(provider) - logger.debug(f"Unregistered tool trace provider: {provider.__name__}") - - -def clear_tool_trace_providers() -> None: - """Clear all registered tool trace providers.""" - _tool_trace_providers.clear() - logger.debug("Cleared all tool trace providers") - - -@contextmanager -def trace_tool_call( - tool_name: str, - inputs: dict[str, Any] | None = None, - tool_type: str = "TOOL", -) -> Iterator[None]: - """Trace a tool call across all enabled observability tools. - - This context manager wraps tool executions with tracing from all - registered observability providers (Weave, Laminar, etc.). - - Args: - tool_name: The name of the tool being executed. - inputs: Optional dict of input arguments to the tool. - tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL"). Used for - categorization in observability UIs. - - Yields: - None - - Example: - ```python - with trace_tool_call("bash", inputs={"command": "ls -la"}): - result = bash_tool.execute(action) - ``` - """ - if not _tool_trace_providers: - yield - return - - with ExitStack() as stack: - for provider in _tool_trace_providers: - try: - ctx = provider(tool_name, inputs) - stack.enter_context(ctx) - except Exception as e: - logger.debug( - f"Error entering tool trace from provider {provider.__name__}: {e}" - ) - yield - - -def traced_tool( - tool_name: str | None = None, - tool_type: str = "TOOL", -) -> Callable[[Callable[P, R]], Callable[P, R]]: - """Decorator to trace tool execution functions. - - This decorator wraps a function with tool tracing from all registered - observability providers. It automatically captures the function's - arguments as inputs. - - Args: - tool_name: The name of the tool. If None, uses the function name. - tool_type: The type of tool (e.g., "TOOL", "MCP_TOOL"). - - Returns: - A decorator that wraps the function with tool tracing. - - Example: - ```python - @traced_tool(tool_name="bash") - def execute_bash(command: str) -> str: - ... - - # Or with automatic name detection - @traced_tool() - def my_tool(arg1, arg2): - ... - ``` - """ - def decorator(func: Callable[P, R]) -> Callable[P, R]: - name = tool_name or func.__name__ - - @wraps(func) - def wrapper(*args: P.args, **kwargs: P.kwargs) -> R: - # Capture inputs from kwargs (args are harder to name) - inputs = dict(kwargs) if kwargs else None - with trace_tool_call(name, inputs=inputs, tool_type=tool_type): - return func(*args, **kwargs) - - return wrapper - - return decorator - - -# ============================================================================= -# Built-in Tool Trace Providers -# ============================================================================= - - -def _get_weave_tool_trace( - tool_name: str, inputs: dict[str, Any] | None -) -> ContextManager[Any]: - """Weave tool trace provider. - - Uses weave.attributes() to add tool metadata to the current span. - The actual tracing is done by Weave's autopatching of the underlying - operations (LLM calls, etc.). - """ - try: - from openhands.sdk.observability.weave import is_weave_initialized - - if not is_weave_initialized(): - return nullcontext() - - import weave - - # Use weave.attributes to add tool metadata to the trace - attributes = {"tool_name": tool_name, "tool_type": "TOOL"} - if inputs: - # Sanitize inputs - convert non-serializable types to strings - safe_inputs = {} - for k, v in inputs.items(): - try: - # Test if it's JSON serializable - import json - json.dumps(v) - safe_inputs[k] = v - except (TypeError, ValueError): - safe_inputs[k] = str(v) - attributes["tool_inputs"] = safe_inputs - - return weave.attributes(attributes) - except ImportError: - return nullcontext() - except Exception: - return nullcontext() - - -def _get_laminar_tool_trace( - tool_name: str, inputs: dict[str, Any] | None # noqa: ARG001 -) -> ContextManager[Any]: - """Laminar tool trace provider. - - Creates a Laminar span for the tool execution. - Note: Laminar's @observe decorator is typically used directly, - but this provides a context manager alternative. - """ - try: - from openhands.sdk.observability.laminar import should_enable_observability - - if not should_enable_observability(): - return nullcontext() - - from lmnr import Laminar - - @contextmanager - def laminar_tool_trace(): - span = Laminar.start_active_span(f"tool:{tool_name}") - try: - yield - finally: - if span and span.is_recording(): - span.end() - - return laminar_tool_trace() - except ImportError: - return nullcontext() - except Exception: - return nullcontext() - - -# Register built-in tool trace providers -register_tool_trace_provider(_get_weave_tool_trace) -register_tool_trace_provider(_get_laminar_tool_trace) - - -# ============================================================================= -# MCP-Specific Tracing -# ============================================================================= - - -@contextmanager -def trace_mcp_list_tools(server_name: str | None = None) -> Iterator[None]: - """Trace MCP tool listing operations. - - Args: - server_name: Optional name of the MCP server being queried. - - Yields: - None - """ - tool_name = f"mcp:list_tools:{server_name}" if server_name else "mcp:list_tools" - with trace_tool_call(tool_name, tool_type="MCP_LIST"): - yield - - -@contextmanager -def trace_mcp_call_tool( - tool_name: str, - server_name: str | None = None, - inputs: dict[str, Any] | None = None, -) -> Iterator[None]: - """Trace MCP tool call operations. - - Args: - tool_name: The name of the MCP tool being called. - server_name: Optional name of the MCP server. - inputs: Optional dict of input arguments. - - Yields: - None - """ - full_name = f"mcp:{server_name}:{tool_name}" if server_name else f"mcp:{tool_name}" - with trace_tool_call(full_name, inputs=inputs, tool_type="MCP_TOOL"): - yield diff --git a/openhands-sdk/openhands/sdk/observability/weave.py b/openhands-sdk/openhands/sdk/observability/weave.py index 6d16e74ba3..740c42e4da 100644 --- a/openhands-sdk/openhands/sdk/observability/weave.py +++ b/openhands-sdk/openhands/sdk/observability/weave.py @@ -15,12 +15,12 @@ 3. **Optional manual tracing**: Use `@weave.op` for custom agent logic that you want to trace (tool execution, agent steps, etc.) -4. **Thread grouping**: Group related operations under conversation threads. +4. **Thread grouping**: Use `weave.thread()` to group operations by conversation. ## How It Works The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`: -1. Weave's `implicit_patch()` automatically patches LiteLLM +1. Weave's autopatching automatically patches LiteLLM 2. All `litellm.completion()` and `litellm.acompletion()` calls are traced 3. You see full traces in the Weave UI without any code changes! @@ -64,14 +64,15 @@ def process_agent_step(step: dict) -> dict: ### Conversation Thread Grouping ```python -from openhands.sdk.observability import init_weave, weave_attributes +import weave +from openhands.sdk.observability import init_weave init_weave("my-team/my-project") -# Group all operations under a conversation -with weave_attributes(conversation_id="conv-123", user_id="user-456"): +# Group all operations under a conversation thread +with weave.thread("conversation-123"): # All LLM calls and traced functions within this block - # will be tagged with these attributes + # will be grouped under the same thread response = llm.completion(...) ``` @@ -85,7 +86,6 @@ def process_agent_step(step: dict) -> dict: import logging import os from collections.abc import Callable -from contextlib import contextmanager from typing import Any, ParamSpec, TypeVar from openhands.sdk.observability.utils import get_env @@ -252,55 +252,7 @@ def should_enable_weave() -> bool: return bool(get_env("WANDB_API_KEY") and get_env("WEAVE_PROJECT")) -@contextmanager -def weave_attributes(**attributes: Any): - """Context manager to add attributes to all operations within the block. - - This is useful for grouping related operations (e.g., all events in a - conversation) or adding metadata to traces. - - Args: - **attributes: Key-value pairs to attach to all operations. - Common attributes: conversation_id, user_id, session_id, etc. - - Example: - >>> with weave_attributes(conversation_id="conv-123", user_id="user-456"): - ... # All LLM calls and traced functions here will have these attributes - ... response = llm.completion(messages=[...]) - """ - if not _weave_initialized: - yield - return - - try: - import weave - with weave.attributes(attributes): - yield - except Exception as e: - logger.warning(f"Failed to set weave attributes: {e}") - yield - - -@contextmanager -def weave_thread(thread_id: str): - """Context manager to group operations under a thread. - - This is an alias for weave_attributes(thread_id=...) for convenience - and backward compatibility. - - Args: - thread_id: Unique identifier for the thread (e.g., conversation ID). - - Example: - >>> with weave_thread("conversation-123"): - ... # All operations here will be grouped under the same thread - ... response = llm.completion(messages=[...]) - """ - with weave_attributes(thread_id=thread_id): - yield - - -def get_weave_op(): +def get_weave_op() -> Callable: """Get the weave.op decorator for manual function tracing. Returns the actual weave.op decorator if Weave is initialized, @@ -359,11 +311,17 @@ def my_func(): ... func: The function to decorate (when used without parentheses). name: Optional name for the operation. Defaults to function name. call_display_name: Display name for the call in the Weave UI. + Can be a string or a callable that takes the Call object. postprocess_inputs: Function to transform inputs before logging. postprocess_output: Function to transform output before logging. Returns: The decorated function or a decorator. + + Example: + >>> @weave_op(name="agent_step") + ... def step(action: dict) -> dict: + ... return execute(action) """ def decorator(fn: Callable[P, R]) -> Callable[P, R]: if not _weave_initialized: @@ -393,154 +351,3 @@ def decorator(fn: Callable[P, R]) -> Callable[P, R]: if func is not None: return decorator(func) return decorator - - -def observe_weave( - name: str | None = None, - *, - ignore_inputs: list[str] | None = None, - ignore_output: bool = False, -) -> Callable[[Callable[P, R]], Callable[P, R]]: - """Decorator for observing functions with Weave (Laminar-compatible interface). - - This provides a similar interface to the Laminar `observe` decorator, - making it easier to switch between observability backends. - - Args: - name: Optional name for the operation. - ignore_inputs: List of input parameter names to exclude from logging. - ignore_output: If True, don't log the output. - - Returns: - A decorator that wraps the function for Weave tracing. - - Example: - >>> @observe_weave(name="login", ignore_inputs=["password"]) - ... def login(username: str, password: str) -> bool: - ... return authenticate(username, password) - """ - def postprocess_inputs_fn(inputs: dict[str, Any]) -> dict[str, Any]: - if not ignore_inputs: - return inputs - return {k: v for k, v in inputs.items() if k not in ignore_inputs} - - def postprocess_output_fn(output: Any) -> Any: - if ignore_output: - return "[output hidden]" - return output - - return weave_op( - name=name, - postprocess_inputs=postprocess_inputs_fn if ignore_inputs else None, - postprocess_output=postprocess_output_fn if ignore_output else None, - ) - - -class WeaveSpanManager: - """Manager for manual span lifecycle control. - - This class provides fine-grained control over span creation and completion, - useful when automatic decoration is not suitable. - - Note: For most use cases, the automatic LLM tracing and @weave_op decorator - are sufficient. Use this only when you need explicit span control. - - Example: - >>> manager = WeaveSpanManager() - >>> manager.start_span("process_batch", inputs={"batch_size": 100}) - >>> try: - ... result = process_batch() - ... manager.end_span(output=result) - ... except Exception as e: - ... manager.end_span(error=str(e)) - """ - - def __init__(self): - self._call_stack: list[Any] = [] - - def start_span( - self, - name: str, - inputs: dict[str, Any] | None = None, - ) -> Any: - """Start a new span. - - Args: - name: Name of the span/operation. - inputs: Input parameters to log. - - Returns: - The span/call object if successful, None otherwise. - """ - if not _weave_initialized: - return None - - try: - import weave - - @weave.op(name=name) - def _span_op(**kwargs: Any) -> Any: - pass - - call = _span_op.call(inputs or {}) - self._call_stack.append(call) - return call - except Exception as e: - logger.warning(f"Failed to start weave span: {e}") - return None - - def end_span( - self, - output: Any = None, - error: str | None = None, - ) -> None: - """End the current span. - - Args: - output: Output value to log. - error: Error message if the span failed. - """ - if not self._call_stack: - return - - try: - call = self._call_stack.pop() - if error: - call.finish(exception=Exception(error)) - else: - call.finish(output=output) - except Exception as e: - logger.warning(f"Failed to end weave span: {e}") - - -# Global span manager instance for convenience -_global_span_manager = WeaveSpanManager() - - -def start_weave_span( - name: str, - inputs: dict[str, Any] | None = None, -) -> Any: - """Start a new Weave span using the global manager. - - Args: - name: Name of the span/operation. - inputs: Input parameters to log. - - Returns: - The span/call object if successful, None otherwise. - """ - return _global_span_manager.start_span(name, inputs) - - -def end_weave_span( - output: Any = None, - error: str | None = None, -) -> None: - """End the current Weave span using the global manager. - - Args: - output: Output value to log. - error: Error message if the span failed. - """ - _global_span_manager.end_span(output, error) diff --git a/tests/sdk/observability/test_context.py b/tests/sdk/observability/test_context.py index ba2e69a0d8..5f369ed515 100644 --- a/tests/sdk/observability/test_context.py +++ b/tests/sdk/observability/test_context.py @@ -10,15 +10,6 @@ register_conversation_context_provider, unregister_conversation_context_provider, _conversation_context_providers, - # Tool tracing - clear_tool_trace_providers, - register_tool_trace_provider, - unregister_tool_trace_provider, - trace_tool_call, - traced_tool, - trace_mcp_list_tools, - trace_mcp_call_tool, - _tool_trace_providers, ) @@ -245,323 +236,3 @@ def custom_provider(conversation_id: str): assert "test-conv" in custom_called finally: unregister_conversation_context_provider(custom_provider) - - -# ============================================================================= -# Tool Tracing Tests -# ============================================================================= - - -class TestToolTraceProviderRegistry: - """Tests for the tool trace provider registry functions.""" - - def setup_method(self): - """Store original providers before each test.""" - self._original_providers = _tool_trace_providers.copy() - clear_tool_trace_providers() - - def teardown_method(self): - """Restore original providers after each test.""" - clear_tool_trace_providers() - for provider in self._original_providers: - register_tool_trace_provider(provider) - - def test_register_tool_trace_provider(self): - """Test registering a new tool trace provider.""" - def my_provider(tool_name: str, inputs): - return nullcontext() - - register_tool_trace_provider(my_provider) - assert my_provider in _tool_trace_providers - - def test_register_tool_trace_provider_no_duplicates(self): - """Test that registering the same provider twice doesn't create duplicates.""" - def my_provider(tool_name: str, inputs): - return nullcontext() - - register_tool_trace_provider(my_provider) - register_tool_trace_provider(my_provider) - assert _tool_trace_providers.count(my_provider) == 1 - - def test_unregister_tool_trace_provider(self): - """Test unregistering a tool trace provider.""" - def my_provider(tool_name: str, inputs): - return nullcontext() - - register_tool_trace_provider(my_provider) - assert my_provider in _tool_trace_providers - - unregister_tool_trace_provider(my_provider) - assert my_provider not in _tool_trace_providers - - def test_clear_tool_trace_providers(self): - """Test clearing all tool trace providers.""" - def provider1(tool_name: str, inputs): - return nullcontext() - - def provider2(tool_name: str, inputs): - return nullcontext() - - register_tool_trace_provider(provider1) - register_tool_trace_provider(provider2) - assert len(_tool_trace_providers) == 2 - - clear_tool_trace_providers() - assert len(_tool_trace_providers) == 0 - - -class TestTraceToolCall: - """Tests for the trace_tool_call context manager.""" - - def setup_method(self): - """Store original providers before each test.""" - self._original_providers = _tool_trace_providers.copy() - clear_tool_trace_providers() - - def teardown_method(self): - """Restore original providers after each test.""" - clear_tool_trace_providers() - for provider in self._original_providers: - register_tool_trace_provider(provider) - - def test_no_providers_is_noop(self): - """Test that with no providers, the context is a no-op.""" - executed = False - - with trace_tool_call("test-tool"): - executed = True - - assert executed - - def test_single_provider_called(self): - """Test that a single provider is called with tool name and inputs.""" - called_with = [] - - def my_provider(tool_name: str, inputs): - called_with.append((tool_name, inputs)) - return nullcontext() - - register_tool_trace_provider(my_provider) - - with trace_tool_call("bash", inputs={"command": "ls"}): - pass - - assert called_with == [("bash", {"command": "ls"})] - - def test_multiple_providers_called(self): - """Test that multiple providers are called.""" - call_order = [] - - def provider1(tool_name: str, inputs): - call_order.append("provider1") - return nullcontext() - - def provider2(tool_name: str, inputs): - call_order.append("provider2") - return nullcontext() - - register_tool_trace_provider(provider1) - register_tool_trace_provider(provider2) - - with trace_tool_call("test-tool"): - pass - - assert call_order == ["provider1", "provider2"] - - def test_provider_exception_does_not_break_others(self): - """Test that an exception in one provider doesn't prevent others.""" - call_order = [] - - def failing_provider(tool_name: str, inputs): - raise RuntimeError("Provider failed") - - def working_provider(tool_name: str, inputs): - call_order.append("working") - return nullcontext() - - register_tool_trace_provider(failing_provider) - register_tool_trace_provider(working_provider) - - # Should not raise - with trace_tool_call("test-tool"): - pass - - assert call_order == ["working"] - - -class TestTracedToolDecorator: - """Tests for the @traced_tool decorator.""" - - def setup_method(self): - """Store original providers before each test.""" - self._original_providers = _tool_trace_providers.copy() - clear_tool_trace_providers() - - def teardown_method(self): - """Restore original providers after each test.""" - clear_tool_trace_providers() - for provider in self._original_providers: - register_tool_trace_provider(provider) - - def test_traced_tool_with_explicit_name(self): - """Test @traced_tool with explicit tool name.""" - traced_calls = [] - - def my_provider(tool_name: str, inputs): - traced_calls.append(tool_name) - return nullcontext() - - register_tool_trace_provider(my_provider) - - @traced_tool(tool_name="my_custom_tool") - def some_function(x, y): - return x + y - - result = some_function(1, 2) - assert result == 3 - assert traced_calls == ["my_custom_tool"] - - def test_traced_tool_with_auto_name(self): - """Test @traced_tool with automatic name detection.""" - traced_calls = [] - - def my_provider(tool_name: str, inputs): - traced_calls.append(tool_name) - return nullcontext() - - register_tool_trace_provider(my_provider) - - @traced_tool() - def auto_named_function(x): - return x * 2 - - result = auto_named_function(5) - assert result == 10 - assert traced_calls == ["auto_named_function"] - - def test_traced_tool_captures_kwargs(self): - """Test that @traced_tool captures kwargs as inputs.""" - traced_inputs = [] - - def my_provider(tool_name: str, inputs): - traced_inputs.append(inputs) - return nullcontext() - - register_tool_trace_provider(my_provider) - - @traced_tool(tool_name="test") - def func_with_kwargs(a, b=10, c="hello"): - return f"{a}-{b}-{c}" - - result = func_with_kwargs(1, b=20, c="world") - assert result == "1-20-world" - assert traced_inputs == [{"b": 20, "c": "world"}] - - -class TestMCPTracing: - """Tests for MCP-specific tracing functions.""" - - def setup_method(self): - """Store original providers before each test.""" - self._original_providers = _tool_trace_providers.copy() - clear_tool_trace_providers() - - def teardown_method(self): - """Restore original providers after each test.""" - clear_tool_trace_providers() - for provider in self._original_providers: - register_tool_trace_provider(provider) - - def test_trace_mcp_list_tools(self): - """Test trace_mcp_list_tools context manager.""" - traced_calls = [] - - def my_provider(tool_name: str, inputs): - traced_calls.append(tool_name) - return nullcontext() - - register_tool_trace_provider(my_provider) - - with trace_mcp_list_tools(): - pass - - assert traced_calls == ["mcp:list_tools"] - - def test_trace_mcp_list_tools_with_server_name(self): - """Test trace_mcp_list_tools with server name.""" - traced_calls = [] - - def my_provider(tool_name: str, inputs): - traced_calls.append(tool_name) - return nullcontext() - - register_tool_trace_provider(my_provider) - - with trace_mcp_list_tools(server_name="my-server"): - pass - - assert traced_calls == ["mcp:list_tools:my-server"] - - def test_trace_mcp_call_tool(self): - """Test trace_mcp_call_tool context manager.""" - traced_calls = [] - - def my_provider(tool_name: str, inputs): - traced_calls.append((tool_name, inputs)) - return nullcontext() - - register_tool_trace_provider(my_provider) - - with trace_mcp_call_tool("read_file", inputs={"path": "/tmp/test.txt"}): - pass - - assert traced_calls == [("mcp:read_file", {"path": "/tmp/test.txt"})] - - def test_trace_mcp_call_tool_with_server_name(self): - """Test trace_mcp_call_tool with server name.""" - traced_calls = [] - - def my_provider(tool_name: str, inputs): - traced_calls.append(tool_name) - return nullcontext() - - register_tool_trace_provider(my_provider) - - with trace_mcp_call_tool("read_file", server_name="filesystem"): - pass - - assert traced_calls == ["mcp:filesystem:read_file"] - - -class TestToolTraceBuiltInProviders: - """Tests for the built-in tool trace providers.""" - - def test_weave_tool_trace_returns_nullcontext_when_not_initialized(self): - """Test that Weave tool trace provider returns nullcontext when not initialized.""" - from openhands.sdk.observability.context import _get_weave_tool_trace - - with patch( - "openhands.sdk.observability.weave.is_weave_initialized", - return_value=False, - ): - ctx = _get_weave_tool_trace("test-tool", {"arg": "value"}) - assert type(ctx).__name__ == "nullcontext" - - def test_laminar_tool_trace_returns_nullcontext_when_not_initialized(self): - """Test that Laminar tool trace provider returns nullcontext when not initialized.""" - from openhands.sdk.observability.context import _get_laminar_tool_trace - - with patch( - "openhands.sdk.observability.laminar.should_enable_observability", - return_value=False, - ): - ctx = _get_laminar_tool_trace("test-tool", {"arg": "value"}) - assert type(ctx).__name__ == "nullcontext" - - def test_tool_trace_providers_auto_registered(self): - """Test that built-in tool trace providers are registered on import.""" - from openhands.sdk.observability import context - - provider_names = [p.__name__ for p in context._tool_trace_providers] - assert "_get_weave_tool_trace" in provider_names - assert "_get_laminar_tool_trace" in provider_names diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py index 78e8dd4431..b68a205093 100644 --- a/tests/sdk/observability/test_weave.py +++ b/tests/sdk/observability/test_weave.py @@ -104,107 +104,6 @@ def failing_function(): failing_function() -class TestObserveWeaveDecorator: - """Tests for the @observe_weave decorator.""" - - def test_observe_weave_without_initialization(self): - """@observe_weave runs function normally when Weave is not initialized.""" - import openhands.sdk.observability.weave as weave_module - weave_module._weave_initialized = False - - from openhands.sdk.observability.weave import observe_weave - - @observe_weave(name="test_observe") - def test_function(x: int, y: int) -> int: - return x + y - - result = test_function(3, 4) - assert result == 7 - - def test_observe_weave_with_ignore_inputs(self): - """@observe_weave correctly handles ignore_inputs parameter.""" - import openhands.sdk.observability.weave as weave_module - weave_module._weave_initialized = False - - from openhands.sdk.observability.weave import observe_weave - - @observe_weave(name="test_ignore", ignore_inputs=["secret"]) - def test_function(data: str, secret: str) -> str: - return f"{data}-processed" - - result = test_function("hello", "my-secret") - assert result == "hello-processed" - - -class TestWeaveAttributes: - """Tests for the weave_attributes context manager.""" - - def test_weave_attributes_without_initialization(self): - """weave_attributes works as no-op when Weave is not initialized.""" - import openhands.sdk.observability.weave as weave_module - weave_module._weave_initialized = False - - from openhands.sdk.observability.weave import weave_attributes - - results = [] - with weave_attributes(conversation_id="conv-123", user_id="user-456"): - results.append(1) - results.append(2) - - assert results == [1, 2] - - def test_weave_thread_without_initialization(self): - """weave_thread works as no-op when Weave is not initialized.""" - import openhands.sdk.observability.weave as weave_module - weave_module._weave_initialized = False - - from openhands.sdk.observability.weave import weave_thread - - results = [] - with weave_thread("test-thread-123"): - results.append(1) - results.append(2) - - assert results == [1, 2] - - -class TestWeaveSpanManager: - """Tests for the WeaveSpanManager class.""" - - def test_span_manager_without_initialization(self): - """WeaveSpanManager works gracefully when Weave is not initialized.""" - import openhands.sdk.observability.weave as weave_module - weave_module._weave_initialized = False - - from openhands.sdk.observability.weave import WeaveSpanManager - - manager = WeaveSpanManager() - - # start_span should return None when not initialized - result = manager.start_span("test_span", inputs={"key": "value"}) - assert result is None - - # end_span should not raise - manager.end_span(output={"result": "ok"}) - - def test_global_span_functions(self): - """Global span functions work without initialization.""" - import openhands.sdk.observability.weave as weave_module - weave_module._weave_initialized = False - - from openhands.sdk.observability.weave import ( - start_weave_span, - end_weave_span, - ) - - # Should not raise - result = start_weave_span("test", inputs={"x": 1}) - assert result is None - - # Should not raise - end_weave_span(output={"y": 2}) - - class TestGetWeaveOp: """Tests for the get_weave_op function.""" @@ -233,35 +132,23 @@ class TestWeaveExports: def test_all_exports_available(self): """All expected functions are exported from the module.""" from openhands.sdk.observability import ( - end_weave_span, get_weave_client, get_weave_op, init_weave, is_weave_initialized, maybe_init_weave, - observe_weave, should_enable_weave, - start_weave_span, - weave_attributes, weave_op, - weave_thread, - WeaveSpanManager, ) # Just verify they're callable - assert callable(end_weave_span) assert callable(get_weave_client) assert callable(get_weave_op) assert callable(init_weave) assert callable(is_weave_initialized) assert callable(maybe_init_weave) - assert callable(observe_weave) assert callable(should_enable_weave) - assert callable(start_weave_span) - assert callable(weave_attributes) assert callable(weave_op) - assert callable(weave_thread) - assert WeaveSpanManager is not None class TestInitWeave: From 148a1c339451325495a475a27fafd9997f2b7d8b Mon Sep 17 00:00:00 2001 From: morganmcg1 Date: Fri, 19 Dec 2025 13:30:06 -0600 Subject: [PATCH 8/8] refactor: Improve Weave observability integration - Make weave an optional dependency (install with pip install openhands-sdk[weave]) - Add auto-init via maybe_init_weave() at module load (matches Laminar pattern) - Simplify demo to use only existing functions - Fix tests for optional dependency handling - Remove unrelated changes from PR scope --- examples/weave_observability_demo.py | 130 +++++++++------------ openhands-sdk/openhands/sdk/agent/agent.py | 17 +-- openhands-sdk/pyproject.toml | 4 +- tests/sdk/observability/test_weave.py | 33 ++++++ 4 files changed, 98 insertions(+), 86 deletions(-) diff --git a/examples/weave_observability_demo.py b/examples/weave_observability_demo.py index 03bfd89c95..53e3f41fe7 100644 --- a/examples/weave_observability_demo.py +++ b/examples/weave_observability_demo.py @@ -7,7 +7,7 @@ ## Key Features Demonstrated -1. **Automatic LLM Tracing**: Just call `init_weave()` and all LiteLLM calls +1. **Automatic LLM Tracing**: Just set environment variables and all LiteLLM calls are automatically traced - no `@weave.op` decorators needed for LLM calls! 2. **Custom Function Tracing**: Use `@weave_op` for custom agent logic you @@ -17,22 +17,19 @@ in `weave.thread()` to group all operations under the conversation ID. This enables conversation-level tracing in the Weave UI! -4. **Conversation Grouping**: Use `weave_attributes()` to add custom metadata - to operations (user_id, session_id, etc.) - ## How It Works -The SDK uses LiteLLM for all LLM calls. When you call `init_weave()`: -1. Weave's `implicit_patch()` automatically patches LiteLLM +The SDK uses LiteLLM for all LLM calls. When Weave is initialized: +1. Weave's autopatching automatically patches LiteLLM 2. All `litellm.completion()` and `litellm.acompletion()` calls are traced 3. LocalConversation.run() wraps the event loop in `weave.thread(conversation_id)` 4. You see full conversation traces in the Weave UI without any code changes! ## Prerequisites -- Set WANDB_API_KEY environment variable (valid W&B API key) +- Install with Weave support: `pip install openhands-sdk[weave]` +- Set WANDB_API_KEY environment variable - Set WEAVE_PROJECT environment variable (e.g., "your-team/openhands-demo") -- Optionally set OPENAI_API_KEY for actual LLM calls ## Usage @@ -41,9 +38,8 @@ python examples/weave_observability_demo.py Note: - If WANDB_API_KEY is not set or invalid, the demo will still run - but without Weave tracing. This allows testing the functionality - without requiring valid credentials. + If WANDB_API_KEY is not set or the weave package is not installed, + the demo will still run but without Weave tracing. """ import os @@ -57,20 +53,18 @@ is_weave_initialized, maybe_init_weave, weave_op, - weave_attributes, - weave_thread, - start_weave_span, - end_weave_span, - observe_weave, get_weave_op, ) -# Example 1: Using the @weave_op decorator +# Example 1: Using the @weave_op decorator for custom function tracing @weave_op(name="process_message") def process_message(message: str) -> dict: - """Process a user message and return a response.""" - # Simulate some processing + """Process a user message and return a response. + + When Weave is initialized, this function will appear in traces + with the name "process_message". + """ word_count = len(message.split()) return { "original": message, @@ -79,11 +73,14 @@ def process_message(message: str) -> dict: } -# Example 2: Using observe_weave for compatibility with Laminar -@observe_weave(name="analyze_sentiment") +# Example 2: Another traced function +@weave_op(name="analyze_sentiment") def analyze_sentiment(text: str) -> str: - """Analyze the sentiment of text.""" - # Simple mock sentiment analysis + """Analyze the sentiment of text. + + This demonstrates how @weave_op works as a no-op when Weave + is not initialized - your code runs normally either way. + """ positive_words = {"good", "great", "excellent", "happy", "love"} negative_words = {"bad", "terrible", "sad", "hate", "awful"} @@ -98,14 +95,15 @@ def analyze_sentiment(text: str) -> str: return "neutral" -# Example 3: Nested operations with thread grouping +# Example 3: Nested traced functions @weave_op(name="agent_step") def agent_step(step_num: int, user_input: str) -> dict: - """Simulate an agent step with nested operations.""" - # Process the message - processed = process_message(user_input) + """Simulate an agent step with nested traced operations. - # Analyze sentiment + When this function calls process_message and analyze_sentiment, + they appear as child spans in the Weave trace. + """ + processed = process_message(user_input) sentiment = analyze_sentiment(user_input) return { @@ -115,22 +113,6 @@ def agent_step(step_num: int, user_input: str) -> dict: } -# Example 4: Manual span management -def manual_span_example(): - """Demonstrate manual span creation and management.""" - # Start a span - start_weave_span("manual_operation", inputs={"task": "demo"}) - - try: - # Do some work - result = {"status": "completed", "items_processed": 42} - end_weave_span(output=result) - return result - except Exception as e: - end_weave_span(error=e) - raise - - def run_demo(): """Run the Weave observability demo.""" print("=" * 60) @@ -150,7 +132,7 @@ def run_demo(): project = "openhands-sdk-demo" os.environ["WEAVE_PROJECT"] = project - # Initialize Weave + # Initialize Weave (or use maybe_init_weave() for conditional init) print(f"\n📊 Initializing Weave for project: {project}") success = maybe_init_weave() @@ -164,6 +146,7 @@ def run_demo(): else: print("⚠️ Weave not initialized (missing credentials or package)") print(" Running demo without tracing...") + print(" Install with: pip install openhands-sdk[weave]") print("\n" + "-" * 60) print("Running demo operations...") @@ -175,33 +158,25 @@ def run_demo(): result = process_message("Hello, this is a test message for the agent!") print(f" Result: {result}") - # Demo 2: Sentiment analysis with observe_weave - print("\n2️⃣ Laminar-compatible interface with @observe_weave:") - print(" (Easy migration from Laminar to Weave)") - sentiment = analyze_sentiment("This is a great and excellent demo!") - print(f" Sentiment: {sentiment}") - - # Demo 3: Conversation grouping with weave_attributes - print("\n3️⃣ Conversation grouping with weave_attributes:") - print(" (Group all operations under a conversation ID)") - conversation_id = "demo-conversation-001" - - with weave_attributes(conversation_id=conversation_id, user_id="demo-user"): - for i, msg in enumerate([ - "Hello, I need help with my code", - "The function is not working correctly", - "Great, that fixed it! Thank you!", - ], 1): - result = agent_step(i, msg) - print(f" Step {i}: sentiment={result['sentiment']}") - - # Demo 4: Manual span management - print("\n4️⃣ Manual span management (for advanced use cases):") - result = manual_span_example() + # Demo 2: Nested function calls + print("\n2️⃣ Nested traced function calls:") + print(" (Child functions appear as child spans in the trace)") + result = agent_step(1, "This is a great example of tracing!") print(f" Result: {result}") - # Demo 5: Show how to get weave.op for dynamic decoration - print("\n5️⃣ Dynamic decoration with get_weave_op():") + # Demo 3: Multiple steps to show trace structure + print("\n3️⃣ Multiple agent steps:") + for i, msg in enumerate([ + "Hello, I need help with my code", + "The function is not working correctly", + "Great, that fixed it! Thank you!", + ], 1): + result = agent_step(i, msg) + print(f" Step {i}: sentiment={result['sentiment']}") + + # Demo 4: Dynamic decoration with get_weave_op() + print("\n4️⃣ Dynamic decoration with get_weave_op():") + print(" (Useful for conditionally applying tracing)") op = get_weave_op() @op @@ -220,9 +195,18 @@ def dynamically_traced_function(x: int) -> int: print(" • LLM calls via LiteLLM are traced AUTOMATICALLY") print(" • Conversation.run() groups all operations by conversation ID") print(" • Use @weave_op for custom agent logic you want to trace") - print("\n📝 In your code, just do:") - print(" from openhands.sdk.observability import init_weave") - print(" init_weave('your-project') # That's it!") + print("\n📝 Minimal setup (zero code changes):") + print(" 1. pip install openhands-sdk[weave]") + print(" 2. export WANDB_API_KEY='your-key'") + print(" 3. export WEAVE_PROJECT='team/project'") + print(" That's it! All LLM calls are now traced.") + else: + print("\n📝 To enable tracing:") + print(" 1. pip install openhands-sdk[weave]") + print(" 2. export WANDB_API_KEY='your-api-key'") + print(" 3. export WEAVE_PROJECT='your-team/your-project'") + print(" 4. Run this demo again") + print("=" * 60) diff --git a/openhands-sdk/openhands/sdk/agent/agent.py b/openhands-sdk/openhands/sdk/agent/agent.py index d88d2656d4..f3741e6cf8 100644 --- a/openhands-sdk/openhands/sdk/agent/agent.py +++ b/openhands-sdk/openhands/sdk/agent/agent.py @@ -47,6 +47,7 @@ should_enable_observability, ) from openhands.sdk.observability.utils import extract_action_name +from openhands.sdk.observability.weave import maybe_init_weave from openhands.sdk.security.llm_analyzer import LLMSecurityAnalyzer from openhands.sdk.tool import ( Action, @@ -61,6 +62,7 @@ logger = get_logger(__name__) maybe_init_laminar() +maybe_init_weave() class Agent(AgentBase): @@ -109,17 +111,10 @@ def init_state( event = SystemPromptEvent( source="agent", system_prompt=TextContent(text=self.system_message), - # Always expose a 'security_risk' parameter in tool schemas. - # This ensures the schema remains consistent, even if the - # security analyzer is disabled. Validation of this field - # happens dynamically at runtime depending on the analyzer - # configured. This allows weaker models to omit risk field - # and bypass validation requirements when analyzer is disabled. - # For detailed logic, see `_extract_security_risk` method. - tools=[ - t.to_openai_tool(add_security_risk_prediction=True) - for t in self.tools_map.values() - ], + # Tools are stored as ToolDefinition objects and converted to + # OpenAI format with security_risk parameter during LLM completion. + # See make_llm_completion() in agent/utils.py for details. + tools=list(self.tools_map.values()), ) on_event(event) diff --git a/openhands-sdk/pyproject.toml b/openhands-sdk/pyproject.toml index 1b890aac6e..cd3fd1d573 100644 --- a/openhands-sdk/pyproject.toml +++ b/openhands-sdk/pyproject.toml @@ -14,12 +14,12 @@ dependencies = [ "python-json-logger>=3.3.0", "tenacity>=9.1.2", "websockets>=12", - "lmnr>=0.7.24", - "weave>=0.52.22" + "lmnr>=0.7.24" ] [project.optional-dependencies] boto3 = ["boto3>=1.35.0"] +weave = ["weave>=0.52.22", "wandb"] [build-system] requires = ["setuptools>=61.0", "wheel"] diff --git a/tests/sdk/observability/test_weave.py b/tests/sdk/observability/test_weave.py index b68a205093..cad57dc767 100644 --- a/tests/sdk/observability/test_weave.py +++ b/tests/sdk/observability/test_weave.py @@ -12,6 +12,18 @@ import pytest +# Check if weave is installed for tests that require it +try: + import weave + WEAVE_INSTALLED = True +except ImportError: + WEAVE_INSTALLED = False + +requires_weave = pytest.mark.skipif( + not WEAVE_INSTALLED, + reason="Weave package not installed" +) + class TestWeaveConfiguration: """Tests for Weave configuration and initialization.""" @@ -154,6 +166,7 @@ def test_all_exports_available(self): class TestInitWeave: """Tests for init_weave function.""" + @requires_weave def test_init_weave_requires_project(self): """init_weave raises ValueError when no project is specified.""" import openhands.sdk.observability.weave as weave_module @@ -166,6 +179,25 @@ def test_init_weave_requires_project(self): with pytest.raises(ValueError, match="Weave project must be specified"): init_weave() + def test_init_weave_returns_false_when_weave_not_installed(self): + """init_weave returns False when weave package is not installed.""" + # This test verifies the expected behavior. + # When weave is not installed, init_weave should return False. + # Since weave is an optional dependency, we can test the actual + # behavior directly if weave isn't installed. + if WEAVE_INSTALLED: + pytest.skip("Weave is installed, cannot test missing module behavior") + + import openhands.sdk.observability.weave as weave_module + weave_module._weave_initialized = False + + from openhands.sdk.observability.weave import init_weave + + result = init_weave(project="test-project") + # When weave is not installed, init_weave should return False + assert result is False + + @requires_weave def test_init_weave_uses_env_project(self): """init_weave uses WEAVE_PROJECT from environment.""" import openhands.sdk.observability.weave as weave_module @@ -208,6 +240,7 @@ class TestAutopatching: Weave's automatic LiteLLM patching. """ + @requires_weave def test_init_weave_calls_weave_init(self): """init_weave calls weave.init which triggers autopatching.""" import openhands.sdk.observability.weave as weave_module