|
1 | | -"""Deprecated adapter wrappers for Braintrust. |
| 1 | +"""Braintrust adapter for Eval Protocol. |
2 | 2 |
|
3 | | -This module forwards imports to :mod:`eval_protocol.integrations.braintrust`. |
| 3 | +This adapter allows pulling data from Braintrust deployments and converting it |
| 4 | +to EvaluationRow format for use in evaluation pipelines. |
4 | 5 | """ |
5 | 6 |
|
| 7 | +import logging |
| 8 | +import os |
| 9 | +import random |
| 10 | +import time |
| 11 | +from datetime import datetime, timedelta |
| 12 | +from typing import Any, Dict, List, Optional, Protocol |
| 13 | + |
| 14 | +import requests |
| 15 | + |
| 16 | +from eval_protocol.models import EvaluationRow, InputMetadata, Message |
| 17 | +from .utils import extract_messages_from_data |
| 18 | + |
| 19 | +# Keep backward compatibility |
6 | 20 | from ..integrations.braintrust import reward_fn_to_scorer, scorer_to_reward_fn |
7 | 21 |
|
8 | | -__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer"] |
| 22 | + |
| 23 | +logger = logging.getLogger(__name__) |
| 24 | + |
| 25 | + |
| 26 | +class TraceConverter(Protocol): |
| 27 | + """Protocol for custom trace-to-EvaluationRow converter functions. |
| 28 | +
|
| 29 | + A converter function should take a Braintrust trace along with processing |
| 30 | + options and return an EvaluationRow or None to skip the trace. |
| 31 | + """ |
| 32 | + |
| 33 | + def __call__( |
| 34 | + self, |
| 35 | + trace: Dict[str, Any], |
| 36 | + include_tool_calls: bool, |
| 37 | + ) -> Optional[EvaluationRow]: |
| 38 | + """Convert a Braintrust trace to an EvaluationRow. |
| 39 | +
|
| 40 | + Args: |
| 41 | + trace: The Braintrust trace object to convert |
| 42 | + include_tool_calls: Whether to include tool calling information |
| 43 | +
|
| 44 | + Returns: |
| 45 | + EvaluationRow or None if the trace should be skipped |
| 46 | + """ |
| 47 | + ... |
| 48 | + |
| 49 | + |
| 50 | +def convert_trace_to_evaluation_row(trace: Dict[str, Any], include_tool_calls: bool = True) -> Optional[EvaluationRow]: |
| 51 | + """Convert a Braintrust trace to EvaluationRow format. |
| 52 | +
|
| 53 | + Args: |
| 54 | + trace: Braintrust trace object |
| 55 | + include_tool_calls: Whether to include tool calling information |
| 56 | +
|
| 57 | + Returns: |
| 58 | + EvaluationRow or None if conversion fails |
| 59 | + """ |
| 60 | + try: |
| 61 | + # Extract messages from the trace |
| 62 | + messages = extract_messages_from_trace(trace, include_tool_calls) |
| 63 | + |
| 64 | + # Extract tools if available |
| 65 | + tools = None |
| 66 | + if include_tool_calls: |
| 67 | + metadata = trace.get("metadata", {}) |
| 68 | + tools = metadata.get("tools") |
| 69 | + if not tools: |
| 70 | + hidden_params = metadata.get("hidden_params", {}) |
| 71 | + optional_params = hidden_params.get("optional_params", {}) |
| 72 | + tools = optional_params.get("tools") |
| 73 | + |
| 74 | + if not messages: |
| 75 | + return None |
| 76 | + |
| 77 | + return EvaluationRow( |
| 78 | + messages=messages, |
| 79 | + tools=tools, |
| 80 | + input_metadata=InputMetadata( |
| 81 | + session_data={ |
| 82 | + "braintrust_trace_id": trace.get("id"), |
| 83 | + } |
| 84 | + ), |
| 85 | + ) |
| 86 | + |
| 87 | + except (AttributeError, ValueError, KeyError) as e: |
| 88 | + logger.error("Error converting trace %s: %s", trace.get("id", "unknown"), e) |
| 89 | + return None |
| 90 | + |
| 91 | + |
| 92 | +def extract_messages_from_trace(trace: Dict[str, Any], include_tool_calls: bool = True) -> List[Message]: |
| 93 | + """Extract messages from Braintrust trace input and output. |
| 94 | +
|
| 95 | + Args: |
| 96 | + trace: Braintrust trace object |
| 97 | + include_tool_calls: Whether to include tool calling information |
| 98 | +
|
| 99 | + Returns: |
| 100 | + List of Message objects |
| 101 | + """ |
| 102 | + messages = [] |
| 103 | + |
| 104 | + try: |
| 105 | + # Look for complete conversations (input + output arrays) |
| 106 | + input_data = trace.get("input") |
| 107 | + |
| 108 | + output_data = None |
| 109 | + output_list = trace.get("output", []) |
| 110 | + if output_list and len(output_list) > 0: |
| 111 | + first_output = output_list[0] |
| 112 | + if isinstance(first_output, dict): |
| 113 | + output_data = first_output.get("message") |
| 114 | + |
| 115 | + # Skip spans without meaningful conversation data |
| 116 | + if not input_data or not output_data: |
| 117 | + return messages |
| 118 | + |
| 119 | + # Extract messages from input and output |
| 120 | + if input_data: |
| 121 | + messages.extend(extract_messages_from_data(input_data, include_tool_calls)) |
| 122 | + if output_data: |
| 123 | + messages.extend(extract_messages_from_data(output_data, include_tool_calls)) |
| 124 | + |
| 125 | + except (AttributeError, ValueError, KeyError) as e: |
| 126 | + logger.warning("Error processing trace %s: %s", trace.get("id", "unknown"), e) |
| 127 | + |
| 128 | + return messages |
| 129 | + |
| 130 | + |
| 131 | +class BraintrustAdapter: |
| 132 | + """Adapter to pull data from Braintrust and convert to EvaluationRow format. |
| 133 | +
|
| 134 | + This adapter can pull both chat conversations and tool calling traces from |
| 135 | + Braintrust deployments and convert them into the EvaluationRow format expected |
| 136 | + by the evaluation protocol. |
| 137 | +
|
| 138 | + Examples: |
| 139 | + Basic usage: |
| 140 | + >>> adapter = BraintrustAdapter( |
| 141 | + ... api_key="your_api_key", |
| 142 | + ... project_id="your_project_id" |
| 143 | + ... ) |
| 144 | + >>> btql_query = "select: * from: project_logs('your_project_id') traces limit: 10" |
| 145 | + >>> rows = adapter.get_evaluation_rows(btql_query) |
| 146 | +
|
| 147 | + Using BTQL for custom queries: |
| 148 | + >>> btql_query = ''' |
| 149 | + ... select: * |
| 150 | + ... from: project_logs('your_project_id') traces |
| 151 | + ... filter: metadata.agent_name = 'agent_instance' |
| 152 | + ... limit: 50 |
| 153 | + ... ''' |
| 154 | + >>> rows = adapter.get_evaluation_rows(btql_query) |
| 155 | + """ |
| 156 | + |
| 157 | + def __init__( |
| 158 | + self, |
| 159 | + api_key: Optional[str] = None, |
| 160 | + api_url: Optional[str] = None, |
| 161 | + project_id: Optional[str] = None, |
| 162 | + ): |
| 163 | + """Initialize the Braintrust adapter. |
| 164 | +
|
| 165 | + Args: |
| 166 | + api_key: Braintrust API key (defaults to BRAINTRUST_API_KEY env var) |
| 167 | + api_url: Braintrust API URL (defaults to BRAINTRUST_API_URL env var) |
| 168 | + project_id: Project ID to fetch logs from (defaults to BRAINTRUST_PROJECT_ID env var) |
| 169 | + """ |
| 170 | + self.api_key = api_key or os.getenv("BRAINTRUST_API_KEY") |
| 171 | + self.api_url = api_url or os.getenv("BRAINTRUST_API_URL", "https://api.braintrust.dev") |
| 172 | + self.project_id = project_id or os.getenv("BRAINTRUST_PROJECT_ID") |
| 173 | + |
| 174 | + if not self.api_key: |
| 175 | + raise ValueError("BRAINTRUST_API_KEY environment variable or api_key parameter required") |
| 176 | + if not self.project_id: |
| 177 | + raise ValueError("BRAINTRUST_PROJECT_ID environment variable or project_id parameter required") |
| 178 | + |
| 179 | + def get_evaluation_rows( |
| 180 | + self, |
| 181 | + btql_query: str, |
| 182 | + include_tool_calls: bool = True, |
| 183 | + converter: Optional[TraceConverter] = None, |
| 184 | + ) -> List[EvaluationRow]: |
| 185 | + """Get evaluation rows using a custom BTQL query. |
| 186 | +
|
| 187 | + Args: |
| 188 | + btql_query: The BTQL query string to execute |
| 189 | + include_tool_calls: Whether to include tool calling information |
| 190 | + converter: Optional custom converter implementing TraceConverter protocol |
| 191 | +
|
| 192 | + Returns: |
| 193 | + List[EvaluationRow]: Converted evaluation rows |
| 194 | + """ |
| 195 | + eval_rows = [] |
| 196 | + |
| 197 | + headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} |
| 198 | + |
| 199 | + response = requests.post(f"{self.api_url}/btql", headers=headers, json={"query": btql_query, "fmt": "json"}) |
| 200 | + response.raise_for_status() |
| 201 | + query_response = response.json() |
| 202 | + |
| 203 | + if not query_response or not query_response.get("data"): |
| 204 | + logger.debug("No data returned from BTQL query") |
| 205 | + return eval_rows |
| 206 | + |
| 207 | + all_traces = query_response["data"] |
| 208 | + logger.debug("BTQL query returned %d traces", len(all_traces)) |
| 209 | + |
| 210 | + # Process each selected trace |
| 211 | + for trace in all_traces: |
| 212 | + try: |
| 213 | + if converter: |
| 214 | + eval_row = converter(trace, include_tool_calls) |
| 215 | + else: |
| 216 | + eval_row = convert_trace_to_evaluation_row(trace, include_tool_calls) |
| 217 | + if eval_row: |
| 218 | + eval_rows.append(eval_row) |
| 219 | + except (AttributeError, ValueError, KeyError) as e: |
| 220 | + logger.warning("Failed to convert trace %s: %s", trace.get("id", "unknown"), e) |
| 221 | + continue |
| 222 | + |
| 223 | + logger.info("Successfully processed %d BTQL results into %d evaluation rows", len(all_traces), len(eval_rows)) |
| 224 | + return eval_rows |
| 225 | + |
| 226 | + |
| 227 | +def create_braintrust_adapter( |
| 228 | + api_key: Optional[str] = None, |
| 229 | + api_url: Optional[str] = None, |
| 230 | + project_id: Optional[str] = None, |
| 231 | +) -> BraintrustAdapter: |
| 232 | + """Factory function to create a Braintrust adapter.""" |
| 233 | + return BraintrustAdapter( |
| 234 | + api_key=api_key, |
| 235 | + api_url=api_url, |
| 236 | + project_id=project_id, |
| 237 | + ) |
| 238 | + |
| 239 | + |
| 240 | +__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer", "BraintrustAdapter", "create_braintrust_adapter"] |
0 commit comments