Skip to content

Commit 5edd0d2

Browse files
author
Dylan Huang
committed
Merge branch 'main' into responses-api-part-2
# Conflicts: # eval_protocol/quickstart/llm_judge.py
2 parents f585ce0 + df0fc3a commit 5edd0d2

File tree

10 files changed

+1165
-180
lines changed

10 files changed

+1165
-180
lines changed
Lines changed: 235 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,240 @@
1-
"""Deprecated adapter wrappers for Braintrust.
1+
"""Braintrust adapter for Eval Protocol.
22
3-
This module forwards imports to :mod:`eval_protocol.integrations.braintrust`.
3+
This adapter allows pulling data from Braintrust deployments and converting it
4+
to EvaluationRow format for use in evaluation pipelines.
45
"""
56

7+
import logging
8+
import os
9+
import random
10+
import time
11+
from datetime import datetime, timedelta
12+
from typing import Any, Dict, List, Optional, Protocol
13+
14+
import requests
15+
16+
from eval_protocol.models import EvaluationRow, InputMetadata, Message
17+
from .utils import extract_messages_from_data
18+
19+
# Keep backward compatibility
620
from ..integrations.braintrust import reward_fn_to_scorer, scorer_to_reward_fn
721

8-
__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer"]
22+
23+
logger = logging.getLogger(__name__)
24+
25+
26+
class TraceConverter(Protocol):
27+
"""Protocol for custom trace-to-EvaluationRow converter functions.
28+
29+
A converter function should take a Braintrust trace along with processing
30+
options and return an EvaluationRow or None to skip the trace.
31+
"""
32+
33+
def __call__(
34+
self,
35+
trace: Dict[str, Any],
36+
include_tool_calls: bool,
37+
) -> Optional[EvaluationRow]:
38+
"""Convert a Braintrust trace to an EvaluationRow.
39+
40+
Args:
41+
trace: The Braintrust trace object to convert
42+
include_tool_calls: Whether to include tool calling information
43+
44+
Returns:
45+
EvaluationRow or None if the trace should be skipped
46+
"""
47+
...
48+
49+
50+
def convert_trace_to_evaluation_row(trace: Dict[str, Any], include_tool_calls: bool = True) -> Optional[EvaluationRow]:
51+
"""Convert a Braintrust trace to EvaluationRow format.
52+
53+
Args:
54+
trace: Braintrust trace object
55+
include_tool_calls: Whether to include tool calling information
56+
57+
Returns:
58+
EvaluationRow or None if conversion fails
59+
"""
60+
try:
61+
# Extract messages from the trace
62+
messages = extract_messages_from_trace(trace, include_tool_calls)
63+
64+
# Extract tools if available
65+
tools = None
66+
if include_tool_calls:
67+
metadata = trace.get("metadata", {})
68+
tools = metadata.get("tools")
69+
if not tools:
70+
hidden_params = metadata.get("hidden_params", {})
71+
optional_params = hidden_params.get("optional_params", {})
72+
tools = optional_params.get("tools")
73+
74+
if not messages:
75+
return None
76+
77+
return EvaluationRow(
78+
messages=messages,
79+
tools=tools,
80+
input_metadata=InputMetadata(
81+
session_data={
82+
"braintrust_trace_id": trace.get("id"),
83+
}
84+
),
85+
)
86+
87+
except (AttributeError, ValueError, KeyError) as e:
88+
logger.error("Error converting trace %s: %s", trace.get("id", "unknown"), e)
89+
return None
90+
91+
92+
def extract_messages_from_trace(trace: Dict[str, Any], include_tool_calls: bool = True) -> List[Message]:
93+
"""Extract messages from Braintrust trace input and output.
94+
95+
Args:
96+
trace: Braintrust trace object
97+
include_tool_calls: Whether to include tool calling information
98+
99+
Returns:
100+
List of Message objects
101+
"""
102+
messages = []
103+
104+
try:
105+
# Look for complete conversations (input + output arrays)
106+
input_data = trace.get("input")
107+
108+
output_data = None
109+
output_list = trace.get("output", [])
110+
if output_list and len(output_list) > 0:
111+
first_output = output_list[0]
112+
if isinstance(first_output, dict):
113+
output_data = first_output.get("message")
114+
115+
# Skip spans without meaningful conversation data
116+
if not input_data or not output_data:
117+
return messages
118+
119+
# Extract messages from input and output
120+
if input_data:
121+
messages.extend(extract_messages_from_data(input_data, include_tool_calls))
122+
if output_data:
123+
messages.extend(extract_messages_from_data(output_data, include_tool_calls))
124+
125+
except (AttributeError, ValueError, KeyError) as e:
126+
logger.warning("Error processing trace %s: %s", trace.get("id", "unknown"), e)
127+
128+
return messages
129+
130+
131+
class BraintrustAdapter:
132+
"""Adapter to pull data from Braintrust and convert to EvaluationRow format.
133+
134+
This adapter can pull both chat conversations and tool calling traces from
135+
Braintrust deployments and convert them into the EvaluationRow format expected
136+
by the evaluation protocol.
137+
138+
Examples:
139+
Basic usage:
140+
>>> adapter = BraintrustAdapter(
141+
... api_key="your_api_key",
142+
... project_id="your_project_id"
143+
... )
144+
>>> btql_query = "select: * from: project_logs('your_project_id') traces limit: 10"
145+
>>> rows = adapter.get_evaluation_rows(btql_query)
146+
147+
Using BTQL for custom queries:
148+
>>> btql_query = '''
149+
... select: *
150+
... from: project_logs('your_project_id') traces
151+
... filter: metadata.agent_name = 'agent_instance'
152+
... limit: 50
153+
... '''
154+
>>> rows = adapter.get_evaluation_rows(btql_query)
155+
"""
156+
157+
def __init__(
158+
self,
159+
api_key: Optional[str] = None,
160+
api_url: Optional[str] = None,
161+
project_id: Optional[str] = None,
162+
):
163+
"""Initialize the Braintrust adapter.
164+
165+
Args:
166+
api_key: Braintrust API key (defaults to BRAINTRUST_API_KEY env var)
167+
api_url: Braintrust API URL (defaults to BRAINTRUST_API_URL env var)
168+
project_id: Project ID to fetch logs from (defaults to BRAINTRUST_PROJECT_ID env var)
169+
"""
170+
self.api_key = api_key or os.getenv("BRAINTRUST_API_KEY")
171+
self.api_url = api_url or os.getenv("BRAINTRUST_API_URL", "https://api.braintrust.dev")
172+
self.project_id = project_id or os.getenv("BRAINTRUST_PROJECT_ID")
173+
174+
if not self.api_key:
175+
raise ValueError("BRAINTRUST_API_KEY environment variable or api_key parameter required")
176+
if not self.project_id:
177+
raise ValueError("BRAINTRUST_PROJECT_ID environment variable or project_id parameter required")
178+
179+
def get_evaluation_rows(
180+
self,
181+
btql_query: str,
182+
include_tool_calls: bool = True,
183+
converter: Optional[TraceConverter] = None,
184+
) -> List[EvaluationRow]:
185+
"""Get evaluation rows using a custom BTQL query.
186+
187+
Args:
188+
btql_query: The BTQL query string to execute
189+
include_tool_calls: Whether to include tool calling information
190+
converter: Optional custom converter implementing TraceConverter protocol
191+
192+
Returns:
193+
List[EvaluationRow]: Converted evaluation rows
194+
"""
195+
eval_rows = []
196+
197+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
198+
199+
response = requests.post(f"{self.api_url}/btql", headers=headers, json={"query": btql_query, "fmt": "json"})
200+
response.raise_for_status()
201+
query_response = response.json()
202+
203+
if not query_response or not query_response.get("data"):
204+
logger.debug("No data returned from BTQL query")
205+
return eval_rows
206+
207+
all_traces = query_response["data"]
208+
logger.debug("BTQL query returned %d traces", len(all_traces))
209+
210+
# Process each selected trace
211+
for trace in all_traces:
212+
try:
213+
if converter:
214+
eval_row = converter(trace, include_tool_calls)
215+
else:
216+
eval_row = convert_trace_to_evaluation_row(trace, include_tool_calls)
217+
if eval_row:
218+
eval_rows.append(eval_row)
219+
except (AttributeError, ValueError, KeyError) as e:
220+
logger.warning("Failed to convert trace %s: %s", trace.get("id", "unknown"), e)
221+
continue
222+
223+
logger.info("Successfully processed %d BTQL results into %d evaluation rows", len(all_traces), len(eval_rows))
224+
return eval_rows
225+
226+
227+
def create_braintrust_adapter(
228+
api_key: Optional[str] = None,
229+
api_url: Optional[str] = None,
230+
project_id: Optional[str] = None,
231+
) -> BraintrustAdapter:
232+
"""Factory function to create a Braintrust adapter."""
233+
return BraintrustAdapter(
234+
api_key=api_key,
235+
api_url=api_url,
236+
project_id=project_id,
237+
)
238+
239+
240+
__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer", "BraintrustAdapter", "create_braintrust_adapter"]

0 commit comments

Comments
 (0)