Skip to content

Commit 67ac619

Browse files
committed
braintrust example
1 parent d563336 commit 67ac619

File tree

6 files changed

+671
-12
lines changed

6 files changed

+671
-12
lines changed
Lines changed: 206 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,211 @@
1-
"""Deprecated adapter wrappers for Braintrust.
1+
"""Braintrust adapter for Eval Protocol.
22
3-
This module forwards imports to :mod:`eval_protocol.integrations.braintrust`.
3+
This adapter pulls traces from Braintrust projects and converts them
4+
to EvaluationRow format for evaluation pipelines.
45
"""
56

7+
import os
8+
from datetime import datetime
9+
from typing import Any, Dict, Iterator, List, Optional
10+
11+
import requests
12+
13+
from eval_protocol.models import EvaluationRow, InputMetadata, Message
14+
15+
# Keep backward compatibility
616
from ..integrations.braintrust import reward_fn_to_scorer, scorer_to_reward_fn
717

8-
__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer"]
18+
19+
class BraintrustAdapter:
20+
"""Minimal adapter to pull traces from Braintrust."""
21+
22+
def __init__(
23+
self,
24+
api_key: Optional[str] = None,
25+
api_url: Optional[str] = None,
26+
project_id: Optional[str] = None,
27+
):
28+
"""Initialize the Braintrust adapter.
29+
30+
Args:
31+
api_key: Braintrust API key (defaults to BRAINTRUST_API_KEY env var)
32+
api_url: Braintrust API URL (defaults to BRAINTRUST_API_URL env var)
33+
project_id: Project ID to fetch logs from
34+
"""
35+
self.api_key = api_key or os.getenv("BRAINTRUST_API_KEY")
36+
self.api_url = api_url or os.getenv("BRAINTRUST_API_URL", "https://api.braintrust.dev")
37+
self.project_id = project_id
38+
39+
if not self.api_key:
40+
raise ValueError("BRAINTRUST_API_KEY environment variable or api_key parameter required")
41+
42+
def get_evaluation_rows(
43+
self,
44+
project_id: Optional[str] = None,
45+
limit: Optional[int] = None,
46+
from_timestamp: Optional[datetime] = None,
47+
to_timestamp: Optional[datetime] = None,
48+
) -> Iterator[EvaluationRow]:
49+
"""Fetch traces from Braintrust and convert to EvaluationRow format."""
50+
project_id = project_id or self.project_id
51+
if not project_id:
52+
raise ValueError("project_id required")
53+
54+
# Prepare query parameters for GET request
55+
params = {"limit": 1000}
56+
if from_timestamp:
57+
params["from_timestamp"] = int(from_timestamp.timestamp())
58+
if to_timestamp:
59+
params["to_timestamp"] = int(to_timestamp.timestamp())
60+
61+
# Fetch logs from Braintrust using GET endpoint
62+
headers = {"Authorization": f"Bearer {self.api_key}"}
63+
64+
url = f"{self.api_url}/v1/project_logs/{project_id}/fetch"
65+
66+
response = requests.get(url, headers=headers, params=params)
67+
response.raise_for_status()
68+
69+
logs = response.json()
70+
71+
# Convert each log to EvaluationRow
72+
for log in logs.get("events", []):
73+
if log.get("metadata", {}).get("agent_name") == "agent_instance":
74+
try:
75+
eval_row = self._convert_log_to_evaluation_row(log)
76+
if eval_row:
77+
yield eval_row
78+
except Exception as e:
79+
print(f"Warning: Failed to convert log {log.get('id', 'unknown')}: {e}")
80+
continue
81+
82+
def _convert_log_to_evaluation_row(self, log: Dict[str, Any]) -> Optional[EvaluationRow]:
83+
"""Convert a Braintrust log to EvaluationRow format."""
84+
# Extract messages from the log
85+
messages = self._extract_messages(log)
86+
if not messages:
87+
return None
88+
89+
# Extract metadata (pulling nothing currently)
90+
input_metadata = InputMetadata(
91+
row_id=log.get("id"),
92+
completion_params=log.get("metadata", {}),
93+
dataset_info={
94+
"braintrust_log_id": log.get("id"),
95+
"braintrust_project_id": self.project_id,
96+
"span_id": log.get("span_id"),
97+
"trace_id": log.get("root_span_id"),
98+
},
99+
)
100+
101+
# Extract ground truth from metadata
102+
metadata = log.get("metadata", {})
103+
ground_truth = metadata.get("ground_truth")
104+
105+
return EvaluationRow(
106+
messages=messages,
107+
input_metadata=input_metadata,
108+
ground_truth=str(ground_truth) if ground_truth else None,
109+
)
110+
111+
def _extract_messages(self, log: Dict[str, Any]) -> List[Message]:
112+
"""Extract conversation messages from a Braintrust log."""
113+
messages = []
114+
115+
# Look for complete conversations (input + output arrays)
116+
input_data = log.get("input")
117+
output_data = log.get("output")
118+
119+
# Skip spans without meaningful conversation data
120+
if not input_data or not output_data:
121+
return []
122+
123+
# Extract input messages (usually just user message)
124+
if isinstance(input_data, list):
125+
for msg in input_data:
126+
if isinstance(msg, dict) and "role" in msg and "content" in msg:
127+
messages.append(Message(role=msg["role"], content=str(msg["content"])))
128+
129+
# Extract output messages (assistant + tool responses)
130+
if isinstance(output_data, list):
131+
for msg in output_data:
132+
if isinstance(msg, dict) and "role" in msg:
133+
# Handle tool calls in assistant messages
134+
tool_calls = msg.get("tool_calls") if msg["role"] == "assistant" else None
135+
tool_call_id = msg.get("tool_call_id") if msg["role"] == "tool" else None
136+
name = msg.get("name") if msg["role"] == "tool" else None
137+
138+
messages.append(
139+
Message(
140+
role=msg["role"],
141+
content=str(msg.get("content", "")),
142+
tool_calls=tool_calls,
143+
tool_call_id=tool_call_id,
144+
name=name,
145+
)
146+
)
147+
148+
return messages
149+
150+
def create_score(
151+
self,
152+
log_id: str,
153+
name: str,
154+
value: float,
155+
comment: Optional[str] = None,
156+
project_id: Optional[str] = None,
157+
) -> bool:
158+
"""Create a score/feedback for a Braintrust log entry.
159+
160+
Args:
161+
log_id: The ID of the log entry to score
162+
name: The score name/type
163+
value: The score value
164+
comment: Optional comment explaining the score
165+
project_id: Project ID (overrides instance default)
166+
167+
Returns:
168+
True if successful, False otherwise
169+
"""
170+
project_id = project_id or self.project_id
171+
if not project_id:
172+
raise ValueError("project_id required")
173+
174+
# Prepare feedback data - API expects "feedback" array
175+
feedback_item = {
176+
"id": log_id,
177+
"name": name,
178+
"value": value,
179+
}
180+
if comment:
181+
feedback_item["comment"] = comment
182+
183+
feedback_data = {"feedback": [feedback_item]}
184+
185+
# Post feedback to Braintrust
186+
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
187+
188+
try:
189+
url = f"{self.api_url}/v1/project_logs/{project_id}/feedback"
190+
response = requests.post(url, headers=headers, json=feedback_data)
191+
response.raise_for_status()
192+
return True
193+
except Exception as e:
194+
print(f"Error creating Braintrust score: {e}")
195+
return False
196+
197+
198+
def create_braintrust_adapter(
199+
api_key: Optional[str] = None,
200+
api_url: Optional[str] = None,
201+
project_id: Optional[str] = None,
202+
) -> BraintrustAdapter:
203+
"""Create a BraintrustAdapter instance."""
204+
return BraintrustAdapter(
205+
api_key=api_key,
206+
api_url=api_url,
207+
project_id=project_id,
208+
)
209+
210+
211+
__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer", "BraintrustAdapter", "create_braintrust_adapter"]

pyproject.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ chinook = [
133133
langchain = [
134134
"langchain-core>=0.3.0",
135135
]
136+
braintrust = [
137+
"braintrust[otel]",
138+
]
136139

137140
[tool.pytest.ini_options]
138141
addopts = "-q"

0 commit comments

Comments
 (0)