|
1 | | -"""Deprecated adapter wrappers for Braintrust. |
| 1 | +"""Braintrust adapter for Eval Protocol. |
2 | 2 |
|
3 | | -This module forwards imports to :mod:`eval_protocol.integrations.braintrust`. |
| 3 | +This adapter pulls traces from Braintrust projects and converts them |
| 4 | +to EvaluationRow format for evaluation pipelines. |
4 | 5 | """ |
5 | 6 |
|
| 7 | +import os |
| 8 | +from datetime import datetime |
| 9 | +from typing import Any, Dict, Iterator, List, Optional |
| 10 | + |
| 11 | +import requests |
| 12 | + |
| 13 | +from eval_protocol.models import EvaluationRow, InputMetadata, Message |
| 14 | + |
| 15 | +# Keep backward compatibility |
6 | 16 | from ..integrations.braintrust import reward_fn_to_scorer, scorer_to_reward_fn |
7 | 17 |
|
8 | | -__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer"] |
| 18 | + |
| 19 | +class BraintrustAdapter: |
| 20 | + """Minimal adapter to pull traces from Braintrust.""" |
| 21 | + |
| 22 | + def __init__( |
| 23 | + self, |
| 24 | + api_key: Optional[str] = None, |
| 25 | + api_url: Optional[str] = None, |
| 26 | + project_id: Optional[str] = None, |
| 27 | + ): |
| 28 | + """Initialize the Braintrust adapter. |
| 29 | +
|
| 30 | + Args: |
| 31 | + api_key: Braintrust API key (defaults to BRAINTRUST_API_KEY env var) |
| 32 | + api_url: Braintrust API URL (defaults to BRAINTRUST_API_URL env var) |
| 33 | + project_id: Project ID to fetch logs from |
| 34 | + """ |
| 35 | + self.api_key = api_key or os.getenv("BRAINTRUST_API_KEY") |
| 36 | + self.api_url = api_url or os.getenv("BRAINTRUST_API_URL", "https://api.braintrust.dev") |
| 37 | + self.project_id = project_id |
| 38 | + |
| 39 | + if not self.api_key: |
| 40 | + raise ValueError("BRAINTRUST_API_KEY environment variable or api_key parameter required") |
| 41 | + |
| 42 | + def get_evaluation_rows( |
| 43 | + self, |
| 44 | + project_id: Optional[str] = None, |
| 45 | + limit: Optional[int] = None, |
| 46 | + from_timestamp: Optional[datetime] = None, |
| 47 | + to_timestamp: Optional[datetime] = None, |
| 48 | + ) -> Iterator[EvaluationRow]: |
| 49 | + """Fetch traces from Braintrust and convert to EvaluationRow format.""" |
| 50 | + project_id = project_id or self.project_id |
| 51 | + if not project_id: |
| 52 | + raise ValueError("project_id required") |
| 53 | + |
| 54 | + # Prepare query parameters for GET request |
| 55 | + params = {"limit": 1000} |
| 56 | + if from_timestamp: |
| 57 | + params["from_timestamp"] = int(from_timestamp.timestamp()) |
| 58 | + if to_timestamp: |
| 59 | + params["to_timestamp"] = int(to_timestamp.timestamp()) |
| 60 | + |
| 61 | + # Fetch logs from Braintrust using GET endpoint |
| 62 | + headers = {"Authorization": f"Bearer {self.api_key}"} |
| 63 | + |
| 64 | + url = f"{self.api_url}/v1/project_logs/{project_id}/fetch" |
| 65 | + |
| 66 | + response = requests.get(url, headers=headers, params=params) |
| 67 | + response.raise_for_status() |
| 68 | + |
| 69 | + logs = response.json() |
| 70 | + |
| 71 | + # Convert each log to EvaluationRow |
| 72 | + for log in logs.get("events", []): |
| 73 | + if log.get("metadata", {}).get("agent_name") == "agent_instance": |
| 74 | + try: |
| 75 | + eval_row = self._convert_log_to_evaluation_row(log) |
| 76 | + if eval_row: |
| 77 | + yield eval_row |
| 78 | + except Exception as e: |
| 79 | + print(f"Warning: Failed to convert log {log.get('id', 'unknown')}: {e}") |
| 80 | + continue |
| 81 | + |
| 82 | + def _convert_log_to_evaluation_row(self, log: Dict[str, Any]) -> Optional[EvaluationRow]: |
| 83 | + """Convert a Braintrust log to EvaluationRow format.""" |
| 84 | + # Extract messages from the log |
| 85 | + messages = self._extract_messages(log) |
| 86 | + if not messages: |
| 87 | + return None |
| 88 | + |
| 89 | + # Extract metadata (pulling nothing currently) |
| 90 | + input_metadata = InputMetadata( |
| 91 | + row_id=log.get("id"), |
| 92 | + completion_params=log.get("metadata", {}), |
| 93 | + dataset_info={ |
| 94 | + "braintrust_log_id": log.get("id"), |
| 95 | + "braintrust_project_id": self.project_id, |
| 96 | + "span_id": log.get("span_id"), |
| 97 | + "trace_id": log.get("root_span_id"), |
| 98 | + }, |
| 99 | + ) |
| 100 | + |
| 101 | + # Extract ground truth from metadata |
| 102 | + metadata = log.get("metadata", {}) |
| 103 | + ground_truth = metadata.get("ground_truth") |
| 104 | + |
| 105 | + return EvaluationRow( |
| 106 | + messages=messages, |
| 107 | + input_metadata=input_metadata, |
| 108 | + ground_truth=str(ground_truth) if ground_truth else None, |
| 109 | + ) |
| 110 | + |
| 111 | + def _extract_messages(self, log: Dict[str, Any]) -> List[Message]: |
| 112 | + """Extract conversation messages from a Braintrust log.""" |
| 113 | + messages = [] |
| 114 | + |
| 115 | + # Look for complete conversations (input + output arrays) |
| 116 | + input_data = log.get("input") |
| 117 | + output_data = log.get("output") |
| 118 | + |
| 119 | + # Skip spans without meaningful conversation data |
| 120 | + if not input_data or not output_data: |
| 121 | + return [] |
| 122 | + |
| 123 | + # Extract input messages (usually just user message) |
| 124 | + if isinstance(input_data, list): |
| 125 | + for msg in input_data: |
| 126 | + if isinstance(msg, dict) and "role" in msg and "content" in msg: |
| 127 | + messages.append(Message(role=msg["role"], content=str(msg["content"]))) |
| 128 | + |
| 129 | + # Extract output messages (assistant + tool responses) |
| 130 | + if isinstance(output_data, list): |
| 131 | + for msg in output_data: |
| 132 | + if isinstance(msg, dict) and "role" in msg: |
| 133 | + # Handle tool calls in assistant messages |
| 134 | + tool_calls = msg.get("tool_calls") if msg["role"] == "assistant" else None |
| 135 | + tool_call_id = msg.get("tool_call_id") if msg["role"] == "tool" else None |
| 136 | + name = msg.get("name") if msg["role"] == "tool" else None |
| 137 | + |
| 138 | + messages.append( |
| 139 | + Message( |
| 140 | + role=msg["role"], |
| 141 | + content=str(msg.get("content", "")), |
| 142 | + tool_calls=tool_calls, |
| 143 | + tool_call_id=tool_call_id, |
| 144 | + name=name, |
| 145 | + ) |
| 146 | + ) |
| 147 | + |
| 148 | + return messages |
| 149 | + |
| 150 | + def create_score( |
| 151 | + self, |
| 152 | + log_id: str, |
| 153 | + name: str, |
| 154 | + value: float, |
| 155 | + comment: Optional[str] = None, |
| 156 | + project_id: Optional[str] = None, |
| 157 | + ) -> bool: |
| 158 | + """Create a score/feedback for a Braintrust log entry. |
| 159 | +
|
| 160 | + Args: |
| 161 | + log_id: The ID of the log entry to score |
| 162 | + name: The score name/type |
| 163 | + value: The score value |
| 164 | + comment: Optional comment explaining the score |
| 165 | + project_id: Project ID (overrides instance default) |
| 166 | +
|
| 167 | + Returns: |
| 168 | + True if successful, False otherwise |
| 169 | + """ |
| 170 | + project_id = project_id or self.project_id |
| 171 | + if not project_id: |
| 172 | + raise ValueError("project_id required") |
| 173 | + |
| 174 | + # Prepare feedback data - API expects "feedback" array |
| 175 | + feedback_item = { |
| 176 | + "id": log_id, |
| 177 | + "name": name, |
| 178 | + "value": value, |
| 179 | + } |
| 180 | + if comment: |
| 181 | + feedback_item["comment"] = comment |
| 182 | + |
| 183 | + feedback_data = {"feedback": [feedback_item]} |
| 184 | + |
| 185 | + # Post feedback to Braintrust |
| 186 | + headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} |
| 187 | + |
| 188 | + try: |
| 189 | + url = f"{self.api_url}/v1/project_logs/{project_id}/feedback" |
| 190 | + response = requests.post(url, headers=headers, json=feedback_data) |
| 191 | + response.raise_for_status() |
| 192 | + return True |
| 193 | + except Exception as e: |
| 194 | + print(f"Error creating Braintrust score: {e}") |
| 195 | + return False |
| 196 | + |
| 197 | + |
| 198 | +def create_braintrust_adapter( |
| 199 | + api_key: Optional[str] = None, |
| 200 | + api_url: Optional[str] = None, |
| 201 | + project_id: Optional[str] = None, |
| 202 | +) -> BraintrustAdapter: |
| 203 | + """Create a BraintrustAdapter instance.""" |
| 204 | + return BraintrustAdapter( |
| 205 | + api_key=api_key, |
| 206 | + api_url=api_url, |
| 207 | + project_id=project_id, |
| 208 | + ) |
| 209 | + |
| 210 | + |
| 211 | +__all__ = ["scorer_to_reward_fn", "reward_fn_to_scorer", "BraintrustAdapter", "create_braintrust_adapter"] |
0 commit comments