1414import requests
1515
1616from eval_protocol .models import EvaluationRow , InputMetadata , Message
17+ from .base import BaseAdapter
1718from .utils import extract_messages_from_data
1819
19- # Keep backward compatibility
20- from ..integrations .braintrust import reward_fn_to_scorer , scorer_to_reward_fn
21-
2220
2321logger = logging .getLogger (__name__ )
2422
@@ -128,7 +126,7 @@ def extract_messages_from_trace(trace: Dict[str, Any], include_tool_calls: bool
128126 return messages
129127
130128
131- class BraintrustAdapter :
129+ class BraintrustAdapter ( BaseAdapter ) :
132130 """Adapter to pull data from Braintrust and convert to EvaluationRow format.
133131
134132 This adapter can pull both chat conversations and tool calling traces from
@@ -223,6 +221,49 @@ def get_evaluation_rows(
223221 logger .info ("Successfully processed %d BTQL results into %d evaluation rows" , len (all_traces ), len (eval_rows ))
224222 return eval_rows
225223
224+ def upload_scores (self , rows : List [EvaluationRow ], model_name : str , mean_score : float ) -> None :
225+ """Upload evaluation scores back to Braintrust traces for tracking and analysis.
226+
227+ Creates score entries in Braintrust for each unique trace_id found in the evaluation
228+ rows' session data. This allows you to see evaluation results directly in the
229+ Braintrust UI alongside the original traces.
230+
231+ Args:
232+ rows: List of EvaluationRow objects with session_data containing trace IDs
233+ model_name: Name of the model (used as the score name in Braintrust)
234+ mean_score: The calculated mean score to push to Braintrust
235+
236+ Note:
237+ Silently handles errors if rows lack session data
238+ """
239+ try :
240+ headers = {
241+ "Authorization" : f"Bearer { self .api_key } " ,
242+ "Content-Type" : "application/json" ,
243+ }
244+
245+ feedback_items = []
246+ for trace_id in set (
247+ row .input_metadata .session_data ["braintrust_trace_id" ]
248+ for row in rows
249+ if row .evaluation_result and row .input_metadata and row .input_metadata .session_data
250+ ):
251+ if trace_id :
252+ feedback_items .append ({"id" : trace_id , "scores" : {model_name : mean_score }})
253+
254+ if feedback_items :
255+ payload = {"feedback" : feedback_items }
256+
257+ response = requests .post (
258+ f"{ self .api_url } /v1/project_logs/{ self .project_id } /feedback" ,
259+ headers = headers ,
260+ json = payload ,
261+ )
262+ response .raise_for_status ()
263+
264+ except Exception as e :
265+ logger .warning ("Failed to push scores to Braintrust: %s" , e )
266+
226267
227268def create_braintrust_adapter (
228269 api_key : Optional [str ] = None ,
@@ -237,4 +278,4 @@ def create_braintrust_adapter(
237278 )
238279
239280
240- __all__ = ["scorer_to_reward_fn" , "reward_fn_to_scorer" , " BraintrustAdapter" , "create_braintrust_adapter" ]
281+ __all__ = ["BraintrustAdapter" , "create_braintrust_adapter" ]
0 commit comments