88import logging
99import requests
1010from datetime import datetime
11- from typing import Any , Dict , List , Optional , Protocol
11+ import ast
12+ import json
1213import os
14+ from typing import Any , Dict , List , Optional , Protocol
1315
1416from eval_protocol .models import EvaluationRow , InputMetadata , ExecutionMetadata , Message
1517from .base import BaseAdapter
@@ -44,6 +46,43 @@ def __call__(
4446 ...
4547
4648
49+ def extract_openai_response (observations : List [Dict [str , Any ]]) -> Optional [Dict [str , Any ]]:
50+ """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
51+
52+ Args:
53+ observations: List of observation dictionaries from the trace
54+
55+ Returns:
56+ Dict with all attributes parsed. Or None if not found.
57+ """
58+ for obs in observations :
59+ if obs .get ("name" ) == "raw_gen_ai_request" and obs .get ("type" ) == "SPAN" :
60+ metadata = obs .get ("metadata" ) or {}
61+ attributes = metadata .get ("attributes" ) or {}
62+
63+ result : Dict [str , Any ] = {}
64+
65+ for key , value in attributes .items ():
66+ # Try to parse stringified objects (could be Python repr or JSON)
67+ if isinstance (value , str ) and value .startswith (("[" , "{" )):
68+ try :
69+ result [key ] = ast .literal_eval (value )
70+ except Exception as e :
71+ logger .debug ("Failed to parse %s with ast.literal_eval: %s" , key , e )
72+ try :
73+ result [key ] = json .loads (value )
74+ except Exception as e :
75+ logger .debug ("Failed to parse %s with json.loads: %s" , key , e )
76+ result [key ] = value
77+ else :
78+ result [key ] = value
79+
80+ if result :
81+ return result
82+
83+ return None
84+
85+
4786def convert_trace_dict_to_evaluation_row (
4887 trace : Dict [str , Any ], include_tool_calls : bool = True , span_name : Optional [str ] = None
4988) -> Optional [EvaluationRow ]:
@@ -96,6 +135,14 @@ def convert_trace_dict_to_evaluation_row(
96135 ):
97136 break # Break early if we've found all the metadata we need
98137
138+ observations = trace .get ("observations" ) or []
139+ # We can only extract when stored in OTEL format.
140+ openai_response = extract_openai_response (observations )
141+ if openai_response :
142+ choices = openai_response .get ("llm.openai.choices" )
143+ if choices and len (choices ) > 0 :
144+ execution_metadata .finish_reason = choices [0 ].get ("finish_reason" )
145+
99146 return EvaluationRow (
100147 messages = messages ,
101148 tools = tools ,
@@ -160,7 +207,7 @@ def extract_messages_from_trace_dict(
160207 # Fallback: use the last GENERATION observation which typically contains full chat history
161208 if not messages :
162209 try :
163- all_observations = trace .get ("observations" , [])
210+ all_observations = trace .get ("observations" ) or []
164211 gens = [obs for obs in all_observations if obs .get ("type" ) == "GENERATION" ]
165212 if gens :
166213 gens .sort (key = lambda x : x .get ("start_time" , "" ))
@@ -186,7 +233,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
186233 The final generation dictionary, or None if not found
187234 """
188235 # Get all observations from the trace
189- all_observations = trace .get ("observations" , [])
236+ all_observations = trace .get ("observations" ) or []
190237
191238 # Find a span with the given name that has generation children
192239 parent_span = None
0 commit comments