88import logging
99import requests
1010from datetime import datetime
11- import ast
12- import json
13- import os
1411from typing import Any , Dict , List , Optional , Protocol
12+ import os
1513
1614from eval_protocol .models import EvaluationRow , InputMetadata , ExecutionMetadata , Message
1715from .base import BaseAdapter
@@ -46,43 +44,6 @@ def __call__(
4644 ...
4745
4846
49- def extract_otel_attributes (observations : List [Dict [str , Any ]]) -> Optional [Dict [str , Any ]]:
50- """Attempt to extract and parse attributes from raw_gen_ai_request observation. This only works when stored in OTEL format.
51-
52- Args:
53- observations: List of observation dictionaries from the trace
54-
55- Returns:
56- Dict with all attributes parsed. Or None if not found.
57- """
58- for obs in observations :
59- if obs .get ("name" ) == "raw_gen_ai_request" and obs .get ("type" ) == "SPAN" :
60- metadata = obs .get ("metadata" ) or {}
61- attributes = metadata .get ("attributes" ) or {}
62-
63- result : Dict [str , Any ] = {}
64-
65- for key , value in attributes .items ():
66- # Try to parse stringified objects (could be Python repr or JSON)
67- if isinstance (value , str ) and value .startswith (("[" , "{" )):
68- try :
69- result [key ] = ast .literal_eval (value )
70- except Exception as e :
71- logger .debug ("Failed to parse %s with ast.literal_eval: %s" , key , e )
72- try :
73- result [key ] = json .loads (value )
74- except Exception as e :
75- logger .debug ("Failed to parse %s with json.loads: %s" , key , e )
76- result [key ] = value
77- else :
78- result [key ] = value
79-
80- if result :
81- return result
82-
83- return None
84-
85-
8647def convert_trace_dict_to_evaluation_row (
8748 trace : Dict [str , Any ], include_tool_calls : bool = True , span_name : Optional [str ] = None
8849) -> Optional [EvaluationRow ]:
@@ -135,19 +96,6 @@ def convert_trace_dict_to_evaluation_row(
13596 ):
13697 break # Break early if we've found all the metadata we need
13798
138- observations = trace .get ("observations" ) or []
139- # We can only extract when stored in OTEL format.
140- otel_attributes = extract_otel_attributes (observations )
141- if otel_attributes :
142- # Find choices from any provider (llm.*.choices pattern)
143- choices = None
144- for key , value in otel_attributes .items ():
145- if key .endswith (".choices" ) and isinstance (value , list ):
146- choices = value
147- break
148- if choices and len (choices ) > 0 :
149- execution_metadata .finish_reason = choices [0 ].get ("finish_reason" )
150-
15199 return EvaluationRow (
152100 messages = messages ,
153101 tools = tools ,
@@ -212,7 +160,7 @@ def extract_messages_from_trace_dict(
212160 # Fallback: use the last GENERATION observation which typically contains full chat history
213161 if not messages :
214162 try :
215- all_observations = trace .get ("observations" ) or []
163+ all_observations = trace .get ("observations" , [])
216164 gens = [obs for obs in all_observations if obs .get ("type" ) == "GENERATION" ]
217165 if gens :
218166 gens .sort (key = lambda x : x .get ("start_time" , "" ))
@@ -238,7 +186,7 @@ def get_final_generation_in_span_dict(trace: Dict[str, Any], span_name: str) ->
238186 The final generation dictionary, or None if not found
239187 """
240188 # Get all observations from the trace
241- all_observations = trace .get ("observations" ) or []
189+ all_observations = trace .get ("observations" , [])
242190
243191 # Find a span with the given name that has generation children
244192 parent_span = None
0 commit comments