1010from __future__ import annotations
1111
1212import logging
13- from typing import Any , Dict , List , Optional
13+ from typing import Any , Dict , List , Optional , Iterable
1414
1515from eval_protocol .models import EvaluationRow , InputMetadata , Message
1616
@@ -36,7 +36,7 @@ class LangSmithAdapter:
3636
3737 def __init__ (self , client : Optional [Client ] = None ) -> None :
3838 if not LANGSMITH_AVAILABLE :
39- raise ImportError ("LangSmith not installed. Install with: pip install langsmith" )
39+ raise ImportError ("LangSmith not installed. Install with: pip install 'eval-protocol[ langsmith]' " )
4040 self .client = client or Client ()
4141
4242 def get_evaluation_rows (
@@ -45,6 +45,31 @@ def get_evaluation_rows(
4545 project_name : str ,
4646 limit : int = 50 ,
4747 include_tool_calls : bool = True ,
48+ # Pass-through filters to list_runs to match LangSmith Client API
49+ run_id : Optional [str ] = None ,
50+ ids : Optional [List [str ]] = None ,
51+ run_type : Optional [str ] = None ,
52+ execution_order : Optional [int ] = None ,
53+ parent_run_id : Optional [str ] = None ,
54+ trace_id : Optional [str ] = None ,
55+ trace_ids : Optional [List [str ]] = None ,
56+ reference_example_id : Optional [str ] = None ,
57+ session_name : Optional [str ] = None ,
58+ error : Optional [bool ] = None ,
59+ start_time : Optional [str ] = None ,
60+ end_time : Optional [str ] = None ,
61+ filter_expr : Optional [str ] = None , # server-side filter DSL
62+ tags : Optional [List [str ]] = None ,
63+ metadata : Optional [Dict [str , Any ]] = None ,
64+ feedback_keys : Optional [List [str ]] = None ,
65+ feedback_source : Optional [str ] = None ,
66+ tree_id : Optional [str ] = None ,
67+ # ordering/pagination
68+ offset : Optional [int ] = None ,
69+ order_by : Optional [str ] = None ,
70+ # selection
71+ select : Optional [List [str ]] = None ,
72+ ** list_runs_kwargs : Any ,
4873 ) -> List [EvaluationRow ]:
4974 """Pull runs from LangSmith and convert to EvaluationRow format.
5075
@@ -55,17 +80,57 @@ def get_evaluation_rows(
5580 """
5681 rows : List [EvaluationRow ] = []
5782
58- # Prefer root runs; they usually contain messages in inputs/outputs when tracing app-level flows
59- runs = list (
60- self .client .list_runs (
61- project_name = project_name ,
62- is_root = True ,
63- limit = limit ,
64- select = ["id" , "inputs" , "outputs" ],
65- )
66- )
67-
83+ # Fetch runs with pass-through filters. Prefer root runs by default.
84+ params : Dict [str , Any ] = {"project_name" : project_name , "limit" : limit }
85+ # Only include non-None params
86+ if run_type is None :
87+ params ["is_root" ] = True
88+ for key , value in [
89+ ("id" , run_id ),
90+ ("ids" , ids ),
91+ ("run_type" , run_type ),
92+ ("execution_order" , execution_order ),
93+ ("parent_run_id" , parent_run_id ),
94+ ("trace_id" , trace_id ),
95+ ("trace_ids" , trace_ids ),
96+ ("reference_example_id" , reference_example_id ),
97+ ("session_name" , session_name ),
98+ ("error" , error ),
99+ ("start_time" , start_time ),
100+ ("end_time" , end_time ),
101+ ("filter" , filter_expr ),
102+ ("tags" , tags ),
103+ ("metadata" , metadata ),
104+ ("feedback_keys" , feedback_keys ),
105+ ("feedback_source" , feedback_source ),
106+ ("tree_id" , tree_id ),
107+ ("offset" , offset ),
108+ ("order_by" , order_by ),
109+ ]:
110+ if value is not None :
111+ params [key ] = value
112+ params ["select" ] = select or ["id" , "inputs" , "outputs" , "trace_id" ]
113+
114+ # Merge any additional kwargs last to allow explicit overrides
115+ if list_runs_kwargs :
116+ for k , v in list_runs_kwargs .items ():
117+ if v is not None :
118+ params [k ] = v
119+
120+ runs_iter : Iterable [Any ] = self .client .list_runs (** params )
121+
122+ runs = list (runs_iter )
123+ if not runs :
124+ logger .warning ("No LangSmith runs found for project '%s' with current filters" , project_name )
125+ return []
126+
127+ # Group by trace_id and pick the last run in each trace (assume iterator yields chronological)
128+ trace_to_last_run : Dict [str , Any ] = {}
68129 for r in runs :
130+ t_id = str (getattr (r , "trace_id" , "" )) or str (getattr (r , "id" , "" ))
131+ trace_to_last_run [t_id ] = r
132+
133+ for r in trace_to_last_run .values ():
69134 try :
70135 inp = getattr (r , "inputs" , None )
71136 out = getattr (r , "outputs" , None )
@@ -86,10 +151,9 @@ def get_evaluation_rows(
86151
87152 # Deduplicate consecutive identical user messages (common echo pattern)
88153 def _canon (text : Any ) -> str :
89- try :
90- return " " .join (str (text or "" ).strip ().lower ().split ())
91- except Exception :
92- return str (text or "" )
154+ # Best-effort canonicalization; avoid broad exception handling warnings by handling types
155+ text_str = str (text ) if text is not None else ""
156+ return " " .join (text_str .strip ().lower ().split ())
93157
94158 deduped : List [Message ] = []
95159 for m in ep_messages :
@@ -102,23 +166,115 @@ def _canon(text: Any) -> str:
102166 if not ep_messages :
103167 continue
104168
169+ tools = None
170+ if include_tool_calls and isinstance (inp , dict ):
171+ # Try to extract tool schema if present in inputs
172+ if "tools" in inp :
173+ tools = inp ["tools" ]
174+
105175 rows .append (
106176 EvaluationRow (
107177 messages = ep_messages ,
178+ tools = tools ,
108179 input_metadata = InputMetadata (
109180 session_data = {
110181 "langsmith_run_id" : str (getattr (r , "id" , "" )),
182+ "langsmith_trace_id" : str (getattr (r , "trace_id" , "" )),
111183 "langsmith_project" : project_name ,
112184 }
113185 ),
114186 )
115187 )
116- except Exception as e :
188+ except ( AttributeError , ValueError , KeyError , TypeError ) as e :
117189 logger .warning ("Failed to convert run %s: %s" , getattr (r , "id" , "" ), e )
118190 continue
119191
120192 return rows
121193
194+ def get_evaluation_rows_by_ids (
195+ self ,
196+ * ,
197+ run_ids : Optional [List [str ]] = None ,
198+ trace_ids : Optional [List [str ]] = None ,
199+ include_tool_calls : bool = True ,
200+ project_name : Optional [str ] = None ,
201+ ) -> List [EvaluationRow ]:
202+ """Fetch specific runs or traces and convert to EvaluationRow.
203+
204+ If both run_ids and trace_ids are provided, both sets are fetched.
205+ """
206+ results : List [EvaluationRow ] = []
207+
208+ fetched_runs : List [Any ] = []
209+ try :
210+ if run_ids :
211+ fetched_runs .extend (list (self .client .list_runs (ids = run_ids , select = ["id" , "inputs" , "outputs" , "trace_id" ])) )
212+ if trace_ids :
213+ fetched_runs .extend (list (self .client .list_runs (trace_ids = trace_ids , select = ["id" , "inputs" , "outputs" , "trace_id" ])) )
214+ except (AttributeError , ValueError , KeyError , TypeError ) as e :
215+ logger .warning ("Failed to fetch runs by ids: %s" , e )
216+ return []
217+
218+ if not fetched_runs :
219+ logger .warning ("No LangSmith runs found for provided ids" )
220+ return []
221+
222+ # Prefer the last run per trace id
223+ trace_to_last_run : Dict [str , Any ] = {}
224+ for r in fetched_runs :
225+ t_id = str (getattr (r , "trace_id" , "" )) or str (getattr (r , "id" , "" ))
226+ trace_to_last_run [t_id ] = r
227+
228+ for r in trace_to_last_run .values ():
229+ try :
230+ inp = getattr (r , "inputs" , None )
231+ out = getattr (r , "outputs" , None )
232+
233+ ep_messages : List [Message ] = []
234+ if isinstance (out , dict ) and isinstance (out .get ("messages" ), list ):
235+ ep_messages .extend (self ._extract_messages_from_payload ({"messages" : out ["messages" ]}, include_tool_calls , is_output = True ))
236+ else :
237+ ep_messages .extend (self ._extract_messages_from_payload (inp , include_tool_calls ))
238+ ep_messages .extend (self ._extract_messages_from_payload (out , include_tool_calls , is_output = True ))
239+
240+ def _canon (text : Any ) -> str :
241+ text_str = str (text ) if text is not None else ""
242+ return " " .join (text_str .strip ().lower ().split ())
243+
244+ deduped : List [Message ] = []
245+ for m in ep_messages :
246+ if deduped and m .role == "user" and deduped [- 1 ].role == "user" :
247+ if _canon (m .content ) == _canon (deduped [- 1 ].content ):
248+ continue
249+ deduped .append (m )
250+ ep_messages = deduped
251+
252+ if not ep_messages :
253+ continue
254+
255+ tools = None
256+ if include_tool_calls and isinstance (inp , dict ) and "tools" in inp :
257+ tools = inp ["tools" ]
258+
259+ results .append (
260+ EvaluationRow (
261+ messages = ep_messages ,
262+ tools = tools ,
263+ input_metadata = InputMetadata (
264+ session_data = {
265+ "langsmith_run_id" : str (getattr (r , "id" , "" )),
266+ "langsmith_trace_id" : str (getattr (r , "trace_id" , "" )),
267+ "langsmith_project" : project_name or "" ,
268+ }
269+ ),
270+ )
271+ )
272+ except (AttributeError , ValueError , KeyError , TypeError ) as e :
273+ logger .warning ("Failed to convert run %s: %s" , getattr (r , "id" , "" ), e )
274+ continue
275+
276+ return results
277+
122278 def _extract_messages_from_payload (
123279 self , payload : Any , include_tool_calls : bool , * , is_output : bool = False
124280 ) -> List [Message ]:
@@ -161,13 +317,11 @@ def _dict_to_message(msg_dict: Dict[str, Any]) -> Message:
161317 # Extract id/type/function fields from dicts or provider-native objects
162318 if isinstance (tc , dict ):
163319 tc_id = tc .get ("id" , None )
164- tc_type = tc .get ("type" , "function" ) or "function"
165320 fn = tc .get ("function" , {}) or {}
166321 fn_name = fn .get ("name" , None )
167322 fn_args = fn .get ("arguments" , None )
168323 else :
169324 tc_id = getattr (tc , "id" , None )
170- tc_type = getattr (tc , "type" , None ) or "function"
171325 f = getattr (tc , "function" , None )
172326 fn_name = getattr (f , "name" , None ) if f is not None else None
173327 fn_args = getattr (f , "arguments" , None ) if f is not None else None
@@ -185,7 +339,7 @@ def _dict_to_message(msg_dict: Dict[str, Any]) -> Message:
185339 )
186340 )
187341 tool_calls = typed_calls
188- except Exception :
342+ except ( ImportError , AttributeError , TypeError , ValueError ) :
189343 # If OpenAI types unavailable, leave None to satisfy type checker
190344 tool_calls = None
191345 if "tool_call_id" in msg_dict :
0 commit comments