@@ -35,7 +35,7 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
3535 request_params = {"messages" : messages_payload , ** config .completion_params }
3636 # Ensure caching is disabled only for this request (review feedback)
3737 request_params ["cache" ] = {"no-cache" : True }
38- request_params ["timeout " ] = 1200 # 20 minutes timeout
38+ request_params ["stream " ] = True # Enable streaming
3939 # Single-level reasoning effort: expect `reasoning_effort` only
4040 effort_val = None
4141
@@ -68,10 +68,23 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
6868
6969 _litellm = importlib .import_module ("litellm" )
7070 acompletion = getattr (_litellm , "acompletion" )
71- response = await acompletion (** request_params )
7271
73- assistant_content = response .choices [0 ].message .content or ""
74- tool_calls = response .choices [0 ].message .tool_calls if response .choices [0 ].message .tool_calls else None
72+ # Handle streaming response
73+ assistant_content = ""
74+ tool_calls = None
75+ usage_info = None
76+
77+ async for chunk in await acompletion (** request_params ):
78+ if chunk .choices and len (chunk .choices ) > 0 :
79+ delta = chunk .choices [0 ].delta
80+ if hasattr (delta , "content" ) and delta .content :
81+ assistant_content += delta .content
82+ if hasattr (delta , "tool_calls" ) and delta .tool_calls :
83+ tool_calls = delta .tool_calls
84+
85+ # Capture usage info from the final chunk
86+ if hasattr (chunk , "usage" ) and chunk .usage :
87+ usage_info = chunk .usage
7588
7689 converted_tool_calls = None
7790 if tool_calls :
@@ -112,11 +125,19 @@ async def process_row(row: EvaluationRow) -> EvaluationRow:
112125 )
113126 ]
114127
115- row .execution_metadata .usage = CompletionUsage (
116- prompt_tokens = response .usage .prompt_tokens ,
117- completion_tokens = response .usage .completion_tokens ,
118- total_tokens = response .usage .total_tokens ,
119- )
128+ if usage_info :
129+ row .execution_metadata .usage = CompletionUsage (
130+ prompt_tokens = usage_info .prompt_tokens ,
131+ completion_tokens = usage_info .completion_tokens ,
132+ total_tokens = usage_info .total_tokens ,
133+ )
134+ else :
135+ # Fallback if usage info not available from streaming
136+ row .execution_metadata .usage = CompletionUsage (
137+ prompt_tokens = 0 ,
138+ completion_tokens = 0 ,
139+ total_tokens = 0 ,
140+ )
120141
121142 row .messages = messages
122143
0 commit comments