|
6 | 6 |
|
7 | 7 | import litellm |
8 | 8 | from litellm import acompletion |
9 | | -from typing import Dict |
10 | 9 |
|
11 | 10 | from eval_protocol.dataset_logger import default_logger |
12 | 11 | from eval_protocol.models import EvaluationRow, Message |
@@ -36,7 +35,6 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: |
36 | 35 | request_params = {"messages": messages_payload, **config.completion_params} |
37 | 36 | # Ensure caching is disabled only for this request (review feedback) |
38 | 37 | request_params["cache"] = {"no-cache": True} |
39 | | - request_params["stream"] = True # Enable streaming |
40 | 38 | # Single-level reasoning effort: expect `reasoning_effort` only |
41 | 39 | effort_val = None |
42 | 40 |
|
@@ -64,16 +62,14 @@ async def process_row(row: EvaluationRow) -> EvaluationRow: |
64 | 62 | if row.tools is not None: |
65 | 63 | request_params["tools"] = row.tools |
66 | 64 |
|
67 | | - chunks = [] |
68 | | - print("time: ", time.time()) |
69 | | - |
70 | | - stream = await acompletion(**request_params) |
71 | | - async for chunk in stream: |
72 | | - # print("chunk added at time: ", time.time()) |
73 | | - # print("chunk: ", chunk) |
74 | | - chunks.append(chunk) |
75 | | - |
76 | | - response = litellm.stream_chunk_builder(chunks, messages_payload) |
| 65 | + if request_params.get("stream") is True: |
| 66 | + chunks = [] |
| 67 | + stream = await acompletion(**request_params) |
| 68 | + async for chunk in stream: # pyright: ignore[reportGeneralTypeIssues] |
| 69 | + chunks.append(chunk) |
| 70 | + response = litellm.stream_chunk_builder(chunks, messages_payload) |
| 71 | + else: |
| 72 | + response = await acompletion(**request_params) |
77 | 73 |
|
78 | 74 | if response is None: |
79 | 75 | raise ValueError("Response is None") |
|
0 commit comments