enable other stream

xzrderek · xzrderek · commit 122184254990 · 2025-10-15T00:41:40.000-07:00
diff --git a/eval_protocol/mcp/execution/policy.py b/eval_protocol/mcp/execution/policy.py
@@ -5,15 +5,14 @@
 Rewritten to use LiteLLM for unified retry logic, caching, and provider support.
 """
 
-import asyncio
-import json
 import logging
 import os
-from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Literal, Optional, Tuple, Union
+from typing import Any, Dict, List, Literal, Optional
 
 import litellm
-from litellm import acompletion, completion
+from litellm import acompletion
+from litellm.types.utils import ModelResponse
+from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
 from litellm.caching.caching import Cache
 from litellm.caching.dual_cache import DualCache
 from litellm.caching.in_memory_cache import InMemoryCache
@@ -194,7 +193,20 @@ async def _make_llm_call(self, messages: List[Dict[str, Any]], tools: List[Dict[
             request_params["tools"] = tools
 
         try:
-            response = await acompletion(model=self.model_id, **request_params)
+            if request_params.get("stream") is True:
+                chunks = []
+                stream = await acompletion(model=self.model_id, **request_params)
+
+                assert isinstance(stream, CustomStreamWrapper), "Stream should be a CustomStreamWrapper"
+
+                async for chunk in stream:  # pyright: ignore[reportGeneralTypeIssues]
+                    chunks.append(chunk)
+                response = litellm.stream_chunk_builder(chunks, messages)
+            else:
+                response = await acompletion(model=self.model_id, **request_params)
+
+            assert response is not None, "Response is None"
+            assert isinstance(response, ModelResponse), "Response should be ModelResponse"
 
             # Log cache hit/miss for monitoring
             hidden = getattr(response, "_hidden_params", {})
diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py
@@ -223,13 +223,26 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) ->
             try:
                 self.server.start()
 
+                model_id = str(
+                    (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini"
+                )
+                temperature = config.completion_params.get("temperature", 0.0)
+                max_tokens = config.completion_params.get("max_tokens", 4096)
+
+                # Pass all other completion_params (e.g. stream=True) via kwargs
+                other_params = {
+                    k: v
+                    for k, v in (config.completion_params or {}).items()
+                    if k not in ["model", "temperature", "max_tokens", "extra_body"]
+                }
+                extra_body = config.completion_params.get("extra_body", {}) or {}
+
                 self.policy = ep.LiteLLMPolicy(
-                    model_id=str(
-                        (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini"
-                    ),
-                    temperature=config.completion_params.get("temperature", 0.0),
-                    max_tokens=config.completion_params.get("max_tokens", 4096),
-                    **(config.completion_params.get("extra_body", {}) or {}),
+                    model_id=model_id,
+                    temperature=temperature,
+                    max_tokens=max_tokens,
+                    **extra_body,
+                    **other_params,
                 )
 
             except Exception as e: