1313from eval_protocol .mcp .execution .manager import ExecutionManager
1414from eval_protocol .models import EvaluationRow
1515from eval_protocol .pytest .rollout_processor import RolloutProcessor
16- from eval_protocol .pytest .types import RolloutProcessorConfig
16+ from eval_protocol .pytest .types import RolloutProcessorConfig , ServerMode
1717
1818
1919class MCPServerManager :
@@ -207,37 +207,78 @@ class MCPGymRolloutProcessor(RolloutProcessor):
207207 using the eval_protocol framework with proper cleanup handling.
208208 """
209209
210+ # Shared server state for "shared" mode
211+ _shared_server_lock = threading .Lock ()
212+ _shared_server : Optional [MCPServerManager ] = None
213+ _shared_server_started : bool = False
214+
210215 def __init__ (self ):
211- self .server = None
216+ # Instance-level server handle (used in "per_run" mode)
217+ self .server : Optional [MCPServerManager ] = None
212218 self .policy = None
219+ # Track which mode this instance last used ("per_run" or "shared")
220+ self .server_mode : ServerMode = "per_run"
213221
214222 def __call__ (self , rows : List [EvaluationRow ], config : RolloutProcessorConfig ) -> List [asyncio .Task [EvaluationRow ]]:
215223 """Process evaluation rows with MCP gym environments."""
216- start_server = config .kwargs .get ("start_server" , True ) if config .kwargs else True
224+ server_kwargs = dict (config .kwargs or {})
225+ start_server = bool (server_kwargs .pop ("start_server" , True ))
226+ server_mode : ServerMode = server_kwargs .pop ("server_mode" , "per_run" )
227+ port = int (server_kwargs .pop ("port" , 9700 ))
217228
218- if start_server :
219- # Create fresh MCP server and environments for this run
220- if config .server_script_path is None :
221- raise ValueError ("server_script_path is required for MCPGymRolloutProcessor" )
229+ self .server_mode = server_mode
222230
223- self .server = MCPServerManager (config .server_script_path , port = 9700 , ** (config .kwargs or {}))
231+ if server_mode == "shared" :
232+ # Shared, class-level server used across calls
233+ if start_server and not MCPGymRolloutProcessor ._shared_server_started :
234+ with MCPGymRolloutProcessor ._shared_server_lock :
235+ if not MCPGymRolloutProcessor ._shared_server_started :
236+ if config .server_script_path is None :
237+ raise ValueError ("server_script_path is required for MCPGymRolloutProcessor" )
224238
225- try :
226- self .server .start ()
239+ shared_server = MCPServerManager (config .server_script_path , port = port , ** server_kwargs )
227240
228- except Exception as e :
229- if self .server :
230- self .server .stop ()
231- self .server = None
232- self .policy = None
233- raise e
241+ try :
242+ shared_server .start ()
243+ except Exception as e :
244+ shared_server .stop ()
245+ raise e
234246
235- else :
236- # Reuse existing MCP environments for retry
237- if not self .server :
247+ MCPGymRolloutProcessor ._shared_server = shared_server
248+ MCPGymRolloutProcessor ._shared_server_started = True
249+
250+ if MCPGymRolloutProcessor ._shared_server is None :
238251 raise RuntimeError (
239- "Cannot retry without existing server/environments . Call with start_server=True first."
252+ "Shared MCP server not started . Call with server_mode='shared' and start_server=True first."
240253 )
254+ # Bind this instance to the shared server for this call
255+ self .server = MCPGymRolloutProcessor ._shared_server
256+
257+ else :
258+ # Default "per_run" behavior: fresh server per call, reused only for retries
259+ if start_server :
260+ # Create fresh MCP server and environments for this run
261+ if config .server_script_path is None :
262+ raise ValueError ("server_script_path is required for MCPGymRolloutProcessor" )
263+
264+ self .server = MCPServerManager (config .server_script_path , port = port , ** server_kwargs )
265+
266+ try :
267+ self .server .start ()
268+
269+ except Exception as e :
270+ if self .server :
271+ self .server .stop ()
272+ self .server = None
273+ self .policy = None
274+ raise e
275+
276+ else :
277+ # Reuse existing MCP environments for retry (per_run mode)
278+ if not self .server :
279+ raise RuntimeError (
280+ "Cannot retry without existing server/environments. Call with start_server=True first."
281+ )
241282
242283 model_id = str ((config .completion_params .get ("model" ) if config .completion_params else None ) or "gpt-4o-mini" )
243284 temperature = config .completion_params .get ("temperature" , 0.0 )
@@ -260,7 +301,7 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) ->
260301 )
261302 # Create MCP environments directly from evaluation_rows
262303 envs = ep .make (
263- "http://localhost:9700 /mcp/" ,
304+ f "http://localhost:{ port } /mcp/" ,
264305 evaluation_rows = rows ,
265306 model_id = self .policy .model_id ,
266307 )
@@ -278,6 +319,13 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) ->
278319
279320 def cleanup (self ) -> None :
280321 """Cleanup MCP server and environments."""
322+ # For shared mode, don't stop the shared server here; rely on global cleanup
323+ # (atexit or an explicit class-level shutdown) so multiple users can share it.
324+ if self .server_mode == "shared" :
325+ self .policy = None
326+ return
327+
328+ # Per-run mode: stop this instance's server
281329 if self .server :
282330 self .server .stop ()
283331 self .server = None
0 commit comments