From c37b81619afacfcbba3716c7ac331c6111bac887 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Sat, 25 Oct 2025 00:02:41 +0200 Subject: [PATCH 1/8] init --- .../default_mcp_gym_rollout_processor.py | 6 +- examples/gym_2048/gym_2048_adapter.py | 137 ++++++++++++++++++ 2 files changed, 140 insertions(+), 3 deletions(-) create mode 100644 examples/gym_2048/gym_2048_adapter.py diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index 1d35212a..4bf66103 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -9,10 +9,10 @@ from typing import List, Optional import eval_protocol as ep +from eval_protocol.mcp.execution.manager import ExecutionManager from eval_protocol.models import EvaluationRow from eval_protocol.pytest.rollout_processor import RolloutProcessor from eval_protocol.pytest.types import RolloutProcessorConfig -from eval_protocol.mcp.execution.manager import ExecutionManager class MCPServerManager: @@ -181,8 +181,8 @@ def _signal_handler(cls, signum, frame): def _register_cleanup_handlers(cls): """Register cleanup handlers - called only once""" atexit.register(cls._cleanup_all_servers) - signal.signal(signal.SIGINT, cls._signal_handler) # Ctrl+C - signal.signal(signal.SIGTERM, cls._signal_handler) # Termination signal + # signal.signal(signal.SIGINT, cls._signal_handler) # Ctrl+C + # signal.signal(signal.SIGTERM, cls._signal_handler) # Termination signal def __enter__(self): """Context manager entry""" diff --git a/examples/gym_2048/gym_2048_adapter.py b/examples/gym_2048/gym_2048_adapter.py new file mode 100644 index 00000000..652c7c8d --- /dev/null +++ b/examples/gym_2048/gym_2048_adapter.py @@ -0,0 +1,137 @@ +""" +2048 Environment Adapter + +This adapter implements the EnvironmentAdapter interface for 2048 environments, +enabling integration with the MCP-Gym framework. +""" + +from typing import Any, Dict, Optional, Tuple + +import gym +import gym_2048 +from gym_2048.envs.game2048_env import Game2048Env + +from eval_protocol.mcp.adapter import EnvironmentAdapter + + +class Gym2048Adapter(EnvironmentAdapter): + """2048 adapter for MCP-Gym framework.""" + + ACTION_NAMES = ["UP", "RIGHT", "DOWN", "LEFT"] + + def create_environment(self, config: Optional[Dict[str, Any]] = None) -> Game2048Env: + """ + Create 2048 environment. + + Args: + config: config is not used in this implementation + + Returns: + 2048 environment instance + """ + print(f"🔍 Gym2048Adapter.create_environment: config: {config}") + env = gym.make('2048-v0') + return env + + def create_environment_with_seed( + self, config: Optional[Dict[str, Any]] = None, seed: Optional[int] = None + ) -> Tuple[Game2048Env, int, Dict[str, Any]]: + """ + Create 2048 environment with seed and return initial state. + + Args: + config: config is not used in this implementation + seed: Seed for reproducible environments + + Returns: + Tuple of (environment, initial_observation, initial_info) + """ + print(f"🔍 Gym2048Adapter.create_environment_with_seed: seed: {seed}") + config = config or {} + + # Add seed to config for environment creation + env_config = {**config, "seed": seed} + print(f"🔍 Gym2048Adapter.create_environment_with_seed: env_config: {env_config}") + + env = self.create_environment(env_config) + print(f"🔍 Gym2048Adapter.create_environment_with_seed: created env, calling reset with seed: {seed}") + obs, info = env.reset(seed=seed) + print(f"🔍 Gym2048Adapter.create_environment_with_seed: reset returned obs: {obs}, info: {info}") + + return env, obs, info + + def reset_environment(self, env: Game2048Env, seed: Optional[int] = None) -> Tuple[int, Dict[str, Any]]: + """ + Reset environment. + + Args: + env: Environment instance + seed: Optional seed for reset + + Returns: + Tuple of (observation, info) + """ + return env.reset(seed=seed) + + def step_environment(self, env: Game2048Env, action: int) -> Tuple[int, float, bool, bool, Dict[str, Any]]: + """ + Execute environment step. + + Args: + env: Environment instance + action: Action index + + Returns: + Tuple of (observation, reward, terminated, truncated, info) + """ + return env.step(action) + + def close_environment(self, env: Game2048Env) -> None: + """ + Close environment. + + Args: + env: Environment instance + """ + env.close() + + def parse_action(self, action_str: str) -> int: + """ + Parse action string to integer. + + Args: + action_str: Action string (UP, RIGHT, DOWN, LEFT) + + Returns: + Action index + + Raises: + ValueError: If action is invalid + """ + action_str = action_str.strip().upper() + if action_str not in self.ACTION_NAMES: + raise ValueError(f"Invalid action '{action_str}'. Valid actions: {self.ACTION_NAMES}") + return self.ACTION_NAMES.index(action_str) + + def format_observation(self, observation: int) -> int: + """ + Format observation for JSON serialization. + + Args: + observation: Raw observation from environment + + Returns: + Formatted observation + """ + return int(observation) + + def get_default_config(self) -> Dict[str, Any]: + """ + Get default configuration. + + Returns: + Default configuration dictionary + """ + return { + "is_slippery": False, + } From 2d0fe2d47982dc839d8bad9c964c929937d11170 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Sat, 25 Oct 2025 01:34:33 +0200 Subject: [PATCH 2/8] -import signal --- eval_protocol/pytest/default_mcp_gym_rollout_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index 4bf66103..306ab568 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -1,7 +1,6 @@ import asyncio import atexit import os -import signal import socket import subprocess import time From c1b06d4ee36a77d4e58d32e29836a532ba89b8c4 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Sat, 25 Oct 2025 01:45:00 +0200 Subject: [PATCH 3/8] commend out signal --- development/utils/subprocess_manager.py | 3 ++- eval_protocol/cli_commands/deploy.py | 17 ++++++++--------- eval_protocol/rewards/apps_testing_util.py | 2 +- eval_protocol/rewards/code_execution.py | 3 ++- 4 files changed, 13 insertions(+), 12 deletions(-) diff --git a/development/utils/subprocess_manager.py b/development/utils/subprocess_manager.py index 1d568c87..e2f0cd57 100644 --- a/development/utils/subprocess_manager.py +++ b/development/utils/subprocess_manager.py @@ -2,7 +2,8 @@ import os import re # Added for Serveo URL parsing import shutil # Added for checking ssh availability -import signal + +# import signal import subprocess import time from typing import IO, Any, Dict, List, Optional # Added IO, Any, List, Dict, Optional diff --git a/eval_protocol/cli_commands/deploy.py b/eval_protocol/cli_commands/deploy.py index 1ae0313b..8a358f54 100644 --- a/eval_protocol/cli_commands/deploy.py +++ b/eval_protocol/cli_commands/deploy.py @@ -19,13 +19,15 @@ # Import functions with explicit names to match expected signatures from development.utils.subprocess_manager import ( start_ngrok_and_get_url as _start_ngrok_and_get_url, - start_process as _start_process, + ) + from development.utils.subprocess_manager import start_process as _start_process + from development.utils.subprocess_manager import ( start_serveo_and_get_url as _start_serveo_and_get_url, - stop_process as _stop_process, ) + from development.utils.subprocess_manager import stop_process as _stop_process except ImportError: # Fallback implementations when development module is not available - import signal + # import signal import socket import subprocess @@ -79,12 +81,9 @@ def start_ngrok_and_get_url(local_port, log_path): from eval_protocol.auth import get_fireworks_account_id -from eval_protocol.config import ( - GCPCloudRunConfig, - RewardKitConfig, - _config_file_path as global_loaded_config_path, - get_config, -) +from eval_protocol.config import GCPCloudRunConfig, RewardKitConfig +from eval_protocol.config import _config_file_path as global_loaded_config_path +from eval_protocol.config import get_config from eval_protocol.evaluation import create_evaluation from eval_protocol.gcp_tools import ( build_and_push_docker_image, diff --git a/eval_protocol/rewards/apps_testing_util.py b/eval_protocol/rewards/apps_testing_util.py index e4b931f9..fd8e8d78 100644 --- a/eval_protocol/rewards/apps_testing_util.py +++ b/eval_protocol/rewards/apps_testing_util.py @@ -23,7 +23,7 @@ import re # Added for re.search # to run the solution files we're using a timing based approach -import signal +# import signal import sys import textwrap # Added for dedenting model output import traceback diff --git a/eval_protocol/rewards/code_execution.py b/eval_protocol/rewards/code_execution.py index 38c7189a..c2314edc 100644 --- a/eval_protocol/rewards/code_execution.py +++ b/eval_protocol/rewards/code_execution.py @@ -21,7 +21,8 @@ import re import resource import shlex # Added for robust splitting of arguments -import signal + +# import signal import subprocess import sys import tempfile From 62824b13692e16b85115bbf918d99465c81a86e3 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Sat, 25 Oct 2025 10:07:00 +0200 Subject: [PATCH 4/8] print("model_id from eval_protocol: ", model_id) --- eval_protocol/pytest/default_mcp_gym_rollout_processor.py | 1 + 1 file changed, 1 insertion(+) diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index 306ab568..ecc6c835 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -225,6 +225,7 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> model_id = str( (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini" ) + print("model_id from eval_protocol: ", model_id) temperature = config.completion_params.get("temperature", 0.0) max_tokens = config.completion_params.get("max_tokens", 4096) From 4759b68590d65801caaf6ab1f41faf08d0b85ef7 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Sat, 25 Oct 2025 11:10:18 +0200 Subject: [PATCH 5/8] new LiteLLMPolicy --- .../default_mcp_gym_rollout_processor.py | 49 +++++++++---------- 1 file changed, 24 insertions(+), 25 deletions(-) diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index ecc6c835..21e52f6c 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -222,29 +222,6 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> try: self.server.start() - model_id = str( - (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini" - ) - print("model_id from eval_protocol: ", model_id) - temperature = config.completion_params.get("temperature", 0.0) - max_tokens = config.completion_params.get("max_tokens", 4096) - - # Pass all other completion_params (e.g. stream=True) via kwargs - other_params = { - k: v - for k, v in (config.completion_params or {}).items() - if k not in ["model", "temperature", "max_tokens", "extra_body"] - } - extra_body = config.completion_params.get("extra_body", {}) or {} - - self.policy = ep.LiteLLMPolicy( - model_id=model_id, - temperature=temperature, - max_tokens=max_tokens, - **extra_body, - **other_params, - ) - except Exception as e: if self.server: self.server.stop() @@ -254,13 +231,35 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> else: # Reuse existing MCP environments for retry - if not self.server or not self.policy: + if not self.server: raise RuntimeError( "Cannot retry without existing server/environments. Call with start_server=True first." ) + + model_id = str( + (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini" + ) + print("model_id from eval_protocol: ", model_id) + temperature = config.completion_params.get("temperature", 0.0) + max_tokens = config.completion_params.get("max_tokens", 4096) + + # Pass all other completion_params (e.g. stream=True) via kwargs + other_params = { + k: v + for k, v in (config.completion_params or {}).items() + if k not in ["model", "temperature", "max_tokens", "extra_body"] + } + extra_body = config.completion_params.get("extra_body", {}) or {} + + self.policy = ep.LiteLLMPolicy( + model_id=model_id, + temperature=temperature, + max_tokens=max_tokens, + **extra_body, + **other_params, + ) # Create MCP environments directly from evaluation_rows - assert self.policy is not None, "Policy must be initialized before rollout" envs = ep.make( "http://localhost:9700/mcp/", evaluation_rows=rows, From 7dafec87f4924e3a685b5aa53c0edfc0efb5d4e2 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Sat, 25 Oct 2025 11:27:44 +0200 Subject: [PATCH 6/8] - print("model_id from eval_protocol: ", model_id) --- eval_protocol/pytest/default_mcp_gym_rollout_processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index 21e52f6c..6fa495c9 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -240,7 +240,6 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> model_id = str( (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini" ) - print("model_id from eval_protocol: ", model_id) temperature = config.completion_params.get("temperature", 0.0) max_tokens = config.completion_params.get("max_tokens", 4096) From c5ec0653457d5f3210eda63951676ccf74e220e9 Mon Sep 17 00:00:00 2001 From: 1stprinciple Date: Mon, 27 Oct 2025 19:08:03 +0100 Subject: [PATCH 7/8] reverts --- development/utils/subprocess_manager.py | 3 +- eval_protocol/cli_commands/deploy.py | 17 ++- .../default_mcp_gym_rollout_processor.py | 7 +- eval_protocol/rewards/apps_testing_util.py | 2 +- eval_protocol/rewards/code_execution.py | 3 +- examples/gym_2048/gym_2048_adapter.py | 137 ------------------ 6 files changed, 17 insertions(+), 152 deletions(-) delete mode 100644 examples/gym_2048/gym_2048_adapter.py diff --git a/development/utils/subprocess_manager.py b/development/utils/subprocess_manager.py index e2f0cd57..1d568c87 100644 --- a/development/utils/subprocess_manager.py +++ b/development/utils/subprocess_manager.py @@ -2,8 +2,7 @@ import os import re # Added for Serveo URL parsing import shutil # Added for checking ssh availability - -# import signal +import signal import subprocess import time from typing import IO, Any, Dict, List, Optional # Added IO, Any, List, Dict, Optional diff --git a/eval_protocol/cli_commands/deploy.py b/eval_protocol/cli_commands/deploy.py index 8a358f54..1ae0313b 100644 --- a/eval_protocol/cli_commands/deploy.py +++ b/eval_protocol/cli_commands/deploy.py @@ -19,15 +19,13 @@ # Import functions with explicit names to match expected signatures from development.utils.subprocess_manager import ( start_ngrok_and_get_url as _start_ngrok_and_get_url, - ) - from development.utils.subprocess_manager import start_process as _start_process - from development.utils.subprocess_manager import ( + start_process as _start_process, start_serveo_and_get_url as _start_serveo_and_get_url, + stop_process as _stop_process, ) - from development.utils.subprocess_manager import stop_process as _stop_process except ImportError: # Fallback implementations when development module is not available - # import signal + import signal import socket import subprocess @@ -81,9 +79,12 @@ def start_ngrok_and_get_url(local_port, log_path): from eval_protocol.auth import get_fireworks_account_id -from eval_protocol.config import GCPCloudRunConfig, RewardKitConfig -from eval_protocol.config import _config_file_path as global_loaded_config_path -from eval_protocol.config import get_config +from eval_protocol.config import ( + GCPCloudRunConfig, + RewardKitConfig, + _config_file_path as global_loaded_config_path, + get_config, +) from eval_protocol.evaluation import create_evaluation from eval_protocol.gcp_tools import ( build_and_push_docker_image, diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index 6fa495c9..4aa8ff68 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -1,8 +1,10 @@ import asyncio import atexit import os +import signal import socket import subprocess +import threading import time from pathlib import Path from typing import List, Optional @@ -180,8 +182,9 @@ def _signal_handler(cls, signum, frame): def _register_cleanup_handlers(cls): """Register cleanup handlers - called only once""" atexit.register(cls._cleanup_all_servers) - # signal.signal(signal.SIGINT, cls._signal_handler) # Ctrl+C - # signal.signal(signal.SIGTERM, cls._signal_handler) # Termination signal + if threading.current_thread() is threading.main_thread(): + signal.signal(signal.SIGINT, cls._signal_handler) # Ctrl+C + signal.signal(signal.SIGTERM, cls._signal_handler) # Termination signal def __enter__(self): """Context manager entry""" diff --git a/eval_protocol/rewards/apps_testing_util.py b/eval_protocol/rewards/apps_testing_util.py index fd8e8d78..e4b931f9 100644 --- a/eval_protocol/rewards/apps_testing_util.py +++ b/eval_protocol/rewards/apps_testing_util.py @@ -23,7 +23,7 @@ import re # Added for re.search # to run the solution files we're using a timing based approach -# import signal +import signal import sys import textwrap # Added for dedenting model output import traceback diff --git a/eval_protocol/rewards/code_execution.py b/eval_protocol/rewards/code_execution.py index c2314edc..38c7189a 100644 --- a/eval_protocol/rewards/code_execution.py +++ b/eval_protocol/rewards/code_execution.py @@ -21,8 +21,7 @@ import re import resource import shlex # Added for robust splitting of arguments - -# import signal +import signal import subprocess import sys import tempfile diff --git a/examples/gym_2048/gym_2048_adapter.py b/examples/gym_2048/gym_2048_adapter.py deleted file mode 100644 index 652c7c8d..00000000 --- a/examples/gym_2048/gym_2048_adapter.py +++ /dev/null @@ -1,137 +0,0 @@ -""" -2048 Environment Adapter - -This adapter implements the EnvironmentAdapter interface for 2048 environments, -enabling integration with the MCP-Gym framework. -""" - -from typing import Any, Dict, Optional, Tuple - -import gym -import gym_2048 -from gym_2048.envs.game2048_env import Game2048Env - -from eval_protocol.mcp.adapter import EnvironmentAdapter - - -class Gym2048Adapter(EnvironmentAdapter): - """2048 adapter for MCP-Gym framework.""" - - ACTION_NAMES = ["UP", "RIGHT", "DOWN", "LEFT"] - - def create_environment(self, config: Optional[Dict[str, Any]] = None) -> Game2048Env: - """ - Create 2048 environment. - - Args: - config: config is not used in this implementation - - Returns: - 2048 environment instance - """ - print(f"🔍 Gym2048Adapter.create_environment: config: {config}") - env = gym.make('2048-v0') - return env - - def create_environment_with_seed( - self, config: Optional[Dict[str, Any]] = None, seed: Optional[int] = None - ) -> Tuple[Game2048Env, int, Dict[str, Any]]: - """ - Create 2048 environment with seed and return initial state. - - Args: - config: config is not used in this implementation - seed: Seed for reproducible environments - - Returns: - Tuple of (environment, initial_observation, initial_info) - """ - print(f"🔍 Gym2048Adapter.create_environment_with_seed: seed: {seed}") - config = config or {} - - # Add seed to config for environment creation - env_config = {**config, "seed": seed} - print(f"🔍 Gym2048Adapter.create_environment_with_seed: env_config: {env_config}") - - env = self.create_environment(env_config) - print(f"🔍 Gym2048Adapter.create_environment_with_seed: created env, calling reset with seed: {seed}") - obs, info = env.reset(seed=seed) - print(f"🔍 Gym2048Adapter.create_environment_with_seed: reset returned obs: {obs}, info: {info}") - - return env, obs, info - - def reset_environment(self, env: Game2048Env, seed: Optional[int] = None) -> Tuple[int, Dict[str, Any]]: - """ - Reset environment. - - Args: - env: Environment instance - seed: Optional seed for reset - - Returns: - Tuple of (observation, info) - """ - return env.reset(seed=seed) - - def step_environment(self, env: Game2048Env, action: int) -> Tuple[int, float, bool, bool, Dict[str, Any]]: - """ - Execute environment step. - - Args: - env: Environment instance - action: Action index - - Returns: - Tuple of (observation, reward, terminated, truncated, info) - """ - return env.step(action) - - def close_environment(self, env: Game2048Env) -> None: - """ - Close environment. - - Args: - env: Environment instance - """ - env.close() - - def parse_action(self, action_str: str) -> int: - """ - Parse action string to integer. - - Args: - action_str: Action string (UP, RIGHT, DOWN, LEFT) - - Returns: - Action index - - Raises: - ValueError: If action is invalid - """ - action_str = action_str.strip().upper() - if action_str not in self.ACTION_NAMES: - raise ValueError(f"Invalid action '{action_str}'. Valid actions: {self.ACTION_NAMES}") - return self.ACTION_NAMES.index(action_str) - - def format_observation(self, observation: int) -> int: - """ - Format observation for JSON serialization. - - Args: - observation: Raw observation from environment - - Returns: - Formatted observation - """ - return int(observation) - - def get_default_config(self) -> Dict[str, Any]: - """ - Get default configuration. - - Returns: - Default configuration dictionary - """ - return { - "is_slippery": False, - } From 339e8ced20173d7790746bd8fc33eba08cf5fd20 Mon Sep 17 00:00:00 2001 From: Derek Xu Date: Mon, 27 Oct 2025 11:13:17 -0700 Subject: [PATCH 8/8] reformat --- eval_protocol/pytest/default_mcp_gym_rollout_processor.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py index 4aa8ff68..cd869bd7 100644 --- a/eval_protocol/pytest/default_mcp_gym_rollout_processor.py +++ b/eval_protocol/pytest/default_mcp_gym_rollout_processor.py @@ -239,10 +239,7 @@ def __call__(self, rows: List[EvaluationRow], config: RolloutProcessorConfig) -> "Cannot retry without existing server/environments. Call with start_server=True first." ) - - model_id = str( - (config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini" - ) + model_id = str((config.completion_params.get("model") if config.completion_params else None) or "gpt-4o-mini") temperature = config.completion_params.get("temperature", 0.0) max_tokens = config.completion_params.get("max_tokens", 4096)