From d430ac1e9914c417088982977f714f73e671a92f Mon Sep 17 00:00:00 2001 From: swyxio Date: Sun, 30 Mar 2025 09:40:56 -0700 Subject: [PATCH 1/3] add retries and readme --- pokemon-example/instructions.md | 1 + pokemon-example/pokemon_agent_trajectory.py | 47 ++++++++++++++++++--- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/pokemon-example/instructions.md b/pokemon-example/instructions.md index d6e87ed..6555d9f 100644 --- a/pokemon-example/instructions.md +++ b/pokemon-example/instructions.md @@ -6,6 +6,7 @@ uv pip install dotenv uv pip install morphcloud uv pip install pillow uv pip install mcp +uv pip install fastapi # if you do not have a snapshot uv run setup_romwatch_server.py diff --git a/pokemon-example/pokemon_agent_trajectory.py b/pokemon-example/pokemon_agent_trajectory.py index 8d9a42c..8dcf091 100644 --- a/pokemon-example/pokemon_agent_trajectory.py +++ b/pokemon-example/pokemon_agent_trajectory.py @@ -267,14 +267,14 @@ def __init__(self, server_url: str): log(LogLevel.INFO, f"Created MCP handler", extra={"server_url": server_url}) async def connect(self): - """Connect to the MCP server.""" + """Connect to the MCP server with retries and exponential backoff.""" from mcp import ClientSession from mcp.client.sse import sse_client import asyncio + from contextlib import AsyncExitStack MAX_RETRIES = 10 # With exponential backoff, this gives us ~5 minutes total BASE_DELAY = 1 # Start with 1 second delay MAX_DELAY = 300 # Maximum delay of 5 minutes - from contextlib import AsyncExitStack self.exit_stack = AsyncExitStack() for attempt in range(MAX_RETRIES): @@ -317,6 +317,44 @@ async def connect(self): return False + for attempt in range(MAX_RETRIES): + try: + log(LogLevel.INFO, f"Connecting to MCP server (attempt {attempt + 1}/{MAX_RETRIES})", + extra={"url": self.server_url}) + + # Connect to the SSE endpoint + self.streams = await self.exit_stack.enter_async_context( + sse_client(self.server_url) + ) + self.session = await self.exit_stack.enter_async_context( + ClientSession(self.streams[0], self.streams[1]) + ) + + await self.session.initialize() + + # List available tools and store them + response = await self.session.list_tools() + self.tools = response.tools + tool_names = [tool.name for tool in self.tools] + log(LogLevel.INFO, f"Connected to server", + extra={"tool_count": len(tool_names), "tools": tool_names}) + return True + + except Exception as e: + import traceback + log(LogLevel.ERROR, f"Connection attempt {attempt + 1} failed", + extra={"error": str(e), "traceback": traceback.format_exc()}) + + if attempt < MAX_RETRIES - 1: + # Calculate delay with exponential backoff + delay = min(BASE_DELAY * (2 ** attempt), MAX_DELAY) + log(LogLevel.INFO, f"Retrying in {delay} seconds...") + await asyncio.sleep(delay) + else: + log(LogLevel.ERROR, "Max retries reached, giving up") + return False + + return False def get_claude_tools(self): """Convert MCP tools to Claude-compatible format.""" @@ -349,7 +387,7 @@ async def call_tool_with_extras(self, tool_name, tool_input, include_state=True, raise ValueError("Not connected to MCP server") primary_result = await self.session.call_tool(tool_name, tool_input) - + # Parse the primary result manually to check if it already contains what we need has_state = False has_screenshot = False @@ -562,7 +600,7 @@ async def _update_state(self, state: Dict[str, Any]) -> Dict[str, Any]: "valid_moves": game_state.get("valid_moves", []), "last_action": state.get("last_action", "") } - + # Make sure morph_instance reference is preserved if "_morph_instance" in state: @@ -1464,4 +1502,3 @@ async def run_pokemon_without_api(snapshot_id: str, steps: int = 100): api = PokemonAPI(host="127.0.0.1", port=args.port) print(f"Starting Pokemon Agent API on port {args.port}...") api.start() - From 23238b3fe3868363bdea6e79170cb169588ebbba Mon Sep 17 00:00:00 2001 From: swyxio Date: Sun, 30 Mar 2025 12:55:48 -0700 Subject: [PATCH 2/3] store my prompts --- pokemon-example/pokemon_agent_trajectory.py | 6 +++++- pokemon-example/tasks.py | 4 ++-- pokemon-example/trajectory_driver.py | 4 ++-- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pokemon-example/pokemon_agent_trajectory.py b/pokemon-example/pokemon_agent_trajectory.py index 8dcf091..00c5faf 100644 --- a/pokemon-example/pokemon_agent_trajectory.py +++ b/pokemon-example/pokemon_agent_trajectory.py @@ -544,7 +544,11 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet self.pause_event = asyncio.Event() self.pause_event.set() # Start unpaused - self.system_prompt = """You are playing Pokemon Red. You can see the game screen and control the game by executing emulator commands: check your tools! for example, 'navigate_to' can help you move in the overworld. Before each action, explain your reasoning briefly, then use the available actions to control the game""" + self.system_prompt = """You are playing Pokemon Red. You can see the game screen and control the game by executing emulator commands: check your tools! for example, 'navigate_to' can help you move in the overworld. + + Before each action, explain your reasoning briefly, plan your immediate next few steps needed (low level actions, e.g. 'to reach the Cave, i need to go 1. right, 2. right, 3. right, 4. up', not high level goals) to get there, then use the available actions to execute the next step in the game. + + The game commands always register perfectly, so if you see no reaction to them, you have made an invalid command and misunderstood the game state. In battles, when you see an attack that isn't effective, you should examine your assumptions and update your beliefs. In general, search the solution space (try different things) before getting stuck in ruts.""" log(LogLevel.INFO, f"Initialized PokemonAgent", extra={"model": model_name, "max_tokens": max_tokens}) diff --git a/pokemon-example/tasks.py b/pokemon-example/tasks.py index 59a19c1..3b0c340 100644 --- a/pokemon-example/tasks.py +++ b/pokemon-example/tasks.py @@ -174,9 +174,9 @@ def get_verification_function(self) -> Callable[[Dict[str, Any]], bool]: REGISTERED_TASKS = [ TaskDefinition( id="escape-mt-moon", - instruction="Navigate through Mount Moon and exit to Route 4", + instruction="You are about to enter Mount Moon. The entrance is on the right of the Pokemon center. Go into the cave and get through Mount Moon", verification_fn_name="verify_escape_mount_moon", - verification_message="Reach Route 4 after leaving Mt. Moon.", + verification_message="Enter and complete Mount Moon (you are not done).", metadata={"game": "Pokemon Red", "objective": "mount_moon"}, ), TaskDefinition( diff --git a/pokemon-example/trajectory_driver.py b/pokemon-example/trajectory_driver.py index f2a4d2f..05098f4 100644 --- a/pokemon-example/trajectory_driver.py +++ b/pokemon-example/trajectory_driver.py @@ -328,12 +328,12 @@
- +
- +
From e67f79b29c13e45da7110654ccb942407e6e354e Mon Sep 17 00:00:00 2001 From: swyxio Date: Sun, 30 Mar 2025 14:33:02 -0700 Subject: [PATCH 3/3] continuebutton --- pokemon-example/.gitignore | 2 + pokemon-example/eva.py | 205 +++++++++---------- pokemon-example/pokemon_agent_trajectory.py | 209 ++++++++------------ pokemon-example/pokemon_eva_agent.py | 28 ++- pokemon-example/simple_driver.py | 100 ++++++++++ pokemon-example/trajectory_driver.py | 62 ++++-- 6 files changed, 365 insertions(+), 241 deletions(-) create mode 100644 pokemon-example/.gitignore diff --git a/pokemon-example/.gitignore b/pokemon-example/.gitignore new file mode 100644 index 0000000..1bbb861 --- /dev/null +++ b/pokemon-example/.gitignore @@ -0,0 +1,2 @@ +__pycache__ +logs \ No newline at end of file diff --git a/pokemon-example/eva.py b/pokemon-example/eva.py index 19bfdec..4fcca10 100644 --- a/pokemon-example/eva.py +++ b/pokemon-example/eva.py @@ -375,106 +375,120 @@ async def initialize_state(self, morph_instance: 'MorphInstance') -> Instance[S, pass -async def run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], max_steps: int = 100, - verify_every_step: bool = False, ttl_seconds: Optional[int] = None) -> Tuple[VerificationResult[R], Trajectory[S, A, R, T]]: +async def initialize_run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], ttl_seconds: Optional[int] = None) -> Tuple[Instance[S, T], Trajectory[S, A, R, T], 'MorphInstance']: """ - Run an agent on a task until the task is complete or max_steps is reached. + Initialize a run by creating the Morph instance and initial state. + + Returns: + Tuple of (initial_state, trajectory, morph_instance) """ - log(LogLevel.INFO, f"Running agent for task", - extra={"task": task.instruction, "max_steps": max_steps, "verify_every_step": verify_every_step}) - - agent.set_objective(task.instruction) - - # Start a Morph instance from the task's snapshot - log(LogLevel.INFO, f"Starting Morph instance", - extra={"snapshot_id": task.snapshot_id}) + # Create a new Morph instance from the snapshot + log(LogLevel.INFO, "Creating MorphInstance", extra={"snapshot_id": task.snapshot_id}) morph_instance = MorphInstance(task.snapshot_id, task.metadata, ttl_seconds) + log(LogLevel.INFO, "Created MorphInstance") + + # Initialize the state + log(LogLevel.INFO, "Initializing state with MorphInstance") + initial_state = await agent.initialize_state(morph_instance) + log(LogLevel.INFO, "Initialized state") - try: - # Initialize the agent's state and trajectory - initial_state = await agent.initialize_state(morph_instance) - - # Set morph_instance reference - if hasattr(initial_state.state, '_morph_instance'): - object.__setattr__(initial_state.state, '_morph_instance', morph_instance) - - if hasattr(agent, 'trajectory') and agent.trajectory is not None: - trajectory = agent.trajectory - else: - trajectory = Trajectory[S, A, R, T]() - agent.trajectory = trajectory + # Set up trajectory + if hasattr(agent, 'trajectory') and agent.trajectory is not None: + trajectory = agent.trajectory + else: + trajectory = Trajectory[S, A, R, T]() + agent.trajectory = trajectory + + # Bind the agent to the instance + if hasattr(agent, 'bind_instance'): + agent.bind_instance(morph_instance) + + # Initialize with the initial state + trajectory.add_step(initial_state) + + return initial_state, trajectory, morph_instance - - # Bind the agent to the instance - if hasattr(agent, 'bind_instance'): - agent.bind_instance(morph_instance) - - # Initialize with the initial state - trajectory.add_step(initial_state) - - current_state = trajectory.current_state - if current_state is None: - error_msg = "No initial state available" - log(LogLevel.ERROR, error_msg) - raise ValueError(error_msg) - - for step_num in range(max_steps): - log(LogLevel.INFO, f"Starting step execution", - extra={"step_num": step_num+1, "max_steps": max_steps}) - - # Execute a step - now with await - log(LogLevel.INFO, "Determining next action...") - action = await agent.run_step(current_state) - log(LogLevel.INFO, f"Selected action", extra={"action": str(action)}) - - # Apply the action to get a new state - now with await - log(LogLevel.INFO, f"Applying action", extra={"action": str(action)}) - new_state_value = await agent.apply_action(current_state.state, action) - new_state = current_state.__class__(new_state_value) - - # Ensure morph_instance reference is preserved - if hasattr(new_state.state, '_morph_instance'): - object.__setattr__(new_state.state, '_morph_instance', morph_instance) - - # Record the step - trajectory.add_step(new_state, action) - - # Update current state - current_state = new_state +async def continue_run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], + trajectory: Trajectory[S, A, R, T], morph_instance: 'MorphInstance', + max_steps: int = 100, verify_every_step: bool = False) -> Tuple[VerificationResult[R], Trajectory[S, A, R, T]]: + """ + Continue execution for max_steps more steps from the current state. + """ + current_state = trajectory.current_state + if current_state is None: + error_msg = "No current state available" + log(LogLevel.ERROR, error_msg) + raise ValueError(error_msg) + + start_step = len(trajectory.steps) + for step_num in range(max_steps): + log(LogLevel.INFO, f"Starting step execution", + extra={"step_num": start_step + step_num + 1}) + + # Execute a step + log(LogLevel.INFO, "Determining next action...") + action = await agent.run_step(current_state) + log(LogLevel.INFO, f"Selected action", extra={"action": str(action)}) + + # Apply the action to get a new state + log(LogLevel.INFO, f"Applying action", extra={"action": str(action)}) + new_state_value = await agent.apply_action(current_state.state, action) + new_state = current_state.__class__(new_state_value) + + # Ensure morph_instance reference is preserved + if hasattr(new_state.state, '_morph_instance'): + object.__setattr__(new_state.state, '_morph_instance', morph_instance) + + # Record the step + trajectory.add_step(new_state, action) + + # Update current state + current_state = new_state + + # Check if we should verify + if verify_every_step or step_num == max_steps - 1: + log(LogLevel.INFO, "Verifying current state...") + result = task.verify(current_state, trajectory.actions) + trajectory.steps[-1].result = result - # Check if we should verify - if verify_every_step or step_num == max_steps - 1: - log(LogLevel.INFO, "Verifying current state...") - result = task.verify(current_state, trajectory.actions) - trajectory.steps[-1].result = result - - if result.success: - log(LogLevel.SUCCESS, f"Task completed successfully", - extra={"steps_taken": step_num+1}) - trajectory.summarize() - return result, trajectory - - # If we reached max steps without success: - log(LogLevel.WARNING, f"Reached maximum steps without success", - extra={"max_steps": max_steps}) - - if trajectory.final_result is not None: - trajectory.summarize() - return trajectory.final_result, trajectory - - result = VerificationResult( - value=None, - success=False, - message=f"Failed to complete task within {max_steps} steps", - details={"last_state": str(current_state.state)} - ) - result.log() + if result.success: + log(LogLevel.SUCCESS, f"Task completed successfully", + extra={"steps_taken": start_step + step_num + 1}) + trajectory.summarize() + return result, trajectory + + # If we reached max steps without success: + log(LogLevel.WARNING, f"Reached maximum steps without success", + extra={"max_steps": max_steps}) + + if trajectory.final_result is not None: trajectory.summarize() - return result, trajectory + return trajectory.final_result, trajectory + + result = VerificationResult( + value=None, + success=False, + message=f"Failed to complete task within {max_steps} steps", + details={"last_state": str(current_state.state)} + ) + result.log() + trajectory.summarize() + return result, trajectory + +async def run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], max_steps: int = 100, + verify_every_step: bool = False, ttl_seconds: Optional[int] = None) -> Tuple[VerificationResult[R], Trajectory[S, A, R, T]]: + """ + Run an agent on a task until the task is complete or max_steps is reached. + """ + try: + initial_state, trajectory, morph_instance = await initialize_run(task, agent, ttl_seconds) + return await continue_run(task, agent, trajectory, morph_instance, max_steps, verify_every_step) + except Exception as e: + log(LogLevel.ERROR, f"Error in run: {str(e)}") + raise finally: - # Always clean up the Morph instance - morph_instance.stop() + log(LogLevel.INFO, "Run completed") async def run_step(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], trajectory: Trajectory[S, A, R, T], verify: bool = False) -> Tuple[Instance[S, T], Optional[VerificationResult[R]]]: @@ -621,11 +635,4 @@ def stop(self) -> None: def __del__(self) -> None: """Ensure the instance is stopped when this object is garbage collected.""" - try: - if hasattr(self, 'instance') and self.instance: - self.stop() - except (AttributeError, Exception): - # Ignore errors during garbage collection - pass - - + pass diff --git a/pokemon-example/pokemon_agent_trajectory.py b/pokemon-example/pokemon_agent_trajectory.py index 00c5faf..424516d 100644 --- a/pokemon-example/pokemon_agent_trajectory.py +++ b/pokemon-example/pokemon_agent_trajectory.py @@ -94,11 +94,8 @@ def add_step(self, state: Instance[Dict[str, Any], str], location = line.replace("Location:", "").strip() break - # Get the current step index - current_step_index = len(self.steps) - - # Create snapshot (gets snapshot ID) with the current step index - snapshot_id = state.snapshot(step_index=current_step_index) + # Create snapshot (gets snapshot ID) + snapshot_id = state.snapshot() # Create specialized step step = PokemonTrajectoryStep( @@ -205,12 +202,8 @@ def __init__(self, state: Dict[str, Any], morph_instance: Optional[MorphInstance super().__init__(state) self._morph_instance = morph_instance - def snapshot(self, step_index: int = 0) -> str: - """Create a snapshot and return the ID for visualization and rollback. - - Args: - step_index: The current step index in the trajectory - """ + def snapshot(self) -> str: + """Create a snapshot and return the ID for visualization and rollback.""" morph_instance = getattr(self.state, '_morph_instance', None) or self._morph_instance if morph_instance: @@ -222,7 +215,6 @@ def snapshot(self, step_index: int = 0) -> str: metadata = { "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"), "action": self.state.get("last_action", ""), - "step_index": str(step_index), } # Add game state summary to metadata @@ -239,8 +231,7 @@ def snapshot(self, step_index: int = 0) -> str: log(LogLevel.INFO, "Created snapshot", extra={ "event_type": EVENT_SNAPSHOT_CREATED, - "snapshot_id": snapshot_id, - "step_index": metadata.get("step_index", "0") + "snapshot_id": snapshot_id }) return snapshot_id @@ -314,8 +305,6 @@ async def connect(self): log(LogLevel.ERROR, "Max retries reached, giving up") return False - return False - for attempt in range(MAX_RETRIES): try: @@ -520,8 +509,9 @@ async def cleanup(self): class PokemonAgent(Agent[Dict[str, Any], str, bool, str]): """An agent that plays Pokemon using the Claude API and tracks its trajectory.""" - def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet-latest", max_tokens=1000, max_history=30): + def __init__(self, mcp_handler: PokemonMCPHandler, morph_instance: MorphInstance, model_name="claude-3-7-sonnet-latest", max_tokens=1000, max_history=30): """Initialize the Pokemon agent.""" + log(LogLevel.INFO, "PokemonAgent.__init__ start", extra={"morph_instance_exists": morph_instance is not None}) super().__init__() self.mcp_handler = mcp_handler self.anthropic = Anthropic() @@ -530,6 +520,9 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet self.temperature = 0.7 self.message_history = [] self.max_history = max_history + self.morph_instance = morph_instance # Store morph_instance + self.objective = "Explore Pokemon Red" # Default objective + log(LogLevel.INFO, "PokemonAgent stored morph_instance", extra={"morph_instance_exists": self.morph_instance is not None}) # Specialized trajectory self.trajectory = PokemonTrajectory() @@ -546,17 +539,25 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet self.system_prompt = """You are playing Pokemon Red. You can see the game screen and control the game by executing emulator commands: check your tools! for example, 'navigate_to' can help you move in the overworld. - Before each action, explain your reasoning briefly, plan your immediate next few steps needed (low level actions, e.g. 'to reach the Cave, i need to go 1. right, 2. right, 3. right, 4. up', not high level goals) to get there, then use the available actions to execute the next step in the game. - - The game commands always register perfectly, so if you see no reaction to them, you have made an invalid command and misunderstood the game state. In battles, when you see an attack that isn't effective, you should examine your assumptions and update your beliefs. In general, search the solution space (try different things) before getting stuck in ruts.""" +Before each action, explain your reasoning briefly, plan your immediate next few steps needed (low level tool calls and actions, e.g. 'to reach the Cave from here, I need to go 1. right, 2. right, 3. right, 4. up', not high level goals like '1. explore the Cave 2. ??? 3. win!') to get there, then use the available actions to execute the next step in the game. + +The game commands always register perfectly, so if you see no reaction to them, you have made an invalid command and misunderstood the game state. In battles, when you see an attack that isn't effective, you should examine your assumptions and update your beliefs. In general, search the solution space (try different things) before getting stuck in ruts. + +Mistakes you have made before: +- do not talk to NPCs +- do not plan with high level goals +- do not insist on your prior knowledge about what attacks are strong against what types of Pokemon works when the evidence is the opposite +- you miss the cave, which is a black hole to the side of the pokemon center. + """ log(LogLevel.INFO, f"Initialized PokemonAgent", extra={"model": model_name, "max_tokens": max_tokens}) def set_objective(self, objective: str): """Set the agent's current objective.""" + log(LogLevel.INFO, "PokemonAgent.set_objective start", extra={"objective": objective, "morph_instance_exists": hasattr(self, 'morph_instance') and self.morph_instance is not None}) self.objective = objective - log(LogLevel.INFO, f"Setting agent objective", extra={"objective": objective}) + log(LogLevel.INFO, "PokemonAgent.set_objective end") async def initialize_state(self, morph_instance: 'MorphInstance') -> PokemonInstance: """Initialize the state from a MorphCloud instance.""" @@ -580,7 +581,8 @@ async def initialize_state(self, morph_instance: 'MorphInstance') -> PokemonInst instance = PokemonInstance(initial_state, morph_instance) # Add a starting message to the history - initial_message = f"Your current objective is: {self.objective}\n\nYou may now begin playing Pokemon." + objective = getattr(self, 'objective', "Explore Pokemon Red") # Use default if not set + initial_message = f"Your current objective is: {objective}\n\nYou may now begin playing Pokemon." self.message_history = [{"role": "user", "content": initial_message}] # Initialize the trajectory with the first step @@ -599,7 +601,7 @@ async def _update_state(self, state: Dict[str, Any]) -> Dict[str, Any]: # Update the state new_state = { - "game_state": game_state.get("game_state", {}), + "game_state": game_state.get("game_state", ""), "screenshot": screenshot_result.get("screenshot", ""), "valid_moves": game_state.get("valid_moves", []), "last_action": state.get("last_action", "") @@ -735,6 +737,14 @@ async def _retry_with_nudge(self, max_retries=3): assistant_content.append({"type": "text", "text": block.text}) elif block.type == "tool_use": assistant_content.append({"type": "tool_use", **dict(block)}) + # # Add a tool result immediately after each tool use + # tool_result = { + # "type": "tool_result", + # "tool_call_id": block.id, + # "name": block.name, + # "result": "Tool call received" + # } + # assistant_content.append(tool_result) log(LogLevel.DEBUG, f"Found tool call", extra={"tool": block.name, "input": json.dumps(block.input)}) @@ -786,95 +796,29 @@ async def apply_action(self, state: Dict[str, Any], action: str) -> Dict[str, An "screenshot": state.get("screenshot", ""), "valid_moves": state.get("valid_moves", []) } - - - # Make sure morph_instance reference is preserved - if "_morph_instance" in state: - new_state["_morph_instance"] = state["_morph_instance"] - # Update the state with fresh game information - new_state = await self._update_state(new_state) - - # Log tool result with standardized event - log(LogLevel.INFO, f"Tool result", - extra={ - "event_type": EVENT_TOOL_RESULT, - "action": action, - "game_state": new_state["game_state"] - }) - - # Create tool results from the action for Claude history - tool_results = [] - - # Extract most recent assistant message to get tool ID - if self.message_history and self.message_history[-1]["role"] == "assistant": - assistant_content = self.message_history[-1]["content"] - tool_use_items = [item for item in assistant_content if isinstance(item, dict) and item.get("type") == "tool_use"] - - if tool_use_items: - tool_use_id = tool_use_items[0].get("id") - - if tool_use_id: - # Create result content - result_content = [] - - # Add text result - result_text = f"Action '{action}' executed." - if "result" in action_result: - result_text += f"\nResult: {action_result['result']}" - - result_content.append({"type": "text", "text": result_text}) - - # Add screenshot if available - if new_state["screenshot"]: - result_content.append({ - "type": "image", - "source": { - "type": "base64", - "media_type": "image/png", - "data": new_state["screenshot"] - } - }) - - # Create a proper tool result - tool_results.append({ - "type": "tool_result", - "tool_use_id": tool_use_id, - "content": result_content - }) - - # Add the tool results to message history - self.message_history.append({"role": "user", "content": tool_results}) - - # Create PokemonInstance from the new state - instance = PokemonInstance(new_state) - - if isinstance(self.trajectory, PokemonTrajectory): - # Use the enhanced method with Pokemon-specific fields + # Update trajectory with the current step's data + if hasattr(self, 'trajectory'): self.trajectory.add_step( - state=instance, + state=Instance(new_state), action=action, tool_name=self.current_tool_name, tool_input=self.current_tool_input, claude_text=self.current_claude_text ) - else: - # Fall back to the base Trajectory method - log(LogLevel.WARNING, "Using base Trajectory.add_step instead of PokemonTrajectory", - extra={"trajectory_class": self.trajectory.__class__.__name__}) - self.trajectory.add_step(state=instance, action=action) - + # Reset step data + self.current_tool_name = None + self.current_tool_input = None + self.current_claude_text = None - # Reset current step data - self.current_tool_name = None - self.current_tool_input = None - self.current_claude_text = None + # Update state with latest game information + updated_state = await self._update_state(new_state) - # Log action completion - log(LogLevel.INFO, f"Action completed", - extra={"event_type": EVENT_ACTION_COMPLETED, "action": action}) + # Make sure morph_instance reference is preserved + if "_morph_instance" in state: + updated_state["_morph_instance"] = state["_morph_instance"] - return new_state + return updated_state async def summarize_history(self): """Summarize the conversation history to save context space.""" @@ -1190,17 +1134,28 @@ async def run_agent_task(self, snapshot_id: str, steps: int, task_id: Optional[s return # 5. Create the agent - self.agent = PokemonAgent(mcp_handler=mcp_handler) + log(LogLevel.INFO, "About to create PokemonAgent", extra={"morph_instance_exists": self.morph_instance is not None}) + self.agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=self.morph_instance) + log(LogLevel.INFO, "Created PokemonAgent") + + # Initialize agent state + log(LogLevel.INFO, "Initializing agent state") + await self.agent.initialize_state(self.morph_instance) + log(LogLevel.INFO, "Initialized agent state") # Decide which VerifiedTask to run if task_id: # Use a real task from tasks.py + log(LogLevel.INFO, "Creating task from task_id", extra={"task_id": task_id}) self.running_task = create_pokemon_verified_task(task_id, snapshot_id) + log(LogLevel.INFO, "Created task", extra={"instruction": self.running_task.instruction}) # The agent's "objective" might be the instruction from the actual task: + log(LogLevel.INFO, "Setting agent objective from task") self.agent.set_objective(self.running_task.instruction) + log(LogLevel.INFO, "Set agent objective") else: # Fallback: create a trivial verification or use the 'objective' string - # logger.warning("No task_id provided. Using trivial verification function.") + log(LogLevel.INFO, "Creating fallback task with objective", extra={"objective": objective}) def dummy_verify_func(gs: Dict[str, Any]) -> bool: return False # Always returns false => never completes self.running_task = PokemonVerifiedTask.create( @@ -1210,10 +1165,13 @@ def dummy_verify_func(gs: Dict[str, Any]) -> bool: verification_message="No real verification in place", metadata={"game": "Pokemon Red", "objective": objective} ) + log(LogLevel.INFO, "Created fallback task") + log(LogLevel.INFO, "Setting agent objective from fallback") self.agent.set_objective(objective) + log(LogLevel.INFO, "Set agent objective") # 6. Run the agent with E.V.A. - # logger.info(f"Starting E.V.A. run() for task: {self.running_task.instruction}") + log(LogLevel.INFO, "Starting E.V.A. run()", extra={"task_instruction": self.running_task.instruction}) from eva import run result, trajectory = await run( task=self.running_task, @@ -1234,25 +1192,23 @@ def dummy_verify_func(gs: Dict[str, Any]) -> bool: async def rollback_to_step(self, step_index: int): """Roll back to a specific step.""" + if not self.agent or not self.agent.trajectory: + log(LogLevel.ERROR, "No trajectory to roll back") + return + + if step_index < 0 or step_index >= len(self.agent.trajectory.steps): + log(LogLevel.ERROR, f"Invalid step index {step_index}") + return + try: - log(LogLevel.INFO, f"Rolling back to step {step_index}") - - if not self.agent or not self.task: - log(LogLevel.ERROR, "Cannot rollback - agent or task not initialized") - return - - # Get snapshot ID from the step - target_step = self.agent.trajectory.steps[step_index] - snapshot_id = target_step.snapshot + # Get the snapshot ID from the step + step = self.agent.trajectory.steps[step_index] + snapshot_id = step.snapshot if not snapshot_id: - log(LogLevel.ERROR, f"No snapshot ID available for step {step_index}") + log(LogLevel.ERROR, "No snapshot ID for step") return - # Clean up existing MorphInstance if any - if self.morph_instance: - self.morph_instance.stop() - # Start new MorphVM instance from the snapshot log(LogLevel.INFO, f"Starting MorphVM instance from rollback snapshot", extra={"snapshot_id": snapshot_id}) @@ -1290,7 +1246,7 @@ async def rollback_to_step(self, step_index: int): return # Create new agent with the same objective - self.agent = PokemonAgent(mcp_handler=mcp_handler) + self.agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=self.morph_instance) self.agent.set_objective(getattr(self.task, 'instruction', "Continue playing Pokemon")) @@ -1299,17 +1255,14 @@ async def rollback_to_step(self, step_index: int): for i in range(step_index + 1): new_trajectory.steps.append(self.agent.trajectory.steps[i]) + # Update agent's trajectory self.agent.trajectory = new_trajectory - # Resume the agent - self.agent.resume() - - log(LogLevel.SUCCESS, f"Rollback to step {step_index} completed") + log(LogLevel.SUCCESS, f"Successfully rolled back to step {step_index}") except Exception as e: - log(LogLevel.ERROR, f"Error during rollback", extra={"error": str(e)}) - import traceback - log(LogLevel.ERROR, f"Rollback error traceback", extra={"traceback": traceback.format_exc()}) + log(LogLevel.ERROR, f"Error during rollback: {e}") + raise def start(self): """Start the API server.""" @@ -1445,7 +1398,7 @@ async def run_pokemon_without_api(snapshot_id: str, steps: int = 100): return # Create the agent - agent = PokemonAgent(mcp_handler=mcp_handler) + agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=morph_instance) # agent.trajectory = PokemonTrajectory() diff --git a/pokemon-example/pokemon_eva_agent.py b/pokemon-example/pokemon_eva_agent.py index e947809..5fa660d 100644 --- a/pokemon-example/pokemon_eva_agent.py +++ b/pokemon-example/pokemon_eva_agent.py @@ -403,6 +403,8 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet mcp_handler: Handler for MCP communication model_name: Claude model to use max_tokens: Maximum tokens to generate + max_history: Maximum number of messages to keep in history + morph_instance: Optional MorphInstance for snapshotting """ super().__init__() self.mcp_handler = mcp_handler @@ -1057,9 +1059,7 @@ def verify_beat_first_gym(game_state: Dict[str, Any]) -> bool: ) # Create a Pokemon agent - agent = PokemonAgent(mcp_handler) - - agent.morph_instance = morph_instance + agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=morph_instance) novnc_url = morph_instance.instance.expose_http_service( name="novnc", @@ -1104,7 +1104,29 @@ def verify_beat_first_gym(game_state: Dict[str, Any]) -> bool: parser = argparse.ArgumentParser(description='Run Pokemon agent with custom snapshot ID') parser.add_argument('--snapshot-id', type=str, help='Snapshot ID to use for the MorphVM instance') parser.add_argument('--steps', type=int, default=200, help='Number of steps to run the agent for') + parser.add_argument('--continue', action='store_true', help='Continue from the last state') args = parser.parse_args() + # If continuing, use the last snapshot ID + if getattr(args, 'continue', False): + # Get the latest snapshot ID from the log file + log_file = get_latest_eva_log_file() + if log_file: + with open(log_file, 'r') as f: + lines = f.readlines() + # Read backwards to find the last snapshot + for line in reversed(lines): + try: + data = json.loads(line) + if 'snapshot_id' in data.get('extra', {}): + args.snapshot_id = data['extra']['snapshot_id'] + break + except json.JSONDecodeError: + continue + + if not args.snapshot_id: + print("Error: No previous snapshot found to continue from") + sys.exit(1) + asyncio.run(run_pokemon_example(args.snapshot_id, args.steps)) diff --git a/pokemon-example/simple_driver.py b/pokemon-example/simple_driver.py index 947bfa6..4cc7840 100644 --- a/pokemon-example/simple_driver.py +++ b/pokemon-example/simple_driver.py @@ -282,6 +282,7 @@
+
Status: Ready
@@ -304,6 +305,7 @@ const gameIframe = document.getElementById('game-iframe'); const playBtn = document.getElementById('play-btn'); const stopBtn = document.getElementById('stop-btn'); + const continueBtn = document.getElementById('continue-btn'); const refreshLogsBtn = document.getElementById('refresh-logs-btn'); const snapshotsList = document.getElementById('snapshots-list'); const statusDisplay = document.getElementById('status-display'); @@ -465,6 +467,7 @@ function resetPlayState() { playBtn.disabled = false; stopBtn.disabled = true; + continueBtn.disabled = true; statusDisplay.textContent = 'Status: Ready'; statusDisplay.className = 'status'; } @@ -483,6 +486,7 @@ // Update UI playBtn.disabled = true; stopBtn.disabled = false; + continueBtn.disabled = false; statusDisplay.textContent = 'Status: Starting...'; statusDisplay.className = 'status running'; @@ -542,6 +546,44 @@ } }); + // Continue button click handler + continueBtn.addEventListener('click', async () => { + try { + // Update UI + playBtn.disabled = true; + stopBtn.disabled = false; + continueBtn.disabled = true; + statusDisplay.textContent = 'Status: Continuing...'; + statusDisplay.className = 'status running'; + + // Continue the script + const response = await fetch('/api/continue', { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ + steps: document.getElementById('steps').value + }) + }); + + const data = await response.json(); + + if (data.success) { + // Start fetching logs + logUpdateInterval = setInterval(fetchLogs, 2000); + statusDisplay.textContent = 'Status: Running'; + } else { + alert(`Failed to continue: ${data.error}`); + resetPlayState(); + } + } catch (error) { + console.error('Error continuing script:', error); + alert(`Error: ${error.message}`); + resetPlayState(); + } + }); + // Refresh logs button click handler refreshLogsBtn.addEventListener('click', () => { fetchLogs(); @@ -850,6 +892,18 @@ def do_POST(self): self.send_header('Content-Type', 'application/json') self.end_headers() self.wfile.write(json.dumps(result).encode()) + + elif self.path == '/api/continue': + # Parse the request data + params = json.loads(post_data) + + # Continue the Pokemon agent + result = continue_pokemon_agent(steps=params.get('steps', 100)) + + self.send_response(200) + self.send_header('Content-Type', 'application/json') + self.end_headers() + self.wfile.write(json.dumps(result).encode()) else: self.send_response(404) @@ -930,6 +984,52 @@ def start_pokemon_agent(snapshot_id, steps=10): logger.error(f"Traceback: {traceback.format_exc()}") return {'success': False, 'error': str(e)} +# Continue the Pokemon agent +def continue_pokemon_agent(steps=100): + """Continue the Pokemon agent for additional steps.""" + global active_process + + with process_lock: + if active_process is None or active_process.poll() is not None: + return {'success': False, 'error': 'No agent is currently running'} + + try: + # Build command to continue execution + cmd = [ + sys.executable, # Use the current Python interpreter + 'pokemon_eva_agent.py', # The EVA agent script + '--continue', # Flag to indicate continuing execution + '--steps', str(steps) + ] + + # Log the command + logger.info(f"Continuing Pokemon agent: {' '.join(cmd)}") + + # Start the process + active_process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1 # Line buffered + ) + + # Log the process ID + logger.info(f"Continue process started with PID: {active_process.pid}") + + # Check if process is still running after a brief delay + time.sleep(1) + if active_process.poll() is not None: + logger.error(f"Continue process exited immediately with code: {active_process.poll()}") + return {'success': False, 'error': f'Continue process exited immediately with code: {active_process.poll()}'} + + return {'success': True} + except Exception as e: + logger.error(f"Error continuing Pokemon agent: {e}") + import traceback + logger.error(f"Traceback: {traceback.format_exc()}") + return {'success': False, 'error': str(e)} + # Stop the Pokemon agent def stop_pokemon_agent(): """Stop the currently running Pokemon agent.""" diff --git a/pokemon-example/trajectory_driver.py b/pokemon-example/trajectory_driver.py index 05098f4..ba7b105 100644 --- a/pokemon-example/trajectory_driver.py +++ b/pokemon-example/trajectory_driver.py @@ -333,7 +333,7 @@
- +
@@ -347,6 +347,7 @@ +
@@ -373,6 +374,7 @@ const playBtn = document.getElementById('play-btn'); const pauseBtn = document.getElementById('pause-btn'); const resumeBtn = document.getElementById('resume-btn'); + const continueBtn = document.getElementById('continue-btn'); const rollbackBtn = document.getElementById('rollback-btn'); const stopBtn = document.getElementById('stop-btn'); const refreshBtn = document.getElementById('refresh-btn'); @@ -529,25 +531,36 @@ // Update UI based on status function updateUIForStatus(status) { - statusDisplay.textContent = `Status: ${status}`; - statusDisplay.className = 'status ' + status; - - if (status === 'running') { + clearInterval(pollingInterval); + pollingInterval = null; + + if (status === 'stopped') { + statusDisplay.textContent = 'Status: Stopped'; + statusDisplay.className = 'status stopped'; + playBtn.disabled = false; + pauseBtn.disabled = true; + resumeBtn.disabled = true; + continueBtn.disabled = true; + stopBtn.disabled = true; + rollbackBtn.disabled = false; + } else if (status === 'running') { + statusDisplay.textContent = 'Status: Running'; + statusDisplay.className = 'status running'; playBtn.disabled = true; pauseBtn.disabled = false; resumeBtn.disabled = true; + continueBtn.disabled = false; stopBtn.disabled = false; + rollbackBtn.disabled = true; } else if (status === 'paused') { + statusDisplay.textContent = 'Status: Paused'; + statusDisplay.className = 'status paused'; playBtn.disabled = true; pauseBtn.disabled = true; resumeBtn.disabled = false; + continueBtn.disabled = false; stopBtn.disabled = false; - } else if (status === 'stopped' || status === 'not_initialized') { - playBtn.disabled = false; - pauseBtn.disabled = true; - resumeBtn.disabled = true; - stopBtn.disabled = true; - rollbackBtn.disabled = true; + rollbackBtn.disabled = false; } } @@ -733,6 +746,33 @@ } }); + continueBtn.addEventListener('click', async () => { + try { + statusDisplay.textContent = 'Status: Continuing...'; + const r = await fetch(`${window.API_BASE_URL}/continue`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ steps: 100 }) + }); + const data = await r.json(); + if (data.success) { + pollingInterval = setInterval(fetchData, window.POLL_INTERVAL * 1000); + statusDisplay.textContent = 'Status: Running'; + statusDisplay.className = 'status running'; + pauseBtn.disabled = false; + stopBtn.disabled = false; + continueBtn.disabled = false; + resumeBtn.disabled = true; + } else { + alert(`Failed to continue: ${data.error}`); + updateUIForStatus('stopped'); + } + } catch (err) { + console.error("Continue error:", err); + alert("Continue error: " + err.message); + } + }); + rollbackBtn.addEventListener('click', async () => { if (selectedStepIndex === null) { alert("Please select a step to roll back to!");