From d430ac1e9914c417088982977f714f73e671a92f Mon Sep 17 00:00:00 2001
From: swyxio <shawnthe1@gmail.com>
Date: Sun, 30 Mar 2025 09:40:56 -0700
Subject: [PATCH 1/3] add retries and readme

---
 pokemon-example/instructions.md             |  1 +
 pokemon-example/pokemon_agent_trajectory.py | 47 ++++++++++++++++++---
 2 files changed, 43 insertions(+), 5 deletions(-)

diff --git a/pokemon-example/instructions.md b/pokemon-example/instructions.md
index d6e87ed..6555d9f 100644
--- a/pokemon-example/instructions.md
+++ b/pokemon-example/instructions.md
@@ -6,6 +6,7 @@ uv pip install dotenv
 uv pip install morphcloud
 uv pip install pillow
 uv pip install mcp
+uv pip install fastapi
 
 # if you do not have a snapshot
 uv run setup_romwatch_server.py 
diff --git a/pokemon-example/pokemon_agent_trajectory.py b/pokemon-example/pokemon_agent_trajectory.py
index 8d9a42c..8dcf091 100644
--- a/pokemon-example/pokemon_agent_trajectory.py
+++ b/pokemon-example/pokemon_agent_trajectory.py
@@ -267,14 +267,14 @@ def __init__(self, server_url: str):
         log(LogLevel.INFO, f"Created MCP handler", extra={"server_url": server_url})
     
     async def connect(self):
-        """Connect to the MCP server."""
+        """Connect to the MCP server with retries and exponential backoff."""
         from mcp import ClientSession
         from mcp.client.sse import sse_client
         import asyncio
+        from contextlib import AsyncExitStack
         MAX_RETRIES = 10  # With exponential backoff, this gives us ~5 minutes total
         BASE_DELAY = 1  # Start with 1 second delay
         MAX_DELAY = 300  # Maximum delay of 5 minutes
-        from contextlib import AsyncExitStack
         
         self.exit_stack = AsyncExitStack()
         for attempt in range(MAX_RETRIES):
@@ -317,6 +317,44 @@ async def connect(self):
         return False
 
         
+        for attempt in range(MAX_RETRIES):
+            try:
+                log(LogLevel.INFO, f"Connecting to MCP server (attempt {attempt + 1}/{MAX_RETRIES})", 
+                    extra={"url": self.server_url})
+                
+                # Connect to the SSE endpoint
+                self.streams = await self.exit_stack.enter_async_context(
+                    sse_client(self.server_url)
+                )
+                self.session = await self.exit_stack.enter_async_context(
+                    ClientSession(self.streams[0], self.streams[1])
+                )
+                
+                await self.session.initialize()
+                
+                # List available tools and store them
+                response = await self.session.list_tools()
+                self.tools = response.tools
+                tool_names = [tool.name for tool in self.tools]
+                log(LogLevel.INFO, f"Connected to server", 
+                    extra={"tool_count": len(tool_names), "tools": tool_names})
+                return True
+                
+            except Exception as e:
+                import traceback
+                log(LogLevel.ERROR, f"Connection attempt {attempt + 1} failed", 
+                    extra={"error": str(e), "traceback": traceback.format_exc()})
+                
+                if attempt < MAX_RETRIES - 1:
+                    # Calculate delay with exponential backoff
+                    delay = min(BASE_DELAY * (2 ** attempt), MAX_DELAY)
+                    log(LogLevel.INFO, f"Retrying in {delay} seconds...")
+                    await asyncio.sleep(delay)
+                else:
+                    log(LogLevel.ERROR, "Max retries reached, giving up")
+                    return False
+        
+        return False
     
     def get_claude_tools(self):
         """Convert MCP tools to Claude-compatible format."""
@@ -349,7 +387,7 @@ async def call_tool_with_extras(self, tool_name, tool_input, include_state=True,
             raise ValueError("Not connected to MCP server")
         
         primary_result = await self.session.call_tool(tool_name, tool_input)
-        
+            
         # Parse the primary result manually to check if it already contains what we need
         has_state = False
         has_screenshot = False
@@ -562,7 +600,7 @@ async def _update_state(self, state: Dict[str, Any]) -> Dict[str, Any]:
             "valid_moves": game_state.get("valid_moves", []),
             "last_action": state.get("last_action", "")
         }
-        
+
         
         # Make sure morph_instance reference is preserved
         if "_morph_instance" in state:
@@ -1464,4 +1502,3 @@ async def run_pokemon_without_api(snapshot_id: str, steps: int = 100):
         api = PokemonAPI(host="127.0.0.1", port=args.port)
         print(f"Starting Pokemon Agent API on port {args.port}...")
         api.start()
-

From 23238b3fe3868363bdea6e79170cb169588ebbba Mon Sep 17 00:00:00 2001
From: swyxio <shawnthe1@gmail.com>
Date: Sun, 30 Mar 2025 12:55:48 -0700
Subject: [PATCH 2/3] store my prompts

---
 pokemon-example/pokemon_agent_trajectory.py | 6 +++++-
 pokemon-example/tasks.py                    | 4 ++--
 pokemon-example/trajectory_driver.py        | 4 ++--
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/pokemon-example/pokemon_agent_trajectory.py b/pokemon-example/pokemon_agent_trajectory.py
index 8dcf091..00c5faf 100644
--- a/pokemon-example/pokemon_agent_trajectory.py
+++ b/pokemon-example/pokemon_agent_trajectory.py
@@ -544,7 +544,11 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet
         self.pause_event = asyncio.Event()
         self.pause_event.set()  # Start unpaused
 
-        self.system_prompt = """You are playing Pokemon Red. You can see the game screen and control the game by executing emulator commands: check your tools! for example, 'navigate_to' can help you move in the overworld. Before each action, explain your reasoning briefly, then use the available actions to control the game"""
+        self.system_prompt = """You are playing Pokemon Red. You can see the game screen and control the game by executing emulator commands: check your tools! for example, 'navigate_to' can help you move in the overworld. 
+        
+        Before each action, explain your reasoning briefly, plan your immediate next few steps needed (low level actions, e.g. 'to reach the Cave, i need to go 1. right, 2. right, 3. right, 4. up', not high level goals) to get there, then use the available actions to execute the next step in the game. 
+        
+        The game commands always register perfectly, so if you see no reaction to them, you have made an invalid command and misunderstood the game state. In battles, when you see an attack that isn't effective, you should examine your assumptions and update your beliefs. In general, search the solution space (try different things) before getting stuck in ruts."""
         
         log(LogLevel.INFO, f"Initialized PokemonAgent", 
             extra={"model": model_name, "max_tokens": max_tokens})
diff --git a/pokemon-example/tasks.py b/pokemon-example/tasks.py
index 59a19c1..3b0c340 100644
--- a/pokemon-example/tasks.py
+++ b/pokemon-example/tasks.py
@@ -174,9 +174,9 @@ def get_verification_function(self) -> Callable[[Dict[str, Any]], bool]:
 REGISTERED_TASKS = [
     TaskDefinition(
         id="escape-mt-moon",
-        instruction="Navigate through Mount Moon and exit to Route 4",
+        instruction="You are about to enter Mount Moon. The entrance is on the right of the Pokemon center. Go into the cave and get through Mount Moon",
         verification_fn_name="verify_escape_mount_moon",
-        verification_message="Reach Route 4 after leaving Mt. Moon.",
+        verification_message="Enter and complete Mount Moon (you are not done).",
         metadata={"game": "Pokemon Red", "objective": "mount_moon"},
     ),
     TaskDefinition(
diff --git a/pokemon-example/trajectory_driver.py b/pokemon-example/trajectory_driver.py
index f2a4d2f..05098f4 100644
--- a/pokemon-example/trajectory_driver.py
+++ b/pokemon-example/trajectory_driver.py
@@ -328,12 +328,12 @@
                 <div class="controls-container">
                     <div class="form-group">
                         <label for="snapshot-id">SNAPSHOT ID:</label>
-                        <input type="text" id="snapshot-id" placeholder="e.g., snapshot_yigk8b5d">
+                        <input type="text" id="snapshot-id" value="snapshot_6p53l6c2" placeholder="e.g., snapshot_yigk8b5d">
                     </div>
                     
                     <div class="form-group">
                         <label for="steps">NUMBER OF STEPS:</label>
-                        <input type="number" id="steps" value="100" min="1">
+                        <input type="number" id="steps" value="50" min="1">
                     </div>
                     
                     <div class="form-group">

From e67f79b29c13e45da7110654ccb942407e6e354e Mon Sep 17 00:00:00 2001
From: swyxio <shawnthe1@gmail.com>
Date: Sun, 30 Mar 2025 14:33:02 -0700
Subject: [PATCH 3/3] continuebutton

---
 pokemon-example/.gitignore                  |   2 +
 pokemon-example/eva.py                      | 205 +++++++++----------
 pokemon-example/pokemon_agent_trajectory.py | 209 ++++++++------------
 pokemon-example/pokemon_eva_agent.py        |  28 ++-
 pokemon-example/simple_driver.py            | 100 ++++++++++
 pokemon-example/trajectory_driver.py        |  62 ++++--
 6 files changed, 365 insertions(+), 241 deletions(-)
 create mode 100644 pokemon-example/.gitignore

diff --git a/pokemon-example/.gitignore b/pokemon-example/.gitignore
new file mode 100644
index 0000000..1bbb861
--- /dev/null
+++ b/pokemon-example/.gitignore
@@ -0,0 +1,2 @@
+__pycache__
+logs
\ No newline at end of file
diff --git a/pokemon-example/eva.py b/pokemon-example/eva.py
index 19bfdec..4fcca10 100644
--- a/pokemon-example/eva.py
+++ b/pokemon-example/eva.py
@@ -375,106 +375,120 @@ async def initialize_state(self, morph_instance: 'MorphInstance') -> Instance[S,
         pass
 
 
-async def run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], max_steps: int = 100, 
-        verify_every_step: bool = False, ttl_seconds: Optional[int] = None) -> Tuple[VerificationResult[R], Trajectory[S, A, R, T]]:
+async def initialize_run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], ttl_seconds: Optional[int] = None) -> Tuple[Instance[S, T], Trajectory[S, A, R, T], 'MorphInstance']:
     """
-    Run an agent on a task until the task is complete or max_steps is reached.
+       Initialize a run by creating the Morph instance and initial state.
+    
+    Returns:
+        Tuple of (initial_state, trajectory, morph_instance)
     """
-    log(LogLevel.INFO, f"Running agent for task", 
-        extra={"task": task.instruction, "max_steps": max_steps, "verify_every_step": verify_every_step})
-
-    agent.set_objective(task.instruction)
-
-    # Start a Morph instance from the task's snapshot
-    log(LogLevel.INFO, f"Starting Morph instance", 
-        extra={"snapshot_id": task.snapshot_id})
+    # Create a new Morph instance from the snapshot
+    log(LogLevel.INFO, "Creating MorphInstance", extra={"snapshot_id": task.snapshot_id})
     morph_instance = MorphInstance(task.snapshot_id, task.metadata, ttl_seconds)
+    log(LogLevel.INFO, "Created MorphInstance")
+
+    # Initialize the state
+    log(LogLevel.INFO, "Initializing state with MorphInstance")
+    initial_state = await agent.initialize_state(morph_instance)
+    log(LogLevel.INFO, "Initialized state")
     
-    try:
-        # Initialize the agent's state and trajectory
-        initial_state = await agent.initialize_state(morph_instance)
-        
-        # Set morph_instance reference
-        if hasattr(initial_state.state, '_morph_instance'):
-            object.__setattr__(initial_state.state, '_morph_instance', morph_instance)
-        
-        if hasattr(agent, 'trajectory') and agent.trajectory is not None:
-            trajectory = agent.trajectory
-        else:
-            trajectory = Trajectory[S, A, R, T]()
-            agent.trajectory = trajectory
+    # Set up trajectory
+    if hasattr(agent, 'trajectory') and agent.trajectory is not None:
+        trajectory = agent.trajectory
+    else:
+        trajectory = Trajectory[S, A, R, T]()
+        agent.trajectory = trajectory
+    
+    # Bind the agent to the instance
+    if hasattr(agent, 'bind_instance'):
+        agent.bind_instance(morph_instance)
+    
+    # Initialize with the initial state
+    trajectory.add_step(initial_state)
+    
+    return initial_state, trajectory, morph_instance
 
-        
-        # Bind the agent to the instance
-        if hasattr(agent, 'bind_instance'):
-            agent.bind_instance(morph_instance)
-        
-        # Initialize with the initial state
-        trajectory.add_step(initial_state)
-        
-        current_state = trajectory.current_state
-        if current_state is None:
-            error_msg = "No initial state available"
-            log(LogLevel.ERROR, error_msg)
-            raise ValueError(error_msg)
-        
-        for step_num in range(max_steps):
-            log(LogLevel.INFO, f"Starting step execution", 
-                extra={"step_num": step_num+1, "max_steps": max_steps})
-            
-            # Execute a step - now with await
-            log(LogLevel.INFO, "Determining next action...")
-            action = await agent.run_step(current_state)
-            log(LogLevel.INFO, f"Selected action", extra={"action": str(action)})
-            
-            # Apply the action to get a new state - now with await
-            log(LogLevel.INFO, f"Applying action", extra={"action": str(action)})
-            new_state_value = await agent.apply_action(current_state.state, action)
-            new_state = current_state.__class__(new_state_value)
-            
-            # Ensure morph_instance reference is preserved
-            if hasattr(new_state.state, '_morph_instance'):
-                object.__setattr__(new_state.state, '_morph_instance', morph_instance)
-            
-            # Record the step
-            trajectory.add_step(new_state, action)
-            
-            # Update current state
-            current_state = new_state
+async def continue_run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], 
+                      trajectory: Trajectory[S, A, R, T], morph_instance: 'MorphInstance',
+                      max_steps: int = 100, verify_every_step: bool = False) -> Tuple[VerificationResult[R], Trajectory[S, A, R, T]]:
+    """
+    Continue execution for max_steps more steps from the current state.
+    """
+    current_state = trajectory.current_state
+    if current_state is None:
+        error_msg = "No current state available"
+        log(LogLevel.ERROR, error_msg)
+        raise ValueError(error_msg)
+    
+    start_step = len(trajectory.steps)
+    for step_num in range(max_steps):
+        log(LogLevel.INFO, f"Starting step execution", 
+            extra={"step_num": start_step + step_num + 1})
+        
+        # Execute a step
+        log(LogLevel.INFO, "Determining next action...")
+        action = await agent.run_step(current_state)
+        log(LogLevel.INFO, f"Selected action", extra={"action": str(action)})
+        
+        # Apply the action to get a new state
+        log(LogLevel.INFO, f"Applying action", extra={"action": str(action)})
+        new_state_value = await agent.apply_action(current_state.state, action)
+        new_state = current_state.__class__(new_state_value)
+        
+        # Ensure morph_instance reference is preserved
+        if hasattr(new_state.state, '_morph_instance'):
+            object.__setattr__(new_state.state, '_morph_instance', morph_instance)
+        
+        # Record the step
+        trajectory.add_step(new_state, action)
+        
+        # Update current state
+        current_state = new_state
+        
+        # Check if we should verify
+        if verify_every_step or step_num == max_steps - 1:
+            log(LogLevel.INFO, "Verifying current state...")
+            result = task.verify(current_state, trajectory.actions)
+            trajectory.steps[-1].result = result
             
-            # Check if we should verify
-            if verify_every_step or step_num == max_steps - 1:
-                log(LogLevel.INFO, "Verifying current state...")
-                result = task.verify(current_state, trajectory.actions)
-                trajectory.steps[-1].result = result
-                
-                if result.success:
-                    log(LogLevel.SUCCESS, f"Task completed successfully", 
-                        extra={"steps_taken": step_num+1})
-                    trajectory.summarize()
-                    return result, trajectory
-        
-        # If we reached max steps without success:
-        log(LogLevel.WARNING, f"Reached maximum steps without success", 
-            extra={"max_steps": max_steps})
-        
-        if trajectory.final_result is not None:
-            trajectory.summarize()
-            return trajectory.final_result, trajectory
-        
-        result = VerificationResult(
-            value=None,
-            success=False,
-            message=f"Failed to complete task within {max_steps} steps",
-            details={"last_state": str(current_state.state)}
-        )
-        result.log()
+            if result.success:
+                log(LogLevel.SUCCESS, f"Task completed successfully", 
+                    extra={"steps_taken": start_step + step_num + 1})
+                trajectory.summarize()
+                return result, trajectory
+    
+    # If we reached max steps without success:
+    log(LogLevel.WARNING, f"Reached maximum steps without success", 
+        extra={"max_steps": max_steps})
+    
+    if trajectory.final_result is not None:
         trajectory.summarize()
-        return result, trajectory
+        return trajectory.final_result, trajectory
+    
+    result = VerificationResult(
+        value=None,
+        success=False,
+        message=f"Failed to complete task within {max_steps} steps",
+        details={"last_state": str(current_state.state)}
+    )
+    result.log()
+    trajectory.summarize()
+    return result, trajectory
+
+async def run(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], max_steps: int = 100, 
+             verify_every_step: bool = False, ttl_seconds: Optional[int] = None) -> Tuple[VerificationResult[R], Trajectory[S, A, R, T]]:
+    """
+    Run an agent on a task until the task is complete or max_steps is reached.
+    """
+    try:
+        initial_state, trajectory, morph_instance = await initialize_run(task, agent, ttl_seconds)
+        return await continue_run(task, agent, trajectory, morph_instance, max_steps, verify_every_step)
         
+    except Exception as e:
+        log(LogLevel.ERROR, f"Error in run: {str(e)}")
+        raise
     finally:
-        # Always clean up the Morph instance
-        morph_instance.stop()
+        log(LogLevel.INFO, "Run completed")
 
 async def run_step(task: VerifiedTask[S, A, R, T], agent: Agent[S, A, R, T], 
              trajectory: Trajectory[S, A, R, T], verify: bool = False) -> Tuple[Instance[S, T], Optional[VerificationResult[R]]]:
@@ -621,11 +635,4 @@ def stop(self) -> None:
     
     def __del__(self) -> None:
         """Ensure the instance is stopped when this object is garbage collected."""
-        try:
-            if hasattr(self, 'instance') and self.instance:
-                self.stop()
-        except (AttributeError, Exception):
-            # Ignore errors during garbage collection
-            pass
-
-
+        pass
diff --git a/pokemon-example/pokemon_agent_trajectory.py b/pokemon-example/pokemon_agent_trajectory.py
index 00c5faf..424516d 100644
--- a/pokemon-example/pokemon_agent_trajectory.py
+++ b/pokemon-example/pokemon_agent_trajectory.py
@@ -94,11 +94,8 @@ def add_step(self, state: Instance[Dict[str, Any], str],
                     location = line.replace("Location:", "").strip()
                     break
         
-        # Get the current step index
-        current_step_index = len(self.steps)
-        
-        # Create snapshot (gets snapshot ID) with the current step index
-        snapshot_id = state.snapshot(step_index=current_step_index)
+        # Create snapshot (gets snapshot ID)
+        snapshot_id = state.snapshot()
         
         # Create specialized step
         step = PokemonTrajectoryStep(
@@ -205,12 +202,8 @@ def __init__(self, state: Dict[str, Any], morph_instance: Optional[MorphInstance
         super().__init__(state)
         self._morph_instance = morph_instance
     
-    def snapshot(self, step_index: int = 0) -> str:
-        """Create a snapshot and return the ID for visualization and rollback.
-        
-        Args:
-            step_index: The current step index in the trajectory
-        """
+    def snapshot(self) -> str:
+        """Create a snapshot and return the ID for visualization and rollback."""
         morph_instance = getattr(self.state, '_morph_instance', None) or self._morph_instance
         
         if morph_instance:
@@ -222,7 +215,6 @@ def snapshot(self, step_index: int = 0) -> str:
                 metadata = {
                     "timestamp": datetime.now().strftime("%Y%m%d_%H%M%S"),
                     "action": self.state.get("last_action", ""),
-                    "step_index": str(step_index),
                 }
                 
                 # Add game state summary to metadata
@@ -239,8 +231,7 @@ def snapshot(self, step_index: int = 0) -> str:
                 log(LogLevel.INFO, "Created snapshot", 
                     extra={
                         "event_type": EVENT_SNAPSHOT_CREATED, 
-                        "snapshot_id": snapshot_id, 
-                        "step_index": metadata.get("step_index", "0")
+                        "snapshot_id": snapshot_id
                     })
                 
                 return snapshot_id
@@ -314,8 +305,6 @@ async def connect(self):
                     log(LogLevel.ERROR, "Max retries reached, giving up")
                     return False
 
-        return False
-
         
         for attempt in range(MAX_RETRIES):
             try:
@@ -520,8 +509,9 @@ async def cleanup(self):
 class PokemonAgent(Agent[Dict[str, Any], str, bool, str]):
     """An agent that plays Pokemon using the Claude API and tracks its trajectory."""
     
-    def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet-latest", max_tokens=1000, max_history=30):
+    def __init__(self, mcp_handler: PokemonMCPHandler, morph_instance: MorphInstance, model_name="claude-3-7-sonnet-latest", max_tokens=1000, max_history=30):
         """Initialize the Pokemon agent."""
+        log(LogLevel.INFO, "PokemonAgent.__init__ start", extra={"morph_instance_exists": morph_instance is not None})
         super().__init__()
         self.mcp_handler = mcp_handler
         self.anthropic = Anthropic()
@@ -530,6 +520,9 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet
         self.temperature = 0.7
         self.message_history = []
         self.max_history = max_history
+        self.morph_instance = morph_instance  # Store morph_instance
+        self.objective = "Explore Pokemon Red"  # Default objective
+        log(LogLevel.INFO, "PokemonAgent stored morph_instance", extra={"morph_instance_exists": self.morph_instance is not None})
         
         # Specialized trajectory
         self.trajectory = PokemonTrajectory()
@@ -546,17 +539,25 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet
 
         self.system_prompt = """You are playing Pokemon Red. You can see the game screen and control the game by executing emulator commands: check your tools! for example, 'navigate_to' can help you move in the overworld. 
         
-        Before each action, explain your reasoning briefly, plan your immediate next few steps needed (low level actions, e.g. 'to reach the Cave, i need to go 1. right, 2. right, 3. right, 4. up', not high level goals) to get there, then use the available actions to execute the next step in the game. 
-        
-        The game commands always register perfectly, so if you see no reaction to them, you have made an invalid command and misunderstood the game state. In battles, when you see an attack that isn't effective, you should examine your assumptions and update your beliefs. In general, search the solution space (try different things) before getting stuck in ruts."""
+Before each action, explain your reasoning briefly, plan your immediate next few steps needed (low level tool calls and actions, e.g. 'to reach the Cave from here, I need to go 1. right, 2. right, 3. right, 4. up', not high level goals like '1. explore the Cave 2. ??? 3. win!') to get there, then use the available actions to execute the next step in the game. 
+
+The game commands always register perfectly, so if you see no reaction to them, you have made an invalid command and misunderstood the game state. In battles, when you see an attack that isn't effective, you should examine your assumptions and update your beliefs. In general, search the solution space (try different things) before getting stuck in ruts.
+
+Mistakes you have made before:
+- do not talk to NPCs
+- do not plan with high level goals
+- do not insist on your prior knowledge about what attacks are strong against what types of Pokemon works when the evidence is the opposite
+- you miss the cave, which is a black hole to the side of the pokemon center.
+        """
         
         log(LogLevel.INFO, f"Initialized PokemonAgent", 
             extra={"model": model_name, "max_tokens": max_tokens})
     
     def set_objective(self, objective: str):
         """Set the agent's current objective."""
+        log(LogLevel.INFO, "PokemonAgent.set_objective start", extra={"objective": objective, "morph_instance_exists": hasattr(self, 'morph_instance') and self.morph_instance is not None})
         self.objective = objective
-        log(LogLevel.INFO, f"Setting agent objective", extra={"objective": objective})
+        log(LogLevel.INFO, "PokemonAgent.set_objective end")
     
     async def initialize_state(self, morph_instance: 'MorphInstance') -> PokemonInstance:
         """Initialize the state from a MorphCloud instance."""
@@ -580,7 +581,8 @@ async def initialize_state(self, morph_instance: 'MorphInstance') -> PokemonInst
         instance = PokemonInstance(initial_state, morph_instance)
         
         # Add a starting message to the history
-        initial_message = f"Your current objective is: {self.objective}\n\nYou may now begin playing Pokemon."
+        objective = getattr(self, 'objective', "Explore Pokemon Red")  # Use default if not set
+        initial_message = f"Your current objective is: {objective}\n\nYou may now begin playing Pokemon."
         self.message_history = [{"role": "user", "content": initial_message}]
         
         # Initialize the trajectory with the first step
@@ -599,7 +601,7 @@ async def _update_state(self, state: Dict[str, Any]) -> Dict[str, Any]:
         
         # Update the state
         new_state = {
-            "game_state": game_state.get("game_state", {}),
+            "game_state": game_state.get("game_state", ""),
             "screenshot": screenshot_result.get("screenshot", ""),
             "valid_moves": game_state.get("valid_moves", []),
             "last_action": state.get("last_action", "")
@@ -735,6 +737,14 @@ async def _retry_with_nudge(self, max_retries=3):
                     assistant_content.append({"type": "text", "text": block.text})
                 elif block.type == "tool_use":
                     assistant_content.append({"type": "tool_use", **dict(block)})
+                    # # Add a tool result immediately after each tool use
+                    # tool_result = {
+                    #     "type": "tool_result",
+                    #     "tool_call_id": block.id,
+                    #     "name": block.name,
+                    #     "result": "Tool call received"
+                    # }
+                    # assistant_content.append(tool_result)
                     log(LogLevel.DEBUG, f"Found tool call", 
                         extra={"tool": block.name, "input": json.dumps(block.input)})
             
@@ -786,95 +796,29 @@ async def apply_action(self, state: Dict[str, Any], action: str) -> Dict[str, An
             "screenshot": state.get("screenshot", ""),
             "valid_moves": state.get("valid_moves", [])
         }
-
-        
-        # Make sure morph_instance reference is preserved
-        if "_morph_instance" in state:
-            new_state["_morph_instance"] = state["_morph_instance"]
         
-        # Update the state with fresh game information
-        new_state = await self._update_state(new_state)
-        
-        # Log tool result with standardized event
-        log(LogLevel.INFO, f"Tool result", 
-            extra={
-                "event_type": EVENT_TOOL_RESULT, 
-                "action": action,
-                "game_state": new_state["game_state"]
-            })
-        
-        # Create tool results from the action for Claude history
-        tool_results = []
-        
-        # Extract most recent assistant message to get tool ID
-        if self.message_history and self.message_history[-1]["role"] == "assistant":
-            assistant_content = self.message_history[-1]["content"]
-            tool_use_items = [item for item in assistant_content if isinstance(item, dict) and item.get("type") == "tool_use"]
-            
-            if tool_use_items:
-                tool_use_id = tool_use_items[0].get("id")
-                
-                if tool_use_id:
-                    # Create result content
-                    result_content = []
-                    
-                    # Add text result
-                    result_text = f"Action '{action}' executed."
-                    if "result" in action_result:
-                        result_text += f"\nResult: {action_result['result']}"
-                    
-                    result_content.append({"type": "text", "text": result_text})
-                    
-                    # Add screenshot if available
-                    if new_state["screenshot"]:
-                        result_content.append({
-                            "type": "image",
-                            "source": {
-                                "type": "base64",
-                                "media_type": "image/png",
-                                "data": new_state["screenshot"]
-                            }
-                        })
-                    
-                    # Create a proper tool result
-                    tool_results.append({
-                        "type": "tool_result",
-                        "tool_use_id": tool_use_id,
-                        "content": result_content
-                    })
-                    
-                    # Add the tool results to message history
-                    self.message_history.append({"role": "user", "content": tool_results})
-        
-        # Create PokemonInstance from the new state
-        instance = PokemonInstance(new_state)
-        
-        if isinstance(self.trajectory, PokemonTrajectory):
-            # Use the enhanced method with Pokemon-specific fields
+        # Update trajectory with the current step's data
+        if hasattr(self, 'trajectory'):
             self.trajectory.add_step(
-                state=instance,
+                state=Instance(new_state),
                 action=action,
                 tool_name=self.current_tool_name,
                 tool_input=self.current_tool_input,
                 claude_text=self.current_claude_text
             )
-        else:
-            # Fall back to the base Trajectory method
-            log(LogLevel.WARNING, "Using base Trajectory.add_step instead of PokemonTrajectory", 
-                extra={"trajectory_class": self.trajectory.__class__.__name__})
-            self.trajectory.add_step(state=instance, action=action)
-
+            # Reset step data
+            self.current_tool_name = None
+            self.current_tool_input = None
+            self.current_claude_text = None
         
-        # Reset current step data
-        self.current_tool_name = None
-        self.current_tool_input = None
-        self.current_claude_text = None
+        # Update state with latest game information
+        updated_state = await self._update_state(new_state)
         
-        # Log action completion
-        log(LogLevel.INFO, f"Action completed", 
-            extra={"event_type": EVENT_ACTION_COMPLETED, "action": action})
+        # Make sure morph_instance reference is preserved
+        if "_morph_instance" in state:
+            updated_state["_morph_instance"] = state["_morph_instance"]
         
-        return new_state
+        return updated_state
     
     async def summarize_history(self):
         """Summarize the conversation history to save context space."""
@@ -1190,17 +1134,28 @@ async def run_agent_task(self, snapshot_id: str, steps: int, task_id: Optional[s
                 return
 
             # 5. Create the agent
-            self.agent = PokemonAgent(mcp_handler=mcp_handler)
+            log(LogLevel.INFO, "About to create PokemonAgent", extra={"morph_instance_exists": self.morph_instance is not None})
+            self.agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=self.morph_instance)
+            log(LogLevel.INFO, "Created PokemonAgent")
+
+            # Initialize agent state
+            log(LogLevel.INFO, "Initializing agent state")
+            await self.agent.initialize_state(self.morph_instance)
+            log(LogLevel.INFO, "Initialized agent state")
             
             # Decide which VerifiedTask to run
             if task_id:
                 # Use a real task from tasks.py
+                log(LogLevel.INFO, "Creating task from task_id", extra={"task_id": task_id})
                 self.running_task = create_pokemon_verified_task(task_id, snapshot_id)
+                log(LogLevel.INFO, "Created task", extra={"instruction": self.running_task.instruction})
                 # The agent's "objective" might be the instruction from the actual task:
+                log(LogLevel.INFO, "Setting agent objective from task")
                 self.agent.set_objective(self.running_task.instruction)
+                log(LogLevel.INFO, "Set agent objective")
             else:
                 # Fallback: create a trivial verification or use the 'objective' string
-                # logger.warning("No task_id provided. Using trivial verification function.")
+                log(LogLevel.INFO, "Creating fallback task with objective", extra={"objective": objective})
                 def dummy_verify_func(gs: Dict[str, Any]) -> bool:
                     return False  # Always returns false => never completes
                 self.running_task = PokemonVerifiedTask.create(
@@ -1210,10 +1165,13 @@ def dummy_verify_func(gs: Dict[str, Any]) -> bool:
                     verification_message="No real verification in place",
                     metadata={"game": "Pokemon Red", "objective": objective}
                 )
+                log(LogLevel.INFO, "Created fallback task")
+                log(LogLevel.INFO, "Setting agent objective from fallback")
                 self.agent.set_objective(objective)
+                log(LogLevel.INFO, "Set agent objective")
 
             # 6. Run the agent with E.V.A.
-            # logger.info(f"Starting E.V.A. run() for task: {self.running_task.instruction}")
+            log(LogLevel.INFO, "Starting E.V.A. run()", extra={"task_instruction": self.running_task.instruction})
             from eva import run
             result, trajectory = await run(
                 task=self.running_task,
@@ -1234,25 +1192,23 @@ def dummy_verify_func(gs: Dict[str, Any]) -> bool:
     
     async def rollback_to_step(self, step_index: int):
         """Roll back to a specific step."""
+        if not self.agent or not self.agent.trajectory:
+            log(LogLevel.ERROR, "No trajectory to roll back")
+            return
+        
+        if step_index < 0 or step_index >= len(self.agent.trajectory.steps):
+            log(LogLevel.ERROR, f"Invalid step index {step_index}")
+            return
+        
         try:
-            log(LogLevel.INFO, f"Rolling back to step {step_index}")
-            
-            if not self.agent or not self.task:
-                log(LogLevel.ERROR, "Cannot rollback - agent or task not initialized")
-                return
-            
-            # Get snapshot ID from the step
-            target_step = self.agent.trajectory.steps[step_index]
-            snapshot_id = target_step.snapshot
+            # Get the snapshot ID from the step
+            step = self.agent.trajectory.steps[step_index]
+            snapshot_id = step.snapshot
             
             if not snapshot_id:
-                log(LogLevel.ERROR, f"No snapshot ID available for step {step_index}")
+                log(LogLevel.ERROR, "No snapshot ID for step")
                 return
             
-            # Clean up existing MorphInstance if any
-            if self.morph_instance:
-                self.morph_instance.stop()
-            
             # Start new MorphVM instance from the snapshot
             log(LogLevel.INFO, f"Starting MorphVM instance from rollback snapshot", 
                 extra={"snapshot_id": snapshot_id})
@@ -1290,7 +1246,7 @@ async def rollback_to_step(self, step_index: int):
                 return
             
             # Create new agent with the same objective
-            self.agent = PokemonAgent(mcp_handler=mcp_handler)
+            self.agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=self.morph_instance)
 
             self.agent.set_objective(getattr(self.task, 'instruction', "Continue playing Pokemon"))
             
@@ -1299,17 +1255,14 @@ async def rollback_to_step(self, step_index: int):
             for i in range(step_index + 1):
                 new_trajectory.steps.append(self.agent.trajectory.steps[i])
             
+            # Update agent's trajectory
             self.agent.trajectory = new_trajectory
             
-            # Resume the agent
-            self.agent.resume()
-            
-            log(LogLevel.SUCCESS, f"Rollback to step {step_index} completed")
+            log(LogLevel.SUCCESS, f"Successfully rolled back to step {step_index}")
             
         except Exception as e:
-            log(LogLevel.ERROR, f"Error during rollback", extra={"error": str(e)})
-            import traceback
-            log(LogLevel.ERROR, f"Rollback error traceback", extra={"traceback": traceback.format_exc()})
+            log(LogLevel.ERROR, f"Error during rollback: {e}")
+            raise
     
     def start(self):
         """Start the API server."""
@@ -1445,7 +1398,7 @@ async def run_pokemon_without_api(snapshot_id: str, steps: int = 100):
             return
         
         # Create the agent
-        agent = PokemonAgent(mcp_handler=mcp_handler)
+        agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=morph_instance)
 
 
         # agent.trajectory = PokemonTrajectory()
diff --git a/pokemon-example/pokemon_eva_agent.py b/pokemon-example/pokemon_eva_agent.py
index e947809..5fa660d 100644
--- a/pokemon-example/pokemon_eva_agent.py
+++ b/pokemon-example/pokemon_eva_agent.py
@@ -403,6 +403,8 @@ def __init__(self, mcp_handler: PokemonMCPHandler, model_name="claude-3-7-sonnet
             mcp_handler: Handler for MCP communication
             model_name: Claude model to use
             max_tokens: Maximum tokens to generate
+            max_history: Maximum number of messages to keep in history
+            morph_instance: Optional MorphInstance for snapshotting
         """
         super().__init__()
         self.mcp_handler = mcp_handler
@@ -1057,9 +1059,7 @@ def verify_beat_first_gym(game_state: Dict[str, Any]) -> bool:
         )
         
         # Create a Pokemon agent
-        agent = PokemonAgent(mcp_handler)
-
-        agent.morph_instance = morph_instance
+        agent = PokemonAgent(mcp_handler=mcp_handler, morph_instance=morph_instance)
 
         novnc_url = morph_instance.instance.expose_http_service(
             name="novnc",
@@ -1104,7 +1104,29 @@ def verify_beat_first_gym(game_state: Dict[str, Any]) -> bool:
     parser = argparse.ArgumentParser(description='Run Pokemon agent with custom snapshot ID')
     parser.add_argument('--snapshot-id', type=str, help='Snapshot ID to use for the MorphVM instance')
     parser.add_argument('--steps', type=int, default=200, help='Number of steps to run the agent for')
+    parser.add_argument('--continue', action='store_true', help='Continue from the last state')
     
     args = parser.parse_args()
     
+    # If continuing, use the last snapshot ID
+    if getattr(args, 'continue', False):
+        # Get the latest snapshot ID from the log file
+        log_file = get_latest_eva_log_file()
+        if log_file:
+            with open(log_file, 'r') as f:
+                lines = f.readlines()
+                # Read backwards to find the last snapshot
+                for line in reversed(lines):
+                    try:
+                        data = json.loads(line)
+                        if 'snapshot_id' in data.get('extra', {}):
+                            args.snapshot_id = data['extra']['snapshot_id']
+                            break
+                    except json.JSONDecodeError:
+                        continue
+        
+        if not args.snapshot_id:
+            print("Error: No previous snapshot found to continue from")
+            sys.exit(1)
+    
     asyncio.run(run_pokemon_example(args.snapshot_id, args.steps))
diff --git a/pokemon-example/simple_driver.py b/pokemon-example/simple_driver.py
index 947bfa6..4cc7840 100644
--- a/pokemon-example/simple_driver.py
+++ b/pokemon-example/simple_driver.py
@@ -282,6 +282,7 @@
                     <div class="control-buttons">
                         <button id="play-btn" class="primary">PLAY</button>
                         <button id="stop-btn" class="stop" disabled>STOP</button>
+                        <button id="continue-btn" class="primary">CONTINUE</button>
                     </div>
 
                     <div id="status-display" class="status">Status: Ready</div>
@@ -304,6 +305,7 @@
         const gameIframe = document.getElementById('game-iframe');
         const playBtn = document.getElementById('play-btn');
         const stopBtn = document.getElementById('stop-btn');
+        const continueBtn = document.getElementById('continue-btn');
         const refreshLogsBtn = document.getElementById('refresh-logs-btn');
         const snapshotsList = document.getElementById('snapshots-list');
         const statusDisplay = document.getElementById('status-display');
@@ -465,6 +467,7 @@
         function resetPlayState() {
             playBtn.disabled = false;
             stopBtn.disabled = true;
+            continueBtn.disabled = true;
             statusDisplay.textContent = 'Status: Ready';
             statusDisplay.className = 'status';
         }
@@ -483,6 +486,7 @@
                 // Update UI
                 playBtn.disabled = true;
                 stopBtn.disabled = false;
+                continueBtn.disabled = false;
                 statusDisplay.textContent = 'Status: Starting...';
                 statusDisplay.className = 'status running';
                 
@@ -542,6 +546,44 @@
             }
         });
 
+        // Continue button click handler
+        continueBtn.addEventListener('click', async () => {
+            try {
+                // Update UI
+                playBtn.disabled = true;
+                stopBtn.disabled = false;
+                continueBtn.disabled = true;
+                statusDisplay.textContent = 'Status: Continuing...';
+                statusDisplay.className = 'status running';
+                
+                // Continue the script
+                const response = await fetch('/api/continue', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify({
+                        steps: document.getElementById('steps').value
+                    })
+                });
+                
+                const data = await response.json();
+                
+                if (data.success) {
+                    // Start fetching logs
+                    logUpdateInterval = setInterval(fetchLogs, 2000);
+                    statusDisplay.textContent = 'Status: Running';
+                } else {
+                    alert(`Failed to continue: ${data.error}`);
+                    resetPlayState();
+                }
+            } catch (error) {
+                console.error('Error continuing script:', error);
+                alert(`Error: ${error.message}`);
+                resetPlayState();
+            }
+        });
+
         // Refresh logs button click handler
         refreshLogsBtn.addEventListener('click', () => {
             fetchLogs();
@@ -850,6 +892,18 @@ def do_POST(self):
                 self.send_header('Content-Type', 'application/json')
                 self.end_headers()
                 self.wfile.write(json.dumps(result).encode())
+
+            elif self.path == '/api/continue':
+                # Parse the request data
+                params = json.loads(post_data)
+                
+                # Continue the Pokemon agent
+                result = continue_pokemon_agent(steps=params.get('steps', 100))
+                
+                self.send_response(200)
+                self.send_header('Content-Type', 'application/json')
+                self.end_headers()
+                self.wfile.write(json.dumps(result).encode())
                 
             else:
                 self.send_response(404)
@@ -930,6 +984,52 @@ def start_pokemon_agent(snapshot_id, steps=10):
         logger.error(f"Traceback: {traceback.format_exc()}")
         return {'success': False, 'error': str(e)}
 
+# Continue the Pokemon agent
+def continue_pokemon_agent(steps=100):
+    """Continue the Pokemon agent for additional steps."""
+    global active_process
+    
+    with process_lock:
+        if active_process is None or active_process.poll() is not None:
+            return {'success': False, 'error': 'No agent is currently running'}
+        
+        try:
+            # Build command to continue execution
+            cmd = [
+                sys.executable,  # Use the current Python interpreter
+                'pokemon_eva_agent.py',  # The EVA agent script
+                '--continue',  # Flag to indicate continuing execution
+                '--steps', str(steps)
+            ]
+            
+            # Log the command
+            logger.info(f"Continuing Pokemon agent: {' '.join(cmd)}")
+            
+            # Start the process
+            active_process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1  # Line buffered
+            )
+            
+            # Log the process ID
+            logger.info(f"Continue process started with PID: {active_process.pid}")
+            
+            # Check if process is still running after a brief delay
+            time.sleep(1)
+            if active_process.poll() is not None:
+                logger.error(f"Continue process exited immediately with code: {active_process.poll()}")
+                return {'success': False, 'error': f'Continue process exited immediately with code: {active_process.poll()}'}
+            
+            return {'success': True}
+        except Exception as e:
+            logger.error(f"Error continuing Pokemon agent: {e}")
+            import traceback
+            logger.error(f"Traceback: {traceback.format_exc()}")
+            return {'success': False, 'error': str(e)}
+
 # Stop the Pokemon agent
 def stop_pokemon_agent():
     """Stop the currently running Pokemon agent."""
diff --git a/pokemon-example/trajectory_driver.py b/pokemon-example/trajectory_driver.py
index 05098f4..ba7b105 100644
--- a/pokemon-example/trajectory_driver.py
+++ b/pokemon-example/trajectory_driver.py
@@ -333,7 +333,7 @@
                     
                     <div class="form-group">
                         <label for="steps">NUMBER OF STEPS:</label>
-                        <input type="number" id="steps" value="50" min="1">
+                        <input type="number" id="steps" value="5" min="1">
                     </div>
                     
                     <div class="form-group">
@@ -347,6 +347,7 @@
                         <button id="play-btn" class="primary">PLAY</button>
                         <button id="pause-btn" class="secondary" disabled>PAUSE</button>
                         <button id="resume-btn" class="secondary" disabled>RESUME</button>
+                        <button id="continue-btn" class="secondary" disabled>CONTINUE (+100)</button>
                         <button id="rollback-btn" class="secondary" disabled>ROLLBACK TO SELECTED</button>
                         <button id="stop-btn" class="stop" disabled>STOP</button>
                     </div>
@@ -373,6 +374,7 @@
         const playBtn = document.getElementById('play-btn');
         const pauseBtn = document.getElementById('pause-btn');
         const resumeBtn = document.getElementById('resume-btn');
+        const continueBtn = document.getElementById('continue-btn');
         const rollbackBtn = document.getElementById('rollback-btn');
         const stopBtn = document.getElementById('stop-btn');
         const refreshBtn = document.getElementById('refresh-btn');
@@ -529,25 +531,36 @@
 
         // Update UI based on status
         function updateUIForStatus(status) {
-            statusDisplay.textContent = `Status: ${status}`;
-            statusDisplay.className = 'status ' + status;
-
-            if (status === 'running') {
+            clearInterval(pollingInterval);
+            pollingInterval = null;
+            
+            if (status === 'stopped') {
+                statusDisplay.textContent = 'Status: Stopped';
+                statusDisplay.className = 'status stopped';
+                playBtn.disabled = false;
+                pauseBtn.disabled = true;
+                resumeBtn.disabled = true;
+                continueBtn.disabled = true;
+                stopBtn.disabled = true;
+                rollbackBtn.disabled = false;
+            } else if (status === 'running') {
+                statusDisplay.textContent = 'Status: Running';
+                statusDisplay.className = 'status running';
                 playBtn.disabled = true;
                 pauseBtn.disabled = false;
                 resumeBtn.disabled = true;
+                continueBtn.disabled = false;
                 stopBtn.disabled = false;
+                rollbackBtn.disabled = true;
             } else if (status === 'paused') {
+                statusDisplay.textContent = 'Status: Paused';
+                statusDisplay.className = 'status paused';
                 playBtn.disabled = true;
                 pauseBtn.disabled = true;
                 resumeBtn.disabled = false;
+                continueBtn.disabled = false;
                 stopBtn.disabled = false;
-            } else if (status === 'stopped' || status === 'not_initialized') {
-                playBtn.disabled = false;
-                pauseBtn.disabled = true;
-                resumeBtn.disabled = true;
-                stopBtn.disabled = true;
-                rollbackBtn.disabled = true;
+                rollbackBtn.disabled = false;
             }
         }
 
@@ -733,6 +746,33 @@
             }
         });
 
+        continueBtn.addEventListener('click', async () => {
+            try {
+                statusDisplay.textContent = 'Status: Continuing...';
+                const r = await fetch(`${window.API_BASE_URL}/continue`, {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({ steps: 100 })
+                });
+                const data = await r.json();
+                if (data.success) {
+                    pollingInterval = setInterval(fetchData, window.POLL_INTERVAL * 1000);
+                    statusDisplay.textContent = 'Status: Running';
+                    statusDisplay.className = 'status running';
+                    pauseBtn.disabled = false;
+                    stopBtn.disabled = false;
+                    continueBtn.disabled = false;
+                    resumeBtn.disabled = true;
+                } else {
+                    alert(`Failed to continue: ${data.error}`);
+                    updateUIForStatus('stopped');
+                }
+            } catch (err) {
+                console.error("Continue error:", err);
+                alert("Continue error: " + err.message);
+            }
+        });
+
         rollbackBtn.addEventListener('click', async () => {
             if (selectedStepIndex === null) {
                 alert("Please select a step to roll back to!");