OpenManus · realtmxi · Aug 28, 2025 · Aug 17, 2025 · Aug 17, 2025 · Aug 17, 2025
diff --git a/.gitignore b/.gitignore
@@ -31,4 +31,8 @@ wandb/
 logs/
 verl_checkpoints
 verl_checkpoints/
-verl.egg-info/
+verl.egg-info/
+
+test_memory.md
+
+trajectories/traj_*.json
diff --git a/.gitmodules b/.gitmodules
@@ -1,4 +1,4 @@
 [submodule "verl"]
 	path = verl
-	url = git@github.com:realtmxi/verl.git
+	url = https://github.com/realtmxi/verl.git
 	branch = main
diff --git a/openmanus_rl/environments/prompts/alfworld.py b/openmanus_rl/environments/prompts/alfworld.py
@@ -18,4 +18,54 @@
 Now it's your turn to take an action.
 You should first reason step-by-step about the current situation. This reasoning process MUST be enclosed within <think> </think> tags. 
 Once you've finished your reasoning, you should choose an admissible action for current step and present it within <action> </action> tags.
+"""
+
+ALFWORLD_OPENMANUS_TEMPLATE = """
+You are an expert agent operating in the ALFRED Embodied Environment. Your task is to: {task_description}
+Prior to this step, you have already taken {step_count} step(s). Below are the most recent {history_length} observations and the corresponding actions you took: {action_history}
+You are now at step {current_step} and your current observation is: {current_observation}
+Your admissible actions of the current situation are: [{admissible_actions}].
+
+Now it's your turn to take an action. Please output your response using the following separated XML tags:
+
+First, analyze the current situation and plan:
+<think>
+Analyze the current situation and devise a plan to accomplish the task: {task_description}
+What are the key steps needed to complete this task?
+Based on the current observation, what should be our immediate next step?
+How does this action advance our plan toward completing the task?
+</think>
+
+Then, if this is not the first step (step_count > 0), reflect on the last action:
+<reflection>
+Last observation analysis: Have we made progress toward solving the task?
+What did the last action accomplish? Was it successful or did it encounter any issues?
+Are we closer to completing the task?
+</reflection>
+
+Next, analyze your memory and past experiences:
+
+<memory_analysis>
+RAG-style retrieval from history:
+
+[Thinking history - cite specific past reasoning from previous steps]
+Example: "At step 3, I reasoned that we needed to find a knife first before attempting to slice..."
+Example: "In step 5's thinking, I identified that the fridge typically contains perishable items..."
+
+[Observation/Action history - cite specific observations and outcomes]
+Example: "Step 2 observation: 'You are in the kitchen. You see a countertop 1, a cabinet 1...' - this revealed the kitchen layout"
+Example: "Step 4 action 'go to fridge 1' succeeded and revealed tomato, lettuce..."
+Example: "Step 6 failed with 'Nothing happens' when trying to take knife from drawer 2"
+
+[Milestone tracking]
+- Completed: Found target object at step X, Successfully picked up item at step Y
+- Current state: Holding [items], Located at [location]
+</memory_analysis>
+
+Finally, present your chosen action:
+
+<action>
+action_choice: [selected admissible action from the list]
+action_parameters: {{relevant details about the action if applicable}}
+</action>
 """
diff --git a/openmanus_rl/memory/__init__.py b/openmanus_rl/memory/__init__.py
@@ -1 +1,4 @@
-from .memory import SimpleMemory
+from .memory import SimpleMemory
+from .file_memory import FileMemory
+
+__all__ = ['SimpleMemory', 'FileMemory']
diff --git a/openmanus_rl/memory/file_memory.py b/openmanus_rl/memory/file_memory.py
@@ -0,0 +1,127 @@
+"""
+Extended memory system with file persistence (memory.md).
+Builds on SimpleMemory to add query and storage capabilities.
+"""
+
+from typing import List, Dict, Any, Tuple, Optional
+from .memory import SimpleMemory
+
+
+class FileMemory(SimpleMemory):
+    """
+    Extended memory that adds file persistence and query capabilities.
+    Inherits from SimpleMemory for compatibility, adds memory.md support.
+    """
+
+    def __init__(self, memory_file: str = "memory.md"):
+        super().__init__()
+        self.memory_file = memory_file
+        self.file_cache = []  # Recent entries from file
+        self._load_file_cache()
+
+    def _load_file_cache(self, limit: int = 100):
+        """Load recent entries from memory.md into cache."""
+        self.file_cache = []
+        try:
+            with open(self.memory_file, 'r') as f:
+                lines = f.readlines()
+                # Keep last N entries
+                self.file_cache = lines[-limit:] if len(lines) > limit else lines
+        except FileNotFoundError:
+            pass  # File doesn't exist yet
+
+    def store_to_file(self, content: str, episode: str = "", step: int = 0):
+        """
+        Store content to memory.md file.
+
+        Args:
+            content: Text to store
+            episode: Episode identifier
+            step: Step number
+        """
+        with open(self.memory_file, 'a') as f:
+            metadata = f"E:{episode}|S:{step}" if episode else f"S:{step}"
+            f.write(f"\n[{metadata}] {content}\n")
+
+        # Update cache
+        entry = f"[{metadata}] {content}\n"
+        self.file_cache.append(entry)
+        if len(self.file_cache) > 100:
+            self.file_cache.pop(0)
+
+    def query(self, query: str, limit: int = 3) -> str:
+        """
+        Query memory for relevant information.
+        Searches both in-memory data and file cache.
+
+        Args:
+            query: Search query
+            limit: Maximum number of results
+
+        Returns:
+            Formatted string of matching memories
+        """
+        results = []
+        query_lower = query.lower()
+
+        # Search in file cache first (more persistent memories)
+        for line in reversed(self.file_cache):
+            if query_lower in line.lower():
+                results.append(line.strip())
+                if len(results) >= limit:
+                    break
+
+        # If not enough results, search in-memory data
+        if len(results) < limit and self._data:
+            for env_data in reversed(self._data):
+                for record in reversed(env_data):
+                    # Search in all fields
+                    for value in record.values():
+                        if isinstance(value, str) and query_lower in value.lower():
+                            results.append(str(record))
+                            break
+                    if len(results) >= limit:
+                        break
+                if len(results) >= limit:
+                    break
+
+        return "\n".join(results) if results else "No relevant memory found"
+
+    def store_staged(self, staged_data: Dict[str, Any], episode: str = "", step: int = 0):
+        """
+        Store data from staged processing.
+
+        Args:
+            staged_data: Dictionary containing plan, action, reflection, etc.
+            episode: Episode identifier
+            step: Step number
+        """
+        # Store important parts to file
+        if staged_data.get('plan'):
+            self.store_to_file(f"[Plan] {staged_data['plan']}", episode, step)
+
+        if staged_data.get('memory_store'):
+            self.store_to_file(staged_data['memory_store'], episode, step)
+
+        if staged_data.get('reflection'):
+            self.store_to_file(f"[Reflection] {staged_data['reflection']}", episode, step)
+
+        # Also store in regular memory structure for compatibility
+        if self._data is not None:
+            record = {
+                'text_obs': staged_data.get('plan', ''),
+                'action': staged_data.get('action', ''),
+                'reflection': staged_data.get('reflection', '')
+            }
+            # Store for all environments (broadcast)
+            broadcast_record = {k: [v] * self.batch_size for k, v in record.items()}
+            self.store(broadcast_record)
+
+    def clear_file(self):
+        """Clear the memory.md file."""
+        open(self.memory_file, 'w').close()
+        self.file_cache = []
+
+    def get_recent_from_file(self, n: int = 10) -> List[str]:
+        """Get n most recent entries from file cache."""
+        return self.file_cache[-n:] if self.file_cache else []
diff --git a/openmanus_rl/memory/rag_memory.py b/openmanus_rl/memory/rag_memory.py
diff --git a/openmanus_rl/multi_turn_rollout/__init__.py b/openmanus_rl/multi_turn_rollout/__init__.py
@@ -1,2 +1,19 @@
+"""
+Multi-turn rollout module.
+Modular stage processing with memory.md integration.
+"""
+
+from .openmanus_rollout import OpenmanusRollout
+from .modular_stages import ModularStageProcessor, DEFAULT_TOOLS
 from .rollout_loop import TrajectoryCollector
-from .utils import adjust_batch
+from .tool_integration import GLOBAL_TOOL_REGISTRY, ToolRegistry, create_simple_tool_wrappers
+
+__all__ = [
+    'OpenmanusRollout',          # VERL-compatible rollout with modular stages
+    'ModularStageProcessor', # Standalone modular processor
+    'DEFAULT_TOOLS',         # Simple tool functions  
+    'TrajectoryCollector',   # Legacy, kept for compatibility
+    'GLOBAL_TOOL_REGISTRY',  # Global tool registry instance
+    'ToolRegistry',          # Tool registry class
+    'create_simple_tool_wrappers'  # Helper for tool wrappers
+]