Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,8 @@ wandb/
logs/
verl_checkpoints
verl_checkpoints/
verl.egg-info/
verl.egg-info/

test_memory.md

trajectories/traj_*.json
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "verl"]
path = verl
url = git@github.com:realtmxi/verl.git
url = https://github.com/realtmxi/verl.git
branch = main
50 changes: 50 additions & 0 deletions openmanus_rl/environments/prompts/alfworld.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,54 @@
Now it's your turn to take an action.
You should first reason step-by-step about the current situation. This reasoning process MUST be enclosed within <think> </think> tags.
Once you've finished your reasoning, you should choose an admissible action for current step and present it within <action> </action> tags.
"""

ALFWORLD_OPENMANUS_TEMPLATE = """
You are an expert agent operating in the ALFRED Embodied Environment. Your task is to: {task_description}
Prior to this step, you have already taken {step_count} step(s). Below are the most recent {history_length} observations and the corresponding actions you took: {action_history}
You are now at step {current_step} and your current observation is: {current_observation}
Your admissible actions of the current situation are: [{admissible_actions}].

Now it's your turn to take an action. Please output your response using the following separated XML tags:

First, analyze the current situation and plan:
<think>
Analyze the current situation and devise a plan to accomplish the task: {task_description}
What are the key steps needed to complete this task?
Based on the current observation, what should be our immediate next step?
How does this action advance our plan toward completing the task?
</think>

Then, if this is not the first step (step_count > 0), reflect on the last action:
<reflection>
Last observation analysis: Have we made progress toward solving the task?
What did the last action accomplish? Was it successful or did it encounter any issues?
Are we closer to completing the task?
</reflection>

Next, analyze your memory and past experiences:

<memory_analysis>
RAG-style retrieval from history:

[Thinking history - cite specific past reasoning from previous steps]
Example: "At step 3, I reasoned that we needed to find a knife first before attempting to slice..."
Example: "In step 5's thinking, I identified that the fridge typically contains perishable items..."

[Observation/Action history - cite specific observations and outcomes]
Example: "Step 2 observation: 'You are in the kitchen. You see a countertop 1, a cabinet 1...' - this revealed the kitchen layout"
Example: "Step 4 action 'go to fridge 1' succeeded and revealed tomato, lettuce..."
Example: "Step 6 failed with 'Nothing happens' when trying to take knife from drawer 2"

[Milestone tracking]
- Completed: Found target object at step X, Successfully picked up item at step Y
- Current state: Holding [items], Located at [location]
</memory_analysis>

Finally, present your chosen action:

<action>
action_choice: [selected admissible action from the list]
action_parameters: {{relevant details about the action if applicable}}
</action>
"""
5 changes: 4 additions & 1 deletion openmanus_rl/memory/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
from .memory import SimpleMemory
from .memory import SimpleMemory
from .file_memory import FileMemory

__all__ = ['SimpleMemory', 'FileMemory']
127 changes: 127 additions & 0 deletions openmanus_rl/memory/file_memory.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
"""
Extended memory system with file persistence (memory.md).
Builds on SimpleMemory to add query and storage capabilities.
"""

from typing import List, Dict, Any, Tuple, Optional
from .memory import SimpleMemory


class FileMemory(SimpleMemory):
"""
Extended memory that adds file persistence and query capabilities.
Inherits from SimpleMemory for compatibility, adds memory.md support.
"""

def __init__(self, memory_file: str = "memory.md"):
super().__init__()
self.memory_file = memory_file
self.file_cache = [] # Recent entries from file
self._load_file_cache()

def _load_file_cache(self, limit: int = 100):
"""Load recent entries from memory.md into cache."""
self.file_cache = []
try:
with open(self.memory_file, 'r') as f:
lines = f.readlines()
# Keep last N entries
self.file_cache = lines[-limit:] if len(lines) > limit else lines
except FileNotFoundError:
pass # File doesn't exist yet

def store_to_file(self, content: str, episode: str = "", step: int = 0):
"""
Store content to memory.md file.

Args:
content: Text to store
episode: Episode identifier
step: Step number
"""
with open(self.memory_file, 'a') as f:
metadata = f"E:{episode}|S:{step}" if episode else f"S:{step}"
f.write(f"\n[{metadata}] {content}\n")

# Update cache
entry = f"[{metadata}] {content}\n"
self.file_cache.append(entry)
if len(self.file_cache) > 100:
self.file_cache.pop(0)

def query(self, query: str, limit: int = 3) -> str:
"""
Query memory for relevant information.
Searches both in-memory data and file cache.

Args:
query: Search query
limit: Maximum number of results

Returns:
Formatted string of matching memories
"""
results = []
query_lower = query.lower()

# Search in file cache first (more persistent memories)
for line in reversed(self.file_cache):
if query_lower in line.lower():
results.append(line.strip())
if len(results) >= limit:
break

# If not enough results, search in-memory data
if len(results) < limit and self._data:
for env_data in reversed(self._data):
for record in reversed(env_data):
# Search in all fields
for value in record.values():
if isinstance(value, str) and query_lower in value.lower():
results.append(str(record))
break
if len(results) >= limit:
break
if len(results) >= limit:
break

return "\n".join(results) if results else "No relevant memory found"

def store_staged(self, staged_data: Dict[str, Any], episode: str = "", step: int = 0):
"""
Store data from staged processing.

Args:
staged_data: Dictionary containing plan, action, reflection, etc.
episode: Episode identifier
step: Step number
"""
# Store important parts to file
if staged_data.get('plan'):
self.store_to_file(f"[Plan] {staged_data['plan']}", episode, step)

if staged_data.get('memory_store'):
self.store_to_file(staged_data['memory_store'], episode, step)

if staged_data.get('reflection'):
self.store_to_file(f"[Reflection] {staged_data['reflection']}", episode, step)

# Also store in regular memory structure for compatibility
if self._data is not None:
record = {
'text_obs': staged_data.get('plan', ''),
'action': staged_data.get('action', ''),
'reflection': staged_data.get('reflection', '')
}
# Store for all environments (broadcast)
broadcast_record = {k: [v] * self.batch_size for k, v in record.items()}
self.store(broadcast_record)

def clear_file(self):
"""Clear the memory.md file."""
open(self.memory_file, 'w').close()
self.file_cache = []

def get_recent_from_file(self, n: int = 10) -> List[str]:
"""Get n most recent entries from file cache."""
return self.file_cache[-n:] if self.file_cache else []
Empty file.
19 changes: 18 additions & 1 deletion openmanus_rl/multi_turn_rollout/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,19 @@
"""
Multi-turn rollout module.
Modular stage processing with memory.md integration.
"""

from .openmanus_rollout import OpenmanusRollout
from .modular_stages import ModularStageProcessor, DEFAULT_TOOLS
from .rollout_loop import TrajectoryCollector
from .utils import adjust_batch
from .tool_integration import GLOBAL_TOOL_REGISTRY, ToolRegistry, create_simple_tool_wrappers

__all__ = [
'OpenmanusRollout', # VERL-compatible rollout with modular stages
'ModularStageProcessor', # Standalone modular processor
'DEFAULT_TOOLS', # Simple tool functions
'TrajectoryCollector', # Legacy, kept for compatibility
'GLOBAL_TOOL_REGISTRY', # Global tool registry instance
'ToolRegistry', # Tool registry class
'create_simple_tool_wrappers' # Helper for tool wrappers
]
Loading
Loading