Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,7 @@ RETRY_MAX_DELAY=60.0 # Maximum delay in seconds

# Memory Management Configuration
MEMORY_ENABLED=true # Enable/disable memory compression
MEMORY_MAX_CONTEXT_TOKENS=100000 # Maximum context window size
MEMORY_TARGET_TOKENS=30000 # Target working memory size (soft limit)
MEMORY_COMPRESSION_THRESHOLD=25000 # Hard limit - compress when exceeded
MEMORY_COMPRESSION_THRESHOLD=25000 # Token threshold - compress when exceeded
MEMORY_SHORT_TERM_SIZE=100 # Number of recent messages to keep
MEMORY_COMPRESSION_RATIO=0.3 # Target compression ratio (0.3 = 30% of original)

Expand Down
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,6 @@ MAX_ITERATIONS=100 # Maximum iteration loops

# Memory Management
MEMORY_ENABLED=true
MEMORY_MAX_CONTEXT_TOKENS=100000
MEMORY_TARGET_TOKENS=30000
MEMORY_COMPRESSION_THRESHOLD=25000
MEMORY_SHORT_TERM_SIZE=100
MEMORY_COMPRESSION_RATIO=0.3
Expand Down Expand Up @@ -249,8 +247,6 @@ See the full configuration template in `.env.example`. Key options:
| `LITELLM_DROP_PARAMS` | Drop unsupported params | `true` |
| `LITELLM_TIMEOUT` | Request timeout in seconds | `600` |
| `MAX_ITERATIONS` | Maximum agent iterations | `100` |
| `MEMORY_MAX_CONTEXT_TOKENS` | Maximum context window | `100000` |
| `MEMORY_TARGET_TOKENS` | Target working memory size | `30000` |
| `MEMORY_COMPRESSION_THRESHOLD` | Compress when exceeded | `25000` |
| `MEMORY_SHORT_TERM_SIZE` | Recent messages to keep | `100` |
| `RETRY_MAX_ATTEMPTS` | Retry attempts for rate limits | `3` |
Expand Down
7 changes: 2 additions & 5 deletions config.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,10 @@ class Config:

# Memory Management Configuration
MEMORY_ENABLED = os.getenv("MEMORY_ENABLED", "true").lower() == "true"
MEMORY_MAX_CONTEXT_TOKENS = int(os.getenv("MEMORY_MAX_CONTEXT_TOKENS", "100000"))
MEMORY_TARGET_TOKENS = int(os.getenv("MEMORY_TARGET_TOKENS", "50000"))
MEMORY_COMPRESSION_THRESHOLD = int(os.getenv("MEMORY_COMPRESSION_THRESHOLD", "40000"))
MEMORY_COMPRESSION_THRESHOLD = int(os.getenv("MEMORY_COMPRESSION_THRESHOLD", "60000"))
MEMORY_SHORT_TERM_SIZE = int(os.getenv("MEMORY_SHORT_TERM_SIZE", "100"))
MEMORY_SHORT_TERM_MIN_SIZE = int(os.getenv("MEMORY_SHORT_TERM_MIN_SIZE", "5"))
MEMORY_SHORT_TERM_MIN_SIZE = int(os.getenv("MEMORY_SHORT_TERM_MIN_SIZE", "6"))
MEMORY_COMPRESSION_RATIO = float(os.getenv("MEMORY_COMPRESSION_RATIO", "0.3"))
MEMORY_PRESERVE_TOOL_CALLS = True
MEMORY_PRESERVE_SYSTEM_PROMPTS = True

# Tool Result Processing Configuration
Expand Down
1 change: 0 additions & 1 deletion docs/advanced-features.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ Enable in `.env`:

```bash
MEMORY_ENABLED=true
MEMORY_MAX_CONTEXT_TOKENS=100000
MEMORY_COMPRESSION_THRESHOLD=40000
```

Expand Down
2 changes: 0 additions & 2 deletions docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,6 @@ MAX_ITERATIONS=100

```bash
MEMORY_ENABLED=true
MEMORY_MAX_CONTEXT_TOKENS=100000
MEMORY_TARGET_TOKENS=30000
MEMORY_COMPRESSION_THRESHOLD=25000
MEMORY_SHORT_TERM_SIZE=100
MEMORY_COMPRESSION_RATIO=0.3
Expand Down
1 change: 0 additions & 1 deletion docs/examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ For long-running tasks with many iterations:
```bash
# Enable memory management in .env:
MEMORY_ENABLED=true
MEMORY_MAX_CONTEXT_TOKENS=100000

# Run a complex task:
python main.py --mode react --task "Analyze all Python files, find patterns, and generate a detailed report"
Expand Down
32 changes: 1 addition & 31 deletions docs/memory-management.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,8 @@ In your `.env` file:
# Enable memory management
MEMORY_ENABLED=true

# Maximum total context size
MEMORY_MAX_CONTEXT_TOKENS=100000

# Trigger compression at this threshold
MEMORY_COMPRESSION_THRESHOLD=40000

# Target size after compression
MEMORY_TARGET_TOKENS=50000
```

### 2. Run Your Agent
Expand Down Expand Up @@ -150,14 +144,8 @@ cost = tracker.calculate_cost("claude-3-5-sonnet-20241022")
# Enable/disable memory (default: true)
MEMORY_ENABLED=true

# Maximum total context tokens (default: 100000)
MEMORY_MAX_CONTEXT_TOKENS=100000

# Start compression when context exceeds this (default: 40000)
MEMORY_COMPRESSION_THRESHOLD=40000

# Target size after compression (default: 50000)
MEMORY_TARGET_TOKENS=50000
```

### Advanced Settings
Expand All @@ -176,8 +164,6 @@ MEMORY_COMPRESSION_STRATEGY=sliding_window
# Preserve system prompts (default: true)
MEMORY_PRESERVE_SYSTEM_PROMPTS=true

# Preserve tool calls and results (default: true)
MEMORY_PRESERVE_TOOL_CALLS=true
```

### Memory Presets
Expand Down Expand Up @@ -420,22 +406,6 @@ config = MemoryConfig(
agent = ReActAgent(llm=llm, tools=tools, memory_config=config)
```

### Example 4: Monitor Budget

```python
memory = MemoryManager(config, llm)

# ... use memory ...

# Check budget status
budget = memory.token_tracker.get_budget_status(max_tokens=50000)

if budget['over_budget']:
print(f"⚠️ Over budget by {budget['total_tokens'] - budget['max_tokens']} tokens")
else:
print(f"✅ {budget['remaining']} tokens remaining ({budget['percentage']:.1f}% used)")
```

## How Compression Works

### Step-by-Step Process
Expand Down Expand Up @@ -526,7 +496,7 @@ MEMORY_COMPRESSION_THRESHOLD=40000
1. Use `selective` strategy instead of `sliding_window`
2. Increase `MEMORY_SHORT_TERM_SIZE` to preserve more recent messages
3. Increase `MEMORY_COMPRESSION_RATIO` to keep more content
4. Set `MEMORY_PRESERVE_TOOL_CALLS=true`


### Issue: High compression cost

Expand Down
6 changes: 3 additions & 3 deletions memory/code_extractor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Code structure extraction using tree-sitter for multiple languages."""

import logging
from typing import Dict, List, Optional, Tuple
from typing import Any, Dict, List, Optional, Tuple

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -167,8 +167,8 @@ class CodeExtractor:

def __init__(self):
"""Initialize code extractor."""
self.parsers: Dict[str, any] = {}
self.languages: Dict[str, any] = {}
self.parsers: Dict[str, Any] = {}
self.languages: Dict[str, Any] = {}

def detect_language(self, filename: str, content: str) -> Optional[str]:
"""Detect programming language from filename or content.
Expand Down
53 changes: 13 additions & 40 deletions memory/compressor.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""Memory compression using LLM-based summarization."""

import logging
from typing import TYPE_CHECKING, List, Optional, Set, Tuple
from typing import TYPE_CHECKING, List, Optional, Tuple

from config import Config
from llm.base import LLMMessage
Expand Down Expand Up @@ -48,16 +48,13 @@ def compress(
messages: List[LLMMessage],
strategy: str = CompressionStrategy.SLIDING_WINDOW,
target_tokens: Optional[int] = None,
orphaned_tool_use_ids: Optional[Set[str]] = None,
) -> CompressedMemory:
"""Compress messages using specified strategy.

Args:
messages: List of messages to compress
strategy: Compression strategy to use
target_tokens: Target token count for compressed output
orphaned_tool_use_ids: Set of tool_use IDs from previous summaries that are
waiting for tool_result in current messages

Returns:
CompressedMemory object
Expand All @@ -70,14 +67,11 @@ def compress(
original_tokens = self._estimate_tokens(messages)
target_tokens = int(original_tokens * Config.MEMORY_COMPRESSION_RATIO)

if orphaned_tool_use_ids is None:
orphaned_tool_use_ids = set()

# Select and apply compression strategy
if strategy == CompressionStrategy.SLIDING_WINDOW:
return self._compress_sliding_window(messages, target_tokens)
elif strategy == CompressionStrategy.SELECTIVE:
return self._compress_selective(messages, target_tokens, orphaned_tool_use_ids)
return self._compress_selective(messages, target_tokens)
elif strategy == CompressionStrategy.DELETION:
return self._compress_deletion(messages)
else:
Expand Down Expand Up @@ -147,7 +141,7 @@ def _compress_sliding_window(
)

def _compress_selective(
self, messages: List[LLMMessage], target_tokens: int, orphaned_tool_use_ids: set = None
self, messages: List[LLMMessage], target_tokens: int
) -> CompressedMemory:
"""Compress using selective preservation strategy.

Expand All @@ -157,16 +151,12 @@ def _compress_selective(
Args:
messages: Messages to compress
target_tokens: Target token count
orphaned_tool_use_ids: Set of tool_use IDs from previous summaries

Returns:
CompressedMemory object
"""
if orphaned_tool_use_ids is None:
orphaned_tool_use_ids = set()

# Separate preserved vs compressible messages
preserved, to_compress = self._separate_messages(messages, orphaned_tool_use_ids)
preserved, to_compress = self._separate_messages(messages)

if not to_compress:
# Nothing to compress
Expand Down Expand Up @@ -255,29 +245,25 @@ def _compress_deletion(self, messages: List[LLMMessage]) -> CompressedMemory:
)

def _separate_messages(
self, messages: List[LLMMessage], orphaned_tool_use_ids_from_summaries: set = None
self, messages: List[LLMMessage]
) -> Tuple[List[LLMMessage], List[LLMMessage]]:
"""Separate messages into preserved and compressible.

Strategy:
1. Preserve system messages (if configured)
2. Preserve protected tools (todo list, etc.) - NEVER compress these
3. Use selective strategy for other messages (system decides based on recency, importance)
4. **Critical rule**: Tool pairs (tool_use + tool_result) must stay together
2. Preserve orphaned tool_use (waiting for tool_result)
3. Preserve protected tools (todo list, etc.) - NEVER compress these
4. Preserve the most recent N messages (MEMORY_SHORT_TERM_MIN_SIZE)
5. **Critical rule**: Tool pairs (tool_use + tool_result) must stay together
- If one is preserved, the other must be preserved too
- If one is compressed, the other must be compressed too
5. **Critical fix**: Preserve tool_result that match orphaned tool_use from previous summaries

Args:
messages: All messages
orphaned_tool_use_ids_from_summaries: Tool_use IDs from previous summaries waiting for results

Returns:
Tuple of (preserved, to_compress)
"""
if orphaned_tool_use_ids_from_summaries is None:
orphaned_tool_use_ids_from_summaries = set()

preserve_indices = set()

# Step 1: Mark system messages for preservation
Expand All @@ -293,27 +279,13 @@ def _separate_messages(
for orphan_idx in orphaned_tool_use_indices:
preserve_indices.add(orphan_idx)

# Step 2b: CRITICAL FIX - Preserve tool_result that match orphaned tool_use from previous summaries
# These results finally arrived and must be preserved to match their tool_use
for i, msg in enumerate(messages):
if msg.role == "user" and isinstance(msg.content, list):
for block in msg.content:
if isinstance(block, dict) and block.get("type") == "tool_result":
tool_use_id = block.get("tool_use_id")
if tool_use_id in orphaned_tool_use_ids_from_summaries:
preserve_indices.add(i)
logger.info(
f"Preserving tool_result for orphaned tool_use '{tool_use_id}' from previous summary"
)

# Step 2c: Mark protected tools for preservation (CRITICAL for stateful tools)
# Step 2b: Mark protected tools for preservation (CRITICAL for stateful tools)
protected_pairs = self._find_protected_tool_pairs(messages, tool_pairs)
for assistant_idx, user_idx in protected_pairs:
preserve_indices.add(assistant_idx)
preserve_indices.add(user_idx)

# Step 3: Apply selective preservation strategy (keep recent N messages)
# Preserve last short_term_min_message_count messages by default (sliding window approach)
# Step 3: Preserve the most recent N messages to maintain conversation continuity
preserve_count = min(Config.MEMORY_SHORT_TERM_MIN_SIZE, len(messages))
for i in range(len(messages) - preserve_count, len(messages)):
if i >= 0:
Expand All @@ -339,7 +311,8 @@ def _separate_messages(
logger.info(
f"Separated: {len(preserved)} preserved, {len(to_compress)} to compress "
f"({len(tool_pairs)} tool pairs, {len(protected_pairs)} protected, "
f"{len(orphaned_tool_use_indices)} orphaned tool_use)"
f"{len(orphaned_tool_use_indices)} orphaned tool_use, "
f"{preserve_count} recent)"
)
return preserved, to_compress

Expand Down
Loading