luohaha · luohaha · Jan 18, 2026 · Jan 18, 2026 · Jan 18, 2026
diff --git a/.env.example b/.env.example
@@ -43,9 +43,7 @@ RETRY_MAX_DELAY=60.0           # Maximum delay in seconds
 
 # Memory Management Configuration
 MEMORY_ENABLED=true                   # Enable/disable memory compression
-MEMORY_MAX_CONTEXT_TOKENS=100000       # Maximum context window size
-MEMORY_TARGET_TOKENS=30000             # Target working memory size (soft limit)
-MEMORY_COMPRESSION_THRESHOLD=25000     # Hard limit - compress when exceeded
+MEMORY_COMPRESSION_THRESHOLD=25000     # Token threshold - compress when exceeded
 MEMORY_SHORT_TERM_SIZE=100             # Number of recent messages to keep
 MEMORY_COMPRESSION_RATIO=0.3           # Target compression ratio (0.3 = 30% of original)
 

diff --git a/README.md b/README.md
@@ -87,8 +87,6 @@ MAX_ITERATIONS=100  # Maximum iteration loops
 
 # Memory Management
 MEMORY_ENABLED=true
-MEMORY_MAX_CONTEXT_TOKENS=100000
-MEMORY_TARGET_TOKENS=30000
 MEMORY_COMPRESSION_THRESHOLD=25000
 MEMORY_SHORT_TERM_SIZE=100
 MEMORY_COMPRESSION_RATIO=0.3
@@ -249,8 +247,6 @@ See the full configuration template in `.env.example`. Key options:
 | `LITELLM_DROP_PARAMS` | Drop unsupported params | `true` |
 | `LITELLM_TIMEOUT` | Request timeout in seconds | `600` |
 | `MAX_ITERATIONS` | Maximum agent iterations | `100` |
-| `MEMORY_MAX_CONTEXT_TOKENS` | Maximum context window | `100000` |
-| `MEMORY_TARGET_TOKENS` | Target working memory size | `30000` |
 | `MEMORY_COMPRESSION_THRESHOLD` | Compress when exceeded | `25000` |
 | `MEMORY_SHORT_TERM_SIZE` | Recent messages to keep | `100` |
 | `RETRY_MAX_ATTEMPTS` | Retry attempts for rate limits | `3` |

diff --git a/config.py b/config.py
@@ -40,13 +40,10 @@ class Config:
 
     # Memory Management Configuration
     MEMORY_ENABLED = os.getenv("MEMORY_ENABLED", "true").lower() == "true"
-    MEMORY_MAX_CONTEXT_TOKENS = int(os.getenv("MEMORY_MAX_CONTEXT_TOKENS", "100000"))
-    MEMORY_TARGET_TOKENS = int(os.getenv("MEMORY_TARGET_TOKENS", "50000"))
-    MEMORY_COMPRESSION_THRESHOLD = int(os.getenv("MEMORY_COMPRESSION_THRESHOLD", "40000"))
+    MEMORY_COMPRESSION_THRESHOLD = int(os.getenv("MEMORY_COMPRESSION_THRESHOLD", "60000"))
     MEMORY_SHORT_TERM_SIZE = int(os.getenv("MEMORY_SHORT_TERM_SIZE", "100"))
-    MEMORY_SHORT_TERM_MIN_SIZE = int(os.getenv("MEMORY_SHORT_TERM_MIN_SIZE", "5"))
+    MEMORY_SHORT_TERM_MIN_SIZE = int(os.getenv("MEMORY_SHORT_TERM_MIN_SIZE", "6"))
     MEMORY_COMPRESSION_RATIO = float(os.getenv("MEMORY_COMPRESSION_RATIO", "0.3"))
-    MEMORY_PRESERVE_TOOL_CALLS = True
     MEMORY_PRESERVE_SYSTEM_PROMPTS = True
 
     # Tool Result Processing Configuration

diff --git a/docs/advanced-features.md b/docs/advanced-features.md
@@ -82,7 +82,6 @@ Enable in `.env`:
 
 ```bash
 MEMORY_ENABLED=true
-MEMORY_MAX_CONTEXT_TOKENS=100000
 MEMORY_COMPRESSION_THRESHOLD=40000
 ```
 

diff --git a/docs/configuration.md b/docs/configuration.md
@@ -70,8 +70,6 @@ MAX_ITERATIONS=100
 
 ```bash
 MEMORY_ENABLED=true
-MEMORY_MAX_CONTEXT_TOKENS=100000
-MEMORY_TARGET_TOKENS=30000
 MEMORY_COMPRESSION_THRESHOLD=25000
 MEMORY_SHORT_TERM_SIZE=100
 MEMORY_COMPRESSION_RATIO=0.3

diff --git a/docs/examples.md b/docs/examples.md
@@ -198,7 +198,6 @@ For long-running tasks with many iterations:
 ```bash
 # Enable memory management in .env:
 MEMORY_ENABLED=true
-MEMORY_MAX_CONTEXT_TOKENS=100000
 
 # Run a complex task:
 python main.py --mode react --task "Analyze all Python files, find patterns, and generate a detailed report"

diff --git a/docs/memory-management.md b/docs/memory-management.md
@@ -36,14 +36,8 @@ In your `.env` file:
 # Enable memory management
 MEMORY_ENABLED=true
 
-# Maximum total context size
-MEMORY_MAX_CONTEXT_TOKENS=100000
-
 # Trigger compression at this threshold
 MEMORY_COMPRESSION_THRESHOLD=40000
-
-# Target size after compression
-MEMORY_TARGET_TOKENS=50000
 ```
 
 ### 2. Run Your Agent
@@ -150,14 +144,8 @@ cost = tracker.calculate_cost("claude-3-5-sonnet-20241022")
 # Enable/disable memory (default: true)
 MEMORY_ENABLED=true
 
-# Maximum total context tokens (default: 100000)
-MEMORY_MAX_CONTEXT_TOKENS=100000
-
 # Start compression when context exceeds this (default: 40000)
 MEMORY_COMPRESSION_THRESHOLD=40000
-
-# Target size after compression (default: 50000)
-MEMORY_TARGET_TOKENS=50000
 ```
 
 ### Advanced Settings
@@ -176,8 +164,6 @@ MEMORY_COMPRESSION_STRATEGY=sliding_window
 # Preserve system prompts (default: true)
 MEMORY_PRESERVE_SYSTEM_PROMPTS=true
 
-# Preserve tool calls and results (default: true)
-MEMORY_PRESERVE_TOOL_CALLS=true
 ```
 
 ### Memory Presets
@@ -420,22 +406,6 @@ config = MemoryConfig(
 agent = ReActAgent(llm=llm, tools=tools, memory_config=config)
 ```
 
-### Example 4: Monitor Budget
-
-```python
-memory = MemoryManager(config, llm)
-
-# ... use memory ...
-
-# Check budget status
-budget = memory.token_tracker.get_budget_status(max_tokens=50000)
-
-if budget['over_budget']:
-    print(f"⚠️  Over budget by {budget['total_tokens'] - budget['max_tokens']} tokens")
-else:
-    print(f"✅ {budget['remaining']} tokens remaining ({budget['percentage']:.1f}% used)")
-```
-
 ## How Compression Works
 
 ### Step-by-Step Process
@@ -526,7 +496,7 @@ MEMORY_COMPRESSION_THRESHOLD=40000
 1. Use `selective` strategy instead of `sliding_window`
 2. Increase `MEMORY_SHORT_TERM_SIZE` to preserve more recent messages
 3. Increase `MEMORY_COMPRESSION_RATIO` to keep more content
-4. Set `MEMORY_PRESERVE_TOOL_CALLS=true`
+
 
 ### Issue: High compression cost
 

diff --git a/memory/code_extractor.py b/memory/code_extractor.py
@@ -1,7 +1,7 @@
 """Code structure extraction using tree-sitter for multiple languages."""
 
 import logging
-from typing import Dict, List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 logger = logging.getLogger(__name__)
 
@@ -167,8 +167,8 @@ class CodeExtractor:
 
     def __init__(self):
         """Initialize code extractor."""
-        self.parsers: Dict[str, any] = {}
-        self.languages: Dict[str, any] = {}
+        self.parsers: Dict[str, Any] = {}
+        self.languages: Dict[str, Any] = {}
 
     def detect_language(self, filename: str, content: str) -> Optional[str]:
         """Detect programming language from filename or content.

diff --git a/memory/compressor.py b/memory/compressor.py
@@ -1,7 +1,7 @@
 """Memory compression using LLM-based summarization."""
 
 import logging
-from typing import TYPE_CHECKING, List, Optional, Set, Tuple
+from typing import TYPE_CHECKING, List, Optional, Tuple
 
 from config import Config
 from llm.base import LLMMessage
@@ -48,16 +48,13 @@ def compress(
         messages: List[LLMMessage],
         strategy: str = CompressionStrategy.SLIDING_WINDOW,
         target_tokens: Optional[int] = None,
-        orphaned_tool_use_ids: Optional[Set[str]] = None,
     ) -> CompressedMemory:
         """Compress messages using specified strategy.
 
         Args:
             messages: List of messages to compress
             strategy: Compression strategy to use
             target_tokens: Target token count for compressed output
-            orphaned_tool_use_ids: Set of tool_use IDs from previous summaries that are
-                                   waiting for tool_result in current messages
 
         Returns:
             CompressedMemory object
@@ -70,14 +67,11 @@ def compress(
             original_tokens = self._estimate_tokens(messages)
             target_tokens = int(original_tokens * Config.MEMORY_COMPRESSION_RATIO)
 
-        if orphaned_tool_use_ids is None:
-            orphaned_tool_use_ids = set()
-
         # Select and apply compression strategy
         if strategy == CompressionStrategy.SLIDING_WINDOW:
             return self._compress_sliding_window(messages, target_tokens)
         elif strategy == CompressionStrategy.SELECTIVE:
-            return self._compress_selective(messages, target_tokens, orphaned_tool_use_ids)
+            return self._compress_selective(messages, target_tokens)
         elif strategy == CompressionStrategy.DELETION:
             return self._compress_deletion(messages)
         else:
@@ -147,7 +141,7 @@ def _compress_sliding_window(
             )
 
     def _compress_selective(
-        self, messages: List[LLMMessage], target_tokens: int, orphaned_tool_use_ids: set = None
+        self, messages: List[LLMMessage], target_tokens: int
     ) -> CompressedMemory:
         """Compress using selective preservation strategy.
 
@@ -157,16 +151,12 @@ def _compress_selective(
         Args:
             messages: Messages to compress
             target_tokens: Target token count
-            orphaned_tool_use_ids: Set of tool_use IDs from previous summaries
 
         Returns:
             CompressedMemory object
         """
-        if orphaned_tool_use_ids is None:
-            orphaned_tool_use_ids = set()
-
         # Separate preserved vs compressible messages
-        preserved, to_compress = self._separate_messages(messages, orphaned_tool_use_ids)
+        preserved, to_compress = self._separate_messages(messages)
 
         if not to_compress:
             # Nothing to compress
@@ -255,29 +245,25 @@ def _compress_deletion(self, messages: List[LLMMessage]) -> CompressedMemory:
         )
 
     def _separate_messages(
-        self, messages: List[LLMMessage], orphaned_tool_use_ids_from_summaries: set = None
+        self, messages: List[LLMMessage]
     ) -> Tuple[List[LLMMessage], List[LLMMessage]]:
         """Separate messages into preserved and compressible.
 
         Strategy:
         1. Preserve system messages (if configured)
-        2. Preserve protected tools (todo list, etc.) - NEVER compress these
-        3. Use selective strategy for other messages (system decides based on recency, importance)
-        4. **Critical rule**: Tool pairs (tool_use + tool_result) must stay together
+        2. Preserve orphaned tool_use (waiting for tool_result)
+        3. Preserve protected tools (todo list, etc.) - NEVER compress these
+        4. Preserve the most recent N messages (MEMORY_SHORT_TERM_MIN_SIZE)
+        5. **Critical rule**: Tool pairs (tool_use + tool_result) must stay together
            - If one is preserved, the other must be preserved too
            - If one is compressed, the other must be compressed too
-        5. **Critical fix**: Preserve tool_result that match orphaned tool_use from previous summaries
 
         Args:
             messages: All messages
-            orphaned_tool_use_ids_from_summaries: Tool_use IDs from previous summaries waiting for results
 
         Returns:
             Tuple of (preserved, to_compress)
         """
-        if orphaned_tool_use_ids_from_summaries is None:
-            orphaned_tool_use_ids_from_summaries = set()
-
         preserve_indices = set()
 
         # Step 1: Mark system messages for preservation
@@ -293,27 +279,13 @@ def _separate_messages(
         for orphan_idx in orphaned_tool_use_indices:
             preserve_indices.add(orphan_idx)
 
-        # Step 2b: CRITICAL FIX - Preserve tool_result that match orphaned tool_use from previous summaries
-        # These results finally arrived and must be preserved to match their tool_use
-        for i, msg in enumerate(messages):
-            if msg.role == "user" and isinstance(msg.content, list):
-                for block in msg.content:
-                    if isinstance(block, dict) and block.get("type") == "tool_result":
-                        tool_use_id = block.get("tool_use_id")
-                        if tool_use_id in orphaned_tool_use_ids_from_summaries:
-                            preserve_indices.add(i)
-                            logger.info(
-                                f"Preserving tool_result for orphaned tool_use '{tool_use_id}' from previous summary"
-                            )
-
-        # Step 2c: Mark protected tools for preservation (CRITICAL for stateful tools)
+        # Step 2b: Mark protected tools for preservation (CRITICAL for stateful tools)
         protected_pairs = self._find_protected_tool_pairs(messages, tool_pairs)
         for assistant_idx, user_idx in protected_pairs:
             preserve_indices.add(assistant_idx)
             preserve_indices.add(user_idx)
 
-        # Step 3: Apply selective preservation strategy (keep recent N messages)
-        # Preserve last short_term_min_message_count messages by default (sliding window approach)
+        # Step 3: Preserve the most recent N messages to maintain conversation continuity
         preserve_count = min(Config.MEMORY_SHORT_TERM_MIN_SIZE, len(messages))
         for i in range(len(messages) - preserve_count, len(messages)):
             if i >= 0:
@@ -339,7 +311,8 @@ def _separate_messages(
         logger.info(
             f"Separated: {len(preserved)} preserved, {len(to_compress)} to compress "
             f"({len(tool_pairs)} tool pairs, {len(protected_pairs)} protected, "
-            f"{len(orphaned_tool_use_indices)} orphaned tool_use)"
+            f"{len(orphaned_tool_use_indices)} orphaned tool_use, "
+            f"{preserve_count} recent)"
         )
         return preserved, to_compress
-Original file line number
+Diff line change
@@ Expand Up / @@ -82,7 +82,6 @@ Enable in `.env`: @@
     ```bash
     MEMORY_ENABLED=true
-    MEMORY_MAX_CONTEXT_TOKENS=100000
     MEMORY_COMPRESSION_THRESHOLD=40000
     ```
@@ Expand Down @@