From f63f4cee8e5d80d8bf5a6ae8cf23e6f5c86db9a6 Mon Sep 17 00:00:00 2001
From: Yixin Luo <18810541851@163.com>
Date: Sat, 17 Jan 2026 23:48:17 +0800
Subject: [PATCH 1/3] feat: introduce tool result smart compact

Signed-off-by: Yixin Luo <18810541851@163.com>
---
 .env.example                               |   7 +
 AGENTS.md                                  |   4 +-
 CLAUDE.md                                  | 132 +++++-
 agent/base.py                              |  36 +-
 agent/react_agent.py                       |   1 +
 config.py                                  |   6 +
 docs/tool_result_processing.md             | 402 +++++++++++++++++++
 main.py                                    |   5 +
 memory/code_extractor.py                   | 428 ++++++++++++++++++++
 memory/manager.py                          | 107 ++++-
 memory/tool_result_processor.py            | 350 ++++++++++++++++
 memory/tool_result_store.py                | 336 ++++++++++++++++
 memory/types.py                            |  20 +
 test/memory/test_tool_result_processing.py | 445 +++++++++++++++++++++
 tools/retrieve_tool_result.py              |  86 ++++
 tools/web_fetch.py                         |  13 +-
 16 files changed, 2346 insertions(+), 32 deletions(-)
 create mode 100644 docs/tool_result_processing.md
 create mode 100644 memory/code_extractor.py
 create mode 100644 memory/tool_result_processor.py
 create mode 100644 memory/tool_result_store.py
 create mode 100644 test/memory/test_tool_result_processing.py
 create mode 100644 tools/retrieve_tool_result.py

diff --git a/.env.example b/.env.example
index 5a83d3c..3351391 100644
--- a/.env.example
+++ b/.env.example
@@ -49,6 +49,13 @@ MEMORY_COMPRESSION_THRESHOLD=25000     # Hard limit - compress when exceeded
 MEMORY_SHORT_TERM_SIZE=100             # Number of recent messages to keep
 MEMORY_COMPRESSION_RATIO=0.3           # Target compression ratio (0.3 = 30% of original)
 
+# Tool Result Processing Configuration
+# Model for summarizing large tool results (uses LiteLLM format)
+# If not set, falls back to smart truncation (no extra API calls)
+# Recommended: use a fast, cheap model like gpt-4o-mini or claude-3-haiku
+# TOOL_RESULT_SUMMARY_MODEL=openai/gpt-4o-mini
+# TOOL_RESULT_SUMMARY_MODEL=anthropic/claude-3-haiku-20240307
+
 # Logging Configuration
 LOG_DIR=logs                           # Directory for log files
 LOG_LEVEL=DEBUG                        # Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL
diff --git a/AGENTS.md b/AGENTS.md
index bf833f1..84530a9 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -54,7 +54,7 @@ python main.py --task "Calculate 1+1"
 - `agent/`: agent loops (ReAct, Plan-Execute) and orchestration
 - `tools/`: tool implementations (file ops, shell, web search, etc.)
 - `llm/`: provider adapters + retry logic
-- `memory/`: memory manager, compression, persistence
+- `memory/`: memory manager, compression, persistence, tool result processing
 - `docs/`: user/developer documentation
 - `scripts/`: packaging/publishing scripts
 - `test/`: tests (some require API keys; memory tests are mostly mocked)
@@ -113,6 +113,7 @@ Unified entrypoint: `./scripts/dev.sh build`
 - Packaging & release checklist: `docs/packaging.md`
 - Extending tools/agents: `docs/extending.md`
 - Memory system: `docs/memory-management.md`, `docs/memory_persistence.md`
+- Tool result processing: `docs/tool_result_processing.md`
 - Usage examples: `docs/examples.md`
 
 ## Safety & Secrets
@@ -127,3 +128,4 @@ Unified entrypoint: `./scripts/dev.sh build`
 - If you change configuration/env vars: update `docs/configuration.md` and `.env.example`.
 - If you change packaging/versioning: update `pyproject.toml` and `docs/packaging.md`.
 - If you change memory/compression/persistence: add/adjust tests under `test/memory/` and update `docs/memory-management.md` / `docs/memory_persistence.md`.
+- If you change tool result processing: add/adjust tests under `test/memory/test_tool_result_processing.py` and update `docs/tool_result_processing.md`.
diff --git a/CLAUDE.md b/CLAUDE.md
index c317064..84530a9 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -1 +1,131 @@
-AGENTS.md
+# AgenticLoop — Agent Instructions
+
+This file defines the **operational workflow** for making changes in this repo (how to set up, run, test, format, build, and publish). Keep it short, specific, and executable; link to docs for long explanations.
+
+Prerequisites: Python 3.12+ and `uv` (https://github.com/astral-sh/uv).
+
+## Quickstart (Local Dev)
+
+```bash
+./scripts/bootstrap.sh
+source .venv/bin/activate
+./scripts/dev.sh test
+```
+
+Optional (recommended): enable git hooks:
+
+```bash
+pre-commit install
+```
+
+## CI
+
+GitHub Actions runs `./scripts/dev.sh precommit`, `./scripts/dev.sh test -q`, and strict typecheck on PRs.
+
+## Review Checklist
+
+Run these before concluding a change:
+
+```bash
+./scripts/dev.sh precommit
+TYPECHECK_STRICT=1 ./scripts/dev.sh typecheck
+./scripts/dev.sh test -q
+```
+
+Manual doc/workflow checks:
+- README/AGENTS/docs: avoid legacy/removed commands (`LLM_PROVIDER`, `pip install -e`, `requirements.txt`, `setup.py`)
+- Docker examples use `--mode`/`--task`
+- Python 3.12+ + uv-only prerequisites documented consistently
+
+Change impact reminders:
+- CLI changes → update `README.md`, `docs/examples.md`
+- Config changes → update `.env.example`, `docs/configuration.md`
+- Workflow scripts → update `AGENTS.md`, `docs/packaging.md`
+
+Run a quick smoke task (requires a configured provider in `.env`):
+
+```bash
+python main.py --task "Calculate 1+1"
+```
+
+## Repo Map
+
+- `main.py`, `cli.py`, `interactive.py`: CLI entry points and UX
+- `agent/`: agent loops (ReAct, Plan-Execute) and orchestration
+- `tools/`: tool implementations (file ops, shell, web search, etc.)
+- `llm/`: provider adapters + retry logic
+- `memory/`: memory manager, compression, persistence, tool result processing
+- `docs/`: user/developer documentation
+- `scripts/`: packaging/publishing scripts
+- `test/`: tests (some require API keys; memory tests are mostly mocked)
+
+## Commands (Golden Path)
+
+### Install
+
+- Use `./scripts/bootstrap.sh` to create `.venv` and install dependencies.
+- Use `./scripts/dev.sh install` to reinstall dev deps into an existing `.venv`.
+
+### Tests
+
+- All tests: `python -m pytest test/`
+- Memory suite: `python -m pytest test/memory/ -v`
+- Script: `./scripts/test.sh`
+- Unified entrypoint: `./scripts/dev.sh test`
+- Integration tests: set `RUN_INTEGRATION_TESTS=1` (live LLM; may incur cost)
+
+### Format
+
+This repo uses `black` + `isort` (see `pyproject.toml`).
+
+```bash
+python -m black .
+python -m isort .
+```
+
+Script: `./scripts/format.sh`
+Unified entrypoint: `./scripts/dev.sh format`
+
+### Lint / Typecheck
+
+- Lint (format check): `./scripts/dev.sh lint`
+- Pre-commit (recommended): `./scripts/dev.sh precommit`
+- Typecheck (best-effort): `./scripts/dev.sh typecheck` (set `TYPECHECK_STRICT=1` to fail on errors)
+
+### Build (Packaging)
+
+```bash
+./scripts/build.sh
+```
+Unified entrypoint: `./scripts/dev.sh build`
+
+### Publish (Manual / Interactive)
+
+`./scripts/publish.sh` defaults to an interactive confirmation and refuses to run without a TTY unless you pass `--yes`.
+
+- TestPyPI: `./scripts/publish.sh --test`
+- PyPI (manual): `./scripts/publish.sh`
+- Unified entrypoint: `./scripts/dev.sh publish`
+
+## Docs Pointers
+
+- Configuration & `.env`: `docs/configuration.md`
+- Packaging & release checklist: `docs/packaging.md`
+- Extending tools/agents: `docs/extending.md`
+- Memory system: `docs/memory-management.md`, `docs/memory_persistence.md`
+- Tool result processing: `docs/tool_result_processing.md`
+- Usage examples: `docs/examples.md`
+
+## Safety & Secrets
+
+- Never commit `.env` or API keys.
+- Avoid running destructive shell commands; keep file edits scoped and reversible.
+- Publishing/releasing steps require explicit human intent (see `docs/packaging.md`).
+
+## When Changing Key Areas
+
+- If you change CLI flags / behavior: update `README.md` and `docs/examples.md`.
+- If you change configuration/env vars: update `docs/configuration.md` and `.env.example`.
+- If you change packaging/versioning: update `pyproject.toml` and `docs/packaging.md`.
+- If you change memory/compression/persistence: add/adjust tests under `test/memory/` and update `docs/memory-management.md` / `docs/memory_persistence.md`.
+- If you change tool result processing: add/adjust tests under `test/memory/test_tool_result_processing.py` and update `docs/tool_result_processing.md`.
diff --git a/agent/base.py b/agent/base.py
index bd72eb7..69d4835 100644
--- a/agent/base.py
+++ b/agent/base.py
@@ -97,6 +97,7 @@ def _react_loop(
         use_memory: bool = True,
         save_to_memory: bool = True,
         verbose: bool = True,
+        task: str = "",
     ) -> str:
         """Execute a ReAct (Reasoning + Acting) loop.
 
@@ -111,6 +112,7 @@ def _react_loop(
             use_memory: If True, use self.memory for context; if False, use local messages list
             save_to_memory: If True, save messages to self.memory (only when use_memory=True)
             verbose: If True, print iteration and tool call information
+            task: Optional task description for context in tool result processing
 
         Returns:
             Final answer as a string
@@ -181,19 +183,29 @@ def _react_loop(
 
                     result = self.tool_executor.execute_tool_call(tc.name, tc.arguments)
 
-                    # Truncate overly large results to prevent context overflow
-                    MAX_TOOL_RESULT_LENGTH = 8000  # characters
-                    if len(result) > MAX_TOOL_RESULT_LENGTH:
-                        truncated_length = MAX_TOOL_RESULT_LENGTH
-                        result = (
-                            result[:truncated_length]
-                            + f"\n\n[... Output truncated. Showing first {truncated_length} characters of {len(result)} total. "
-                            f"Use grep_content or glob_files for more targeted searches instead of reading large files.]"
+                    # Process tool result with intelligent summarization if memory is enabled
+                    if use_memory and self.memory:
+                        result = self.memory.process_tool_result(
+                            tool_name=tc.name,
+                            tool_call_id=tc.id,
+                            result=result,
+                            context=task,  # Pass task as context for intelligent summarization
                         )
-                        if verbose:
-                            terminal_ui.print_tool_result(result, truncated=True)
-                    elif verbose:
-                        terminal_ui.print_tool_result(result, truncated=False)
+                    else:
+                        # Fallback: simple truncation for non-memory mode
+                        MAX_TOOL_RESULT_LENGTH = 8000  # characters
+                        if len(result) > MAX_TOOL_RESULT_LENGTH:
+                            truncated_length = MAX_TOOL_RESULT_LENGTH
+                            result = (
+                                result[:truncated_length]
+                                + f"\n\n[... Output truncated. Showing first {truncated_length} characters of {len(result)} total. "
+                                f"Use grep_content or glob_files for more targeted searches instead of reading large files.]"
+                            )
+
+                    if verbose:
+                        # Check if result was truncated/processed
+                        truncated = "[... " in result or "[Tool Result #" in result
+                        terminal_ui.print_tool_result(result, truncated=truncated)
 
                     # Log result (truncated)
                     logger.debug(f"Tool result: {result[:200]}{'...' if len(result) > 200 else ''}")
diff --git a/agent/react_agent.py b/agent/react_agent.py
index 41f2569..717da14 100644
--- a/agent/react_agent.py
+++ b/agent/react_agent.py
@@ -159,6 +159,7 @@ def run(self, task: str) -> str:
             use_memory=True,
             save_to_memory=True,
             verbose=True,
+            task=task,
         )
 
         self._print_memory_stats()
diff --git a/config.py b/config.py
index e0ab242..da76f94 100644
--- a/config.py
+++ b/config.py
@@ -40,6 +40,11 @@ class Config:
     MEMORY_SHORT_TERM_SIZE = int(os.getenv("MEMORY_SHORT_TERM_SIZE", "100"))
     MEMORY_COMPRESSION_RATIO = float(os.getenv("MEMORY_COMPRESSION_RATIO", "0.3"))
 
+    # Tool Result Processing Configuration
+    # Model for summarizing large tool results (e.g., "openai/gpt-4o-mini", "anthropic/claude-3-haiku-20240307")
+    # If not set, LLM summarization is disabled and falls back to smart truncation
+    TOOL_RESULT_SUMMARY_MODEL = os.getenv("TOOL_RESULT_SUMMARY_MODEL")
+
     # Logging Configuration
     LOG_DIR = os.getenv("LOG_DIR", "logs")
     LOG_LEVEL = os.getenv("LOG_LEVEL", "DEBUG").upper()
@@ -79,6 +84,7 @@ def get_memory_config(cls):
             short_term_message_count=cls.MEMORY_SHORT_TERM_SIZE,
             compression_ratio=cls.MEMORY_COMPRESSION_RATIO,
             enable_compression=cls.MEMORY_ENABLED,
+            tool_result_summary_model=cls.TOOL_RESULT_SUMMARY_MODEL,
         )
 
     @classmethod
diff --git a/docs/tool_result_processing.md b/docs/tool_result_processing.md
new file mode 100644
index 0000000..8cea669
--- /dev/null
+++ b/docs/tool_result_processing.md
@@ -0,0 +1,402 @@
+# Tool Result Processing and External Storage
+
+This document describes the intelligent tool result processing and external storage features that help manage large tool outputs and reduce memory pressure.
+
+## Overview
+
+When tools return large outputs (e.g., reading large files, extensive search results), they can quickly consume memory tokens and trigger frequent compression. The tool result processing system addresses this with two strategies:
+
+1. **Intelligent Summarization**: Automatically summarize or truncate large tool results based on tool type
+2. **External Storage**: Store very large results externally and keep only summaries in memory
+
+**These features are always enabled** to ensure optimal memory management.
+
+## Configuration
+
+Configure these features in `MemoryConfig`:
+
+```python
+from memory.types import MemoryConfig
+
+config = MemoryConfig(
+    # Storage threshold
+    tool_result_storage_threshold=10000,  # Store externally if > 10k tokens (default)
+    tool_result_storage_path="data/tool_results.db",  # SQLite DB path (None = in-memory)
+
+    # Per-tool token budgets
+    tool_result_budgets={
+        "read_file": 1000,      # Max 1000 tokens for file reads
+        "grep_content": 800,    # Max 800 tokens for search results
+        "execute_shell": 500,   # Max 500 tokens for shell output
+        "web_search": 1200,     # Max 1200 tokens for web searches
+        "web_fetch": 1500,      # Max 1500 tokens for web fetches
+        "glob_files": 600,      # Max 600 tokens for file listings
+        "default": 1000,        # Default for other tools
+    }
+)
+```
+
+## Processing Strategies
+
+Different tools use different processing strategies:
+
+### 1. Extract Key Sections (Code Files)
+
+For `read_file` on code files:
+- Extracts imports, class definitions, function definitions
+- Omits long comments and repetitive code
+- Preserves line numbers for reference
+
+**Example:**
+```
+[Key sections extracted - 150 lines omitted]
+
+   1: import os
+   2: import sys
+  10: class MyClass:
+  11:     def __init__(self):
+  25:     def important_method(self):
+  50: def main():
+
+[Use read_file with specific line ranges for full content]
+```
+
+### 2. Preserve Matches (Search Results)
+
+For `grep_content`:
+- Keeps all matching lines with context
+- Preserves file paths and line numbers
+- Truncates only if necessary
+
+**Example:**
+```
+src/main.py:10:def process_data():
+src/utils.py:25:def process_data():
+src/handlers.py:42:def process_data():
+
+[... 50 more lines omitted. Use more specific search patterns.]
+```
+
+### 3. Smart Truncate (General Content)
+
+For `execute_shell`, `web_search`, etc.:
+- Keeps first 60% and last 20% of allowed content
+- Breaks at line boundaries when possible
+- Shows omitted character count
+
+**Example:**
+```
+Command output starts here...
+[first 60% of content]
+
+[... 5000 characters omitted ...]
+
+[last 20% of content]
+...command output ends here
+
+[Use more specific queries to see omitted content]
+```
+
+### 4. LLM Summarization (Complex Outputs)
+
+For `web_fetch` and other complex outputs:
+- Uses a fast model (Haiku) to generate intelligent summaries
+- Focuses on information relevant to the task
+- Falls back to smart truncate if LLM unavailable
+
+**Example:**
+```
+[LLM Summary of tool output]
+
+The webpage describes three main features:
+1. Authentication using OAuth2
+2. REST API with rate limiting
+3. WebSocket support for real-time updates
+
+Key endpoints: /api/auth, /api/users, /ws/events
+
+[Full output available via external storage]
+```
+
+## External Storage
+
+When tool results exceed the storage threshold (default: 10,000 tokens), they are stored externally:
+
+### Storage Flow
+
+1. **Tool executes** → Returns large result
+2. **Processor evaluates** → Determines result is too large
+3. **Store externally** → Saves full content to SQLite
+4. **Return reference** → Memory gets summary + reference ID
+
+### Reference Format
+
+```
+[Tool Result #read_file_a1b2c3d4]
+Tool: read_file
+Size: 50000 chars (~14285 tokens)
+Stored: 2026-01-17 10:30:00
+
+Summary:
+[Processed summary of the content]
+
+[Full content available via retrieve_tool_result tool - use this ID to access]
+```
+
+### Retrieving Stored Results
+
+The agent automatically gets a `retrieve_tool_result` tool when external storage is enabled:
+
+```python
+# Agent can call this tool to retrieve full content
+retrieve_tool_result(result_id="read_file_a1b2c3d4")
+```
+
+**Tool description:**
+```
+Retrieve the full content of a tool result that was stored externally.
+Use this when you see a '[Tool Result #...]' reference in the conversation
+and need to access the complete output.
+```
+
+## Usage Examples
+
+### Example 1: Reading a Large File
+
+```python
+# Without processing (old behavior)
+result = read_file("large_file.py")  # 20,000 chars
+# → Entire file added to memory (5,700 tokens)
+# → Triggers compression
+
+# With processing (new behavior)
+result = read_file("large_file.py")  # 20,000 chars
+# → Key sections extracted (1,000 tokens)
+# → No compression needed
+```
+
+### Example 2: External Storage
+
+```python
+# Very large file
+result = read_file("huge_log.txt")  # 100,000 chars (28,500 tokens)
+
+# Result in memory:
+"""
+[Tool Result #read_file_xyz789]
+Tool: read_file
+Size: 100000 chars (~28571 tokens)
+
+Summary:
+Log file contains 5000 entries from 2026-01-15 to 2026-01-17.
+Main events: 3000 INFO, 1500 WARNING, 500 ERROR.
+Most common errors: ConnectionTimeout (200), AuthFailure (150).
+
+[Full content available via retrieve_tool_result tool]
+"""
+# → Only ~200 tokens in memory instead of 28,500!
+
+# Later, if agent needs full content:
+full_content = retrieve_tool_result("read_file_xyz789")
+```
+
+### Example 3: Grep Results
+
+```python
+# Search returns many matches
+result = grep_content(pattern="TODO", path="src/")
+# → 500 matches found
+
+# Processed result:
+"""
+src/main.py:10:# TODO: Refactor this
+src/main.py:25:# TODO: Add error handling
+src/utils.py:15:# TODO: Optimize performance
+...
+[First 50 matches shown]
+
+[... 450 more lines omitted. Use more specific search patterns.]
+"""
+# → Reduced from 2,000 tokens to 800 tokens
+```
+
+## Benefits
+
+### Memory Efficiency
+
+- **Reduced token usage**: 50-90% reduction for large tool results
+- **Less frequent compression**: Fewer compression cycles needed
+- **Better context quality**: More room for important information
+
+### Performance
+
+- **Faster processing**: Less data to compress
+- **Lower costs**: Fewer tokens sent to LLM
+- **Scalable**: Can handle very large tool outputs
+
+### Flexibility
+
+- **Configurable**: Adjust budgets per tool type
+- **Retrievable**: Full content available when needed
+- **Transparent**: Agent knows when content is truncated/stored
+
+## Monitoring
+
+### Check Processing Stats
+
+```python
+# Get memory stats including tool result processing
+stats = agent.memory.get_stats()
+print(f"Tool results stored: {stats['tool_result_stats']['total_results']}")
+print(f"Total tokens saved: {stats['tool_result_stats']['total_tokens']}")
+```
+
+### Check Storage Stats
+
+```python
+# Get external storage statistics
+storage_stats = agent.memory.get_tool_result_stats()
+print(f"Stored results: {storage_stats['total_results']}")
+print(f"Total size: {storage_stats['total_bytes']} bytes")
+print(f"Average access count: {storage_stats['avg_access_count']}")
+```
+
+## Advanced Configuration
+
+### Custom Tool Budgets
+
+```python
+config = MemoryConfig(
+    tool_result_budgets={
+        "read_file": 1500,      # Allow more tokens for code files
+        "grep_content": 500,    # Restrict search results more
+        "my_custom_tool": 2000, # Custom tool budget
+    }
+)
+```
+
+### Disable for Specific Scenarios
+
+```python
+# Disable processing for critical tasks where full content is needed
+config = MemoryConfig(
+    enable_tool_result_processing=False,  # Keep all content
+    enable_tool_result_storage=False,     # No external storage
+)
+```
+
+### Persistent Storage
+
+```python
+# Use persistent database for tool results
+config = MemoryConfig(
+    tool_result_storage_path="data/tool_results.db",  # Persistent
+)
+
+# Or use in-memory (default)
+config = MemoryConfig(
+    tool_result_storage_path=None,  # In-memory only
+)
+```
+
+## Cleanup
+
+External storage can be cleaned up periodically:
+
+```python
+# Remove results older than 7 days
+deleted = agent.memory.tool_result_store.cleanup_old_results(days=7)
+print(f"Cleaned up {deleted} old results")
+```
+
+## Implementation Details
+
+### Architecture
+
+```
+Tool Execution
+    ↓
+Raw Result (may be large)
+    ↓
+ToolResultProcessor.process_result()
+    ├─ Small result → Pass through
+    ├─ Medium result → Summarize/truncate
+    └─ Large result → Recommend external storage
+    ↓
+MemoryManager.process_tool_result()
+    ├─ Apply processing
+    └─ Store externally if needed
+    ↓
+Processed Result (optimized for memory)
+    ↓
+Add to Memory
+```
+
+### Files
+
+- `memory/tool_result_processor.py` - Processing strategies
+- `memory/tool_result_store.py` - External storage (SQLite)
+- `memory/manager.py` - Integration with memory system
+- `agent/base.py` - Integration with agent execution
+- `tools/retrieve_tool_result.py` - Retrieval tool
+
+### Database Schema
+
+```sql
+CREATE TABLE tool_results (
+    id TEXT PRIMARY KEY,              -- Hash-based ID
+    tool_call_id TEXT NOT NULL,       -- Original tool call ID
+    tool_name TEXT NOT NULL,          -- Tool that produced result
+    content TEXT NOT NULL,            -- Full content
+    content_hash TEXT NOT NULL,       -- SHA256 hash (deduplication)
+    summary TEXT,                     -- Processed summary
+    token_count INTEGER,              -- Estimated tokens
+    created_at TIMESTAMP NOT NULL,    -- Creation time
+    accessed_at TIMESTAMP,            -- Last access time
+    access_count INTEGER DEFAULT 0    -- Number of retrievals
+);
+```
+
+## Best Practices
+
+1. **Set appropriate budgets**: Balance between context quality and memory usage
+2. **Use persistent storage**: For long-running sessions or when results need to persist
+3. **Monitor stats**: Check processing effectiveness regularly
+4. **Clean up old results**: Prevent database bloat
+5. **Test with your workload**: Adjust budgets based on your specific use case
+
+## Troubleshooting
+
+### Issue: Results still too large
+
+**Solution**: Lower the tool-specific budget:
+```python
+config.tool_result_budgets["read_file"] = 500  # Reduce from 1000
+```
+
+### Issue: Important information lost
+
+**Solution**: Increase budget for specific tools:
+```python
+config.tool_result_budgets["my_tool"] = 2000  # Increase budget
+```
+
+### Issue: External storage not working
+
+**Solution**: Check configuration and permissions:
+```python
+# Check database path is writable
+import os
+db_dir = os.path.dirname(config.tool_result_storage_path or "data/tool_results.db")
+assert os.access(db_dir, os.W_OK)
+```
+
+### Issue: Agent can't retrieve stored results
+
+**Solution**: The retrieve_tool_result tool is automatically registered and always available.
+
+## See Also
+
+- [Memory Management](memory-management.md) - Overall memory system
+- [Memory Persistence](memory_persistence.md) - Session persistence
+- [Configuration](configuration.md) - Full configuration options
diff --git a/main.py b/main.py
index e609c60..d904d18 100644
--- a/main.py
+++ b/main.py
@@ -29,6 +29,7 @@
 )
 from tools.shell import ShellTool
 from tools.smart_edit import SmartEditTool
+from tools.retrieve_tool_result import RetrieveToolResultTool
 from tools.web_fetch import WebFetchTool
 from tools.web_search import WebSearchTool
 from utils import get_log_file_path, setup_logger, terminal_ui
@@ -101,6 +102,10 @@ def create_agent(mode: str = "react"):
     delegation_tool = DelegationTool(agent)
     agent.tool_executor.add_tool(delegation_tool)
 
+    # Add retrieve_tool_result tool (always available)
+    retrieve_tool = RetrieveToolResultTool(agent.memory)
+    agent.tool_executor.add_tool(retrieve_tool)
+
     return agent
 
 
diff --git a/memory/code_extractor.py b/memory/code_extractor.py
new file mode 100644
index 0000000..267656e
--- /dev/null
+++ b/memory/code_extractor.py
@@ -0,0 +1,428 @@
+"""Code structure extraction using tree-sitter for multiple languages."""
+
+import logging
+from typing import Dict, List, Optional, Set, Tuple
+
+logger = logging.getLogger(__name__)
+
+# Try to import tree-sitter, but make it optional
+try:
+    from tree_sitter_language_pack import get_language, get_parser
+
+    TREE_SITTER_AVAILABLE = True
+except ImportError:
+    TREE_SITTER_AVAILABLE = False
+    logger.warning(
+        "tree-sitter-language-pack not available. "
+        "Install with: pip install tree-sitter-language-pack"
+    )
+
+
+class CodeExtractor:
+    """Extract key structures from code files using tree-sitter.
+
+    Supports 160+ languages including Python, JavaScript, Java, C++, Rust, Go, etc.
+    Falls back to regex-based extraction if tree-sitter is not available.
+    """
+
+    # Language detection by file extension
+    EXTENSION_TO_LANGUAGE = {
+        ".py": "python",
+        ".js": "javascript",
+        ".jsx": "javascript",
+        ".ts": "typescript",
+        ".tsx": "typescript",
+        ".java": "java",
+        ".c": "c",
+        ".h": "c",
+        ".cpp": "cpp",
+        ".cc": "cpp",
+        ".cxx": "cpp",
+        ".hpp": "cpp",
+        ".rs": "rust",
+        ".go": "go",
+        ".rb": "ruby",
+        ".php": "php",
+        ".swift": "swift",
+        ".kt": "kotlin",
+        ".cs": "c_sharp",
+        ".scala": "scala",
+        ".sh": "bash",
+        ".bash": "bash",
+        ".zsh": "bash",
+        ".lua": "lua",
+        ".r": "r",
+        ".R": "r",
+        ".sql": "sql",
+        ".html": "html",
+        ".css": "css",
+        ".json": "json",
+        ".yaml": "yaml",
+        ".yml": "yaml",
+        ".toml": "toml",
+        ".xml": "xml",
+        ".md": "markdown",
+    }
+
+    # Tree-sitter queries for extracting definitions
+    # These queries work across multiple languages with similar syntax
+    QUERIES = {
+        "python": """
+            (function_definition
+              name: (identifier) @name) @definition.function
+
+            (class_definition
+              name: (identifier) @name) @definition.class
+
+            (import_statement) @import
+            (import_from_statement) @import
+
+            (decorated_definition) @decorator
+        """,
+        "javascript": """
+            (function_declaration
+              name: (identifier) @name) @definition.function
+
+            (class_declaration
+              name: (identifier) @name) @definition.class
+
+            (method_definition
+              name: (property_identifier) @name) @definition.method
+
+            (import_statement) @import
+            (export_statement) @export
+        """,
+        "typescript": """
+            (function_declaration
+              name: (identifier) @name) @definition.function
+
+            (class_declaration
+              name: (type_identifier) @name) @definition.class
+
+            (method_definition
+              name: (property_identifier) @name) @definition.method
+
+            (interface_declaration
+              name: (type_identifier) @name) @definition.interface
+
+            (type_alias_declaration
+              name: (type_identifier) @name) @definition.type
+
+            (import_statement) @import
+        """,
+        "java": """
+            (method_declaration
+              name: (identifier) @name) @definition.method
+
+            (class_declaration
+              name: (identifier) @name) @definition.class
+
+            (interface_declaration
+              name: (identifier) @name) @definition.interface
+
+            (import_declaration) @import
+        """,
+        "rust": """
+            (function_item
+              name: (identifier) @name) @definition.function
+
+            (struct_item
+              name: (type_identifier) @name) @definition.struct
+
+            (enum_item
+              name: (type_identifier) @name) @definition.enum
+
+            (trait_item
+              name: (type_identifier) @name) @definition.trait
+
+            (impl_item) @definition.impl
+
+            (use_declaration) @import
+        """,
+        "go": """
+            (function_declaration
+              name: (identifier) @name) @definition.function
+
+            (method_declaration
+              name: (field_identifier) @name) @definition.method
+
+            (type_declaration) @definition.type
+
+            (import_declaration) @import
+        """,
+        "cpp": """
+            (function_definition
+              declarator: (function_declarator
+                declarator: (identifier) @name)) @definition.function
+
+            (class_specifier
+              name: (type_identifier) @name) @definition.class
+
+            (struct_specifier
+              name: (type_identifier) @name) @definition.struct
+
+            (preproc_include) @import
+        """,
+    }
+
+    def __init__(self):
+        """Initialize code extractor."""
+        self.parsers: Dict[str, any] = {}
+        self.languages: Dict[str, any] = {}
+
+    def detect_language(self, filename: str, content: str) -> Optional[str]:
+        """Detect programming language from filename or content.
+
+        Args:
+            filename: Name of the file
+            content: File content
+
+        Returns:
+            Language name or None if not detected
+        """
+        # Try extension first
+        for ext, lang in self.EXTENSION_TO_LANGUAGE.items():
+            if filename.endswith(ext):
+                return lang
+
+        # Try shebang for scripts
+        if content.startswith("#!"):
+            first_line = content.split("\n")[0].lower()
+            if "python" in first_line:
+                return "python"
+            elif "node" in first_line or "javascript" in first_line:
+                return "javascript"
+            elif "bash" in first_line or "sh" in first_line:
+                return "bash"
+            elif "ruby" in first_line:
+                return "ruby"
+
+        return None
+
+    def _get_parser(self, language: str):
+        """Get or create parser for language.
+
+        Args:
+            language: Language name
+
+        Returns:
+            Parser instance or None if not available
+        """
+        if not TREE_SITTER_AVAILABLE:
+            return None
+
+        if language not in self.parsers:
+            try:
+                self.parsers[language] = get_parser(language)
+                self.languages[language] = get_language(language)
+                logger.debug(f"Loaded tree-sitter parser for {language}")
+            except Exception as e:
+                logger.warning(f"Failed to load parser for {language}: {e}")
+                return None
+
+        return self.parsers.get(language)
+
+    def extract_definitions(
+        self, content: str, language: str, max_items: int = 100
+    ) -> List[Tuple[int, str, str]]:
+        """Extract function/class definitions from code.
+
+        Args:
+            content: Source code content
+            language: Programming language
+            max_items: Maximum number of items to extract
+
+        Returns:
+            List of (line_number, type, line_content) tuples
+        """
+        if not TREE_SITTER_AVAILABLE:
+            return self._extract_definitions_regex(content, language, max_items)
+
+        parser = self._get_parser(language)
+        if not parser:
+            return self._extract_definitions_regex(content, language, max_items)
+
+        try:
+            # Parse the code
+            tree = parser.parse(bytes(content, "utf8"))
+            root_node = tree.root_node
+
+            # Get query for this language
+            query_text = self.QUERIES.get(language)
+            if not query_text:
+                # Fallback to regex for unsupported languages
+                return self._extract_definitions_regex(content, language, max_items)
+
+            # Execute query
+            lang = self.languages[language]
+            query = lang.query(query_text)
+            captures = query.captures(root_node)
+
+            # Extract definitions with line numbers
+            definitions = []
+            lines = content.split("\n")
+
+            for node, capture_name in captures:
+                if len(definitions) >= max_items:
+                    break
+
+                line_num = node.start_point[0]
+                if line_num < len(lines):
+                    line_content = lines[line_num].strip()
+
+                    # Determine type from capture name
+                    if "function" in capture_name:
+                        def_type = "function"
+                    elif "class" in capture_name:
+                        def_type = "class"
+                    elif "method" in capture_name:
+                        def_type = "method"
+                    elif "import" in capture_name:
+                        def_type = "import"
+                    elif "struct" in capture_name:
+                        def_type = "struct"
+                    elif "interface" in capture_name:
+                        def_type = "interface"
+                    elif "type" in capture_name:
+                        def_type = "type"
+                    else:
+                        def_type = "definition"
+
+                    definitions.append((line_num + 1, def_type, line_content))
+
+            return definitions
+
+        except Exception as e:
+            logger.warning(f"Tree-sitter extraction failed for {language}: {e}")
+            return self._extract_definitions_regex(content, language, max_items)
+
+    def _extract_definitions_regex(
+        self, content: str, language: str, max_items: int
+    ) -> List[Tuple[int, str, str]]:
+        """Fallback regex-based extraction for when tree-sitter is unavailable.
+
+        Args:
+            content: Source code content
+            language: Programming language
+            max_items: Maximum number of items to extract
+
+        Returns:
+            List of (line_number, type, line_content) tuples
+        """
+        import re
+
+        lines = content.split("\n")
+        definitions = []
+
+        # Language-specific patterns
+        patterns = {
+            "python": [
+                (r"^\s*def\s+\w+", "function"),
+                (r"^\s*async\s+def\s+\w+", "function"),
+                (r"^\s*class\s+\w+", "class"),
+                (r"^\s*@\w+", "decorator"),
+                (r"^\s*(import\s+|from\s+.*\s+import\s+)", "import"),
+            ],
+            "javascript": [
+                (r"^\s*function\s+\w+", "function"),
+                (r"^\s*class\s+\w+", "class"),
+                (r"^\s*const\s+\w+\s*=\s*\(.*\)\s*=>", "function"),
+                (r"^\s*(import\s+|export\s+)", "import"),
+            ],
+            "typescript": [
+                (r"^\s*function\s+\w+", "function"),
+                (r"^\s*class\s+\w+", "class"),
+                (r"^\s*interface\s+\w+", "interface"),
+                (r"^\s*type\s+\w+", "type"),
+                (r"^\s*(import\s+|export\s+)", "import"),
+            ],
+            "java": [
+                (r"^\s*(public|private|protected)?\s*(static)?\s*\w+\s+\w+\s*\(", "method"),
+                (r"^\s*(public|private|protected)?\s*class\s+\w+", "class"),
+                (r"^\s*(public|private|protected)?\s*interface\s+\w+", "interface"),
+                (r"^\s*import\s+", "import"),
+            ],
+            "rust": [
+                (r"^\s*fn\s+\w+", "function"),
+                (r"^\s*struct\s+\w+", "struct"),
+                (r"^\s*enum\s+\w+", "enum"),
+                (r"^\s*trait\s+\w+", "trait"),
+                (r"^\s*impl\s+", "impl"),
+                (r"^\s*use\s+", "import"),
+            ],
+            "go": [
+                (r"^\s*func\s+\w+", "function"),
+                (r"^\s*func\s+\(.*\)\s+\w+", "method"),
+                (r"^\s*type\s+\w+\s+struct", "struct"),
+                (r"^\s*type\s+\w+\s+interface", "interface"),
+                (r"^\s*import\s+", "import"),
+            ],
+            "cpp": [
+                (r"^\s*\w+\s+\w+\s*\(.*\)\s*\{?", "function"),
+                (r"^\s*class\s+\w+", "class"),
+                (r"^\s*struct\s+\w+", "struct"),
+                (r"^\s*#include\s+", "import"),
+            ],
+        }
+
+        # Get patterns for this language, or use Python as default
+        lang_patterns = patterns.get(language, patterns["python"])
+
+        for i, line in enumerate(lines):
+            if len(definitions) >= max_items:
+                break
+
+            for pattern, def_type in lang_patterns:
+                if re.match(pattern, line):
+                    definitions.append((i + 1, def_type, line.strip()))
+                    break
+
+        return definitions
+
+    def format_extracted_code(
+        self, content: str, filename: str, max_tokens: int
+    ) -> str:
+        """Format extracted code with key definitions.
+
+        Args:
+            content: Source code content
+            filename: Name of the file
+            max_tokens: Maximum tokens to use
+
+        Returns:
+            Formatted string with key sections
+        """
+        max_chars = int(max_tokens * 3.5)
+
+        # Detect language
+        language = self.detect_language(filename, content)
+        if not language:
+            # Can't detect language, return truncated content
+            return content[:max_chars]
+
+        # Extract definitions
+        definitions = self.extract_definitions(content, language, max_items=200)
+
+        if not definitions:
+            # No definitions found, return truncated content
+            return content[:max_chars]
+
+        # Format output
+        lines = content.split("\n")
+        output_lines = []
+        current_size = 0
+
+        header = f"[Key sections extracted from {language} code - {len(lines) - len(definitions)} lines omitted]\n\n"
+        current_size += len(header)
+
+        for line_num, def_type, line_content in definitions:
+            formatted = f"{line_num:4d}: {line_content}"
+            if current_size + len(formatted) < max_chars:
+                output_lines.append(formatted)
+                current_size += len(formatted) + 1
+            else:
+                break
+
+        footer = f"\n\n[Extracted {len(output_lines)} {language} definitions. Use read_file with line ranges for full content]"
+
+        return header + "\n".join(output_lines) + footer
diff --git a/memory/manager.py b/memory/manager.py
index 7f5a7a7..1749e59 100644
--- a/memory/manager.py
+++ b/memory/manager.py
@@ -4,11 +4,14 @@
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
 from llm.base import LLMMessage
+from config import Config
 
 from .compressor import WorkingMemoryCompressor
 from .short_term import ShortTermMemory
 from .store import MemoryStore
 from .token_tracker import TokenTracker
+from .tool_result_processor import ToolResultProcessor
+from .tool_result_store import ToolResultStore
 from .types import CompressedMemory, CompressionStrategy, MemoryConfig
 
 logger = logging.getLogger(__name__)
@@ -39,24 +42,38 @@ def __init__(
         """
         self.config = config
         self.llm = llm
+        self._db_path = db_path
 
         # Always create/use store for persistence
         if store is None:
             store = MemoryStore(db_path=db_path)
         self.store = store
 
-        # Create new session or use existing one
-        if session_id is None:
-            self.session_id = store.create_session()
-            logger.info(f"Created new session: {self.session_id}")
-        else:
+        # Lazy session creation: only create when first message is added
+        # If session_id is provided (resuming), use it immediately
+        if session_id is not None:
             self.session_id = session_id
+            self._session_created = True
+        else:
+            self.session_id = None
+            self._session_created = False
 
         # Initialize components
         self.short_term = ShortTermMemory(max_size=config.short_term_message_count)
         self.compressor = WorkingMemoryCompressor(llm, config)
         self.token_tracker = TokenTracker()
 
+        # Initialize tool result processing components (always enabled)
+        self.tool_result_processor = ToolResultProcessor(
+            storage_threshold=config.tool_result_storage_threshold,
+            summary_model=Config.TOOL_RESULT_SUMMARY_MODEL,
+        )
+        storage_path = config.tool_result_storage_path
+        self.tool_result_store = ToolResultStore(db_path=storage_path)
+        logger.info(
+            f"Tool result processing enabled with external storage: {storage_path or 'in-memory'}"
+        )
+
         # Storage for compressed memories and system messages
         self.summaries: List[CompressedMemory] = []
         self.system_messages: List[LLMMessage] = []
@@ -122,6 +139,17 @@ def from_session(
 
         return manager
 
+    def _ensure_session(self) -> None:
+        """Lazily create session when first needed.
+
+        This avoids creating empty sessions when MemoryManager is instantiated
+        but no messages are ever added (e.g., user exits before running any task).
+        """
+        if not self._session_created:
+            self.session_id = self.store.create_session()
+            self._session_created = True
+            logger.info(f"Created new session: {self.session_id}")
+
     def add_message(self, message: LLMMessage, actual_tokens: Dict[str, int] = None) -> None:
         """Add a message to memory and trigger compression if needed.
 
@@ -130,6 +158,9 @@ def add_message(self, message: LLMMessage, actual_tokens: Dict[str, int] = None)
             actual_tokens: Optional dict with actual token counts from LLM response
                           Format: {"input": int, "output": int}
         """
+        # Ensure session exists before adding messages
+        self._ensure_session()
+
         # Track system messages separately
         if message.role == "system":
             self.system_messages.append(message)
@@ -404,6 +435,68 @@ def _get_orphaned_tool_use_ids_from_summaries(self) -> set:
 
         return orphaned_ids
 
+    def process_tool_result(
+        self, tool_name: str, tool_call_id: str, result: str, context: str = ""
+    ) -> str:
+        """Process a tool result with intelligent summarization and optional external storage.
+
+        Args:
+            tool_name: Name of the tool that produced the result
+            tool_call_id: ID of the tool call
+            result: Raw tool result string
+            context: Optional context about the task
+
+        Returns:
+            Processed result (may be summarized or reference to external storage)
+        """
+        # Process the result
+        processed_result, should_store_externally = self.tool_result_processor.process_result(
+            tool_name=tool_name, result=result, context=context
+        )
+
+        # Store externally if processor recommends it (threshold already checked in processor)
+        if should_store_externally:
+            result_tokens = self.tool_result_processor.estimate_tokens(result)
+            logger.info(
+                f"Storing large tool result externally: {tool_name} "
+                f"({result_tokens} tokens > {self.config.tool_result_storage_threshold})"
+            )
+
+            # Store full result
+            result_id = self.tool_result_store.store_result(
+                tool_call_id=tool_call_id,
+                tool_name=tool_name,
+                content=result,
+                summary=processed_result,
+                token_count=result_tokens,
+            )
+
+            # Return reference instead of full content
+            return self.tool_result_store.format_reference(result_id, include_summary=True)
+
+        return processed_result
+
+    def retrieve_tool_result(self, result_id: str) -> Optional[str]:
+        """Retrieve a tool result from external storage.
+
+        Args:
+            result_id: ID returned by process_tool_result
+
+        Returns:
+            Full tool result content, or None if not found
+        """
+        return self.tool_result_store.retrieve_result(result_id)
+
+    def get_tool_result_stats(self) -> Dict[str, Any]:
+        """Get statistics about stored tool results.
+
+        Returns:
+            Dictionary with statistics
+        """
+        stats = self.tool_result_store.get_stats()
+        stats["enabled"] = True
+        return stats
+
     def _recalculate_current_tokens(self) -> int:
         """Recalculate current token count from scratch.
 
@@ -460,7 +553,9 @@ def save_memory(self):
 
         Call this method after completing a task or at key checkpoints.
         """
-        if not self.store or not self.session_id:
+        # Skip if no session was created (no messages were ever added)
+        if not self.store or not self._session_created or not self.session_id:
+            logger.debug("Skipping save_memory: no session created")
             return
 
         messages = self.short_term.get_messages()
diff --git a/memory/tool_result_processor.py b/memory/tool_result_processor.py
new file mode 100644
index 0000000..8888411
--- /dev/null
+++ b/memory/tool_result_processor.py
@@ -0,0 +1,350 @@
+"""Tool result processing for intelligent summarization and truncation."""
+
+import logging
+import re
+from typing import Dict, Optional
+
+from memory.code_extractor import CodeExtractor
+
+logger = logging.getLogger(__name__)
+
+
+class ToolResultProcessor:
+    """Intelligently process tool results to reduce memory usage.
+
+    Provides different strategies for different tool types:
+    - extract_key_sections: For code files, extract imports, definitions, key logic
+    - preserve_matches: For search results, keep all matches with minimal context
+    - summarize_output: For complex outputs, use LLM to generate summary
+    - smart_truncate: For general content, preserve head and tail
+    """
+
+    # Tool-specific processing strategies
+    TOOL_STRATEGIES = {
+        "read_file": {
+            "max_tokens": 1000,
+            "strategy": "extract_key_sections",
+        },
+        "grep_content": {
+            "max_tokens": 800,
+            "strategy": "preserve_matches",
+        },
+        "execute_shell": {
+            "max_tokens": 500,
+            "strategy": "smart_truncate",
+        },
+        "web_search": {
+            "max_tokens": 1200,
+            "strategy": "smart_truncate",
+        },
+        "web_fetch": {
+            "max_tokens": 1500,
+            "strategy": "summarize_output",
+        },
+        "glob_files": {
+            "max_tokens": 600,
+            "strategy": "smart_truncate",
+        },
+    }
+
+    # Default threshold for external storage (tokens)
+    DEFAULT_STORAGE_THRESHOLD = 10000
+
+    def __init__(
+        self,
+        storage_threshold: int = DEFAULT_STORAGE_THRESHOLD,
+        summary_model: str = None,
+    ):
+        """Initialize processor.
+
+        Args:
+            storage_threshold: Token threshold for recommending external storage
+            summary_model: Optional model name for LLM summarization (e.g., "openai/gpt-4o-mini")
+                          If None, LLM summarization is disabled and falls back to smart_truncate.
+        """
+        self.storage_threshold = storage_threshold
+        self.summary_model = summary_model
+        self.code_extractor = CodeExtractor()
+
+    def process_result(
+        self,
+        tool_name: str,
+        result: str,
+        context: str = "",
+        force_external: bool = False,
+        filename: str = "",
+    ) -> tuple[str, bool]:
+        """Process tool result with appropriate strategy.
+
+        Args:
+            tool_name: Name of the tool that produced the result
+            result: Raw tool result string
+            context: Optional context about the task (for intelligent summarization)
+            force_external: If True, always recommend external storage
+            filename: Optional filename for language detection (used by extract_key_sections)
+
+        Returns:
+            Tuple of (processed_result, should_store_externally)
+        """
+        # Get strategy for this tool
+        strategy_config = self.TOOL_STRATEGIES.get(
+            tool_name, {"max_tokens": 1000, "strategy": "smart_truncate"}
+        )
+        max_tokens = strategy_config["max_tokens"]
+        strategy = strategy_config["strategy"]
+
+        # Estimate tokens (rough: 3.5 chars per token)
+        estimated_tokens = len(result) / 3.5
+
+        # If result is small enough, return as-is
+        if estimated_tokens <= max_tokens:
+            return result, False
+
+        # If result is extremely large, recommend external storage
+        should_store_externally = force_external or estimated_tokens > self.storage_threshold
+
+        # Apply processing strategy
+        if strategy == "extract_key_sections":
+            processed = self._extract_key_sections(result, max_tokens, filename)
+        elif strategy == "preserve_matches":
+            processed = self._preserve_matches(result, max_tokens)
+        elif strategy == "summarize_output" and self.summary_model:
+            processed = self._summarize_with_llm(result, max_tokens, context)
+        else:
+            processed = self._smart_truncate(result, max_tokens)
+
+        logger.info(
+            f"Processed {tool_name} result: {int(estimated_tokens)} -> "
+            f"{int(len(processed) / 3.5)} tokens (strategy: {strategy})"
+        )
+
+        return processed, should_store_externally
+
+    def _extract_key_sections(self, content: str, max_tokens: int, filename: str = "") -> str:
+        """Extract key sections from code files using CodeExtractor.
+
+        Uses tree-sitter for accurate multi-language parsing when available,
+        with regex fallback for unsupported languages.
+
+        Preserves:
+        - Import statements
+        - Class and function definitions
+        - Key structural elements (structs, interfaces, traits, etc.)
+
+        Omits:
+        - Long comments and docstrings
+        - Repetitive code blocks
+
+        Args:
+            content: Source code content
+            max_tokens: Maximum tokens to use
+            filename: Optional filename for language detection
+        """
+        # Try to detect language from filename
+        language = self.code_extractor.detect_language(filename, content) if filename else None
+
+        # If we can detect the language, use CodeExtractor's format_extracted_code
+        if language:
+            return self.code_extractor.format_extracted_code(content, filename, max_tokens)
+
+        # Fallback: try to detect language from content (e.g., shebang)
+        language = self.code_extractor.detect_language("", content)
+        if language:
+            # Create a dummy filename with the right extension
+            ext_map = {v: k for k, v in self.code_extractor.EXTENSION_TO_LANGUAGE.items()}
+            dummy_ext = ext_map.get(language, ".py")
+            return self.code_extractor.format_extracted_code(
+                content, f"file{dummy_ext}", max_tokens
+            )
+
+        # Final fallback: use simple Python-focused regex extraction
+        return self._extract_key_sections_regex(content, max_tokens)
+
+    def _extract_key_sections_regex(self, content: str, max_tokens: int) -> str:
+        """Fallback regex-based extraction for unknown languages.
+
+        Uses simple Python-like patterns as a reasonable default.
+
+        Args:
+            content: Source code content
+            max_tokens: Maximum tokens to use
+        """
+        max_chars = int(max_tokens * 3.5)
+        lines = content.split("\n")
+
+        # Patterns to identify important lines (Python-focused but catches common patterns)
+        important_patterns = [
+            r"^\s*import\s+",  # imports
+            r"^\s*from\s+.*\s+import\s+",  # from imports
+            r"^\s*class\s+\w+",  # class definitions
+            r"^\s*def\s+\w+",  # function definitions
+            r"^\s*async\s+def\s+\w+",  # async function definitions
+            r"^\s*@\w+",  # decorators
+            r"^\s*function\s+\w+",  # JS/TS functions
+            r"^\s*const\s+\w+\s*=\s*\(.*\)\s*=>",  # arrow functions
+            r"^\s*interface\s+\w+",  # TS interfaces
+            r"^\s*type\s+\w+",  # TS type aliases
+            r"^\s*fn\s+\w+",  # Rust functions
+            r"^\s*struct\s+\w+",  # Rust/Go structs
+            r"^\s*impl\s+",  # Rust impl blocks
+            r"^\s*func\s+",  # Go functions
+            r"^\s*#include\s+",  # C/C++ includes
+        ]
+
+        important_lines = []
+        current_size = 0
+
+        for i, line in enumerate(lines):
+            # Check if line matches important patterns
+            is_important = any(re.match(pattern, line) for pattern in important_patterns)
+
+            if is_important:
+                # Add line with line number
+                line_with_num = f"{i+1:4d}: {line}"
+                if current_size + len(line_with_num) < max_chars:
+                    important_lines.append(line_with_num)
+                    current_size += len(line_with_num) + 1
+                else:
+                    break
+
+        if not important_lines:
+            # Fallback to smart truncate if no important lines found
+            return self._smart_truncate(content, max_tokens)
+
+        result = "\n".join(important_lines)
+        omitted_lines = len(lines) - len(important_lines)
+
+        return (
+            f"[Key sections extracted - {omitted_lines} lines omitted]\n\n"
+            + result
+            + f"\n\n[Use read_file with specific line ranges for full content]"
+        )
+
+    def _preserve_matches(self, content: str, max_tokens: int) -> str:
+        """Preserve search matches with minimal context.
+
+        For grep/search results, keeps all matching lines with line numbers.
+        """
+        max_chars = int(max_tokens * 3.5)
+        lines = content.split("\n")
+
+        # Try to keep all lines if possible
+        if len(content) <= max_chars:
+            return content
+
+        # If too large, keep first N matches
+        preserved_lines = []
+        current_size = 0
+        match_count = 0
+
+        for line in lines:
+            if current_size + len(line) < max_chars:
+                preserved_lines.append(line)
+                current_size += len(line) + 1
+                if ":" in line:  # Likely a match line (file:line:content)
+                    match_count += 1
+            else:
+                break
+
+        omitted = len(lines) - len(preserved_lines)
+        result = "\n".join(preserved_lines)
+
+        if omitted > 0:
+            result += f"\n\n[... {omitted} more lines omitted. Use more specific search patterns.]"
+
+        return result
+
+    def _summarize_with_llm(self, content: str, max_tokens: int, context: str) -> str:
+        """Use configured LLM model to generate intelligent summary.
+
+        Args:
+            content: Content to summarize
+            max_tokens: Target token limit for summary
+            context: Task context for relevance-aware summarization
+
+        Returns:
+            Summarized content or smart_truncate fallback on error
+        """
+        if not self.summary_model:
+            return self._smart_truncate(content, max_tokens)
+
+        try:
+            import litellm
+
+            # Limit input to avoid excessive costs (10k chars ~ 2.5k tokens)
+            input_limit = 10000
+            truncated_content = content[:input_limit]
+            if len(content) > input_limit:
+                truncated_content += f"\n\n[... {len(content) - input_limit} more characters]"
+
+            prompt = f"""Summarize this tool output concisely, focusing on key information.
+Context: {context if context else 'general task'}
+
+Output to summarize:
+{truncated_content}
+
+Provide a concise summary (target: {max_tokens} tokens) that captures the essential information."""
+
+            response = litellm.completion(
+                model=self.summary_model,
+                messages=[{"role": "user", "content": prompt}],
+                max_tokens=max_tokens * 2,
+            )
+            summary = response.choices[0].message.content
+
+            return f"[Summary by {self.summary_model}]\n\n{summary}"
+
+        except Exception as e:
+            logger.warning(
+                f"LLM summarization failed ({self.summary_model}): {e}, falling back to truncation"
+            )
+            return self._smart_truncate(content, max_tokens)
+
+    def _smart_truncate(self, content: str, max_tokens: int) -> str:
+        """Smart truncation preserving head and tail.
+
+        Keeps first 60% and last 20% of allowed content.
+        This preserves context from both beginning and end.
+        """
+        max_chars = int(max_tokens * 3.5)
+
+        if len(content) <= max_chars:
+            return content
+
+        # Calculate split points
+        head_chars = int(max_chars * 0.6)
+        tail_chars = int(max_chars * 0.2)
+
+        # Try to break at line boundaries
+        head_part = content[:head_chars]
+        tail_part = content[-tail_chars:]
+
+        # Find last newline in head
+        last_newline = head_part.rfind("\n")
+        if last_newline > head_chars * 0.8:  # If newline is reasonably close
+            head_part = head_part[:last_newline]
+
+        # Find first newline in tail
+        first_newline = tail_part.find("\n")
+        if first_newline > 0 and first_newline < tail_chars * 0.2:
+            tail_part = tail_part[first_newline + 1 :]
+
+        omitted_chars = len(content) - len(head_part) - len(tail_part)
+
+        return (
+            head_part
+            + f"\n\n[... {omitted_chars} characters omitted ...]\n\n"
+            + tail_part
+            + "\n\n[Use more specific queries to see omitted content]"
+        )
+
+    def estimate_tokens(self, text: str) -> int:
+        """Estimate token count for text.
+
+        Args:
+            text: Text to estimate
+
+        Returns:
+            Estimated token count
+        """
+        return int(len(text) / 3.5)
diff --git a/memory/tool_result_store.py b/memory/tool_result_store.py
new file mode 100644
index 0000000..ee7e43c
--- /dev/null
+++ b/memory/tool_result_store.py
@@ -0,0 +1,336 @@
+"""External storage for large tool results."""
+
+import hashlib
+import logging
+import sqlite3
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+logger = logging.getLogger(__name__)
+
+
+class ToolResultStore:
+    """Store large tool results externally with SQLite backend.
+
+    This allows keeping full tool results accessible while only storing
+    summaries in the main memory context.
+    """
+
+    def __init__(self, db_path: Optional[str] = None):
+        """Initialize tool result store.
+
+        Args:
+            db_path: Path to SQLite database file. If None, uses in-memory database.
+        """
+        self.db_path = db_path or ":memory:"
+        self.conn = None
+        self._init_db()
+
+    def _init_db(self):
+        """Initialize database schema."""
+        self.conn = sqlite3.connect(self.db_path, check_same_thread=False)
+        self.conn.row_factory = sqlite3.Row
+
+        self.conn.execute(
+            """
+            CREATE TABLE IF NOT EXISTS tool_results (
+                id TEXT PRIMARY KEY,
+                tool_call_id TEXT NOT NULL,
+                tool_name TEXT NOT NULL,
+                content TEXT NOT NULL,
+                content_hash TEXT NOT NULL,
+                summary TEXT,
+                token_count INTEGER,
+                created_at TIMESTAMP NOT NULL,
+                accessed_at TIMESTAMP,
+                access_count INTEGER DEFAULT 0
+            )
+        """
+        )
+
+        # Index for faster lookups
+        self.conn.execute(
+            """
+            CREATE INDEX IF NOT EXISTS idx_tool_call_id
+            ON tool_results(tool_call_id)
+        """
+        )
+
+        self.conn.execute(
+            """
+            CREATE INDEX IF NOT EXISTS idx_created_at
+            ON tool_results(created_at)
+        """
+        )
+
+        self.conn.commit()
+        logger.info(f"Initialized tool result store at {self.db_path}")
+
+    def store_result(
+        self,
+        tool_call_id: str,
+        tool_name: str,
+        content: str,
+        summary: Optional[str] = None,
+        token_count: Optional[int] = None,
+    ) -> str:
+        """Store a tool result externally.
+
+        Args:
+            tool_call_id: ID of the tool call that produced this result
+            tool_name: Name of the tool
+            content: Full content to store
+            summary: Optional summary (if None, will generate simple summary)
+            token_count: Optional token count of content
+
+        Returns:
+            Result ID for retrieval
+        """
+        # Generate unique ID based on content hash
+        content_hash = hashlib.sha256(content.encode()).hexdigest()[:16]
+        result_id = f"{tool_name}_{content_hash}"
+
+        # Check if already stored
+        existing = self.conn.execute(
+            "SELECT id FROM tool_results WHERE content_hash = ?", (content_hash,)
+        ).fetchone()
+
+        if existing:
+            logger.debug(f"Tool result already stored: {result_id}")
+            return result_id
+
+        # Generate summary if not provided
+        if summary is None:
+            summary = self._generate_simple_summary(content, tool_name)
+
+        # Estimate tokens if not provided
+        if token_count is None:
+            token_count = int(len(content) / 3.5)
+
+        # Store result
+        try:
+            self.conn.execute(
+                """
+                INSERT INTO tool_results
+                (id, tool_call_id, tool_name, content, content_hash, summary, token_count, created_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+                (
+                    result_id,
+                    tool_call_id,
+                    tool_name,
+                    content,
+                    content_hash,
+                    summary,
+                    token_count,
+                    datetime.now(),
+                ),
+            )
+            self.conn.commit()
+            logger.info(
+                f"Stored tool result {result_id}: {len(content)} chars, {token_count} tokens"
+            )
+            return result_id
+
+        except sqlite3.IntegrityError as e:
+            logger.warning(f"Failed to store tool result: {e}")
+            # Return existing ID if duplicate
+            return result_id
+
+    def retrieve_result(self, result_id: str) -> Optional[str]:
+        """Retrieve full content of a stored result.
+
+        Args:
+            result_id: ID returned by store_result()
+
+        Returns:
+            Full content, or None if not found
+        """
+        row = self.conn.execute(
+            "SELECT content FROM tool_results WHERE id = ?", (result_id,)
+        ).fetchone()
+
+        if row:
+            # Update access tracking
+            self.conn.execute(
+                """
+                UPDATE tool_results
+                SET accessed_at = ?, access_count = access_count + 1
+                WHERE id = ?
+            """,
+                (datetime.now(), result_id),
+            )
+            self.conn.commit()
+
+            logger.debug(f"Retrieved tool result {result_id}")
+            return row["content"]
+
+        logger.warning(f"Tool result not found: {result_id}")
+        return None
+
+    def get_summary(self, result_id: str) -> Optional[str]:
+        """Get summary of a stored result without retrieving full content.
+
+        Args:
+            result_id: ID returned by store_result()
+
+        Returns:
+            Summary text, or None if not found
+        """
+        row = self.conn.execute(
+            "SELECT summary, tool_name, token_count FROM tool_results WHERE id = ?",
+            (result_id,),
+        ).fetchone()
+
+        if row:
+            return row["summary"]
+
+        return None
+
+    def get_metadata(self, result_id: str) -> Optional[dict]:
+        """Get metadata about a stored result.
+
+        Args:
+            result_id: ID returned by store_result()
+
+        Returns:
+            Dictionary with metadata, or None if not found
+        """
+        row = self.conn.execute(
+            """
+            SELECT tool_call_id, tool_name, token_count, created_at,
+                   accessed_at, access_count, length(content) as content_length
+            FROM tool_results
+            WHERE id = ?
+        """,
+            (result_id,),
+        ).fetchone()
+
+        if row:
+            return dict(row)
+
+        return None
+
+    def format_reference(self, result_id: str, include_summary: bool = True) -> str:
+        """Format a reference to a stored result for inclusion in memory.
+
+        Args:
+            result_id: ID returned by store_result()
+            include_summary: Whether to include the summary
+
+        Returns:
+            Formatted reference string
+        """
+        metadata = self.get_metadata(result_id)
+        if not metadata:
+            return f"[Tool Result #{result_id} - not found]"
+
+        lines = [
+            f"[Tool Result #{result_id}]",
+            f"Tool: {metadata['tool_name']}",
+            f"Size: {metadata['content_length']} chars (~{metadata['token_count']} tokens)",
+            f"Stored: {metadata['created_at']}",
+        ]
+
+        if include_summary:
+            summary = self.get_summary(result_id)
+            if summary:
+                lines.append("")
+                lines.append("Summary:")
+                lines.append(summary)
+
+        lines.append("")
+        lines.append(
+            "[Full content available via retrieve_tool_result tool - use this ID to access]"
+        )
+
+        return "\n".join(lines)
+
+    def _generate_simple_summary(self, content: str, tool_name: str) -> str:
+        """Generate a simple summary of content.
+
+        Args:
+            content: Content to summarize
+            tool_name: Name of tool that produced content
+
+        Returns:
+            Simple summary string
+        """
+        lines = content.split("\n")
+        char_count = len(content)
+        line_count = len(lines)
+
+        # Get first few non-empty lines as preview
+        preview_lines = []
+        for line in lines[:5]:
+            if line.strip():
+                preview_lines.append(line[:100])
+                if len(preview_lines) >= 3:
+                    break
+
+        preview = "\n".join(preview_lines)
+        if len(preview) > 300:
+            preview = preview[:297] + "..."
+
+        return f"""Tool: {tool_name}
+Size: {char_count} characters, {line_count} lines
+
+Preview:
+{preview}
+
+[Use retrieve_tool_result to access full content]"""
+
+    def cleanup_old_results(self, days: int = 7) -> int:
+        """Remove results older than specified days.
+
+        Args:
+            days: Remove results older than this many days
+
+        Returns:
+            Number of results removed
+        """
+        cursor = self.conn.execute(
+            """
+            DELETE FROM tool_results
+            WHERE created_at < datetime('now', '-' || ? || ' days')
+        """,
+            (days,),
+        )
+        deleted = cursor.rowcount
+        self.conn.commit()
+
+        if deleted > 0:
+            logger.info(f"Cleaned up {deleted} old tool results (older than {days} days)")
+
+        return deleted
+
+    def get_stats(self) -> dict:
+        """Get statistics about stored results.
+
+        Returns:
+            Dictionary with statistics
+        """
+        row = self.conn.execute(
+            """
+            SELECT
+                COUNT(*) as total_results,
+                SUM(length(content)) as total_bytes,
+                SUM(token_count) as total_tokens,
+                AVG(access_count) as avg_access_count,
+                MAX(created_at) as latest_created
+            FROM tool_results
+        """
+        ).fetchone()
+
+        return dict(row) if row else {}
+
+    def close(self):
+        """Close database connection."""
+        if self.conn:
+            self.conn.close()
+            logger.info("Closed tool result store")
+
+    def __del__(self):
+        """Cleanup on deletion."""
+        self.close()
diff --git a/memory/types.py b/memory/types.py
index 928face..3aee317 100644
--- a/memory/types.py
+++ b/memory/types.py
@@ -32,6 +32,26 @@ class MemoryConfig:
     enable_compression: bool = True  # Enable/disable compression
     compression_model: Optional[str] = None  # Model to use for compression (None = same as agent)
 
+    # Tool result processing (always enabled)
+    tool_result_storage_threshold: int = 10000  # Store externally if result > N tokens
+    tool_result_storage_path: Optional[str] = None  # Path to SQLite DB (None = in-memory)
+    tool_result_summary_model: Optional[str] = (
+        None  # Model for summarizing large tool results (None = disable)
+    )
+
+    # Tool-specific token budgets (can be overridden per tool)
+    tool_result_budgets: Dict[str, int] = field(
+        default_factory=lambda: {
+            "read_file": 1000,
+            "grep_content": 800,
+            "execute_shell": 500,
+            "web_search": 1200,
+            "web_fetch": 1500,
+            "glob_files": 600,
+            "default": 1000,
+        }
+    )
+
 
 @dataclass
 class CompressedMemory:
diff --git a/test/memory/test_tool_result_processing.py b/test/memory/test_tool_result_processing.py
new file mode 100644
index 0000000..879354f
--- /dev/null
+++ b/test/memory/test_tool_result_processing.py
@@ -0,0 +1,445 @@
+"""Tests for tool result processing and external storage."""
+
+import pytest
+
+from llm.base import LLMMessage
+from memory.tool_result_processor import ToolResultProcessor
+from memory.tool_result_store import ToolResultStore
+
+
+class TestToolResultProcessor:
+    """Test intelligent tool result processing."""
+
+    def test_small_result_passthrough(self):
+        """Small results should pass through unchanged."""
+        processor = ToolResultProcessor()
+        result = "Small result"
+
+        processed, should_store = processor.process_result("read_file", result)
+
+        assert processed == result
+        assert should_store is False
+
+    def test_large_result_truncation(self):
+        """Large results should be truncated."""
+        processor = ToolResultProcessor()
+        result = "x" * 10000  # 10k chars
+
+        processed, should_store = processor.process_result("read_file", result)
+
+        assert len(processed) < len(result)
+        assert "[... " in processed or "[Key sections" in processed
+        assert should_store is False  # Not large enough for external storage
+
+    def test_very_large_result_external_storage(self):
+        """Very large results should recommend external storage."""
+        processor = ToolResultProcessor()
+        result = "x" * 50000  # 50k chars (~14k tokens)
+
+        processed, should_store = processor.process_result("read_file", result)
+
+        assert should_store is True
+
+    def test_extract_key_sections_strategy(self):
+        """Test key section extraction for code files."""
+        processor = ToolResultProcessor()
+        code = """
+import os
+import sys
+
+# This is a comment
+# Another comment
+
+class MyClass:
+    def __init__(self):
+        pass
+
+    def method1(self):
+        # Long implementation
+        pass
+
+def my_function():
+    # Another long implementation
+    pass
+"""
+        processed, _ = processor.process_result("read_file", code * 100)  # Make it large
+
+        # Should extract imports and definitions
+        assert (
+            "import os" in processed
+            or "class MyClass" in processed
+            or "def my_function" in processed
+        )
+
+    def test_extract_key_sections_with_filename(self):
+        """Test key section extraction with explicit filename for language detection."""
+        processor = ToolResultProcessor()
+        python_code = """
+import os
+import sys
+from typing import List, Dict
+
+class DataProcessor:
+    def __init__(self, config: Dict):
+        self.config = config
+
+    def process(self, items: List[str]) -> List[str]:
+        return [item.upper() for item in items]
+
+def main():
+    processor = DataProcessor({})
+    result = processor.process(["hello", "world"])
+    print(result)
+"""
+        # Make the code large enough to trigger processing
+        large_code = python_code * 50
+        processed, _ = processor.process_result(
+            "read_file", large_code, filename="data_processor.py"
+        )
+
+        # Should extract key sections using CodeExtractor
+        assert "[Key sections" in processed or "extracted" in processed.lower()
+        assert "DataProcessor" in processed or "def main" in processed
+
+    def test_extract_key_sections_javascript(self):
+        """Test key section extraction for JavaScript files."""
+        processor = ToolResultProcessor()
+        js_code = """
+import React from 'react';
+import { useState, useEffect } from 'react';
+
+function MyComponent(props) {
+    const [count, setCount] = useState(0);
+
+    useEffect(() => {
+        console.log('Effect running');
+    }, [count]);
+
+    return <div>{count}</div>;
+}
+
+class LegacyComponent extends React.Component {
+    render() {
+        return <div>Legacy</div>;
+    }
+}
+
+export default MyComponent;
+"""
+        large_code = js_code * 50
+        processed, _ = processor.process_result("read_file", large_code, filename="component.js")
+
+        # Should extract JavaScript-specific patterns
+        assert "[Key sections" in processed or "extracted" in processed.lower()
+
+    def test_extract_key_sections_rust(self):
+        """Test key section extraction for Rust files."""
+        processor = ToolResultProcessor()
+        rust_code = """
+use std::collections::HashMap;
+use std::io::{self, Read};
+
+struct Config {
+    name: String,
+    value: i32,
+}
+
+impl Config {
+    fn new(name: &str, value: i32) -> Self {
+        Config {
+            name: name.to_string(),
+            value,
+        }
+    }
+}
+
+fn process_data(data: &[u8]) -> Result<String, io::Error> {
+    Ok(String::from_utf8_lossy(data).to_string())
+}
+
+trait Processor {
+    fn process(&self, input: &str) -> String;
+}
+"""
+        large_code = rust_code * 50
+        processed, _ = processor.process_result("read_file", large_code, filename="lib.rs")
+
+        # Should extract Rust-specific patterns
+        assert "[Key sections" in processed or "extracted" in processed.lower()
+
+    def test_extract_key_sections_unknown_language_fallback(self):
+        """Test fallback to regex extraction for unknown file types."""
+        processor = ToolResultProcessor()
+        # Code with Python-like patterns but unknown extension
+        code = """
+import something
+from module import thing
+
+class MyClass:
+    def method(self):
+        pass
+
+def function():
+    pass
+"""
+        large_code = code * 100
+        # Use an unknown extension
+        processed, _ = processor.process_result(
+            "read_file", large_code, filename="file.unknown_extension"
+        )
+
+        # Should still extract something (fallback regex)
+        assert len(processed) < len(large_code)
+
+    def test_extract_key_sections_shebang_detection(self):
+        """Test language detection from shebang when no filename provided."""
+        processor = ToolResultProcessor()
+        python_script = """#!/usr/bin/env python3
+
+import os
+import sys
+
+def main():
+    print("Hello, world!")
+
+if __name__ == "__main__":
+    main()
+"""
+        large_code = python_script * 100
+        # No filename provided, should detect from shebang
+        processed, _ = processor.process_result("read_file", large_code)
+
+        # Should extract something
+        assert len(processed) < len(large_code)
+
+    def test_preserve_matches_strategy(self):
+        """Test match preservation for grep results."""
+        processor = ToolResultProcessor()
+        grep_result = "\n".join([f"file{i}.py:10:match line {i}" for i in range(100)])
+
+        processed, _ = processor.process_result("grep_content", grep_result)
+
+        # Should preserve match lines
+        assert "file" in processed
+        assert "match line" in processed
+
+    def test_smart_truncate_preserves_head_and_tail(self):
+        """Test smart truncation preserves both ends."""
+        processor = ToolResultProcessor()
+        result = "START" + ("x" * 10000) + "END"
+
+        processed, _ = processor.process_result("execute_shell", result)
+
+        # Should have both start and end
+        assert "START" in processed
+        assert "END" in processed
+        assert len(processed) < len(result)
+
+    def test_token_estimation(self):
+        """Test token estimation."""
+        processor = ToolResultProcessor()
+
+        # Rough estimate: ~3.5 chars per token
+        text = "x" * 3500
+        tokens = processor.estimate_tokens(text)
+
+        assert 900 < tokens < 1100  # Should be around 1000 tokens
+
+
+class TestToolResultStore:
+    """Test external tool result storage."""
+
+    def test_store_and_retrieve(self):
+        """Test basic store and retrieve."""
+        store = ToolResultStore()  # In-memory
+
+        result_id = store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="Test content",
+        )
+
+        assert result_id is not None
+
+        retrieved = store.retrieve_result(result_id)
+        assert retrieved == "Test content"
+
+    def test_duplicate_content_same_id(self):
+        """Duplicate content should return same ID."""
+        store = ToolResultStore()
+
+        id1 = store.store_result(
+            tool_call_id="call_1",
+            tool_name="read_file",
+            content="Same content",
+        )
+
+        id2 = store.store_result(
+            tool_call_id="call_2",
+            tool_name="read_file",
+            content="Same content",
+        )
+
+        assert id1 == id2
+
+    def test_get_summary(self):
+        """Test getting summary without full content."""
+        store = ToolResultStore()
+
+        result_id = store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="x" * 10000,
+            summary="Custom summary",
+        )
+
+        summary = store.get_summary(result_id)
+        assert summary == "Custom summary"
+
+    def test_get_metadata(self):
+        """Test getting metadata."""
+        store = ToolResultStore()
+
+        result_id = store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="Test content",
+            token_count=100,
+        )
+
+        metadata = store.get_metadata(result_id)
+        assert metadata is not None
+        assert metadata["tool_name"] == "read_file"
+        assert metadata["tool_call_id"] == "call_123"
+        assert metadata["token_count"] == 100
+        assert metadata["content_length"] == len("Test content")
+
+    def test_format_reference(self):
+        """Test formatting a reference."""
+        store = ToolResultStore()
+
+        result_id = store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="Test content",
+            summary="This is a summary",
+        )
+
+        reference = store.format_reference(result_id, include_summary=True)
+
+        assert result_id in reference
+        assert "read_file" in reference
+        assert "This is a summary" in reference
+        assert "retrieve_tool_result" in reference
+
+    def test_access_tracking(self):
+        """Test that access is tracked."""
+        store = ToolResultStore()
+
+        result_id = store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="Test content",
+        )
+
+        # Retrieve multiple times
+        store.retrieve_result(result_id)
+        store.retrieve_result(result_id)
+
+        metadata = store.get_metadata(result_id)
+        assert metadata["access_count"] == 2
+        assert metadata["accessed_at"] is not None
+
+    def test_get_stats(self):
+        """Test getting storage statistics."""
+        store = ToolResultStore()
+
+        # Store some results
+        for i in range(5):
+            store.store_result(
+                tool_call_id=f"call_{i}",
+                tool_name="read_file",
+                content=f"Content {i}" * 100,
+                token_count=100,
+            )
+
+        stats = store.get_stats()
+        assert stats["total_results"] == 5
+        assert stats["total_tokens"] == 500
+        assert stats["total_bytes"] > 0
+
+    def test_retrieve_nonexistent(self):
+        """Test retrieving non-existent result."""
+        store = ToolResultStore()
+
+        result = store.retrieve_result("nonexistent_id")
+        assert result is None
+
+    def test_cleanup_old_results(self):
+        """Test cleanup of old results."""
+        store = ToolResultStore()
+
+        # Store a result
+        store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="Test content",
+        )
+
+        # Cleanup results older than 0 days (should delete all)
+        deleted = store.cleanup_old_results(days=0)
+        assert deleted >= 0  # May be 0 if created just now
+
+    def test_store_with_persistence(self, tmp_path):
+        """Test storage with persistent database."""
+        db_path = str(tmp_path / "test_store.db")
+
+        # Create store and add data
+        store1 = ToolResultStore(db_path=db_path)
+        result_id = store1.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content="Persistent content",
+        )
+        store1.close()
+
+        # Reopen and verify data persists
+        store2 = ToolResultStore(db_path=db_path)
+        retrieved = store2.retrieve_result(result_id)
+        assert retrieved == "Persistent content"
+        store2.close()
+
+
+class TestIntegration:
+    """Integration tests for processor + store."""
+
+    def test_processor_and_store_integration(self):
+        """Test processor recommending external storage."""
+        processor = ToolResultProcessor()
+        store = ToolResultStore()
+
+        # Create a very large result
+        large_result = "x" * 50000
+
+        # Process it
+        processed, should_store = processor.process_result("read_file", large_result)
+
+        assert should_store is True
+
+        # Store it
+        result_id = store.store_result(
+            tool_call_id="call_123",
+            tool_name="read_file",
+            content=large_result,
+            summary=processed,
+        )
+
+        # Get reference
+        reference = store.format_reference(result_id)
+
+        # Reference should be much smaller than original
+        assert len(reference) < len(large_result) / 10
+
+        # Should be able to retrieve full content
+        retrieved = store.retrieve_result(result_id)
+        assert retrieved == large_result
diff --git a/tools/retrieve_tool_result.py b/tools/retrieve_tool_result.py
new file mode 100644
index 0000000..d1020b6
--- /dev/null
+++ b/tools/retrieve_tool_result.py
@@ -0,0 +1,86 @@
+"""Tool for retrieving externally stored tool results."""
+
+import logging
+from typing import TYPE_CHECKING, Any, Dict
+
+from .base import BaseTool
+
+if TYPE_CHECKING:
+    from memory import MemoryManager
+
+logger = logging.getLogger(__name__)
+
+
+class RetrieveToolResultTool(BaseTool):
+    """Retrieve full content of externally stored tool results.
+
+    When tool results are too large, they are stored externally and only
+    a summary/reference is kept in memory. This tool allows retrieving
+    the full content when needed.
+    """
+
+    def __init__(self, memory_manager: "MemoryManager"):
+        """Initialize tool.
+
+        Args:
+            memory_manager: MemoryManager instance with tool result storage
+        """
+        self.memory_manager = memory_manager
+
+    @property
+    def name(self) -> str:
+        return "retrieve_tool_result"
+
+    @property
+    def description(self) -> str:
+        return (
+            "Retrieve the full content of a tool result that was stored externally. "
+            "Use this when you see a '[Tool Result #...]' reference in the conversation "
+            "and need to access the complete output. The result_id can be found in the "
+            "reference message (e.g., 'Tool Result #read_file_abc123')."
+        )
+
+    @property
+    def parameters(self) -> Dict[str, Any]:
+        return {
+            "result_id": {
+                "type": "string",
+                "description": (
+                    "The ID of the stored tool result (found in the reference message, "
+                    "e.g., 'read_file_abc123')"
+                ),
+            }
+        }
+
+    def execute(self, result_id: str) -> str:
+        """Retrieve a stored tool result.
+
+        Args:
+            result_id: ID of the stored result
+
+        Returns:
+            Full content or error message
+        """
+        try:
+            # Retrieve the result
+            content = self.memory_manager.retrieve_tool_result(result_id)
+
+            if content is None:
+                return (
+                    f"Error: Tool result '{result_id}' not found in storage. "
+                    f"It may have been cleaned up or the ID is incorrect."
+                )
+
+            # Get metadata for context
+            metadata = self.memory_manager.tool_result_store.get_metadata(result_id)
+            header = f"[Retrieved Tool Result #{result_id}]\n"
+            if metadata:
+                header += f"Tool: {metadata['tool_name']}\n"
+                header += f"Size: {metadata['content_length']} characters\n"
+                header += f"Created: {metadata['created_at']}\n\n"
+
+            return header + content
+
+        except Exception as e:
+            logger.error(f"Error retrieving tool result {result_id}: {e}")
+            return f"Error retrieving tool result: {str(e)}"
diff --git a/tools/web_fetch.py b/tools/web_fetch.py
index a901e6b..add3009 100644
--- a/tools/web_fetch.py
+++ b/tools/web_fetch.py
@@ -21,7 +21,6 @@
 DEFAULT_TIMEOUT_SECONDS = 30
 MAX_TIMEOUT_SECONDS = 120
 MAX_REDIRECTS = 5
-MAX_OUTPUT_CHARS = 6000
 ALLOWED_PORTS = {80, 443}
 BLOCKED_HOSTS = {"localhost"}
 BLOCKED_SUFFIXES = (".local",)
@@ -140,7 +139,6 @@ def _execute(
         content = content_bytes.decode(encoding, errors="replace")
 
         output, title = self._convert_content(content, content_type, format, url)
-        output, output_truncated, output_total_chars = self._truncate_output(output)
 
         metadata = {
             "requested_url": url,
@@ -149,10 +147,9 @@ def _execute(
             "content_type": content_type_header,
             "charset": encoding,
             "fetched_bytes": len(content_bytes),
+            "output_chars": len(output),
             "redirects": redirects,
             "truncated": len(content_bytes) >= MAX_RESPONSE_BYTES,
-            "output_truncated": output_truncated,
-            "output_total_chars": output_total_chars,
             "duration_ms": int((time.time() - start_time) * 1000),
         }
 
@@ -412,11 +409,3 @@ def _render_html(self, html: str, format: str, url: str) -> Tuple[str, str]:
             node.drop_tree()
         text = tree.text_content()
         return " ".join(text.split()), title
-
-    def _truncate_output(self, output: str) -> Tuple[str, bool, int]:
-        total = len(output)
-        if total <= MAX_OUTPUT_CHARS:
-            return output, False, total
-        suffix = "\n\n[... output truncated ...]"
-        cutoff = max(0, MAX_OUTPUT_CHARS - len(suffix))
-        return output[:cutoff] + suffix, True, total

From 3e18aae7544f85a621ccf1c0c6a5275f8cc8ddc2 Mon Sep 17 00:00:00 2001
From: Yixin Luo <18810541851@163.com>
Date: Sun, 18 Jan 2026 10:50:38 +0800
Subject: [PATCH 2/3] refactor config

Signed-off-by: Yixin Luo <18810541851@163.com>
---
 agent/base.py                      |  10 +--
 config.py                          |  55 ++++++------
 llm/__init__.py                    |   2 -
 llm/litellm_adapter.py             |   8 +-
 llm/retry.py                       |  78 +++-------------
 main.py                            |   5 +-
 memory/__init__.py                 |   3 +-
 memory/code_extractor.py           |   4 +-
 memory/compressor.py               |  13 ++-
 memory/manager.py                  |  51 +++++------
 memory/store.py                    |   7 +-
 memory/types.py                    |  48 +---------
 test/memory/conftest.py            |  19 ++++
 test/memory/test_compressor.py     | 109 ++++++++++-------------
 test/memory/test_integration.py    | 130 ++++++++++++++-------------
 test/memory/test_memory_manager.py | 138 ++++++++++++++---------------
 test/test_memory.py                |  26 +++---
 17 files changed, 288 insertions(+), 418 deletions(-)

diff --git a/agent/base.py b/agent/base.py
index 69d4835..9e18e13 100644
--- a/agent/base.py
+++ b/agent/base.py
@@ -4,7 +4,7 @@
 from typing import TYPE_CHECKING, List, Optional
 
 from llm import LLMMessage, LLMResponse, ToolResult
-from memory import MemoryConfig, MemoryManager
+from memory import MemoryManager
 from tools.base import BaseTool
 from tools.todo import TodoTool
 from utils import get_logger, terminal_ui
@@ -26,7 +26,6 @@ def __init__(
         llm: "LiteLLMLLM",
         tools: List[BaseTool],
         max_iterations: int = 10,
-        memory_config: Optional[MemoryConfig] = None,
     ):
         """Initialize the agent.
 
@@ -34,7 +33,6 @@ def __init__(
             llm: LLM instance to use
             max_iterations: Maximum number of agent loop iterations
             tools: List of tools available to the agent
-            memory_config: Optional memory configuration (None = use defaults)
         """
         self.llm = llm
         self.max_iterations = max_iterations
@@ -53,10 +51,8 @@ def __init__(
 
         self.tool_executor = ToolExecutor(tools)
 
-        # Initialize memory manager
-        if memory_config is None:
-            memory_config = MemoryConfig()
-        self.memory = MemoryManager(memory_config, llm)
+        # Initialize memory manager (uses Config directly)
+        self.memory = MemoryManager(llm)
 
     @abstractmethod
     def run(self, task: str) -> str:
diff --git a/config.py b/config.py
index da76f94..6e96309 100644
--- a/config.py
+++ b/config.py
@@ -1,6 +1,7 @@
 """Configuration management for the agentic system."""
 
 import os
+import random
 
 from dotenv import load_dotenv
 
@@ -8,7 +9,10 @@
 
 
 class Config:
-    """Configuration for the agentic system."""
+    """Configuration for the agentic system.
+
+    All configuration is centralized here. Access config values directly via Config.XXX.
+    """
 
     # LiteLLM Model Configuration
     # Format: provider/model_name (e.g. "anthropic/claude-3-5-sonnet-20241022")
@@ -31,6 +35,8 @@ class Config:
     RETRY_MAX_ATTEMPTS = int(os.getenv("RETRY_MAX_ATTEMPTS", "3"))
     RETRY_INITIAL_DELAY = float(os.getenv("RETRY_INITIAL_DELAY", "1.0"))
     RETRY_MAX_DELAY = float(os.getenv("RETRY_MAX_DELAY", "60.0"))
+    RETRY_EXPONENTIAL_BASE = 2.0
+    RETRY_JITTER = True
 
     # Memory Management Configuration
     MEMORY_ENABLED = os.getenv("MEMORY_ENABLED", "true").lower() == "true"
@@ -38,9 +44,14 @@ class Config:
     MEMORY_TARGET_TOKENS = int(os.getenv("MEMORY_TARGET_TOKENS", "50000"))
     MEMORY_COMPRESSION_THRESHOLD = int(os.getenv("MEMORY_COMPRESSION_THRESHOLD", "40000"))
     MEMORY_SHORT_TERM_SIZE = int(os.getenv("MEMORY_SHORT_TERM_SIZE", "100"))
+    MEMORY_SHORT_TERM_MIN_SIZE = int(os.getenv("MEMORY_SHORT_TERM_MIN_SIZE", "5"))
     MEMORY_COMPRESSION_RATIO = float(os.getenv("MEMORY_COMPRESSION_RATIO", "0.3"))
+    MEMORY_PRESERVE_TOOL_CALLS = True
+    MEMORY_PRESERVE_SYSTEM_PROMPTS = True
 
     # Tool Result Processing Configuration
+    TOOL_RESULT_STORAGE_THRESHOLD = int(os.getenv("TOOL_RESULT_STORAGE_THRESHOLD", "10000"))
+    TOOL_RESULT_STORAGE_PATH = os.getenv("TOOL_RESULT_STORAGE_PATH")
     # Model for summarizing large tool results (e.g., "openai/gpt-4o-mini", "anthropic/claude-3-haiku-20240307")
     # If not set, LLM summarization is disabled and falls back to smart truncation
     TOOL_RESULT_SUMMARY_MODEL = os.getenv("TOOL_RESULT_SUMMARY_MODEL")
@@ -52,40 +63,26 @@ class Config:
     LOG_TO_CONSOLE = os.getenv("LOG_TO_CONSOLE", "false").lower() == "true"
 
     @classmethod
-    def get_retry_config(cls):
-        """Get retry configuration.
+    def get_retry_delay(cls, attempt: int) -> float:
+        """Calculate delay for a given retry attempt using exponential backoff.
+
+        Args:
+            attempt: Current attempt number (0-indexed)
 
         Returns:
-            RetryConfig instance with settings from environment variables
+            Delay in seconds
         """
-        from llm.retry import RetryConfig
-
-        return RetryConfig(
-            max_retries=cls.RETRY_MAX_ATTEMPTS,
-            initial_delay=cls.RETRY_INITIAL_DELAY,
-            max_delay=cls.RETRY_MAX_DELAY,
-            exponential_base=2.0,
-            jitter=True,
+        # Calculate exponential backoff
+        delay = min(
+            cls.RETRY_INITIAL_DELAY * (cls.RETRY_EXPONENTIAL_BASE**attempt),
+            cls.RETRY_MAX_DELAY,
         )
 
-    @classmethod
-    def get_memory_config(cls):
-        """Get memory configuration.
+        # Add jitter to avoid thundering herd
+        if cls.RETRY_JITTER:
+            delay = delay * (0.5 + random.random())
 
-        Returns:
-            MemoryConfig instance with settings from environment variables
-        """
-        from memory import MemoryConfig
-
-        return MemoryConfig(
-            max_context_tokens=cls.MEMORY_MAX_CONTEXT_TOKENS,
-            target_working_memory_tokens=cls.MEMORY_TARGET_TOKENS,
-            compression_threshold=cls.MEMORY_COMPRESSION_THRESHOLD,
-            short_term_message_count=cls.MEMORY_SHORT_TERM_SIZE,
-            compression_ratio=cls.MEMORY_COMPRESSION_RATIO,
-            enable_compression=cls.MEMORY_ENABLED,
-            tool_result_summary_model=cls.TOOL_RESULT_SUMMARY_MODEL,
-        )
+        return delay
 
     @classmethod
     def validate(cls):
diff --git a/llm/__init__.py b/llm/__init__.py
index 546be3a..6bca1a2 100644
--- a/llm/__init__.py
+++ b/llm/__init__.py
@@ -2,7 +2,6 @@
 
 from .base import LLMMessage, LLMResponse, ToolCall, ToolResult
 from .litellm_adapter import LiteLLMLLM
-from .retry import RetryConfig
 
 __all__ = [
     "LLMMessage",
@@ -10,5 +9,4 @@
     "ToolCall",
     "ToolResult",
     "LiteLLMLLM",
-    "RetryConfig",
 ]
diff --git a/llm/litellm_adapter.py b/llm/litellm_adapter.py
index 8134531..9e412e6 100644
--- a/llm/litellm_adapter.py
+++ b/llm/litellm_adapter.py
@@ -9,7 +9,7 @@
 from utils import get_logger
 
 from .base import LLMMessage, LLMResponse, ToolCall, ToolResult
-from .retry import RetryConfig, with_retry
+from .retry import with_retry
 
 logger = get_logger(__name__)
 
@@ -31,7 +31,6 @@ def __init__(self, model: str, **kwargs):
             **kwargs: Additional configuration:
                 - api_key: API key (optional, uses env vars by default)
                 - api_base: Custom base URL
-                - retry_config: RetryConfig instance
                 - drop_params: Drop unsupported params (default: True)
                 - timeout: Request timeout in seconds
         """
@@ -45,11 +44,6 @@ def __init__(self, model: str, **kwargs):
         self.drop_params = kwargs.pop("drop_params", True)
         self.timeout = kwargs.pop("timeout", 600)
 
-        # Configure retry behavior
-        self.retry_config = kwargs.pop(
-            "retry_config", RetryConfig(max_retries=3, initial_delay=1.0, max_delay=60.0)
-        )
-
         # Configure LiteLLM global settings
         litellm.drop_params = self.drop_params
         litellm.set_verbose = False  # Disable verbose output
diff --git a/llm/retry.py b/llm/retry.py
index 4a54c60..528e909 100644
--- a/llm/retry.py
+++ b/llm/retry.py
@@ -1,61 +1,16 @@
 """Retry utilities for LLM API calls with exponential backoff."""
 
-import random
 import time
 from functools import wraps
 from typing import Callable, TypeVar
 
+from config import Config
 from utils import get_logger
 
 logger = get_logger(__name__)
 T = TypeVar("T")
 
 
-class RetryConfig:
-    """Configuration for retry behavior."""
-
-    def __init__(
-        self,
-        max_retries: int = 5,
-        initial_delay: float = 1.0,
-        max_delay: float = 60.0,
-        exponential_base: float = 2.0,
-        jitter: bool = True,
-    ):
-        """Initialize retry configuration.
-
-        Args:
-            max_retries: Maximum number of retry attempts
-            initial_delay: Initial delay in seconds
-            max_delay: Maximum delay in seconds
-            exponential_base: Base for exponential backoff
-            jitter: Whether to add random jitter to delays
-        """
-        self.max_retries = max_retries
-        self.initial_delay = initial_delay
-        self.max_delay = max_delay
-        self.exponential_base = exponential_base
-        self.jitter = jitter
-
-    def get_delay(self, attempt: int) -> float:
-        """Calculate delay for a given retry attempt.
-
-        Args:
-            attempt: Current attempt number (0-indexed)
-
-        Returns:
-            Delay in seconds
-        """
-        # Calculate exponential backoff
-        delay = min(self.initial_delay * (self.exponential_base**attempt), self.max_delay)
-
-        # Add jitter to avoid thundering herd
-        if self.jitter:
-            delay = delay * (0.5 + random.random())
-
-        return delay
-
-
 def is_rate_limit_error(error: Exception) -> bool:
     """Check if an error is a rate limit error.
 
@@ -113,11 +68,10 @@ def is_retryable_error(error: Exception) -> bool:
     return any(indicator in error_str for indicator in retryable_indicators)
 
 
-def with_retry(config: RetryConfig = None):
+def with_retry():
     """Decorator to add retry logic with exponential backoff.
 
-    Args:
-        config: RetryConfig instance, uses defaults if None
+    Uses Config.RETRY_* settings for retry configuration.
 
     Returns:
         Decorator function
@@ -126,37 +80,31 @@ def with_retry(config: RetryConfig = None):
     def decorator(func: Callable[..., T]) -> Callable[..., T]:
         @wraps(func)
         def wrapper(*args, **kwargs) -> T:
-            # Try to get config from instance (self) if available
-            retry_config = config
-            if retry_config is None and args and hasattr(args[0], "retry_config"):
-                retry_config = args[0].retry_config
-            if retry_config is None:
-                retry_config = RetryConfig()
-
             last_error = None
+            max_retries = Config.RETRY_MAX_ATTEMPTS
 
-            for attempt in range(retry_config.max_retries + 1):
+            for attempt in range(max_retries + 1):
                 try:
                     return func(*args, **kwargs)
                 except Exception as e:
                     last_error = e
 
                     # Don't retry on last attempt
-                    if attempt == retry_config.max_retries:
+                    if attempt == max_retries:
                         break
 
                     # Only retry if error is retryable
                     if not is_retryable_error(e):
                         raise
 
-                    # Calculate delay
-                    delay = retry_config.get_delay(attempt)
+                    # Calculate delay using Config
+                    delay = Config.get_retry_delay(attempt)
 
                     # Log retry attempt
                     error_type = "Rate limit" if is_rate_limit_error(e) else "Retryable"
                     logger.warning(f"{error_type} error: {str(e)}")
                     logger.warning(
-                        f"Retrying in {delay:.1f}s... (attempt {attempt + 1}/{retry_config.max_retries})"
+                        f"Retrying in {delay:.1f}s... (attempt {attempt + 1}/{max_retries})"
                     )
 
                     # Wait before retry
@@ -170,13 +118,12 @@ def wrapper(*args, **kwargs) -> T:
     return decorator
 
 
-def retry_with_backoff(func: Callable[..., T], *args, config: RetryConfig = None, **kwargs) -> T:
+def retry_with_backoff(func: Callable[..., T], *args, **kwargs) -> T:
     """Execute a function with retry logic.
 
     Args:
         func: Function to execute
         *args: Positional arguments for func
-        config: RetryConfig instance
         **kwargs: Keyword arguments for func
 
     Returns:
@@ -185,8 +132,5 @@ def retry_with_backoff(func: Callable[..., T], *args, config: RetryConfig = None
     Raises:
         Last exception if all retries fail
     """
-    if config is None:
-        config = RetryConfig()
-
-    decorated_func = with_retry(config)(func)
+    decorated_func = with_retry()(func)
     return decorated_func(*args, **kwargs)
diff --git a/main.py b/main.py
index d904d18..e59c9c7 100644
--- a/main.py
+++ b/main.py
@@ -27,9 +27,9 @@
     GitStashTool,
     GitStatusTool,
 )
+from tools.retrieve_tool_result import RetrieveToolResultTool
 from tools.shell import ShellTool
 from tools.smart_edit import SmartEditTool
-from tools.retrieve_tool_result import RetrieveToolResultTool
 from tools.web_fetch import WebFetchTool
 from tools.web_search import WebSearchTool
 from utils import get_log_file_path, setup_logger, terminal_ui
@@ -75,11 +75,10 @@ def create_agent(mode: str = "react"):
         GitCleanTool(),
     ]
 
-    # Create LLM instance with LiteLLM
+    # Create LLM instance with LiteLLM (retry config is read from Config directly)
     llm = LiteLLMLLM(
         model=Config.LITELLM_MODEL,
         api_base=Config.LITELLM_API_BASE,
-        retry_config=Config.get_retry_config(),
         drop_params=Config.LITELLM_DROP_PARAMS,
         timeout=Config.LITELLM_TIMEOUT,
     )
diff --git a/memory/__init__.py b/memory/__init__.py
index 986011c..0905848 100644
--- a/memory/__init__.py
+++ b/memory/__init__.py
@@ -9,10 +9,9 @@
 from .short_term import ShortTermMemory
 from .store import MemoryStore
 from .token_tracker import TokenTracker
-from .types import CompressedMemory, CompressionStrategy, MemoryConfig
+from .types import CompressedMemory, CompressionStrategy
 
 __all__ = [
-    "MemoryConfig",
     "CompressedMemory",
     "CompressionStrategy",
     "MemoryManager",
diff --git a/memory/code_extractor.py b/memory/code_extractor.py
index 267656e..8c31856 100644
--- a/memory/code_extractor.py
+++ b/memory/code_extractor.py
@@ -379,9 +379,7 @@ def _extract_definitions_regex(
 
         return definitions
 
-    def format_extracted_code(
-        self, content: str, filename: str, max_tokens: int
-    ) -> str:
+    def format_extracted_code(self, content: str, filename: str, max_tokens: int) -> str:
         """Format extracted code with key definitions.
 
         Args:
diff --git a/memory/compressor.py b/memory/compressor.py
index 19a3b91..48a8ca0 100644
--- a/memory/compressor.py
+++ b/memory/compressor.py
@@ -3,9 +3,10 @@
 import logging
 from typing import TYPE_CHECKING, List, Optional, Set, Tuple
 
+from config import Config
 from llm.base import LLMMessage
 
-from .types import CompressedMemory, CompressionStrategy, MemoryConfig
+from .types import CompressedMemory, CompressionStrategy
 
 logger = logging.getLogger(__name__)
 
@@ -34,15 +35,13 @@ class WorkingMemoryCompressor:
 
     Provide a concise but comprehensive summary that captures the essential information. Be specific and include concrete details. Target length: {target_tokens} tokens."""
 
-    def __init__(self, llm: "LiteLLMLLM", config: MemoryConfig):
+    def __init__(self, llm: "LiteLLMLLM"):
         """Initialize compressor.
 
         Args:
             llm: LLM instance to use for summarization
-            config: Memory configuration
         """
         self.llm = llm
-        self.config = config
 
     def compress(
         self,
@@ -69,7 +68,7 @@ def compress(
         if target_tokens is None:
             # Calculate target based on config compression ratio
             original_tokens = self._estimate_tokens(messages)
-            target_tokens = int(original_tokens * self.config.compression_ratio)
+            target_tokens = int(original_tokens * Config.MEMORY_COMPRESSION_RATIO)
 
         if orphaned_tool_use_ids is None:
             orphaned_tool_use_ids = set()
@@ -283,7 +282,7 @@ def _separate_messages(
 
         # Step 1: Mark system messages for preservation
         for i, msg in enumerate(messages):
-            if self.config.preserve_system_prompts and msg.role == "system":
+            if Config.MEMORY_PRESERVE_SYSTEM_PROMPTS and msg.role == "system":
                 preserve_indices.add(i)
 
         # Step 2: Find tool pairs and orphaned tool_use messages
@@ -315,7 +314,7 @@ def _separate_messages(
 
         # Step 3: Apply selective preservation strategy (keep recent N messages)
         # Preserve last short_term_min_message_count messages by default (sliding window approach)
-        preserve_count = min(self.config.short_term_min_message_count, len(messages))
+        preserve_count = min(Config.MEMORY_SHORT_TERM_MIN_SIZE, len(messages))
         for i in range(len(messages) - preserve_count, len(messages)):
             if i >= 0:
                 preserve_indices.add(i)
diff --git a/memory/manager.py b/memory/manager.py
index 1749e59..c419460 100644
--- a/memory/manager.py
+++ b/memory/manager.py
@@ -3,8 +3,8 @@
 import logging
 from typing import TYPE_CHECKING, Any, Dict, List, Optional
 
-from llm.base import LLMMessage
 from config import Config
+from llm.base import LLMMessage
 
 from .compressor import WorkingMemoryCompressor
 from .short_term import ShortTermMemory
@@ -12,7 +12,7 @@
 from .token_tracker import TokenTracker
 from .tool_result_processor import ToolResultProcessor
 from .tool_result_store import ToolResultStore
-from .types import CompressedMemory, CompressionStrategy, MemoryConfig
+from .types import CompressedMemory, CompressionStrategy
 
 logger = logging.getLogger(__name__)
 
@@ -25,7 +25,6 @@ class MemoryManager:
 
     def __init__(
         self,
-        config: MemoryConfig,
         llm: "LiteLLMLLM",
         store: Optional[MemoryStore] = None,
         session_id: Optional[str] = None,
@@ -34,13 +33,11 @@ def __init__(
         """Initialize memory manager.
 
         Args:
-            config: Memory configuration
             llm: LLM instance for compression
             store: Optional MemoryStore for persistence (if None, creates default store)
             session_id: Optional session ID (if resuming session)
             db_path: Path to database file (default: data/memory.db)
         """
-        self.config = config
         self.llm = llm
         self._db_path = db_path
 
@@ -58,17 +55,17 @@ def __init__(
             self.session_id = None
             self._session_created = False
 
-        # Initialize components
-        self.short_term = ShortTermMemory(max_size=config.short_term_message_count)
-        self.compressor = WorkingMemoryCompressor(llm, config)
+        # Initialize components using Config directly
+        self.short_term = ShortTermMemory(max_size=Config.MEMORY_SHORT_TERM_SIZE)
+        self.compressor = WorkingMemoryCompressor(llm)
         self.token_tracker = TokenTracker()
 
         # Initialize tool result processing components (always enabled)
         self.tool_result_processor = ToolResultProcessor(
-            storage_threshold=config.tool_result_storage_threshold,
+            storage_threshold=Config.TOOL_RESULT_STORAGE_THRESHOLD,
             summary_model=Config.TOOL_RESULT_SUMMARY_MODEL,
         )
-        storage_path = config.tool_result_storage_path
+        storage_path = Config.TOOL_RESULT_STORAGE_PATH
         self.tool_result_store = ToolResultStore(db_path=storage_path)
         logger.info(
             f"Tool result processing enabled with external storage: {storage_path or 'in-memory'}"
@@ -112,11 +109,8 @@ def from_session(
         if not session_data:
             raise ValueError(f"Session {session_id} not found")
 
-        # Get config (use loaded config or default)
-        config = session_data["config"] or MemoryConfig()
-
-        # Create manager
-        manager = cls(config=config, llm=llm, store=store, session_id=session_id)
+        # Create manager (config is now read from Config class directly)
+        manager = cls(llm=llm, store=store, session_id=session_id)
 
         # Restore state
         manager.system_messages = session_data["system_messages"]
@@ -195,7 +189,7 @@ def add_message(self, message: LLMMessage, actual_tokens: Dict[str, int] = None)
         # Log memory state for debugging
         logger.debug(
             f"Memory state: {self.current_tokens} tokens, "
-            f"{self.short_term.count()}/{self.config.short_term_message_count} messages, "
+            f"{self.short_term.count()}/{Config.MEMORY_SHORT_TERM_SIZE} messages, "
             f"full={self.short_term.is_full()}"
         )
 
@@ -209,8 +203,8 @@ def add_message(self, message: LLMMessage, actual_tokens: Dict[str, int] = None)
             # Log why compression was NOT triggered
             logger.debug(
                 f"Compression check: current={self.current_tokens}, "
-                f"threshold={self.config.compression_threshold}, "
-                f"target={self.config.target_working_memory_tokens}, "
+                f"threshold={Config.MEMORY_COMPRESSION_THRESHOLD}, "
+                f"target={Config.MEMORY_TARGET_TOKENS}, "
                 f"short_term_full={self.short_term.is_full()}"
             )
 
@@ -323,12 +317,15 @@ def _should_compress(self) -> tuple[bool, Optional[str]]:
         Returns:
             Tuple of (should_compress, reason)
         """
-        if not self.config.enable_compression:
+        if not Config.MEMORY_ENABLED:
             return False, "compression_disabled"
 
         # Hard limit: must compress
-        if self.current_tokens > self.config.compression_threshold:
-            return True, f"hard_limit ({self.current_tokens} > {self.config.compression_threshold})"
+        if self.current_tokens > Config.MEMORY_COMPRESSION_THRESHOLD:
+            return (
+                True,
+                f"hard_limit ({self.current_tokens} > {Config.MEMORY_COMPRESSION_THRESHOLD})",
+            )
 
         # CRITICAL: Compress when short-term memory is full to prevent eviction
         # If we don't compress, the next message will cause deque to evict the oldest message,
@@ -336,15 +333,15 @@ def _should_compress(self) -> tuple[bool, Optional[str]]:
         if self.short_term.is_full():
             return (
                 True,
-                f"short_term_full ({self.short_term.count()}/{self.config.short_term_message_count} messages, "
+                f"short_term_full ({self.short_term.count()}/{Config.MEMORY_SHORT_TERM_SIZE} messages, "
                 f"current tokens: {self.current_tokens})",
             )
 
         # Soft limit: compress if over target token count
-        if self.current_tokens > self.config.target_working_memory_tokens:
+        if self.current_tokens > Config.MEMORY_TARGET_TOKENS:
             return (
                 True,
-                f"soft_limit ({self.current_tokens} > {self.config.target_working_memory_tokens})",
+                f"soft_limit ({self.current_tokens} > {Config.MEMORY_TARGET_TOKENS})",
             )
 
         return False, None
@@ -396,7 +393,7 @@ def _calculate_target_tokens(self) -> int:
             Target token count
         """
         original_tokens = self.current_tokens
-        target = int(original_tokens * self.config.compression_ratio)
+        target = int(original_tokens * Config.MEMORY_COMPRESSION_RATIO)
         return max(target, 500)  # Minimum 500 tokens for summary
 
     def _get_orphaned_tool_use_ids_from_summaries(self) -> set:
@@ -459,7 +456,7 @@ def process_tool_result(
             result_tokens = self.tool_result_processor.estimate_tokens(result)
             logger.info(
                 f"Storing large tool result externally: {tool_name} "
-                f"({result_tokens} tokens > {self.config.tool_result_storage_threshold})"
+                f"({result_tokens} tokens > {Config.TOOL_RESULT_STORAGE_THRESHOLD})"
             )
 
             # Store full result
@@ -540,7 +537,7 @@ def get_stats(self) -> Dict[str, Any]:
             "short_term_count": self.short_term.count(),
             "summary_count": len(self.summaries),
             "total_cost": self.token_tracker.get_total_cost(self.llm.model),
-            "budget_status": self.token_tracker.get_budget_status(self.config.max_context_tokens),
+            "budget_status": self.token_tracker.get_budget_status(Config.MEMORY_MAX_CONTEXT_TOKENS),
         }
 
     def save_memory(self):
diff --git a/memory/store.py b/memory/store.py
index a5a6cde..a53cec9 100644
--- a/memory/store.py
+++ b/memory/store.py
@@ -9,7 +9,7 @@
 from typing import Any, Dict, List, Optional
 
 from llm.base import LLMMessage
-from memory.types import CompressedMemory, MemoryConfig
+from memory.types import CompressedMemory
 
 logger = logging.getLogger(__name__)
 
@@ -61,14 +61,11 @@ def _init_db(self):
             conn.commit()
             logger.debug("Database schema initialized")
 
-    def create_session(
-        self, metadata: Optional[Dict[str, Any]] = None, config: Optional[MemoryConfig] = None
-    ) -> str:
+    def create_session(self, metadata: Optional[Dict[str, Any]] = None) -> str:
         """Create a new session.
 
         Args:
             metadata: Optional session metadata (description, tags, etc.)
-            config: Memory configuration for this session
 
         Returns:
             Session ID (UUID)
diff --git a/memory/types.py b/memory/types.py
index 3aee317..f4dc81c 100644
--- a/memory/types.py
+++ b/memory/types.py
@@ -2,57 +2,11 @@
 
 from dataclasses import dataclass, field
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List
 
 from llm.base import LLMMessage
 
 
-@dataclass
-class MemoryConfig:
-    """Configuration for memory management system."""
-
-    # Token budgets
-    max_context_tokens: int = 100000  # Maximum context window
-    target_working_memory_tokens: int = 30000  # Soft limit - trigger compression at this level
-    compression_threshold: int = 40000  # Hard limit - must compress regardless of message count
-
-    # Memory windows
-    short_term_message_count: int = 100  # Keep last N messages in short-term memory
-    short_term_min_message_count: int = 5  # Keep at least N messages in short-term memory
-
-    # Compression settings
-    compression_ratio: float = 0.3  # Target 30% of original size
-    preserve_tool_calls: bool = True  # Always preserve tool-related messages
-    preserve_system_prompts: bool = True  # Always preserve system prompts
-
-    # Cost management
-    max_cost_dollars: Optional[float] = None  # Optional budget limit
-
-    # Feature flags
-    enable_compression: bool = True  # Enable/disable compression
-    compression_model: Optional[str] = None  # Model to use for compression (None = same as agent)
-
-    # Tool result processing (always enabled)
-    tool_result_storage_threshold: int = 10000  # Store externally if result > N tokens
-    tool_result_storage_path: Optional[str] = None  # Path to SQLite DB (None = in-memory)
-    tool_result_summary_model: Optional[str] = (
-        None  # Model for summarizing large tool results (None = disable)
-    )
-
-    # Tool-specific token budgets (can be overridden per tool)
-    tool_result_budgets: Dict[str, int] = field(
-        default_factory=lambda: {
-            "read_file": 1000,
-            "grep_content": 800,
-            "execute_shell": 500,
-            "web_search": 1200,
-            "web_fetch": 1500,
-            "glob_files": 600,
-            "default": 1000,
-        }
-    )
-
-
 @dataclass
 class CompressedMemory:
     """Represents a compressed memory segment."""
diff --git a/test/memory/conftest.py b/test/memory/conftest.py
index 772797f..5e88bc0 100644
--- a/test/memory/conftest.py
+++ b/test/memory/conftest.py
@@ -2,9 +2,28 @@
 
 import pytest
 
+from config import Config
 from llm.base import LLMMessage, LLMResponse
 
 
+@pytest.fixture
+def set_memory_config(monkeypatch):
+    """Fixture to temporarily set memory configuration values.
+
+    Usage:
+        def test_something(set_memory_config, mock_llm):
+            set_memory_config(MEMORY_SHORT_TERM_SIZE=5, MEMORY_COMPRESSION_THRESHOLD=100)
+            manager = MemoryManager(mock_llm)
+            ...
+    """
+
+    def _set_config(**kwargs):
+        for key, value in kwargs.items():
+            monkeypatch.setattr(Config, key, value)
+
+    return _set_config
+
+
 class MockLLM:
     """Mock LLM for testing without API calls."""
 
diff --git a/test/memory/test_compressor.py b/test/memory/test_compressor.py
index eb53d93..fb3d1fd 100644
--- a/test/memory/test_compressor.py
+++ b/test/memory/test_compressor.py
@@ -2,7 +2,7 @@
 
 from llm.base import LLMMessage
 from memory.compressor import WorkingMemoryCompressor
-from memory.types import CompressionStrategy, MemoryConfig
+from memory.types import CompressionStrategy
 
 
 class TestCompressorBasics:
@@ -10,16 +10,13 @@ class TestCompressorBasics:
 
     def test_initialization(self, mock_llm):
         """Test compressor initialization."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         assert compressor.llm == mock_llm
-        assert compressor.config == config
 
     def test_compress_empty_messages(self, mock_llm):
         """Test compressing empty message list."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress([])
 
@@ -28,8 +25,7 @@ def test_compress_empty_messages(self, mock_llm):
 
     def test_compress_single_message(self, mock_llm):
         """Test compressing a single message."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         messages = [LLMMessage(role="user", content="Hello")]
         result = compressor.compress(messages, strategy=CompressionStrategy.SLIDING_WINDOW)
@@ -43,8 +39,7 @@ class TestCompressionStrategies:
 
     def test_sliding_window_strategy(self, mock_llm, simple_messages):
         """Test sliding window compression strategy."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress(
             simple_messages, strategy=CompressionStrategy.SLIDING_WINDOW, target_tokens=100
@@ -58,8 +53,7 @@ def test_sliding_window_strategy(self, mock_llm, simple_messages):
 
     def test_deletion_strategy(self, mock_llm, simple_messages):
         """Test deletion compression strategy."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress(simple_messages, strategy=CompressionStrategy.DELETION)
 
@@ -69,10 +63,10 @@ def test_deletion_strategy(self, mock_llm, simple_messages):
         assert result.compressed_tokens == 0
         assert result.metadata["strategy"] == "deletion"
 
-    def test_selective_strategy_with_tools(self, mock_llm, tool_use_messages):
+    def test_selective_strategy_with_tools(self, set_memory_config, mock_llm, tool_use_messages):
         """Test selective compression with tool messages."""
-        config = MemoryConfig(short_term_min_message_count=2)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=2)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress(
             tool_use_messages, strategy=CompressionStrategy.SELECTIVE, target_tokens=200
@@ -83,10 +77,10 @@ def test_selective_strategy_with_tools(self, mock_llm, tool_use_messages):
         # Tool pairs should be preserved
         assert len(result.preserved_messages) > 0
 
-    def test_selective_strategy_preserves_system_messages(self, mock_llm):
+    def test_selective_strategy_preserves_system_messages(self, set_memory_config, mock_llm):
         """Test that selective strategy preserves system messages."""
-        config = MemoryConfig(preserve_system_prompts=True)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_PRESERVE_SYSTEM_PROMPTS=True)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         messages = [
             LLMMessage(role="system", content="System prompt"),
@@ -108,8 +102,7 @@ class TestToolPairDetection:
 
     def test_find_tool_pairs_basic(self, mock_llm, tool_use_messages):
         """Test basic tool pair detection."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         pairs, orphaned = compressor._find_tool_pairs(tool_use_messages)
 
@@ -124,8 +117,7 @@ def test_find_tool_pairs_basic(self, mock_llm, tool_use_messages):
 
     def test_find_tool_pairs_multiple(self, mock_llm):
         """Test finding multiple tool pairs."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         messages = []
         for i in range(3):
@@ -161,8 +153,7 @@ def test_find_tool_pairs_multiple(self, mock_llm):
 
     def test_find_tool_pairs_with_mismatches(self, mock_llm, mismatched_tool_messages):
         """Test tool pair detection with mismatched pairs."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         pairs, orphaned = compressor._find_tool_pairs(mismatched_tool_messages)
 
@@ -171,10 +162,10 @@ def test_find_tool_pairs_with_mismatches(self, mock_llm, mismatched_tool_message
         # Should have one orphaned tool_use (tool_1)
         assert len(orphaned) == 1
 
-    def test_tool_pairs_preserved_together(self, mock_llm, tool_use_messages):
+    def test_tool_pairs_preserved_together(self, set_memory_config, mock_llm, tool_use_messages):
         """Test that when a tool pair is found, both messages are preserved together."""
-        config = MemoryConfig(short_term_min_message_count=1)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=1)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         preserved, to_compress = compressor._separate_messages(tool_use_messages)
 
@@ -204,8 +195,7 @@ class TestProtectedTools:
 
     def test_find_protected_tool_pairs(self, mock_llm, protected_tool_messages):
         """Test finding protected tool pairs (manage_todo_list)."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         # First find all pairs
         all_pairs, orphaned = compressor._find_tool_pairs(protected_tool_messages)
@@ -217,10 +207,12 @@ def test_find_protected_tool_pairs(self, mock_llm, protected_tool_messages):
         assert len(protected_pairs) > 0
         assert len(orphaned) == 0
 
-    def test_protected_tools_always_preserved(self, mock_llm, protected_tool_messages):
+    def test_protected_tools_always_preserved(
+        self, set_memory_config, mock_llm, protected_tool_messages
+    ):
         """Test that protected tools are never compressed."""
-        config = MemoryConfig(short_term_min_message_count=0)  # Don't preserve anything by default
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=0)  # Don't preserve anything by default
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         preserved, to_compress = compressor._separate_messages(protected_tool_messages)
 
@@ -239,10 +231,12 @@ def test_protected_tools_always_preserved(self, mock_llm, protected_tool_message
 
         assert found_protected, "Protected tool should always be preserved"
 
-    def test_non_protected_tools_can_be_compressed(self, mock_llm, tool_use_messages):
+    def test_non_protected_tools_can_be_compressed(
+        self, set_memory_config, mock_llm, tool_use_messages
+    ):
         """Test that non-protected tools can be compressed."""
-        config = MemoryConfig(short_term_min_message_count=0)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=0)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         preserved, to_compress = compressor._separate_messages(tool_use_messages)
 
@@ -254,10 +248,10 @@ def test_non_protected_tools_can_be_compressed(self, mock_llm, tool_use_messages
 class TestMessageSeparation:
     """Test message separation logic."""
 
-    def test_separate_messages_basic(self, mock_llm, simple_messages):
+    def test_separate_messages_basic(self, set_memory_config, mock_llm, simple_messages):
         """Test basic message separation."""
-        config = MemoryConfig(short_term_min_message_count=2)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=2)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         preserved, to_compress = compressor._separate_messages(simple_messages)
 
@@ -266,10 +260,10 @@ def test_separate_messages_basic(self, mock_llm, simple_messages):
         # Total should equal original
         assert len(preserved) + len(to_compress) == len(simple_messages)
 
-    def test_separate_preserves_recent_messages(self, mock_llm, simple_messages):
+    def test_separate_preserves_recent_messages(self, set_memory_config, mock_llm, simple_messages):
         """Test that most recent messages are preserved."""
-        config = MemoryConfig(short_term_min_message_count=2)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=2)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         preserved, to_compress = compressor._separate_messages(simple_messages)
 
@@ -278,10 +272,10 @@ def test_separate_preserves_recent_messages(self, mock_llm, simple_messages):
         for msg in last_n_messages:
             assert msg in preserved
 
-    def test_tool_pair_preservation_rule(self, mock_llm, tool_use_messages):
+    def test_tool_pair_preservation_rule(self, set_memory_config, mock_llm, tool_use_messages):
         """Test that tool pairs are preserved together (critical rule)."""
-        config = MemoryConfig(short_term_min_message_count=1)
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        set_memory_config(MEMORY_SHORT_TERM_MIN_SIZE=1)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         preserved, to_compress = compressor._separate_messages(tool_use_messages)
 
@@ -330,8 +324,7 @@ class TestTokenEstimation:
 
     def test_estimate_tokens_simple_text(self, mock_llm):
         """Test token estimation for simple text messages."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         messages = [LLMMessage(role="user", content="Hello world")]
         tokens = compressor._estimate_tokens(messages)
@@ -341,8 +334,7 @@ def test_estimate_tokens_simple_text(self, mock_llm):
 
     def test_estimate_tokens_long_text(self, mock_llm):
         """Test token estimation for long text."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         long_content = "This is a long message. " * 100
         messages = [LLMMessage(role="user", content=long_content)]
@@ -354,8 +346,7 @@ def test_estimate_tokens_long_text(self, mock_llm):
 
     def test_estimate_tokens_with_tool_content(self, mock_llm, tool_use_messages):
         """Test token estimation with tool content."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         tokens = compressor._estimate_tokens(tool_use_messages)
 
@@ -364,8 +355,7 @@ def test_estimate_tokens_with_tool_content(self, mock_llm, tool_use_messages):
 
     def test_extract_text_content_from_dict(self, mock_llm):
         """Test extracting text content from dict-based content."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         msg = LLMMessage(
             role="assistant",
@@ -384,8 +374,7 @@ class TestCompressionMetrics:
 
     def test_compression_ratio_calculation(self, mock_llm, simple_messages):
         """Test that compression ratio is calculated correctly."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress(
             simple_messages, strategy=CompressionStrategy.SLIDING_WINDOW, target_tokens=50
@@ -398,8 +387,7 @@ def test_compression_ratio_calculation(self, mock_llm, simple_messages):
 
     def test_token_savings_calculation(self, mock_llm, simple_messages):
         """Test token savings calculation."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress(simple_messages, strategy=CompressionStrategy.SLIDING_WINDOW)
 
@@ -409,8 +397,7 @@ def test_token_savings_calculation(self, mock_llm, simple_messages):
 
     def test_savings_percentage_calculation(self, mock_llm, simple_messages):
         """Test savings percentage calculation."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         result = compressor.compress(simple_messages, strategy=CompressionStrategy.SLIDING_WINDOW)
 
@@ -423,8 +410,7 @@ class TestCompressionErrors:
 
     def test_compression_with_llm_error(self, mock_llm, simple_messages):
         """Test compression behavior when LLM call fails."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         # Make LLM raise an error
         def error_call(*args, **kwargs):
@@ -442,8 +428,7 @@ def error_call(*args, **kwargs):
 
     def test_unknown_strategy_fallback(self, mock_llm, simple_messages):
         """Test fallback to default strategy for unknown strategy."""
-        config = MemoryConfig()
-        compressor = WorkingMemoryCompressor(mock_llm, config)
+        compressor = WorkingMemoryCompressor(mock_llm)
 
         # Use invalid strategy name
         result = compressor.compress(simple_messages, strategy="invalid_strategy")
diff --git a/test/memory/test_integration.py b/test/memory/test_integration.py
index 84b890c..e2f97e5 100644
--- a/test/memory/test_integration.py
+++ b/test/memory/test_integration.py
@@ -5,7 +5,7 @@
 """
 
 from llm.base import LLMMessage
-from memory import MemoryConfig, MemoryManager
+from memory import MemoryManager
 from memory.types import CompressionStrategy
 
 
@@ -15,13 +15,13 @@ class TestToolCallResultIntegration:
     This is the critical test suite for the bug mentioned by the user.
     """
 
-    def test_tool_pairs_survive_compression_cycle(self, mock_llm):
+    def test_tool_pairs_survive_compression_cycle(self, set_memory_config, mock_llm):
         """Test that tool pairs remain matched through compression cycles."""
-        config = MemoryConfig(
-            short_term_message_count=6,
-            short_term_min_message_count=2,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=6,
+            MEMORY_SHORT_TERM_MIN_SIZE=2,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add a sequence of tool calls
         messages = []
@@ -62,13 +62,13 @@ def test_tool_pairs_survive_compression_cycle(self, mock_llm):
         context = manager.get_context_for_llm()
         self._verify_tool_pairs_matched(context)
 
-    def test_tool_pairs_with_multiple_compressions(self, mock_llm):
+    def test_tool_pairs_with_multiple_compressions(self, set_memory_config, mock_llm):
         """Test tool pairs remain matched through multiple compression cycles."""
-        config = MemoryConfig(
-            short_term_message_count=4,
-            short_term_min_message_count=2,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=4,
+            MEMORY_SHORT_TERM_MIN_SIZE=2,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add messages in multiple batches, triggering multiple compressions
         for batch in range(3):
@@ -106,10 +106,10 @@ def test_tool_pairs_with_multiple_compressions(self, mock_llm):
         context = manager.get_context_for_llm()
         self._verify_tool_pairs_matched(context)
 
-    def test_interleaved_tool_calls(self, mock_llm):
+    def test_interleaved_tool_calls(self, set_memory_config, mock_llm):
         """Test tool pairs when tool calls are interleaved."""
-        config = MemoryConfig(short_term_message_count=10)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=10)
+        manager = MemoryManager(mock_llm)
 
         # Add interleaved tool calls (assistant makes multiple tool calls at once)
         manager.add_message(LLMMessage(role="user", content="Complex request"))
@@ -140,10 +140,10 @@ def test_interleaved_tool_calls(self, mock_llm):
         context = manager.get_context_for_llm()
         self._verify_tool_pairs_matched(context)
 
-    def test_orphaned_tool_use_detection(self, mock_llm):
+    def test_orphaned_tool_use_detection(self, set_memory_config, mock_llm):
         """Test detection of orphaned tool_use (no matching result)."""
-        config = MemoryConfig(short_term_message_count=5)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=5)
+        manager = MemoryManager(mock_llm)
 
         # Add tool_use without result
         manager.add_message(LLMMessage(role="user", content="Request"))
@@ -179,10 +179,10 @@ def test_orphaned_tool_use_detection(self, mock_llm):
         if orphans:
             print(f"Detected orphaned tool_use: {orphans}")
 
-    def test_orphaned_tool_result_detection(self, mock_llm):
+    def test_orphaned_tool_result_detection(self, set_memory_config, mock_llm):
         """Test detection of orphaned tool_result (no matching use)."""
-        config = MemoryConfig(short_term_message_count=5)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=5)
+        manager = MemoryManager(mock_llm)
 
         # Add tool_result without use (this shouldn't happen but let's test it)
         manager.add_message(LLMMessage(role="user", content="Request"))
@@ -239,13 +239,13 @@ def _verify_tool_pairs_matched(self, messages):
 class TestCompressionIntegration:
     """Integration tests for compression behavior."""
 
-    def test_full_conversation_lifecycle(self, mock_llm):
+    def test_full_conversation_lifecycle(self, set_memory_config, mock_llm):
         """Test a complete conversation lifecycle with multiple compressions."""
-        config = MemoryConfig(
-            short_term_message_count=8,
-            target_working_memory_tokens=200,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=8,
+            MEMORY_TARGET_TOKENS=200,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Simulate a long conversation
         for i in range(20):
@@ -264,13 +264,13 @@ def test_full_conversation_lifecycle(self, mock_llm):
         context = manager.get_context_for_llm()
         assert len(context) < 40  # Compressed from 40 messages
 
-    def test_mixed_content_conversation(self, mock_llm):
+    def test_mixed_content_conversation(self, set_memory_config, mock_llm):
         """Test conversation with mixed text and tool content."""
-        config = MemoryConfig(
-            short_term_message_count=6,
-            short_term_min_message_count=2,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=6,
+            MEMORY_SHORT_TERM_MIN_SIZE=2,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Mix of text and tool messages
         manager.add_message(LLMMessage(role="user", content="Text message 1"))
@@ -303,13 +303,13 @@ def test_mixed_content_conversation(self, mock_llm):
         context = manager.get_context_for_llm()
         assert len(context) > 0
 
-    def test_system_message_persistence(self, mock_llm):
+    def test_system_message_persistence(self, set_memory_config, mock_llm):
         """Test that system messages persist through compressions."""
-        config = MemoryConfig(
-            short_term_message_count=5,
-            preserve_system_prompts=True,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=5,
+            MEMORY_PRESERVE_SYSTEM_PROMPTS=True,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         system_msg = LLMMessage(role="system", content="You are a helpful assistant.")
         manager.add_message(system_msg)
@@ -327,13 +327,15 @@ def test_system_message_persistence(self, mock_llm):
 class TestEdgeCaseIntegration:
     """Integration tests for edge cases."""
 
-    def test_compression_with_no_compressible_content(self, mock_llm, protected_tool_messages):
+    def test_compression_with_no_compressible_content(
+        self, set_memory_config, mock_llm, protected_tool_messages
+    ):
         """Test compression when all content is protected."""
-        config = MemoryConfig(
-            short_term_message_count=10,  # Large enough to avoid auto-compression
-            short_term_min_message_count=0,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=10,  # Large enough to avoid auto-compression
+            MEMORY_SHORT_TERM_MIN_SIZE=0,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add only protected tool messages
         for msg in protected_tool_messages:
@@ -353,13 +355,13 @@ def test_compression_with_no_compressible_content(self, mock_llm, protected_tool
                         found_protected = True
         assert found_protected or len(result.preserved_messages) > 0
 
-    def test_rapid_compression_cycles(self, mock_llm):
+    def test_rapid_compression_cycles(self, set_memory_config, mock_llm):
         """Test many rapid compression cycles."""
-        config = MemoryConfig(
-            short_term_message_count=2,
-            target_working_memory_tokens=50,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=2,
+            MEMORY_TARGET_TOKENS=50,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add messages rapidly, triggering many compressions
         for i in range(20):
@@ -373,10 +375,10 @@ def test_rapid_compression_cycles(self, mock_llm):
         context = manager.get_context_for_llm()
         assert context is not None
 
-    def test_alternating_compression_strategies(self, mock_llm):
+    def test_alternating_compression_strategies(self, set_memory_config, mock_llm):
         """Test using different compression strategies on same manager."""
-        config = MemoryConfig(short_term_message_count=5)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=5)
+        manager = MemoryManager(mock_llm)
 
         # Add messages and compress with sliding window
         for i in range(4):
@@ -405,10 +407,10 @@ def test_alternating_compression_strategies(self, mock_llm):
         assert manager.compression_count == 2
         assert len(manager.summaries) == 2
 
-    def test_empty_content_blocks(self, mock_llm):
+    def test_empty_content_blocks(self, set_memory_config, mock_llm):
         """Test handling of empty content blocks."""
-        config = MemoryConfig(short_term_message_count=5)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=5)
+        manager = MemoryManager(mock_llm)
 
         # Add message with empty content blocks
         manager.add_message(
@@ -427,13 +429,13 @@ def test_empty_content_blocks(self, mock_llm):
         # Test passes if no error occurred
         assert context is not None
 
-    def test_very_long_single_message(self, mock_llm):
+    def test_very_long_single_message(self, set_memory_config, mock_llm):
         """Test handling of a very long single message."""
-        config = MemoryConfig(
-            short_term_message_count=5,
-            target_working_memory_tokens=100,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=5,
+            MEMORY_TARGET_TOKENS=100,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add very long message
         long_content = "This is a very long message. " * 500
@@ -446,10 +448,10 @@ def test_very_long_single_message(self, mock_llm):
 class TestMemoryReset:
     """Test reset functionality in various scenarios."""
 
-    def test_reset_after_compression(self, mock_llm, simple_messages):
+    def test_reset_after_compression(self, set_memory_config, mock_llm, simple_messages):
         """Test reset after compression has occurred."""
-        config = MemoryConfig(short_term_message_count=3)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=3)
+        manager = MemoryManager(mock_llm)
 
         # Add messages and compress
         for msg in simple_messages:
@@ -464,13 +466,13 @@ def test_reset_after_compression(self, mock_llm, simple_messages):
         assert len(manager.summaries) == 0
         assert manager.short_term.count() == 0
 
-    def test_reuse_after_reset(self, mock_llm):
+    def test_reuse_after_reset(self, set_memory_config, mock_llm):
         """Test that manager can be reused after reset."""
-        config = MemoryConfig(
-            short_term_message_count=10,  # Large enough to avoid compression
-            target_working_memory_tokens=100000,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=10,  # Large enough to avoid compression
+            MEMORY_TARGET_TOKENS=100000,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # First use
         for i in range(5):
diff --git a/test/memory/test_memory_manager.py b/test/memory/test_memory_manager.py
index 5fa8594..acffd4a 100644
--- a/test/memory/test_memory_manager.py
+++ b/test/memory/test_memory_manager.py
@@ -1,7 +1,7 @@
 """Unit tests for MemoryManager."""
 
 from llm.base import LLMMessage
-from memory import MemoryConfig, MemoryManager
+from memory import MemoryManager
 from memory.types import CompressionStrategy
 
 
@@ -10,10 +10,8 @@ class TestMemoryManagerBasics:
 
     def test_initialization(self, mock_llm):
         """Test MemoryManager initialization."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
-        assert manager.config == config
         assert manager.llm == mock_llm
         assert manager.current_tokens == 0
         assert manager.compression_count == 0
@@ -22,8 +20,7 @@ def test_initialization(self, mock_llm):
 
     def test_add_system_message(self, mock_llm):
         """Test that system messages are stored separately."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         system_msg = LLMMessage(role="system", content="You are a helpful assistant.")
         manager.add_message(system_msg)
@@ -35,8 +32,7 @@ def test_add_system_message(self, mock_llm):
 
     def test_add_user_message(self, mock_llm):
         """Test adding user messages."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         user_msg = LLMMessage(role="user", content="Hello")
         manager.add_message(user_msg)
@@ -46,8 +42,7 @@ def test_add_user_message(self, mock_llm):
 
     def test_add_assistant_message(self, mock_llm):
         """Test adding assistant messages."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         assistant_msg = LLMMessage(role="assistant", content="Hi there!")
         manager.add_message(assistant_msg)
@@ -57,8 +52,7 @@ def test_add_assistant_message(self, mock_llm):
 
     def test_get_context_structure(self, mock_llm, simple_messages):
         """Test context structure with system, summaries, and recent messages."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add system message
         system_msg = LLMMessage(role="system", content="You are helpful.")
@@ -76,8 +70,7 @@ def test_get_context_structure(self, mock_llm, simple_messages):
 
     def test_reset(self, mock_llm, simple_messages):
         """Test resetting memory manager."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add some messages
         for msg in simple_messages:
@@ -96,14 +89,14 @@ def test_reset(self, mock_llm, simple_messages):
 class TestMemoryCompression:
     """Test compression triggering and behavior."""
 
-    def test_compression_on_short_term_full(self, mock_llm):
+    def test_compression_on_short_term_full(self, set_memory_config, mock_llm):
         """Test compression triggers when short-term memory is full."""
-        config = MemoryConfig(
-            short_term_message_count=5,
-            target_working_memory_tokens=100000,  # Very high to avoid soft limit
-            compression_threshold=200000,  # Very high to avoid hard limit
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=5,
+            MEMORY_TARGET_TOKENS=100000,  # Very high to avoid soft limit
+            MEMORY_COMPRESSION_THRESHOLD=200000,  # Very high to avoid hard limit
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add messages until short-term is full
         for i in range(5):
@@ -115,14 +108,14 @@ def test_compression_on_short_term_full(self, mock_llm):
         # After compression, short-term is cleared so it's not full
         assert not manager.short_term.is_full()
 
-    def test_compression_on_soft_limit(self, mock_llm):
+    def test_compression_on_soft_limit(self, set_memory_config, mock_llm):
         """Test compression triggers on soft limit (target tokens)."""
-        config = MemoryConfig(
-            target_working_memory_tokens=50,  # Very low to trigger easily
-            compression_threshold=10000,
-            short_term_message_count=100,  # Large enough to not trigger on count
+        set_memory_config(
+            MEMORY_TARGET_TOKENS=50,  # Very low to trigger easily
+            MEMORY_COMPRESSION_THRESHOLD=10000,
+            MEMORY_SHORT_TERM_SIZE=100,  # Large enough to not trigger on count
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add messages until we exceed target tokens
         long_message = "This is a long message. " * 50
@@ -131,14 +124,14 @@ def test_compression_on_soft_limit(self, mock_llm):
         # Should trigger compression
         assert manager.compression_count >= 1
 
-    def test_compression_on_hard_limit(self, mock_llm):
+    def test_compression_on_hard_limit(self, set_memory_config, mock_llm):
         """Test compression triggers on hard limit (compression threshold)."""
-        config = MemoryConfig(
-            target_working_memory_tokens=10000,
-            compression_threshold=100,  # Very low to trigger easily
-            short_term_message_count=100,
+        set_memory_config(
+            MEMORY_TARGET_TOKENS=10000,
+            MEMORY_COMPRESSION_THRESHOLD=100,  # Very low to trigger easily
+            MEMORY_SHORT_TERM_SIZE=100,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add long message to exceed hard limit
         long_message = "This is a very long message. " * 100
@@ -146,14 +139,14 @@ def test_compression_on_hard_limit(self, mock_llm):
 
         assert manager.compression_count >= 1
 
-    def test_compression_creates_summary(self, mock_llm, simple_messages):
+    def test_compression_creates_summary(self, set_memory_config, mock_llm, simple_messages):
         """Test that compression creates a summary."""
-        config = MemoryConfig(
-            short_term_message_count=3,
-            target_working_memory_tokens=100000,
-            compression_threshold=200000,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=3,
+            MEMORY_TARGET_TOKENS=100000,
+            MEMORY_COMPRESSION_THRESHOLD=200000,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add messages to trigger compression
         for msg in simple_messages:
@@ -169,8 +162,7 @@ def test_compression_creates_summary(self, mock_llm, simple_messages):
 
     def test_get_stats(self, mock_llm, simple_messages):
         """Test getting memory statistics."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         for msg in simple_messages:
             manager.add_message(msg)
@@ -191,15 +183,15 @@ def test_get_stats(self, mock_llm, simple_messages):
 class TestToolCallMatching:
     """Test tool_use and tool_result matching scenarios."""
 
-    def test_tool_pairs_preserved_together(self, mock_llm, tool_use_messages):
+    def test_tool_pairs_preserved_together(self, set_memory_config, mock_llm, tool_use_messages):
         """Test that tool_use and tool_result pairs are preserved together."""
-        config = MemoryConfig(
-            short_term_message_count=3,
-            short_term_min_message_count=2,
-            target_working_memory_tokens=100000,
-            compression_threshold=200000,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=3,
+            MEMORY_SHORT_TERM_MIN_SIZE=2,
+            MEMORY_TARGET_TOKENS=100000,
+            MEMORY_COMPRESSION_THRESHOLD=200000,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add tool messages
         for msg in tool_use_messages:
@@ -229,13 +221,15 @@ def test_tool_pairs_preserved_together(self, mock_llm, tool_use_messages):
             tool_use_ids == tool_result_ids
         ), f"Mismatched tool calls: tool_use_ids={tool_use_ids}, tool_result_ids={tool_result_ids}"
 
-    def test_mismatched_tool_calls_detected(self, mock_llm, mismatched_tool_messages):
+    def test_mismatched_tool_calls_detected(
+        self, set_memory_config, mock_llm, mismatched_tool_messages
+    ):
         """Test behavior with mismatched tool_use/tool_result pairs."""
-        config = MemoryConfig(
-            short_term_message_count=4,
-            short_term_min_message_count=2,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=4,
+            MEMORY_SHORT_TERM_MIN_SIZE=2,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add mismatched tool messages
         for msg in mismatched_tool_messages:
@@ -269,13 +263,15 @@ def test_mismatched_tool_calls_detected(self, mock_llm, mismatched_tool_messages
                 f"Detected mismatch - missing results: {missing_results}, missing uses: {missing_uses}"
             )
 
-    def test_protected_tool_always_preserved(self, mock_llm, protected_tool_messages):
+    def test_protected_tool_always_preserved(
+        self, set_memory_config, mock_llm, protected_tool_messages
+    ):
         """Test that protected tools (like manage_todo_list) are always preserved."""
-        config = MemoryConfig(
-            short_term_message_count=10,  # Large enough to avoid auto-compression
-            short_term_min_message_count=1,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=10,  # Large enough to avoid auto-compression
+            MEMORY_SHORT_TERM_MIN_SIZE=1,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add protected tool messages
         for msg in protected_tool_messages:
@@ -305,13 +301,13 @@ def test_protected_tool_always_preserved(self, mock_llm, protected_tool_messages
 
         assert found_protected, "Protected tool 'manage_todo_list' should be preserved"
 
-    def test_multiple_tool_pairs_in_sequence(self, mock_llm):
+    def test_multiple_tool_pairs_in_sequence(self, set_memory_config, mock_llm):
         """Test multiple consecutive tool_use/tool_result pairs."""
-        config = MemoryConfig(
-            short_term_message_count=10,
-            short_term_min_message_count=2,
+        set_memory_config(
+            MEMORY_SHORT_TERM_SIZE=10,
+            MEMORY_SHORT_TERM_MIN_SIZE=2,
         )
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Create multiple tool pairs
         messages = []
@@ -372,16 +368,14 @@ class TestEdgeCases:
 
     def test_empty_memory_compression(self, mock_llm):
         """Test compressing empty memory."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         result = manager.compress()
         assert result is None
 
     def test_single_message_compression(self, mock_llm):
         """Test compressing with only one message."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         manager.add_message(LLMMessage(role="user", content="Hello"))
         result = manager.compress()
@@ -390,8 +384,7 @@ def test_single_message_compression(self, mock_llm):
 
     def test_actual_token_counts(self, mock_llm):
         """Test using actual token counts from LLM response."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add message with actual token counts
         msg = LLMMessage(role="assistant", content="Response")
@@ -403,10 +396,10 @@ def test_actual_token_counts(self, mock_llm):
         assert stats["total_input_tokens"] >= 100
         assert stats["total_output_tokens"] >= 50
 
-    def test_compression_with_mixed_content(self, mock_llm):
+    def test_compression_with_mixed_content(self, set_memory_config, mock_llm):
         """Test compression with mixed text and tool content."""
-        config = MemoryConfig(short_term_message_count=5)
-        manager = MemoryManager(config, mock_llm)
+        set_memory_config(MEMORY_SHORT_TERM_SIZE=5)
+        manager = MemoryManager(mock_llm)
 
         messages = [
             LLMMessage(role="user", content="Text only"),
@@ -433,8 +426,7 @@ def test_compression_with_mixed_content(self, mock_llm):
 
     def test_strategy_auto_selection(self, mock_llm, tool_use_messages, simple_messages):
         """Test automatic strategy selection based on message content."""
-        config = MemoryConfig()
-        manager = MemoryManager(config, mock_llm)
+        manager = MemoryManager(mock_llm)
 
         # Add tool messages - should select SELECTIVE strategy
         for msg in tool_use_messages:
diff --git a/test/test_memory.py b/test/test_memory.py
index 465884d..307d91d 100644
--- a/test/test_memory.py
+++ b/test/test_memory.py
@@ -10,8 +10,9 @@
 # Add parent directory to path
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+from config import Config
 from llm import LLMMessage
-from memory import MemoryConfig, MemoryManager
+from memory import MemoryManager
 
 
 class MockLLM:
@@ -41,22 +42,21 @@ def main():
     print("Memory Management System Demo")
     print("=" * 60)
 
-    # Create memory manager with custom config
-    config = MemoryConfig(
-        max_context_tokens=10000,
-        target_working_memory_tokens=500,  # Low threshold for demo
-        compression_threshold=400,  # Trigger compression quickly
-        short_term_message_count=5,
-        compression_ratio=0.3,
-    )
+    # Configure memory settings directly via Config class
+    # (In production, these would be set via environment variables)
+    Config.MEMORY_MAX_CONTEXT_TOKENS = 10000
+    Config.MEMORY_TARGET_TOKENS = 500  # Low threshold for demo
+    Config.MEMORY_COMPRESSION_THRESHOLD = 400  # Trigger compression quickly
+    Config.MEMORY_SHORT_TERM_SIZE = 5
+    Config.MEMORY_COMPRESSION_RATIO = 0.3
 
     mock_llm = MockLLM()
-    memory = MemoryManager(config, mock_llm)
+    memory = MemoryManager(mock_llm)
 
     print("\nConfiguration:")
-    print(f"  Target tokens: {config.target_working_memory_tokens}")
-    print(f"  Compression threshold: {config.compression_threshold}")
-    print(f"  Short-term size: {config.short_term_message_count}")
+    print(f"  Target tokens: {Config.MEMORY_TARGET_TOKENS}")
+    print(f"  Compression threshold: {Config.MEMORY_COMPRESSION_THRESHOLD}")
+    print(f"  Short-term size: {Config.MEMORY_SHORT_TERM_SIZE}")
 
     # Add system message
     print("\n1. Adding system message...")

From 60bd2e144d75a5bdf69c994274bd9865fc027d5b Mon Sep 17 00:00:00 2001
From: Yixin Luo <18810541851@163.com>
Date: Sun, 18 Jan 2026 10:53:52 +0800
Subject: [PATCH 3/3] fix: remove unused imports to pass ruff linting

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
---
 memory/code_extractor.py                   | 2 +-
 memory/tool_result_processor.py            | 3 +--
 memory/tool_result_store.py                | 1 -
 test/memory/test_tool_result_processing.py | 3 ---
 4 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/memory/code_extractor.py b/memory/code_extractor.py
index 8c31856..4a22d8f 100644
--- a/memory/code_extractor.py
+++ b/memory/code_extractor.py
@@ -1,7 +1,7 @@
 """Code structure extraction using tree-sitter for multiple languages."""
 
 import logging
-from typing import Dict, List, Optional, Set, Tuple
+from typing import Dict, List, Optional, Tuple
 
 logger = logging.getLogger(__name__)
 
diff --git a/memory/tool_result_processor.py b/memory/tool_result_processor.py
index 8888411..eefb94c 100644
--- a/memory/tool_result_processor.py
+++ b/memory/tool_result_processor.py
@@ -2,7 +2,6 @@
 
 import logging
 import re
-from typing import Dict, Optional
 
 from memory.code_extractor import CodeExtractor
 
@@ -217,7 +216,7 @@ def _extract_key_sections_regex(self, content: str, max_tokens: int) -> str:
         return (
             f"[Key sections extracted - {omitted_lines} lines omitted]\n\n"
             + result
-            + f"\n\n[Use read_file with specific line ranges for full content]"
+            + "\n\n[Use read_file with specific line ranges for full content]"
         )
 
     def _preserve_matches(self, content: str, max_tokens: int) -> str:
diff --git a/memory/tool_result_store.py b/memory/tool_result_store.py
index ee7e43c..319ce72 100644
--- a/memory/tool_result_store.py
+++ b/memory/tool_result_store.py
@@ -4,7 +4,6 @@
 import logging
 import sqlite3
 from datetime import datetime
-from pathlib import Path
 from typing import Optional
 
 logger = logging.getLogger(__name__)
diff --git a/test/memory/test_tool_result_processing.py b/test/memory/test_tool_result_processing.py
index 879354f..0a43f58 100644
--- a/test/memory/test_tool_result_processing.py
+++ b/test/memory/test_tool_result_processing.py
@@ -1,8 +1,5 @@
 """Tests for tool result processing and external storage."""
 
-import pytest
-
-from llm.base import LLMMessage
 from memory.tool_result_processor import ToolResultProcessor
 from memory.tool_result_store import ToolResultStore