diff --git a/.github/workflows/bump-plugin.yml b/.github/workflows/bump-plugin.yml
new file mode 100644
index 0000000..473eff7
--- /dev/null
+++ b/.github/workflows/bump-plugin.yml
@@ -0,0 +1,55 @@
+name: Bump Plugin Version
+
+on:
+  workflow_dispatch:
+    inputs:
+      bump_type:
+        description: Version bump type
+        required: true
+        default: patch
+        type: choice
+        options:
+          - patch
+          - minor
+          - major
+
+permissions:
+  contents: write
+
+jobs:
+  bump-plugin-version:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+
+      - name: Configure Git user
+        run: |
+          git config user.name "github-actions[bot]"
+          git config user.email "github-actions[bot]@users.noreply.github.com"
+
+      - name: Bump plugin version
+        run: cd openclaw-plugin && npm version ${{ github.event.inputs.bump_type }} --no-git-tag-version
+
+      - name: Extract new version
+        run: |
+          VERSION=$(node -p "require('./openclaw-plugin/package.json').version")
+          echo "VERSION=${VERSION}" >> "$GITHUB_ENV"
+
+      - name: Commit version bump
+        run: |
+          git add openclaw-plugin/package.json openclaw-plugin/package-lock.json
+          git commit -m "chore(plugin): bump to v${VERSION}"
+
+      - name: Create plugin tag
+        run: git tag "plugin-v${VERSION}"
+
+      - name: Push commit and tags
+        run: git push origin "HEAD:${{ github.ref_name }}" --tags
diff --git a/.github/workflows/release-plugin.yml b/.github/workflows/release-plugin.yml
new file mode 100644
index 0000000..0962a02
--- /dev/null
+++ b/.github/workflows/release-plugin.yml
@@ -0,0 +1,39 @@
+name: Release Plugin
+
+on:
+  push:
+    tags:
+      - 'plugin-v*'
+
+permissions:
+  contents: read
+  id-token: write
+
+jobs:
+  publish-npm:
+    name: Publish OpenClaw Plugin to npm
+    runs-on: ubuntu-latest
+    defaults:
+      run:
+        working-directory: openclaw-plugin
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Setup Node.js
+        uses: actions/setup-node@v4
+        with:
+          node-version: '20'
+          registry-url: 'https://registry.npmjs.org'
+
+      - name: Extract version from tag
+        id: get_version
+        run: echo "VERSION=${GITHUB_REF#refs/tags/plugin-v}" >> $GITHUB_OUTPUT
+
+      - name: Install dependencies
+        run: npm ci
+
+      - name: Sync package version
+        run: npm version ${{ steps.get_version.outputs.VERSION }} --no-git-tag-version
+
+      - name: Publish to npm with provenance
+        run: npm publish --provenance --access public
diff --git a/.gitignore b/.gitignore
index 55ae470..e4d3908 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ dist/
 */.DS_Store
 *.DS_Store
 
+node_modules/
diff --git a/README.md b/README.md
index 8115e47..3ca6457 100644
--- a/README.md
+++ b/README.md
@@ -86,17 +86,37 @@ We also evaluated on academic RAG (Qwen3-32B, 4×A6000) and production MoE infer
 
 ### OpenClaw
 
+**Option A: Native Plugin** (recommended — zero external dependencies)
+
+```bash
+openclaw plugins install @contextpilot-ai/contextpilot
+```
+
+Then enable in `~/.openclaw/openclaw.json`:
+
+```json
+{
+  "plugins": {
+    "slots": { "contextEngine": "contextpilot" },
+    "entries": { "contextpilot": { "enabled": true } }
+  }
+}
+```
+
+Restart OpenClaw. Done — ContextPilot runs in-process, no proxy needed.
+
+**Option B: HTTP Proxy** (for self-hosted models or custom backends)
+
 ```bash
 pip install contextpilot
 
-# Start proxy (points to your LLM backend)
 python -m contextpilot.server.http_server \
   --port 8765 --infer-api-url http://localhost:30000   # SGLang
   # or: --infer-api-url https://api.anthropic.com      # Anthropic
   # or: --infer-api-url https://api.openai.com         # OpenAI
 ```
 
-Then set OpenClaw's base URL to `http://localhost:8765/v1`. See the [full OpenClaw integration guide](docs/guides/openclaw.md) for UI setup, config file examples, and self-hosted model instructions.
+Then set OpenClaw's base URL to `http://localhost:8765/v1`. See the [full OpenClaw integration guide](docs/guides/openclaw.md) for details.
 
 ---
 
diff --git a/contextpilot/__init__.py b/contextpilot/__init__.py
index 4a6b312..a2523d7 100644
--- a/contextpilot/__init__.py
+++ b/contextpilot/__init__.py
@@ -55,7 +55,7 @@
     MEM0_AVAILABLE,
 )
 
-__version__ = "0.3.5.post2"
+__version__ = "0.4.1"
 
 __all__ = [
     # High-level pipeline API
diff --git a/contextpilot/dedup/block_dedup.py b/contextpilot/dedup/block_dedup.py
index 5d7c6b7..826966f 100644
--- a/contextpilot/dedup/block_dedup.py
+++ b/contextpilot/dedup/block_dedup.py
@@ -18,6 +18,7 @@
 class DedupResult:
     blocks_deduped: int = 0
     blocks_total: int = 0
+    system_blocks_matched: int = 0
     chars_before: int = 0
     chars_after: int = 0
     chars_saved: int = 0
@@ -94,11 +95,17 @@ def _dedup_text(
     result: DedupResult,
     min_block_chars: int,
     chunk_modulus: int,
+    pre_seen: Optional[Dict[str, Tuple[int, str, int]]] = None,
 ) -> Optional[str]:
     """Core dedup loop shared by all entry points.
 
     Returns the deduped text if any blocks were deduped, or None otherwise.
     """
+    if pre_seen:
+        for h, origin in pre_seen.items():
+            if h not in seen_blocks:
+                seen_blocks[h] = origin
+
     blocks = _content_defined_chunking(text, chunk_modulus)
     if len(blocks) < 2:
         for b in blocks:
@@ -121,9 +128,11 @@ def _dedup_text(
         result.blocks_total += 1
 
         if h in seen_blocks and seen_blocks[h][0] != msg_idx:
-            _, orig_fn, _ = seen_blocks[h]
+            orig_msg_idx, orig_fn, _ = seen_blocks[h]
             first_line = block.strip().split("\n")[0][:80]
             ref = f'[... "{first_line}" — identical to earlier {orig_fn} result, see above ...]'
+            if orig_msg_idx == -1:
+                result.system_blocks_matched += 1
             chars_saved = len(block) - len(ref)
             if chars_saved > 0:
                 new_blocks.append(ref)
@@ -148,11 +157,32 @@ def _dedup_text(
     return None
 
 
+def _prescan_system_blocks(
+    system_content: Optional[str],
+    min_block_chars: int,
+    chunk_modulus: int,
+) -> Dict[str, Tuple[int, str, int]]:
+    """Hash and register dedup-eligible blocks from system prompt content."""
+    pre_seen: Dict[str, Tuple[int, str, int]] = {}
+    if not isinstance(system_content, str) or not system_content.strip():
+        return pre_seen
+
+    blocks = _content_defined_chunking(system_content, chunk_modulus)
+    for block_idx, block in enumerate(blocks):
+        if len(block.strip()) < min_block_chars:
+            continue
+        h = _hash_block(block)
+        if h not in pre_seen:
+            pre_seen[h] = (-1, "system prompt", block_idx)
+    return pre_seen
+
+
 def dedup_chat_completions(
     body: dict,
     min_block_chars: int = MIN_BLOCK_CHARS,
     min_content_chars: int = MIN_CONTENT_CHARS,
     chunk_modulus: int = CHUNK_MODULUS,
+    system_content: Optional[str] = None,
 ) -> DedupResult:
     messages = body.get("messages")
     if not isinstance(messages, list) or not messages:
@@ -160,6 +190,7 @@ def dedup_chat_completions(
 
     tool_names = _build_tool_name_map_openai(messages)
     seen_blocks: Dict[str, Tuple[int, str, int]] = {}
+    pre_seen = _prescan_system_blocks(system_content, min_block_chars, chunk_modulus)
     result = DedupResult()
 
     for idx, msg in enumerate(messages):
@@ -174,8 +205,14 @@ def dedup_chat_completions(
         fn_name = tool_names.get(tc_id, msg.get("name", "")) or "tool"
 
         new_content = _dedup_text(
-            content, seen_blocks, idx, fn_name, result,
-            min_block_chars, chunk_modulus,
+            content,
+            seen_blocks,
+            idx,
+            fn_name,
+            result,
+            min_block_chars,
+            chunk_modulus,
+            pre_seen=pre_seen,
         )
         if new_content is not None:
             original_len = len(content)
@@ -190,7 +227,13 @@ def dedup_chat_completions(
             )
 
     _dedup_assistant_code_blocks(
-        messages, seen_blocks, result, min_block_chars, min_content_chars, chunk_modulus
+        messages,
+        seen_blocks,
+        result,
+        min_block_chars,
+        min_content_chars,
+        chunk_modulus,
+        pre_seen=pre_seen,
     )
 
     return result
@@ -206,6 +249,7 @@ def _dedup_assistant_code_blocks(
     min_block_chars: int,
     min_content_chars: int,
     chunk_modulus: int,
+    pre_seen: Optional[Dict[str, Tuple[int, str, int]]] = None,
 ) -> None:
     for idx, msg in enumerate(messages):
         if not isinstance(msg, dict) or msg.get("role") != "assistant":
@@ -249,8 +293,14 @@ def _dedup_assistant_code_blocks(
                 continue
 
             new_code = _dedup_text(
-                code, seen_blocks, idx, "assistant", result,
-                min_block_chars, chunk_modulus,
+                code,
+                seen_blocks,
+                idx,
+                "assistant",
+                result,
+                min_block_chars,
+                chunk_modulus,
+                pre_seen=pre_seen,
             )
             if new_code is not None:
                 start, end = match.start(2), match.end(2)
@@ -273,6 +323,7 @@ def dedup_responses_api(
     min_block_chars: int = MIN_BLOCK_CHARS,
     min_content_chars: int = MIN_CONTENT_CHARS,
     chunk_modulus: int = CHUNK_MODULUS,
+    system_content: Optional[str] = None,
 ) -> DedupResult:
     input_items = body.get("input")
     if not isinstance(input_items, list) or not input_items:
@@ -280,6 +331,7 @@ def dedup_responses_api(
 
     fn_names = _build_tool_name_map_responses(input_items)
     seen_blocks: Dict[str, Tuple[int, str, int]] = {}
+    pre_seen = _prescan_system_blocks(system_content, min_block_chars, chunk_modulus)
     result = DedupResult()
 
     for idx, item in enumerate(input_items):
@@ -294,8 +346,14 @@ def dedup_responses_api(
         fn_name = fn_names.get(call_id, call_id) or "tool"
 
         new_output = _dedup_text(
-            output, seen_blocks, idx, fn_name, result,
-            min_block_chars, chunk_modulus,
+            output,
+            seen_blocks,
+            idx,
+            fn_name,
+            result,
+            min_block_chars,
+            chunk_modulus,
+            pre_seen=pre_seen,
         )
         if new_output is not None:
             original_len = len(output)
diff --git a/contextpilot/server/http_server.py b/contextpilot/server/http_server.py
index c71af9a..cd3681f 100644
--- a/contextpilot/server/http_server.py
+++ b/contextpilot/server/http_server.py
@@ -25,6 +25,7 @@
 import os
 import re
 import uuid
+from dataclasses import dataclass, field as dc_field
 from typing import List, Dict, Any, Optional, cast
 from contextlib import asynccontextmanager
 
@@ -102,15 +103,13 @@
 
 # ── Conversation-aware intercept state ────────────────────────────────────
 # Tracks which tool results have already been processed, enabling
-# skip-old / dedup-new / reorder-new behaviour.  Single-conversation
-# model (one user at a time).  Resets when the system prompt changes.
-
-from dataclasses import dataclass, field as dc_field
+# skip-old / dedup-new / reorder-new behaviour.  Per-session model
+# keyed by (system prompt + first user message).  Resets on compaction.
 
 
 @dataclass
 class _InterceptConvState:
-    """Global intercept state for the current conversation."""
+    """Per-session intercept state for a single conversation."""
 
     # Cached copy of the full messages array after modification (reorder/dedup).
     # On subsequent turns, old messages are replaced with these cached versions
@@ -132,7 +131,9 @@ class _InterceptConvState:
     last_message_count: int = 0
 
 
-_intercept_state = _InterceptConvState()
+_intercept_states: dict[str, _InterceptConvState] = {}
+_intercept_states_lock = asyncio.Lock()
+_MAX_TRACKED_SESSIONS = 64  # LRU eviction threshold
 
 # TTFT tracking for averages across a session
 _ttft_history: List[float] = []
@@ -612,10 +613,14 @@ def _to_output(reordered):
             if request.deduplicate:
                 tracker = get_conversation_tracker()
                 docs_list = result.get("reordered_contexts") or contexts
-                doc_contents_list = None
+                doc_contents_list: Optional[List[Optional[Dict[int, str]]]] = None
                 if _id_to_str:
                     doc_contents_list = [
-                        {did: _id_to_str[did] for did in ctx if did in _id_to_str}
+                        {
+                            did: _id_to_str[did]
+                            for did in ctx
+                            if isinstance(did, int) and did in _id_to_str
+                        }
                         for ctx in docs_list
                     ]
                 dedup_results = tracker.deduplicate_batch(
@@ -627,6 +632,8 @@ def _to_output(reordered):
                 )
                 if doc_contents_list:
                     for dc in doc_contents_list:
+                        if dc is None:
+                            continue
                         for did, content in dc.items():
                             if did in _id_to_str and content != _id_to_str[did]:
                                 _id_to_str[did] = content
@@ -698,10 +705,14 @@ def _to_output(reordered):
         if request.deduplicate:
             tracker = get_conversation_tracker()
             reordered_raw = result.get("reordered_contexts") or contexts
-            doc_contents_list = None
+            doc_contents_list: Optional[List[Optional[Dict[int, str]]]] = None
             if _id_to_str:
                 doc_contents_list = [
-                    {did: _id_to_str[did] for did in ctx if did in _id_to_str}
+                    {
+                        did: _id_to_str[did]
+                        for did in ctx
+                        if isinstance(did, int) and did in _id_to_str
+                    }
                     for ctx in reordered_raw
                 ]
             dedup_results = tracker.deduplicate_batch(
@@ -713,6 +724,8 @@ def _to_output(reordered):
             )
             if doc_contents_list:
                 for dc in doc_contents_list:
+                    if dc is None:
+                        continue
                     for did, content in dc.items():
                         if did in _id_to_str and content != _id_to_str[did]:
                             _id_to_str[did] = content
@@ -882,13 +895,12 @@ async def reset_index():
         _id_to_str, \
         _next_str_id, \
         _intercept_index, \
-        _intercept_state
+        _intercept_states
 
     # Reset conversation tracker
     reset_conversation_tracker()
 
-    # Reset intercept conversation state
-    _intercept_state = _InterceptConvState()
+    _intercept_states.clear()
     _intercept_index = None
 
     # Reset string-to-ID mapping
@@ -1186,31 +1198,112 @@ def _hash_text(text: str) -> str:
     return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()[:16]
 
 
-def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState:
-    """Return the global intercept state, resetting if the conversation changed.
+def _extract_system_content_for_dedup(body: Dict[str, Any]) -> Optional[str]:
+    """Extract system prompt text for cross-layer block deduplication."""
+    parts: List[str] = []
+
+    def _append_content(content: Any) -> None:
+        if isinstance(content, str):
+            text = content.strip()
+            if text:
+                parts.append(text)
+            return
+        if isinstance(content, list):
+            for block in content:
+                if not isinstance(block, dict):
+                    continue
+                btype = block.get("type")
+                if btype in ("text", "input_text"):
+                    text = block.get("text", "")
+                    if isinstance(text, str) and text.strip():
+                        parts.append(text.strip())
+
+    _append_content(body.get("system"))
+
+    messages = body.get("messages")
+    if isinstance(messages, list):
+        for msg in messages:
+            if isinstance(msg, dict) and msg.get("role") == "system":
+                _append_content(msg.get("content"))
+
+    input_items = body.get("input")
+    if isinstance(input_items, list):
+        for item in input_items:
+            if not isinstance(item, dict):
+                continue
+            if item.get("role") == "system" or item.get("type") == "system":
+                _append_content(item.get("content"))
+            elif item.get("type") == "message" and item.get("role") == "system":
+                _append_content(item.get("content"))
+
+    if not parts:
+        return None
+    return "\n\n".join(parts)
+
+
+def _session_fingerprint(body: Dict[str, Any]) -> str:
+    """Stable session key from system prompt + first user message."""
+    msgs = body.get("messages") or []
+    parts_to_hash: list[str] = []
+
+    system = body.get("system")
+    if system:
+        parts_to_hash.append(str(system)[:500])
 
-    Detection: in a multi-turn agent conversation the messages array only
-    grows.  If the count drops, either a new session started or the host
-    compacted old messages.  Either way, reset all state: the old KV cache
-    entries are gone (compaction rewrites content), so cached_messages,
-    seen_doc_hashes, and reorder state are all invalid.
+    for msg in msgs[:5]:
+        if isinstance(msg, dict) and msg.get("role") == "system":
+            parts_to_hash.append(str(msg.get("content", ""))[:500])
+        elif isinstance(msg, dict) and msg.get("role") == "user":
+            content = msg.get("content", "")
+            if isinstance(content, list):
+                # OpenAI format: [{type: text, text: "..."}]
+                text_parts = [p.get("text", "") for p in content if isinstance(p, dict)]
+                content = "".join(text_parts)
+            parts_to_hash.append(str(content))
+            break
+
+    if not parts_to_hash:
+        return _hash_text(json.dumps(msgs[:2], sort_keys=True))
+
+    return _hash_text("\x00".join(parts_to_hash))
+
+
+async def _get_intercept_state(body: Dict[str, Any]) -> _InterceptConvState:
+    """Return per-session intercept state, creating or resetting as needed.
+
+    If the message count drops, the host compacted old messages or a new
+    session started — either way, reset: the old KV cache entries are gone.
     """
-    global _intercept_state
+    session_key = _session_fingerprint(body)
     msg_count = len(body.get("messages") or [])
-    if msg_count < _intercept_state.last_message_count:
-        logger.info(
-            f"Intercept: message count dropped "
-            f"({msg_count} < {_intercept_state.last_message_count}), "
-            f"resetting all state (compaction or new session)"
-        )
-        _intercept_state = _InterceptConvState()
-        # Skip reorder for the first post-compaction tool result:
-        # prefix cache is fully invalidated, nothing to align with.
-        # Go straight to dedup mode so docs are registered for future turns.
-        _intercept_state.first_tool_result_done = True
-        _intercept_state.system_processed = True
-    _intercept_state.last_message_count = msg_count
-    return _intercept_state
+
+    async with _intercept_states_lock:
+        state = _intercept_states.get(session_key)
+
+        if state is None:
+            state = _InterceptConvState()
+            state.system_processed = True
+            logger.info(
+                f"Intercept: new session {session_key[:8]}… "
+                f"({msg_count} msgs, {len(_intercept_states)} active sessions)"
+            )
+            if len(_intercept_states) >= _MAX_TRACKED_SESSIONS:
+                oldest_key = next(iter(_intercept_states))
+                del _intercept_states[oldest_key]
+                logger.info(f"Intercept: evicted session {oldest_key[:8]}…")
+            _intercept_states[session_key] = state
+        elif msg_count < state.last_message_count:
+            logger.info(
+                f"Intercept: session {session_key[:8]}… message count dropped "
+                f"({msg_count} < {state.last_message_count}), "
+                f"resetting state (compaction or restart)"
+            )
+            state = _InterceptConvState()
+            state.system_processed = True
+            _intercept_states[session_key] = state
+
+    state.last_message_count = msg_count
+    return state
 
 
 def _deduplicate_docs(docs: List[str], state: _InterceptConvState) -> tuple:
@@ -1261,6 +1354,21 @@ def _strip_external_content_ids(body: Any) -> Any:
 _OPENAI_CHAT = "openai_chat"
 _ANTHROPIC_MESSAGES = "anthropic_messages"
 
+_HOP_BY_HOP = frozenset(
+    (
+        "host",
+        "connection",
+        "keep-alive",
+        "transfer-encoding",
+        "te",
+        "trailer",
+        "upgrade",
+        "proxy-authorization",
+        "proxy-authenticate",
+        "content-length",
+    )
+)
+
 
 def _doc_preview(doc: str, max_len: int = 60) -> str:
     """Truncate a document string for log preview."""
@@ -1368,14 +1476,12 @@ async def _intercept_and_forward(request: Request, api_format: str):
     total_reordered = 0
     total_deduped = 0
     total_slimmed = 0
-    tool_results_skipped = 0  # TODO: never incremented — wire up or remove
-    _chars_before_slim = 0
-    _chars_after_slim = 0
+    chars_before_slim = 0
+    chars_after_slim = 0
     system_count = 0
     tool_result_count = 0
-    reorder_details = []  # collect per-source reorder info
+    reorder_details = []
     _dedup_result = DedupResult()
-    state = _intercept_state
 
     # ── Debug: log conversation shape, divergence, and tool_result details ──
     _debug_messages = body.get("messages") or []
@@ -1383,12 +1489,11 @@ async def _intercept_and_forward(request: Request, api_format: str):
 
     # Per-message hashes for this request
     _debug_msg_hashes = []
-    if logger.isEnabledFor(logging.DEBUG):
-        for m in _debug_messages:
-            h = hashlib.sha256(
-                json.dumps(m, sort_keys=True, ensure_ascii=False).encode()
-            ).hexdigest()[:12]
-            _debug_msg_hashes.append(h)
+    for m in _debug_messages:
+        h = hashlib.sha256(
+            json.dumps(m, sort_keys=True, ensure_ascii=False).encode()
+        ).hexdigest()[:12]
+        _debug_msg_hashes.append(h)
 
     # Build tool_call_id → function name mapping from assistant messages
     _tool_call_names = {}
@@ -1421,7 +1526,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
             _chars = len(_content_str)
             _is_compacted = "[compacted:" in _content_str
             _preview = _content_str[:150].replace("\n", "\\n")
-            logger.info(
+            logger.debug(
                 f"  msg[{idx}] role={_role} fn={_fn_label} "
                 f"tool_call_id={_tc_id} "
                 f"chars={_chars} compacted={_is_compacted} "
@@ -1439,55 +1544,27 @@ async def _intercept_and_forward(request: Request, api_format: str):
                     _chars = len(_tc_str)
                     _is_compacted = "[compacted:" in _tc_str
                     _preview = _tc_str[:150].replace("\n", "\\n")
-                    logger.info(
+                    logger.debug(
                         f"  msg[{idx}].content[{bi}] type=tool_result "
                         f"tool_use_id={_tu_id} chars={_chars} "
                         f"compacted={_is_compacted} preview: {_preview}"
                     )
 
-    global _debug_prev_msg_hashes
-    if "_debug_prev_msg_hashes" not in globals():
-        _debug_prev_msg_hashes = []
-
-    _prev_n = len(_debug_prev_msg_hashes)
-    if _prev_n > 0 and _prev_n <= _debug_msg_count:
-        _first_diff = None
-        for idx in range(_prev_n):
-            if _debug_msg_hashes[idx] != _debug_prev_msg_hashes[idx]:
-                _first_diff = idx
-                break
-        if _first_diff is not None:
-            _diff_msg = _debug_messages[_first_diff]
-            _diff_role = _diff_msg.get("role", "?")
-            _diff_content = str(_diff_msg.get("content", ""))
-            logger.warning(
-                f"Intercept PREFIX MISMATCH at msg[{_first_diff}] "
-                f"(role={_diff_role}), "
-                f"hash was {_debug_prev_msg_hashes[_first_diff]} "
-                f"now {_debug_msg_hashes[_first_diff]}. "
-                f"Content preview ({len(_diff_content)} chars): "
-                f"{_diff_content[:300]}..."
-            )
-        else:
-            logger.info(
-                f"Intercept: {_debug_msg_count} msgs (prev={_prev_n}), "
-                f"prefix[:{_prev_n}] MATCH, "
-                f"{_debug_msg_count - _prev_n} new msgs"
-            )
-    else:
-        logger.info(f"Intercept: {_debug_msg_count} msgs (first request or reset)")
-
-    _debug_prev_msg_hashes = list(_debug_msg_hashes)
+    logger.info(
+        f"Intercept: session={_session_fingerprint(body)[:8]} {_debug_msg_count} msgs"
+    )
 
     # ── Format handler (strategy pattern) ────────────────────────────
     handler = get_format_handler(api_format)
+    state = _InterceptConvState()
+    state.last_message_count = _debug_msg_count
 
     if config.enabled:
         try:
-            # body is already a fresh copy from _strip_external_content_ids
+            body = copy.deepcopy(body)
 
             # ── Conversation-aware state (single-conversation model) ──
-            state = _get_intercept_state(body)
+            state = await _get_intercept_state(body)
 
             # ── Replace old messages with cached (modified) versions ──
             # On subsequent turns, the host sends original (unmodified)
@@ -1497,11 +1574,39 @@ async def _intercept_and_forward(request: Request, api_format: str):
             if old_msg_count > 0:
                 msgs = body.get("messages", [])
                 if len(msgs) >= old_msg_count:
-                    msgs[:old_msg_count] = copy.deepcopy(state.cached_messages)
-                    logger.info(
-                        f"Intercept: replaced {old_msg_count} old messages "
-                        f"with cached versions for prefix cache consistency"
-                    )
+                    prefix_ok = True
+                    mismatch_idx = -1
+                    for _ci in range(old_msg_count):
+                        _cached_h = hashlib.sha256(
+                            json.dumps(
+                                state.cached_messages[_ci],
+                                sort_keys=True,
+                                ensure_ascii=False,
+                            ).encode()
+                        ).hexdigest()[:16]
+                        _current_h = hashlib.sha256(
+                            json.dumps(
+                                msgs[_ci], sort_keys=True, ensure_ascii=False
+                            ).encode()
+                        ).hexdigest()[:16]
+                        if _cached_h != _current_h:
+                            prefix_ok = False
+                            mismatch_idx = _ci
+                            break
+                    if prefix_ok:
+                        msgs[:old_msg_count] = copy.deepcopy(state.cached_messages)
+                        logger.info(
+                            f"Intercept: replaced {old_msg_count} old "
+                            f"messages with cached versions for prefix "
+                            f"cache consistency"
+                        )
+                    else:
+                        logger.info(
+                            f"Intercept: prefix mismatch at msg[{mismatch_idx}], "
+                            f"skipping cached message replay "
+                            f"(different session/user)"
+                        )
+                        old_msg_count = 0
                 handler.restore_system(body, state.cached_system)
 
             multi = handler.extract_all(body, config)
@@ -1523,7 +1628,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
                             }
                         )
                         handler.reconstruct_system(
-                            body, extraction, reordered_docs, sys_idx, config
+                            body, extraction, reordered_docs, sys_idx
                         )
                         total_reordered += len(extraction.documents)
                         system_count = 1
@@ -1570,8 +1675,8 @@ async def _intercept_and_forward(request: Request, api_format: str):
                                 f"previous tool result ({orig_chars} chars). "
                                 f"Refer to the earlier result above.]"
                             ]
-                            _chars_before_slim += orig_chars
-                            _chars_after_slim += len(new_docs[0])
+                            chars_before_slim += orig_chars
+                            chars_after_slim += len(new_docs[0])
                             total_slimmed += deduped
                         reorder_details.append(
                             {
@@ -1626,13 +1731,8 @@ async def _intercept_and_forward(request: Request, api_format: str):
                         single_doc.tool_call_id
                     )
 
-            if (
-                total_reordered > 0
-                or total_deduped > 0
-                or total_slimmed > 0
-                or tool_results_skipped > 0
-            ):
-                saved = _chars_before_slim - _chars_after_slim
+            if total_reordered > 0 or total_deduped > 0 or total_slimmed > 0:
+                saved = chars_before_slim - chars_after_slim
                 saved_tokens = saved // 4 if saved > 0 else 0
                 logger.info(
                     f"Intercept ({api_format}): reordered {total_reordered}, "
@@ -1642,17 +1742,27 @@ async def _intercept_and_forward(request: Request, api_format: str):
 
             _dedup_result = DedupResult()
             try:
+                system_content = _extract_system_content_for_dedup(body)
                 if api_format == _OPENAI_CHAT:
-                    _dedup_result = dedup_chat_completions(body, chunk_modulus=_chunk_modulus)
+                    _dedup_result = dedup_chat_completions(
+                        body,
+                        chunk_modulus=_chunk_modulus,
+                        system_content=system_content,
+                    )
                 elif "input" in body and isinstance(body.get("input"), list):
-                    _dedup_result = dedup_responses_api(body, chunk_modulus=_chunk_modulus)
+                    _dedup_result = dedup_responses_api(
+                        body,
+                        chunk_modulus=_chunk_modulus,
+                        system_content=system_content,
+                    )
 
                 if _dedup_result.chars_saved > 0:
-                    _chars_before_slim += _dedup_result.chars_before
-                    _chars_after_slim += _dedup_result.chars_after
+                    chars_before_slim += _dedup_result.chars_before
+                    chars_after_slim += _dedup_result.chars_after
                     logger.info(
                         f"Dedup ({api_format}): "
                         f"blocks={_dedup_result.blocks_deduped}/{_dedup_result.blocks_total}, "
+                        f"system_matches={_dedup_result.system_blocks_matched}, "
                         f"saved {_dedup_result.chars_saved:,} chars"
                     )
             except Exception as dedup_err:
@@ -1697,22 +1807,6 @@ async def _intercept_and_forward(request: Request, api_format: str):
     else:
         target_url = f"{infer_api_url}{handler.target_path()}"
 
-    # Build outbound headers: forward everything except X-ContextPilot-*
-    # and hop-by-hop headers that must not be forwarded by proxies.
-    _HOP_BY_HOP = frozenset(
-        (
-            "host",
-            "connection",
-            "keep-alive",
-            "transfer-encoding",
-            "te",
-            "trailer",
-            "upgrade",
-            "proxy-authorization",
-            "proxy-authenticate",
-            "content-length",
-        )
-    )
     if _cloud_mode and _cloud_adapter is not None and _cloud_api_key:
         outbound_headers = _cloud_adapter.get_auth_headers(_cloud_api_key)
     else:
@@ -1732,7 +1826,6 @@ async def _intercept_and_forward(request: Request, api_format: str):
         total_reordered > 0
         or total_deduped > 0
         or total_slimmed > 0
-        or tool_results_skipped > 0
         or _dedup_result.chars_saved > 0
     )
     if _has_activity:
@@ -1743,10 +1836,9 @@ async def _intercept_and_forward(request: Request, api_format: str):
                 "total_documents": total_reordered,
                 "documents_deduplicated": total_deduped,
                 "documents_slimmed": total_slimmed,
-                "chars_before_slim": _chars_before_slim,
-                "chars_after_slim": _chars_after_slim,
-                "chars_saved": _chars_before_slim - _chars_after_slim,
-                "tool_results_skipped": tool_results_skipped,
+                "chars_before_slim": chars_before_slim,
+                "chars_after_slim": chars_after_slim,
+                "chars_saved": chars_before_slim - chars_after_slim,
                 "message_count": state.last_message_count,
                 "sources": {
                     "system": system_count,
@@ -1756,6 +1848,7 @@ async def _intercept_and_forward(request: Request, api_format: str):
                 "dedup": {
                     "blocks_deduped": _dedup_result.blocks_deduped,
                     "blocks_total": _dedup_result.blocks_total,
+                    "system_blocks_matched": _dedup_result.system_blocks_matched,
                     "chars_saved": _dedup_result.chars_saved,
                 },
             }
@@ -1785,7 +1878,7 @@ async def _stream_with_headers():
                     async for chunk in resp.content.iter_any():
                         if not _ttft_logged:
                             _ttft_ms = (time.monotonic() - _request_start) * 1000
-                            _saved = _chars_before_slim - _chars_after_slim
+                            _saved = chars_before_slim - chars_after_slim
                             _log_ttft(_ttft_ms, total_slimmed, _saved)
                             _ttft_logged = True
                         yield chunk
@@ -1795,12 +1888,9 @@ async def _stream_with_headers():
             status, fwd_headers = cast(tuple[int, Dict[str, str]], first_event)
 
             async def _stream_content_only():
-                try:
-                    async for event in stream_iter:
-                        if isinstance(event, bytes):
-                            yield event
-                finally:
-                    await stream_iter.aclose()
+                async for event in stream_iter:
+                    if isinstance(event, bytes):
+                        yield event
 
             return StreamingResponse(
                 _stream_content_only(),
@@ -1814,13 +1904,9 @@ async def _stream_content_only():
                 target_url, json=body, headers=outbound_headers
             ) as resp:
                 _ttft_ms = (time.monotonic() - _request_start) * 1000
-                _saved = _chars_before_slim - _chars_after_slim
+                _saved = chars_before_slim - chars_after_slim
                 _log_ttft(_ttft_ms, total_slimmed, _saved)
-                try:
-                    result = await resp.json()
-                except (json.JSONDecodeError, aiohttp.ContentTypeError):
-                    text = await resp.text()
-                    raise HTTPException(status_code=resp.status, detail=text[:500])
+                result = await resp.json()
 
                 # ── Cloud mode: track cache metrics from response ──
                 if (
@@ -1858,7 +1944,7 @@ async def _stream_content_only():
 
     except aiohttp.ClientError as e:
         logger.error(f"Error forwarding intercepted request: {e}")
-        raise HTTPException(status_code=502, detail="Backend connection error")
+        raise HTTPException(status_code=502, detail=f"Backend error: {str(e)}")
 
 
 @app.post("/v1/chat/completions")
@@ -1944,6 +2030,7 @@ async def proxy_engine(path: str, request: Request):
 
             dedup_result = DedupResult()
             try:
+                system_content = _extract_system_content_for_dedup(body)
                 if path == "responses" or (
                     "input" in body and isinstance(body.get("input"), list)
                 ):
@@ -1987,13 +2074,22 @@ async def proxy_engine(path: str, request: Request):
                             f"{len(fco_items)} function_call_output:\n"
                             + "\n".join(fco_summary)
                         )
-                    dedup_result = dedup_responses_api(body, chunk_modulus=_chunk_modulus)
+                    dedup_result = dedup_responses_api(
+                        body,
+                        chunk_modulus=_chunk_modulus,
+                        system_content=system_content,
+                    )
                 elif "messages" in body and isinstance(body.get("messages"), list):
-                    dedup_result = dedup_chat_completions(body, chunk_modulus=_chunk_modulus)
+                    dedup_result = dedup_chat_completions(
+                        body,
+                        chunk_modulus=_chunk_modulus,
+                        system_content=system_content,
+                    )
                 if dedup_result.chars_saved > 0:
                     logger.info(
                         f"Passthrough dedup /v1/{path}: "
                         f"block={dedup_result.blocks_deduped}/{dedup_result.blocks_total} "
+                        f"system_matches={dedup_result.system_blocks_matched} "
                         f"(saved {dedup_result.chars_saved:,} chars)"
                     )
             except Exception as pe:
@@ -2153,7 +2249,13 @@ def main():
         os.environ["CONTEXTPILOT_CLOUD_API_KEY"] = args.cloud_api_key
 
     # Also set global config for direct access
-    global _max_tokens, _infer_api_url, _tokenizer, _model_name, _stateless_mode, _chunk_modulus
+    global \
+        _max_tokens, \
+        _infer_api_url, \
+        _tokenizer, \
+        _model_name, \
+        _stateless_mode, \
+        _chunk_modulus
     _max_tokens = args.max_tokens
     _infer_api_url = args.infer_api_url.rstrip("/")
     _stateless_mode = args.stateless
diff --git a/docs/guides/openclaw.md b/docs/guides/openclaw.md
index fc19202..28e30db 100644
--- a/docs/guides/openclaw.md
+++ b/docs/guides/openclaw.md
@@ -6,19 +6,35 @@
 <img src="../images/openclaw-cp.png" alt="OpenClaw + ContextPilot Pipeline" width="800"/>
 </div>
 
-ContextPilot acts as a transparent HTTP proxy. OpenClaw sends requests to the proxy instead of directly to the LLM API. The proxy deduplicates shared content across tool results, reorders documents, and forwards to the backend.
+ContextPilot acts as a transparent HTTP proxy or a native plugin. In either mode, it deduplicates shared content across tool results, reorders documents, and maximizes cache hits.
 
-## Why This Matters for OpenClaw
+## Installation
 
-OpenClaw's search and memory retrieval results appear as **tool_result messages** in the conversation history, not in the system prompt. When multiple search results are returned, their ordering affects the LLM's attention and response quality.
+### Plugin (Recommended)
 
-ContextPilot:
-1. **Reorder**: Reorders documents within tool results to maximize prefix cache hits (multi-doc tool results)
-2. **Dedup**: ContextBlock-level and content-level deduplication across tool results — identical content replaced with back-references, reducing prefill tokens
+The native OpenClaw plugin runs in-process with zero external dependencies.
 
-Results from reorder and dedup are cached and reapplied on subsequent turns to keep the prefix consistent across the conversation (prefix cache alignment). See [Cache Synchronization](cache_sync.md) for how ContextPilot stays in sync with the inference engine's cache.
+1. **Install the plugin**:
+
+```bash
+openclaw plugins install @contextpilot-ai/contextpilot
+```
+
+2. **Enable in `~/.openclaw/openclaw.json`**:
+
+```json
+{
+  "plugins": {
+    "slots": { "contextEngine": "contextpilot" },
+    "entries": { "contextpilot": { "enabled": true } }
+  }
+}
+```
+
+3. **Restart OpenClaw**. ContextPilot now intercepts every LLM call, optimizes the context, and injects cache control markers (for Anthropic) automatically.
+
+### Alternative: HTTP Proxy
 
-## Setup
 
 ### Quick Start (one command)
 
diff --git a/openclaw-plugin/README.md b/openclaw-plugin/README.md
new file mode 100644
index 0000000..fc49e5c
--- /dev/null
+++ b/openclaw-plugin/README.md
@@ -0,0 +1,137 @@
+# @contextpilot-ai/contextpilot
+
+OpenClaw plugin for [ContextPilot](https://github.com/EfficientContext/ContextPilot) — faster long-context inference via in-process context optimization. **Zero external dependencies** — no Python, no proxy server, just install and go.
+
+## What It Does
+
+ContextPilot registers as an OpenClaw **Context Engine** and optimizes every LLM request by:
+
+1. **Extracting** documents from tool results
+2. **Reordering** documents for maximum prefix cache sharing across turns
+3. **Deduplicating** repeated content blocks with compact reference hints
+4. **Injecting** cache control markers (Anthropic `cache_control: { type: "ephemeral" }`)
+
+All processing happens in-process — no external services needed.
+
+## Installation
+
+### From npm (when published)
+
+```bash
+openclaw plugins install @contextpilot-ai/contextpilot
+```
+
+### From local path (development)
+
+Add to `~/.openclaw/openclaw.json`:
+
+```json
+{
+  "plugins": {
+    "load": {
+      "paths": [
+        "/path/to/ContextPilot/openclaw-plugin"
+      ]
+    }
+  }
+}
+```
+
+## Configuration
+
+In `~/.openclaw/openclaw.json`, enable the plugin and set it as the context engine:
+
+```json
+{
+  "plugins": {
+    "slots": {
+      "contextEngine": "contextpilot"
+    },
+    "entries": {
+      "contextpilot": {
+        "enabled": true,
+        "config": {
+          "scope": "all"
+        }
+      }
+    }
+  },
+  "tools": {
+    "allow": ["contextpilot"]
+  }
+}
+```
+
+### Scope Options
+
+| Scope | Tool Results | Description |
+|:------|:------------:|:------------|
+| `all` (default) | Optimized | Optimize all tool results |
+| `tool_results` | Optimized | Same as `all` |
+
+> **Note:** System prompt optimization is not currently available — OpenClaw's context engine API does not expose the system prompt to plugins.
+
+## How It Works
+
+```
+OpenClaw agent request
+  ↓
+ContextPilot Context Engine (assemble hook)
+  ├─ Convert OpenClaw message format (toolResult → tool_result)
+  ├─ Extract documents from tool results
+  ├─ Reorder for prefix cache sharing
+  ├─ Deduplicate repeated blocks
+  ├─ Inject cache_control markers
+  ↓
+Optimized context → LLM Backend
+```
+
+The plugin registers as an OpenClaw Context Engine using `api.registerContextEngine()`. The `assemble()` hook intercepts context assembly before each LLM call.
+
+## Files
+
+```
+openclaw-plugin/
+├── openclaw.plugin.json   # Plugin manifest (id: "contextpilot")
+├── package.json           # npm package (@contextpilot-ai/contextpilot)
+├── src/
+│   ├── index.ts           # Plugin entry point
+│   └── engine/
+│       ├── cache-control.ts   # Cache control injection
+│       ├── dedup.ts           # Content deduplication
+│       ├── extract.ts         # Document extraction
+│       └── live-index.ts      # Reordering engine
+└── tsconfig.json
+```
+
+## Agent Tool
+
+| Tool | Description |
+|------|-------------|
+| `contextpilot_status` | Check engine status, request count, and chars saved |
+
+> **Note:** The status tool is registered but may not be visible to agents due to OpenClaw plugin API limitations.
+
+## Verifying It Works
+
+Check the gateway logs:
+
+```
+[ContextPilot] Stats: 5 requests, 28,356 chars saved (~7,089 tokens, ~$0.0213)
+```
+
+## Expected Savings
+
+Savings depend on conversation length and repeated content:
+
+| Scenario | Chars Saved | Token Reduction |
+|:---------|------------:|----------------:|
+| Short session (few tool calls) | 0-5K | ~0-5% |
+| Medium session (10+ file reads) | 20-50K | ~10-20% |
+| Long session (repeated large files) | 100K+ | ~30-50% |
+
+Run `./benchmark.sh` to measure with/without comparison on your workload.
+
+## License
+
+Apache-2.0
diff --git a/openclaw-plugin/benchmark.sh b/openclaw-plugin/benchmark.sh
new file mode 100755
index 0000000..7a72732
--- /dev/null
+++ b/openclaw-plugin/benchmark.sh
@@ -0,0 +1,175 @@
+#!/bin/bash
+#
+# ContextPilot Benchmark
+# Compares context size with and without ContextPilot
+#
+
+set -e
+
+OPENCLAW_CONFIG="$HOME/.openclaw/openclaw.json"
+BACKUP_CONFIG="$HOME/.openclaw/openclaw.json.bak"
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+TEST_FILE="${SCRIPT_DIR}/src/engine/dedup.ts"
+
+echo "=========================================="
+echo "ContextPilot Benchmark"
+echo "=========================================="
+
+# Backup config
+cp "$OPENCLAW_CONFIG" "$BACKUP_CONFIG"
+
+cleanup() {
+    echo ""
+    echo "Restoring config..."
+    cp "$BACKUP_CONFIG" "$OPENCLAW_CONFIG"
+    rm -f "$BACKUP_CONFIG"
+    openclaw gateway stop 2>/dev/null || true
+}
+trap cleanup EXIT
+
+enable_contextpilot() {
+    python3 << 'PYTHON'
+import json, os
+path = os.path.expanduser("~/.openclaw/openclaw.json")
+with open(path) as f: c = json.load(f)
+c.setdefault('plugins', {}).setdefault('slots', {})['contextEngine'] = 'contextpilot'
+c['plugins'].setdefault('entries', {}).setdefault('contextpilot', {})['enabled'] = True
+with open(path, 'w') as f: json.dump(c, f, indent=2)
+PYTHON
+}
+
+disable_contextpilot() {
+    python3 << 'PYTHON'
+import json, os
+path = os.path.expanduser("~/.openclaw/openclaw.json")
+with open(path) as f: c = json.load(f)
+if 'plugins' in c:
+    c['plugins'].get('slots', {}).pop('contextEngine', None)
+    if 'contextpilot' in c['plugins'].get('entries', {}):
+        c['plugins']['entries']['contextpilot']['enabled'] = False
+with open(path, 'w') as f: json.dump(c, f, indent=2)
+PYTHON
+}
+
+restart_gateway() {
+    local logfile=$1
+
+    # Force stop any existing gateway
+    echo "  Stopping existing gateway..."
+    openclaw gateway stop 2>/dev/null || true
+    sleep 1
+    pkill -9 -f "openclaw-gateway" 2>/dev/null || true
+    sleep 2
+
+    # Verify it's stopped
+    if pgrep -f "openclaw-gateway" > /dev/null 2>&1; then
+        echo "  WARNING: Gateway still running, force killing..."
+        pkill -9 -f "openclaw-gateway" 2>/dev/null || true
+        sleep 2
+    fi
+
+    rm -f "$logfile"
+    echo "  Starting gateway..."
+    nohup openclaw gateway > "$logfile" 2>&1 &
+
+    # Wait for gateway to start
+    for i in {1..15}; do
+        if pgrep -f "openclaw-gateway" > /dev/null 2>&1; then
+            sleep 3
+            echo "  Gateway running (PID $(pgrep -f openclaw-gateway | head -1))"
+            return 0
+        fi
+        sleep 1
+    done
+
+    echo "ERROR: Gateway failed to start"
+    cat "$logfile" | tail -20
+    return 1
+}
+
+run_test() {
+    local session_id="bench-$$-$(date +%s)"
+    echo "  Session: $session_id"
+
+    # Read the same file 3 times to trigger dedup
+    timeout 60 openclaw agent --agent main --session-id "$session_id" \
+        --message "Read $TEST_FILE and count functions" 2>/dev/null || true
+
+    timeout 60 openclaw agent --agent main --session-id "$session_id" \
+        --message "Read $TEST_FILE again, list exports" 2>/dev/null || true
+
+    timeout 60 openclaw agent --agent main --session-id "$session_id" \
+        --message "Read $TEST_FILE one more time, summarize" 2>/dev/null || true
+}
+
+extract_stats() {
+    local logfile=$1
+    # Try the captured log first
+    local stats=$(grep "ContextPilot.*Stats:" "$logfile" 2>/dev/null | tail -1)
+    if [ -n "$stats" ]; then
+        echo "$stats"
+        return
+    fi
+    # Fall back to main openclaw log
+    local today=$(date +%Y-%m-%d)
+    local mainlog="/tmp/openclaw/openclaw-${today}.log"
+    grep "ContextPilot.*Stats:" "$mainlog" 2>/dev/null | tail -1 || echo ""
+}
+
+# ==========================================
+# Test WITH ContextPilot
+# ==========================================
+echo ""
+echo "Test: WITH ContextPilot enabled"
+echo "----------------------------------------"
+enable_contextpilot
+
+LOG_WITH="/tmp/gw-with.log"
+if ! restart_gateway "$LOG_WITH"; then
+    exit 1
+fi
+
+echo "  Running 3 file reads in same session..."
+run_test
+echo "  Done."
+
+sleep 2
+WITH_STATS=$(extract_stats "$LOG_WITH")
+
+if [ -n "$WITH_STATS" ]; then
+    # Extract chars saved
+    CHARS_SAVED=$(echo "$WITH_STATS" | sed 's/.*Stats:[^,]*, //' | sed 's/ chars saved.*//' | tr -d ',')
+    TOKENS_SAVED=$(echo "$WITH_STATS" | grep -oP '~\K[0-9,]+(?= tokens)' | tr -d ',')
+
+    echo ""
+    echo "=========================================="
+    echo "RESULTS"
+    echo "=========================================="
+    echo ""
+    echo "ContextPilot deduplicated:"
+    echo "  Chars saved:    ${CHARS_SAVED:-0}"
+    echo "  Tokens saved:   ~${TOKENS_SAVED:-0}"
+
+    if [ -n "$TOKENS_SAVED" ] && [ "$TOKENS_SAVED" -gt 0 ] 2>/dev/null; then
+        COST=$(echo "scale=4; $TOKENS_SAVED * 0.003 / 1000" | bc 2>/dev/null || echo "N/A")
+        echo "  Est. cost saved: ~\$$COST (at \$3/MTok)"
+    fi
+
+    echo ""
+    echo "This represents content that was deduplicated"
+    echo "across repeated file reads in the session."
+else
+    echo ""
+    echo "No ContextPilot stats found in logs."
+    echo ""
+    echo "Debug info:"
+    echo "  Gateway log: $LOG_WITH"
+    echo "  Main log: /tmp/openclaw/openclaw-$(date +%Y-%m-%d).log"
+    echo ""
+    echo "Gateway log contents:"
+    cat "$LOG_WITH" 2>/dev/null | grep -v "^\[" | head -10
+    echo ""
+    echo "Searching main log for ContextPilot..."
+    grep -i "contextpilot" /tmp/openclaw/openclaw-$(date +%Y-%m-%d).log 2>/dev/null | tail -5 || echo "  (none found)"
+fi
diff --git a/openclaw-plugin/openclaw.plugin.json b/openclaw-plugin/openclaw.plugin.json
new file mode 100644
index 0000000..a8c336e
--- /dev/null
+++ b/openclaw-plugin/openclaw.plugin.json
@@ -0,0 +1,18 @@
+{
+  "id": "contextpilot",
+  "name": "ContextPilot",
+  "description": "Faster long-context inference via context reuse — reorders, deduplicates, and injects cache control for maximum prefix cache sharing.",
+  "version": "0.2.0",
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "scope": {
+        "type": "string",
+        "enum": ["all", "tool_results"],
+        "description": "Which messages ContextPilot optimizes",
+        "default": "all"
+      }
+    }
+  }
+}
diff --git a/openclaw-plugin/package-lock.json b/openclaw-plugin/package-lock.json
new file mode 100644
index 0000000..d487a21
--- /dev/null
+++ b/openclaw-plugin/package-lock.json
@@ -0,0 +1,22 @@
+{
+  "name": "@contextpilot/openclaw-plugin",
+  "version": "0.3.0",
+  "lockfileVersion": 3,
+  "requires": true,
+  "packages": {
+    "": {
+      "name": "@contextpilot/openclaw-plugin",
+      "version": "0.3.0",
+      "license": "Apache-2.0",
+      "dependencies": {
+        "@sinclair/typebox": "^0.34.49"
+      }
+    },
+    "node_modules/@sinclair/typebox": {
+      "version": "0.34.49",
+      "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.34.49.tgz",
+      "integrity": "sha512-brySQQs7Jtn0joV8Xh9ZV/hZb9Ozb0pmazDIASBkYKCjXrXU3mpcFahmK/z4YDhGkQvP9mWJbVyahdtU5wQA+A==",
+      "license": "MIT"
+    }
+  }
+}
diff --git a/openclaw-plugin/package.json b/openclaw-plugin/package.json
new file mode 100644
index 0000000..97e81b7
--- /dev/null
+++ b/openclaw-plugin/package.json
@@ -0,0 +1,39 @@
+{
+  "name": "@contextpilot-ai/contextpilot",
+  "version": "0.3.0",
+  "description": "ContextPilot plugin for OpenClaw — faster long-context inference via in-process context reuse. Zero external dependencies.",
+  "type": "module",
+  "license": "Apache-2.0",
+  "author": "ContextPilot Contributors",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/EfficientContext/ContextPilot.git",
+    "directory": "openclaw-plugin"
+  },
+  "keywords": [
+    "openclaw",
+    "openclaw-plugin",
+    "contextpilot",
+    "kv-cache",
+    "context-reuse",
+    "prompt-cache",
+    "dedup",
+    "llm"
+  ],
+  "openclaw": {
+    "extensions": [
+      "./src/index.ts"
+    ]
+  },
+  "publishConfig": {
+    "access": "public"
+  },
+  "files": [
+    "src/",
+    "openclaw.plugin.json",
+    "README.md"
+  ],
+  "dependencies": {
+    "@sinclair/typebox": "^0.34.49"
+  }
+}
diff --git a/openclaw-plugin/src/engine/cache-control.ts b/openclaw-plugin/src/engine/cache-control.ts
new file mode 100644
index 0000000..57c174e
--- /dev/null
+++ b/openclaw-plugin/src/engine/cache-control.ts
@@ -0,0 +1,167 @@
+export const MIN_CONTENT_LENGTH_FOR_CACHE = 1024;
+export const CACHE_CONTROL_EPHEMERAL = { type: 'ephemeral' } as const;
+
+type CacheControl = typeof CACHE_CONTROL_EPHEMERAL;
+
+interface TextBlock extends Record<string, unknown> {
+    type?: unknown;
+    text?: unknown;
+    cache_control?: CacheControl;
+}
+
+interface ToolResultBlock extends Record<string, unknown> {
+    type?: unknown;
+    content?: unknown;
+    cache_control?: CacheControl;
+}
+
+interface MessageBlock extends Record<string, unknown> {
+    role?: unknown;
+    content?: unknown;
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+    return typeof value === 'object' && value !== null;
+}
+
+function injectSystemCacheControl(
+    body: Record<string, unknown>,
+    cc: CacheControl
+): Record<string, unknown> {
+    const system = body.system;
+    if (system === undefined || system === null) {
+        return body;
+    }
+
+    if (typeof system === 'string') {
+        body.system = [{ type: 'text', text: system, cache_control: cc }];
+        return body;
+    }
+
+    if (Array.isArray(system) && system.length > 0) {
+        const lastBlock = system[system.length - 1];
+        if (isRecord(lastBlock)) {
+            lastBlock.cache_control = cc;
+        }
+    }
+
+    return body;
+}
+
+function maybeAddCacheControlToToolResult(block: ToolResultBlock, cc: CacheControl): void {
+    const toolResultContent = block.content ?? '';
+
+    if (typeof toolResultContent === 'string') {
+        if (toolResultContent.length >= MIN_CONTENT_LENGTH_FOR_CACHE) {
+            block.cache_control = cc;
+        }
+        return;
+    }
+
+    if (!Array.isArray(toolResultContent)) {
+        return;
+    }
+
+    const totalChars = toolResultContent.reduce((sum, inner) => {
+        if (!isRecord(inner) || inner.type !== 'text') {
+            return sum;
+        }
+        return sum + (typeof inner.text === 'string' ? inner.text.length : 0);
+    }, 0);
+
+    if (totalChars < MIN_CONTENT_LENGTH_FOR_CACHE || toolResultContent.length === 0) {
+        return;
+    }
+
+    let lastTextBlock: TextBlock | null = null;
+    for (let i = toolResultContent.length - 1; i >= 0; i -= 1) {
+        const inner = toolResultContent[i];
+        if (isRecord(inner) && inner.type === 'text') {
+            lastTextBlock = inner as TextBlock;
+            break;
+        }
+    }
+
+    if (lastTextBlock !== null) {
+        lastTextBlock.cache_control = cc;
+    }
+}
+
+function injectToolResultCacheControl(
+    body: Record<string, unknown>,
+    cc: CacheControl
+): Record<string, unknown> {
+    const messages = body.messages;
+    if (!Array.isArray(messages) || messages.length === 0) {
+        return body;
+    }
+
+    for (const msg of messages) {
+        if (!isRecord(msg)) {
+            continue;
+        }
+
+        const message = msg as MessageBlock;
+
+        if (message.role === 'toolResult') {
+            const toolResultContent = message.content ?? '';
+            let totalChars = 0;
+
+            if (typeof toolResultContent === 'string') {
+                totalChars = toolResultContent.length;
+            } else if (Array.isArray(toolResultContent)) {
+                totalChars = toolResultContent.reduce((sum, inner) => {
+                    if (isRecord(inner) && inner.type === 'text') {
+                        return sum + (typeof inner.text === 'string' ? inner.text.length : 0);
+                    }
+                    return sum;
+                }, 0);
+            }
+
+            if (totalChars >= MIN_CONTENT_LENGTH_FOR_CACHE) {
+                (message as any).cache_control = cc;
+            }
+            continue;
+        }
+
+        if (message.role !== 'user' || !Array.isArray(message.content)) {
+            continue;
+        }
+
+        for (const block of message.content) {
+            if (!isRecord(block)) {
+                continue;
+            }
+            if (block.type !== 'tool_result' && block.type !== 'toolResult') {
+                continue;
+            }
+            maybeAddCacheControlToToolResult(block as ToolResultBlock, cc);
+        }
+    }
+
+    return body;
+}
+
+export function injectAnthropicCacheControl(body: Record<string, unknown>): Record<string, unknown> {
+    if (!body || typeof body !== 'object') {
+        return body ?? {};
+    }
+    const copiedBody = structuredClone(body);
+    injectSystemCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL);
+    injectToolResultCacheControl(copiedBody, CACHE_CONTROL_EPHEMERAL);
+    return copiedBody;
+}
+
+export function injectOpenAICacheControl(body: Record<string, unknown>): Record<string, unknown> {
+    return body;
+}
+
+export function injectCacheControl(
+    body: Record<string, unknown>,
+    provider: 'anthropic' | 'openai'
+): Record<string, unknown> {
+    if (provider === 'anthropic') {
+        return injectAnthropicCacheControl(body);
+    }
+    return injectOpenAICacheControl(body);
+}
diff --git a/openclaw-plugin/src/engine/compute-distance.ts b/openclaw-plugin/src/engine/compute-distance.ts
new file mode 100644
index 0000000..5ae024f
--- /dev/null
+++ b/openclaw-plugin/src/engine/compute-distance.ts
@@ -0,0 +1,214 @@
+export interface PreparedContextsCpu {
+    chunkIds: number[];
+    originalPositions: number[];
+    lengths: number[];
+    offsets: number[];
+}
+
+export function computeDistanceSingle(
+    contextA: number[],
+    contextB: number[],
+    alpha: number = 0.001
+): number {
+    if (contextA.length === 0 || contextB.length === 0) {
+        return 1.0;
+    }
+
+    const posA = new Map<number, number>();
+    const posB = new Map<number, number>();
+
+    for (let pos = 0; pos < contextA.length; pos += 1) {
+        posA.set(contextA[pos], pos);
+    }
+    for (let pos = 0; pos < contextB.length; pos += 1) {
+        posB.set(contextB[pos], pos);
+    }
+
+    let intersectionSize = 0;
+    let positionDiffSum = 0;
+
+    for (const [docId, aPos] of posA) {
+        const bPos = posB.get(docId);
+        if (bPos === undefined) {
+            continue;
+        }
+
+        intersectionSize += 1;
+        positionDiffSum += Math.abs(aPos - bPos);
+    }
+
+    if (intersectionSize === 0) {
+        return 1.0;
+    }
+
+    const maxSize = Math.max(contextA.length, contextB.length);
+    const overlapTerm = 1.0 - intersectionSize / maxSize;
+    const positionTerm = alpha * (positionDiffSum / intersectionSize);
+
+    return overlapTerm + positionTerm;
+}
+
+export function computeDistancesBatch(
+    queries: number[][],
+    targets: number[][],
+    alpha: number = 0.001
+): number[][] {
+    const nQueries = queries.length;
+    const nTargets = targets.length;
+
+    if (nQueries === 0 || nTargets === 0) {
+        return Array.from({ length: nQueries }, () => new Array<number>(nTargets).fill(0));
+    }
+
+    const distances: number[][] = Array.from(
+        { length: nQueries },
+        () => new Array<number>(nTargets).fill(1.0)
+    );
+
+    for (let i = 0; i < nQueries; i += 1) {
+        for (let j = 0; j < nTargets; j += 1) {
+            distances[i][j] = computeDistanceSingle(queries[i], targets[j], alpha);
+        }
+    }
+
+    return distances;
+}
+
+export function prepareContextsForCpu(contexts: number[][]): PreparedContextsCpu {
+    const n = contexts.length;
+    const sortedData: Array<Array<[number, number]>> = new Array(n);
+    const lengths: number[] = new Array(n).fill(0);
+
+    for (let idx = 0; idx < n; idx += 1) {
+        const ctx = contexts[idx];
+        if (ctx.length === 0) {
+            sortedData[idx] = [];
+            lengths[idx] = 0;
+            continue;
+        }
+
+        const pairs: Array<[number, number]> = new Array(ctx.length);
+        for (let origPos = 0; origPos < ctx.length; origPos += 1) {
+            pairs[origPos] = [ctx[origPos], origPos];
+        }
+        pairs.sort((a, b) => a[0] - b[0]);
+
+        sortedData[idx] = pairs;
+        lengths[idx] = pairs.length;
+    }
+
+    const offsets: number[] = new Array(n + 1).fill(0);
+    for (let i = 0; i < n; i += 1) {
+        offsets[i + 1] = offsets[i] + lengths[i];
+    }
+
+    const totalElements = offsets[n];
+    const chunkIds: number[] = new Array(totalElements).fill(0);
+    const originalPositions: number[] = new Array(totalElements).fill(0);
+
+    for (let i = 0; i < n; i += 1) {
+        const pairs = sortedData[i];
+        const start = offsets[i];
+        for (let j = 0; j < pairs.length; j += 1) {
+            const [chunkId, origPos] = pairs[j];
+            chunkIds[start + j] = chunkId;
+            originalPositions[start + j] = origPos;
+        }
+    }
+
+    return {
+        chunkIds,
+        originalPositions,
+        lengths,
+        offsets
+    };
+}
+
+export function computeDistanceOptimized(
+    chunkIds: number[],
+    originalPositions: number[],
+    lengths: number[],
+    offsets: number[],
+    i: number,
+    j: number,
+    alpha: number
+): number {
+    const lenI = lengths[i];
+    const lenJ = lengths[j];
+
+    if (lenI === 0 || lenJ === 0) {
+        return 1.0;
+    }
+
+    const offsetI = offsets[i];
+    const offsetJ = offsets[j];
+    const endI = offsetI + lenI;
+    const endJ = offsetJ + lenJ;
+
+    let intersectionSize = 0;
+    let positionDiffSum = 0;
+
+    let pi = offsetI;
+    let pj = offsetJ;
+
+    while (pi < endI && pj < endJ) {
+        const chunkI = chunkIds[pi];
+        const chunkJ = chunkIds[pj];
+
+        if (chunkI === chunkJ) {
+            intersectionSize += 1;
+            positionDiffSum += Math.abs(originalPositions[pi] - originalPositions[pj]);
+            pi += 1;
+            pj += 1;
+        } else if (chunkI < chunkJ) {
+            pi += 1;
+        } else {
+            pj += 1;
+        }
+    }
+
+    const maxSize = Math.max(lenI, lenJ);
+    const overlapTerm = 1.0 - intersectionSize / maxSize;
+
+    let positionTerm = 0.0;
+    if (intersectionSize !== 0) {
+        const avgPosDiff = positionDiffSum / intersectionSize;
+        positionTerm = alpha * avgPosDiff;
+    }
+
+    return overlapTerm + positionTerm;
+}
+
+export function computeDistanceMatrixCpu(
+    contexts: number[][],
+    alpha: number = 0.001
+): Float64Array {
+    const n = contexts.length;
+    const numPairs = (n * (n - 1)) / 2;
+
+    if (numPairs === 0) {
+        return new Float64Array(0);
+    }
+
+    const { chunkIds, originalPositions, lengths, offsets } = prepareContextsForCpu(contexts);
+    const condensedDistances = new Float64Array(numPairs);
+
+    for (let i = 0; i < n; i += 1) {
+        for (let j = i + 1; j < n; j += 1) {
+            const dist = computeDistanceOptimized(
+                chunkIds,
+                originalPositions,
+                lengths,
+                offsets,
+                i,
+                j,
+                alpha
+            );
+
+            const condensedIdx = n * i - (i * (i + 1)) / 2 + j - i - 1;
+            condensedDistances[condensedIdx] = dist;
+        }
+    }
+
+    return condensedDistances;
+}
diff --git a/openclaw-plugin/src/engine/conversation-tracker.ts b/openclaw-plugin/src/engine/conversation-tracker.ts
new file mode 100644
index 0000000..38f85fe
--- /dev/null
+++ b/openclaw-plugin/src/engine/conversation-tracker.ts
@@ -0,0 +1,239 @@
+export interface DeduplicationResult {
+    originalDocs: number[];
+    overlappingDocs: number[];
+    newDocs: number[];
+    referenceHints: string[];
+    deduplicatedDocs: number[];
+    docSourceTurns: Map<number, string>;
+    isNewConversation: boolean;
+}
+
+export interface RequestHistory {
+    requestId: string;
+    docs: number[];
+    parentRequestId: string | null;
+    turnNumber: number;
+    timestamp: number;
+}
+
+export interface ConversationTrackerStats {
+    totalRequests: number;
+    totalDedupCalls: number;
+    totalDocsDeduplicated: number;
+    activeRequests: number;
+}
+
+export class ConversationTracker {
+    private _requests: Map<string, RequestHistory>;
+    private _hintTemplate: string;
+    private _maxTrackedRequests: number;
+    private _stats: {
+        totalRequests: number;
+        totalDedupCalls: number;
+        totalDocsDeduplicated: number;
+    };
+
+    constructor(hintTemplate?: string, maxTrackedRequests: number = 256) {
+        this._requests = new Map<string, RequestHistory>();
+        this._hintTemplate =
+            hintTemplate ?? "Please refer to [Doc {doc_id}] from the previous conversation turn.";
+        this._maxTrackedRequests = maxTrackedRequests;
+        this._stats = {
+            totalRequests: 0,
+            totalDedupCalls: 0,
+            totalDocsDeduplicated: 0
+        };
+    }
+
+    registerRequest(requestId: string, docs: number[], parentRequestId?: string | null): RequestHistory {
+        let turnNumber = 1;
+        if (parentRequestId && this._requests.has(parentRequestId)) {
+            turnNumber = this._requests.get(parentRequestId)!.turnNumber + 1;
+        }
+
+        const history: RequestHistory = {
+            requestId,
+            docs: [...docs],
+            parentRequestId: parentRequestId ?? null,
+            turnNumber,
+            timestamp: Date.now() / 1000
+        };
+
+        this._requests.set(requestId, history);
+        this._stats.totalRequests += 1;
+
+        // LRU eviction: remove oldest entries when over limit
+        if (this._requests.size > this._maxTrackedRequests) {
+            const oldest = this._requests.keys().next().value;
+            if (oldest !== undefined) {
+                this._requests.delete(oldest);
+            }
+        }
+
+        return history;
+    }
+
+    getConversationChain(requestId: string): RequestHistory[] {
+        const chain: RequestHistory[] = [];
+        let currentId: string | null = requestId;
+
+        while (currentId && this._requests.has(currentId)) {
+            const history: RequestHistory = this._requests.get(currentId)!;
+            chain.push(history);
+            currentId = history.parentRequestId;
+        }
+
+        chain.reverse();
+        return chain;
+    }
+
+    getAllPreviousDocs(parentRequestId: string): [Set<number>, Map<number, string>] {
+        const allDocs = new Set<number>();
+        const docSources = new Map<number, string>();
+
+        const chain = this.getConversationChain(parentRequestId);
+
+        for (const history of chain) {
+            for (const docId of history.docs) {
+                if (!allDocs.has(docId)) {
+                    allDocs.add(docId);
+                    docSources.set(docId, history.requestId);
+                }
+            }
+        }
+
+        return [allDocs, docSources];
+    }
+
+    deduplicate(
+        requestId: string,
+        docs: number[],
+        parentRequestId?: string | null,
+        hintTemplate?: string
+    ): DeduplicationResult {
+        this._stats.totalDedupCalls += 1;
+
+        if (!parentRequestId || !this._requests.has(parentRequestId)) {
+            this.registerRequest(requestId, docs, null);
+
+            return {
+                originalDocs: docs,
+                overlappingDocs: [],
+                newDocs: docs,
+                referenceHints: [],
+                deduplicatedDocs: docs,
+                docSourceTurns: new Map<number, string>(),
+                isNewConversation: true
+            };
+        }
+
+        const [previousDocs, docSources] = this.getAllPreviousDocs(parentRequestId);
+
+        const overlappingDocs: number[] = [];
+        const newDocs: number[] = [];
+        const docSourceTurns = new Map<number, string>();
+
+        for (const docId of docs) {
+            if (previousDocs.has(docId)) {
+                overlappingDocs.push(docId);
+                const sourceRequestId = docSources.get(docId);
+                if (sourceRequestId !== undefined) {
+                    docSourceTurns.set(docId, sourceRequestId);
+                }
+            } else {
+                newDocs.push(docId);
+            }
+        }
+
+        const template = hintTemplate ?? this._hintTemplate;
+        const referenceHints: string[] = [];
+
+        for (const docId of overlappingDocs) {
+            const sourceRequest = docSources.get(docId);
+            const sourceHistory = sourceRequest ? this._requests.get(sourceRequest) : undefined;
+            const turnNumber = sourceHistory ? String(sourceHistory.turnNumber) : "previous";
+
+            const hint = template
+                .replaceAll("{doc_id}", String(docId))
+                .replaceAll("{turn_number}", turnNumber)
+                .replaceAll("{source_request}", sourceRequest ?? "previous");
+
+            referenceHints.push(hint);
+        }
+
+        this.registerRequest(requestId, docs, parentRequestId);
+        this._stats.totalDocsDeduplicated += overlappingDocs.length;
+
+        return {
+            originalDocs: docs,
+            overlappingDocs,
+            newDocs,
+            referenceHints,
+            deduplicatedDocs: newDocs,
+            docSourceTurns,
+            isNewConversation: false
+        };
+    }
+
+    deduplicateBatch(
+        requestIds: string[],
+        docsList: number[][],
+        parentRequestIds?: Array<string | null | undefined>,
+        hintTemplate?: string
+    ): DeduplicationResult[] {
+        const effectiveParentRequestIds =
+            parentRequestIds ?? new Array<string | null | undefined>(requestIds.length).fill(null);
+
+        const results: DeduplicationResult[] = [];
+        const n = Math.min(requestIds.length, docsList.length, effectiveParentRequestIds.length);
+
+        for (let i = 0; i < n; i += 1) {
+            const result = this.deduplicate(
+                requestIds[i],
+                docsList[i],
+                effectiveParentRequestIds[i],
+                hintTemplate
+            );
+            results.push(result);
+        }
+
+        return results;
+    }
+
+    removeRequest(requestId: string): boolean {
+        return this._requests.delete(requestId);
+    }
+
+    clearConversation(requestId: string): number {
+        const chain = this.getConversationChain(requestId);
+        let count = 0;
+
+        for (const history of chain) {
+            if (this.removeRequest(history.requestId)) {
+                count += 1;
+            }
+        }
+
+        return count;
+    }
+
+    reset(): void {
+        this._requests.clear();
+        this._stats = {
+            totalRequests: 0,
+            totalDedupCalls: 0,
+            totalDocsDeduplicated: 0
+        };
+    }
+
+    getStats(): ConversationTrackerStats {
+        return {
+            ...this._stats,
+            activeRequests: this._requests.size
+        };
+    }
+
+    getRequestHistory(requestId: string): RequestHistory | null {
+        return this._requests.get(requestId) ?? null;
+    }
+}
diff --git a/openclaw-plugin/src/engine/dedup.ts b/openclaw-plugin/src/engine/dedup.ts
new file mode 100644
index 0000000..1af124b
--- /dev/null
+++ b/openclaw-plugin/src/engine/dedup.ts
@@ -0,0 +1,581 @@
+import * as crypto from 'node:crypto';
+
+export const MIN_BLOCK_CHARS = 80;
+export const MIN_CONTENT_CHARS = 500;
+
+export const CHUNK_MODULUS = 13;
+export const CHUNK_MIN_LINES = 5;
+export const CHUNK_MAX_LINES = 40;
+
+export interface DedupResult {
+    blocksDeduped: number;
+    blocksTotal: number;
+    systemBlocksMatched: number;
+    charsBefore: number;
+    charsAfter: number;
+    charsSaved: number;
+}
+
+export interface DedupOptions {
+    minBlockChars?: number;
+    minContentChars?: number;
+    chunkModulus?: number;
+}
+
+type SeenBlock = [number, string, number];
+
+interface OpenAIToolCall {
+    id?: string;
+    function?: {
+        name?: string;
+    };
+}
+
+interface OpenAIAssistantMessage {
+    role?: string;
+    tool_calls?: OpenAIToolCall[];
+}
+
+interface OpenAIToolMessage {
+    role?: string;
+    content?: unknown;
+    tool_call_id?: string;
+    name?: string;
+}
+
+interface OpenAIChatMessage extends OpenAIToolMessage {
+    tool_calls?: OpenAIToolCall[];
+}
+
+interface TextContentBlock {
+    type?: string;
+    text?: string;
+}
+
+interface ChatCompletionsBody {
+    messages?: OpenAIChatMessage[];
+}
+
+interface ResponsesFunctionCallItem {
+    type?: string;
+    call_id?: string;
+    name?: string;
+}
+
+interface ResponsesFunctionCallOutputItem {
+    type?: string;
+    call_id?: string;
+    output?: unknown;
+}
+
+interface ResponsesApiBody {
+    input?: ResponsesFunctionCallOutputItem[];
+}
+
+const CODE_BLOCK_RE = /(```[\w]*\n)([\s\S]*?)(```)/g;
+
+function emptyDedupResult(): DedupResult {
+    return {
+        blocksDeduped: 0,
+        blocksTotal: 0,
+        systemBlocksMatched: 0,
+        charsBefore: 0,
+        charsAfter: 0,
+        charsSaved: 0
+    };
+}
+
+export function hashString(str: string): number {
+    let h = 5381;
+    for (let i = 0; i < str.length; i++) {
+        h = (Math.imul(h, 33) + str.charCodeAt(i)) | 0;
+    }
+    return h >>> 0;
+}
+
+export function buildToolNameMapOpenai(messages: OpenAIAssistantMessage[]): Record<string, string> {
+    const mapping: Record<string, string> = {};
+    for (const msg of messages) {
+        if (!msg || typeof msg !== 'object' || msg.role !== 'assistant') {
+            continue;
+        }
+
+        for (const tc of msg.tool_calls || []) {
+            if (!tc || typeof tc !== 'object') {
+                continue;
+            }
+            const tcId = tc.id || '';
+            const fn = tc.function;
+            if (fn && typeof fn === 'object' && fn.name) {
+                mapping[tcId] = fn.name;
+            }
+        }
+    }
+    return mapping;
+}
+
+export function buildToolNameMapResponses(items: ResponsesFunctionCallItem[]): Record<string, string> {
+    const mapping: Record<string, string> = {};
+    for (const item of items) {
+        if (item && typeof item === 'object' && item.type === 'function_call') {
+            const callId = item.call_id || '';
+            const name = item.name || '';
+            if (callId && name) {
+                mapping[callId] = name;
+            }
+        }
+    }
+    return mapping;
+}
+
+export function contentDefinedChunking(
+    text: string,
+    chunkModulus: number = CHUNK_MODULUS
+): string[] {
+    const lines = text.split('\n');
+    if (lines.length <= CHUNK_MIN_LINES) {
+        return [text];
+    }
+
+    const blocks: string[] = [];
+    let current: string[] = [];
+
+    for (const line of lines) {
+        current.push(line);
+        const lineHash = hashString(line.trim()) & 0xFFFFFFFF;
+        const isBoundary = (
+            lineHash % chunkModulus === 0 && current.length >= CHUNK_MIN_LINES
+        ) || current.length >= CHUNK_MAX_LINES;
+
+        if (isBoundary) {
+            blocks.push(current.join('\n'));
+            current = [];
+        }
+    }
+
+    if (current.length > 0) {
+        if (blocks.length > 0 && current.length < CHUNK_MIN_LINES) {
+            blocks[blocks.length - 1] += `\n${current.join('\n')}`;
+        } else {
+            blocks.push(current.join('\n'));
+        }
+    }
+
+    return blocks;
+}
+
+export function hashBlock(block: string): string {
+    const normalized = block.trim();
+    return crypto.createHash('sha256').update(normalized, 'utf8').digest('hex').slice(0, 20);
+}
+
+function normalizeDedupArgs(
+    systemContentOrOpts?: string | DedupOptions,
+    maybeOpts?: DedupOptions
+): { systemContent: string | undefined; opts: DedupOptions } {
+    if (typeof systemContentOrOpts === 'string') {
+        return { systemContent: systemContentOrOpts, opts: maybeOpts ?? {} };
+    }
+    return { systemContent: undefined, opts: systemContentOrOpts ?? {} };
+}
+
+function mergePreSeen(
+    seenBlocks: Map<string, SeenBlock>,
+    preSeen?: Map<string, SeenBlock>
+): void {
+    if (!preSeen) {
+        return;
+    }
+
+    for (const [hash, origin] of preSeen.entries()) {
+        if (!seenBlocks.has(hash)) {
+            seenBlocks.set(hash, origin);
+        }
+    }
+}
+
+function dedupText(
+    text: string,
+    seenBlocks: Map<string, SeenBlock>,
+    msgIdx: number,
+    fnName: string,
+    result: DedupResult,
+    minBlockChars: number,
+    chunkModulus: number,
+    preSeen?: Map<string, SeenBlock>
+): string | undefined {
+    mergePreSeen(seenBlocks, preSeen);
+
+    const blocks = contentDefinedChunking(text, chunkModulus);
+    if (blocks.length < 2) {
+        for (const block of blocks) {
+            if (block.trim().length >= minBlockChars) {
+                const h = hashBlock(block);
+                result.blocksTotal += 1;
+                if (!seenBlocks.has(h)) {
+                    seenBlocks.set(h, [msgIdx, fnName, 0]);
+                }
+            }
+        }
+        return undefined;
+    }
+
+    const newBlocks: string[] = [];
+    let dedupedInThis = 0;
+
+    for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+        const block = blocks[blockIdx];
+        if (block.trim().length < minBlockChars) {
+            newBlocks.push(block);
+            continue;
+        }
+
+        const h = hashBlock(block);
+        result.blocksTotal += 1;
+
+        const seen = seenBlocks.get(h);
+        if (seen && seen[0] !== msgIdx) {
+            const [origMsgIdx, origFn] = seen;
+            const firstLine = block.trim().split('\n')[0].slice(0, 80);
+            const ref = `[..., "${firstLine}" — identical to earlier ${origFn} result, see above ...]`;
+            const charsSaved = block.length - ref.length;
+            if (charsSaved > 0) {
+                if (origMsgIdx === -1) {
+                    result.systemBlocksMatched += 1;
+                }
+                newBlocks.push(ref);
+                dedupedInThis += 1;
+                result.blocksDeduped += 1;
+            } else {
+                newBlocks.push(block);
+            }
+        } else {
+            if (!seen) {
+                seenBlocks.set(h, [msgIdx, fnName, blockIdx]);
+            }
+            newBlocks.push(block);
+        }
+    }
+
+    if (dedupedInThis > 0) {
+        return newBlocks.join('\n\n');
+    }
+
+    for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+        const block = blocks[blockIdx];
+        if (block.trim().length >= minBlockChars) {
+            const h = hashBlock(block);
+            if (!seenBlocks.has(h)) {
+                seenBlocks.set(h, [msgIdx, fnName, blockIdx]);
+            }
+        }
+    }
+
+    return undefined;
+}
+
+function prescanSystemBlocks(
+    systemContent: string,
+    minBlockChars: number,
+    chunkModulus: number
+): Map<string, SeenBlock> {
+    const preSeen = new Map<string, SeenBlock>();
+    if (typeof systemContent !== 'string' || !systemContent.trim()) {
+        return preSeen;
+    }
+
+    const blocks = contentDefinedChunking(systemContent, chunkModulus);
+    for (let blockIdx = 0; blockIdx < blocks.length; blockIdx++) {
+        const block = blocks[blockIdx];
+        if (block.trim().length < minBlockChars) {
+            continue;
+        }
+        const h = hashBlock(block);
+        if (!preSeen.has(h)) {
+            preSeen.set(h, [-1, 'system prompt', blockIdx]);
+        }
+    }
+
+    return preSeen;
+}
+
+export function dedupAssistantCodeBlocks(
+    messages: OpenAIChatMessage[],
+    seenBlocks: Map<string, SeenBlock>,
+    result: DedupResult,
+    minBlockChars: number,
+    minContentChars: number,
+    chunkModulus: number,
+    preSeen?: Map<string, SeenBlock>
+): void {
+    for (let idx = 0; idx < messages.length; idx++) {
+        const msg = messages[idx];
+        if (!msg || typeof msg !== 'object' || msg.role !== 'assistant') {
+            continue;
+        }
+
+        const rawContent = msg.content;
+        let content = '';
+        let isListContent = false;
+        let textBlockIdx = -1;
+
+        if (typeof rawContent === 'string') {
+            content = rawContent;
+        } else if (Array.isArray(rawContent)) {
+            for (let blockIdx = 0; blockIdx < rawContent.length; blockIdx++) {
+                const block = rawContent[blockIdx] as { type?: string; text?: string };
+                if (block?.type !== 'text' || typeof block.text !== 'string') {
+                    continue;
+                }
+                if (block.text.includes('```') && block.text.length > content.length) {
+                    content = block.text;
+                    textBlockIdx = blockIdx;
+                    isListContent = true;
+                }
+            }
+
+            if (!content) {
+                continue;
+            }
+        } else {
+            continue;
+        }
+
+        if (content.length < minContentChars) {
+            continue;
+        }
+
+        const matches = Array.from(content.matchAll(CODE_BLOCK_RE));
+        if (matches.length === 0) {
+            continue;
+        }
+
+        let modified = false;
+        let newContent = content;
+
+        for (let i = matches.length - 1; i >= 0; i--) {
+            const match = matches[i];
+            const code = match[2] ?? '';
+            if (code.trim().length < minBlockChars) {
+                continue;
+            }
+
+            const dedupedCode = dedupText(
+                code,
+                seenBlocks,
+                idx,
+                'assistant',
+                result,
+                minBlockChars,
+                chunkModulus,
+                preSeen
+            );
+
+            if (dedupedCode === undefined) {
+                continue;
+            }
+
+            const start = match.index + (match[1]?.length ?? 0);
+            const end = start + code.length;
+            const originalLen = end - start;
+
+            newContent = `${newContent.slice(0, start)}${dedupedCode}${newContent.slice(end)}`;
+            result.charsBefore += originalLen;
+            result.charsAfter += dedupedCode.length;
+            result.charsSaved += (originalLen - dedupedCode.length);
+            modified = true;
+        }
+
+        if (!modified) {
+            continue;
+        }
+
+        if (isListContent && Array.isArray(msg.content) && textBlockIdx >= 0) {
+            const textBlock = msg.content[textBlockIdx] as { type?: string; text?: string };
+            if (textBlock && textBlock.type === 'text') {
+                textBlock.text = newContent;
+            }
+        } else {
+            msg.content = newContent;
+        }
+    }
+}
+
+export function dedupChatCompletions(body: ChatCompletionsBody, opts?: DedupOptions): DedupResult;
+export function dedupChatCompletions(
+    body: ChatCompletionsBody,
+    systemContent?: string,
+    opts?: DedupOptions
+): DedupResult;
+export function dedupChatCompletions(
+    body: ChatCompletionsBody,
+    systemContentOrOpts?: string | DedupOptions,
+    maybeOpts?: DedupOptions
+): DedupResult {
+    const normalized = normalizeDedupArgs(systemContentOrOpts, maybeOpts);
+    const systemContent = normalized.systemContent;
+    const opts = normalized.opts;
+    const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS;
+    const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS;
+    const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS;
+
+    const messages = body?.messages;
+    if (!Array.isArray(messages) || messages.length === 0) {
+        return emptyDedupResult();
+    }
+
+    const toolNames = buildToolNameMapOpenai(messages);
+    const seenBlocks = new Map<string, SeenBlock>();
+    const preSeen = systemContent
+        ? prescanSystemBlocks(systemContent, minBlockChars, chunkModulus)
+        : undefined;
+    mergePreSeen(seenBlocks, preSeen);
+    const result = emptyDedupResult();
+
+    for (let idx = 0; idx < messages.length; idx++) {
+        const msg = messages[idx];
+        if (!msg || typeof msg !== 'object') {
+            continue;
+        }
+        if (msg.role !== 'tool' && msg.role !== 'toolResult') {
+            continue;
+        }
+
+        let content = msg.content ?? '';
+        if (Array.isArray(content)) {
+            const textParts: string[] = [];
+            for (const block of content) {
+                if (!block || typeof block !== 'object') {
+                    continue;
+                }
+                const textBlock = block as TextContentBlock;
+                if (textBlock.type === 'text' && typeof textBlock.text === 'string') {
+                    textParts.push(textBlock.text);
+                }
+            }
+            content = textParts.join('\n');
+        }
+        if (typeof content !== 'string' || content.length < minContentChars) {
+            continue;
+        }
+
+        const toolCallId = msg.tool_call_id || '';
+        const fnName = toolNames[toolCallId] || msg.name || 'tool';
+        const dedupedContent = dedupText(
+            content,
+            seenBlocks,
+            idx,
+            fnName,
+            result,
+            minBlockChars,
+            chunkModulus,
+            preSeen
+        );
+
+        if (dedupedContent === undefined) {
+            continue;
+        }
+
+        const originalLen = content.length;
+        if (Array.isArray(msg.content)) {
+            const textBlockIdx = msg.content.findIndex(
+                (block) => !!block && typeof block === 'object' && (block as TextContentBlock).type === 'text'
+            );
+            if (textBlockIdx >= 0) {
+                const textBlock = msg.content[textBlockIdx];
+                if (textBlock && typeof textBlock === 'object') {
+                    (textBlock as TextContentBlock).text = dedupedContent;
+                }
+            }
+        } else {
+            msg.content = dedupedContent;
+        }
+
+        const newLen = dedupedContent.length;
+        result.charsBefore += originalLen;
+        result.charsAfter += newLen;
+        result.charsSaved += (originalLen - newLen);
+    }
+
+    dedupAssistantCodeBlocks(
+        messages,
+        seenBlocks,
+        result,
+        minBlockChars,
+        minContentChars,
+        chunkModulus,
+        preSeen
+    );
+
+    return result;
+}
+
+export function dedupResponsesApi(body: ResponsesApiBody, opts?: DedupOptions): DedupResult;
+export function dedupResponsesApi(
+    body: ResponsesApiBody,
+    systemContent?: string,
+    opts?: DedupOptions
+): DedupResult;
+export function dedupResponsesApi(
+    body: ResponsesApiBody,
+    systemContentOrOpts?: string | DedupOptions,
+    maybeOpts?: DedupOptions
+): DedupResult {
+    const normalized = normalizeDedupArgs(systemContentOrOpts, maybeOpts);
+    const systemContent = normalized.systemContent;
+    const opts = normalized.opts;
+    const minBlockChars = opts.minBlockChars ?? MIN_BLOCK_CHARS;
+    const minContentChars = opts.minContentChars ?? MIN_CONTENT_CHARS;
+    const chunkModulus = opts.chunkModulus ?? CHUNK_MODULUS;
+
+    const inputItems = body?.input;
+    if (!Array.isArray(inputItems) || inputItems.length === 0) {
+        return emptyDedupResult();
+    }
+
+    const fnNames = buildToolNameMapResponses(inputItems);
+    const seenBlocks = new Map<string, SeenBlock>();
+    const preSeen = systemContent
+        ? prescanSystemBlocks(systemContent, minBlockChars, chunkModulus)
+        : undefined;
+    mergePreSeen(seenBlocks, preSeen);
+    const result = emptyDedupResult();
+
+    for (let idx = 0; idx < inputItems.length; idx++) {
+        const item = inputItems[idx];
+        if (!item || typeof item !== 'object' || item.type !== 'function_call_output') {
+            continue;
+        }
+
+        const output = item.output ?? '';
+        if (typeof output !== 'string' || output.length < minContentChars) {
+            continue;
+        }
+
+        const callId = item.call_id || '';
+        const fnName = fnNames[callId] || callId || 'tool';
+        const dedupedOutput = dedupText(
+            output,
+            seenBlocks,
+            idx,
+            fnName,
+            result,
+            minBlockChars,
+            chunkModulus,
+            preSeen
+        );
+
+        if (dedupedOutput === undefined) {
+            continue;
+        }
+
+        const originalLen = output.length;
+        item.output = dedupedOutput;
+        const newLen = dedupedOutput.length;
+        result.charsBefore += originalLen;
+        result.charsAfter += newLen;
+        result.charsSaved += (originalLen - newLen);
+    }
+
+    return result;
+}
diff --git a/openclaw-plugin/src/engine/engine.test.ts b/openclaw-plugin/src/engine/engine.test.ts
new file mode 100644
index 0000000..be258a4
--- /dev/null
+++ b/openclaw-plugin/src/engine/engine.test.ts
@@ -0,0 +1,768 @@
+import { describe, expect, it } from "vitest";
+import {
+  injectAnthropicCacheControl,
+  injectCacheControl,
+  injectOpenAICacheControl,
+} from "./cache-control.js";
+import {
+  buildToolNameMapOpenai,
+  contentDefinedChunking,
+  dedupChatCompletions,
+  dedupResponsesApi,
+  hashBlock,
+} from "./dedup.js";
+import {
+  extractAllOpenai,
+  extractDocuments,
+  extractFromAnthropicMessages,
+  extractFromAnthropicToolResults,
+  extractFromOpenaiChat,
+  extractFromOpenaiToolResults,
+  extractSingleDocsFromOpenaiToolResults,
+  getFormatHandler,
+  parseInterceptHeaders,
+  reconstructAnthropicToolResult,
+  reconstructContent,
+  reconstructOpenaiToolResult,
+} from "./extract.js";
+import { ReorderState, reorderDocuments } from "./reorder.js";
+
+const DEFAULT_CONFIG = parseInterceptHeaders({});
+
+const OPENAI_CHAT_BODY = {
+  model: "claude-sonnet-4-6",
+  messages: [
+    {
+      role: "system",
+      content:
+        "<documents><document>Doc A content here</document><document>Doc B content here</document><document>Doc C content here</document></documents>",
+    },
+    { role: "user", content: "What do these docs say?" },
+  ],
+};
+
+const ANTHROPIC_MESSAGES_BODY = {
+  model: "claude-sonnet-4-6",
+  system:
+    "<documents><document>Doc A content here</document><document>Doc B content here</document></documents>",
+  messages: [{ role: "user", content: "Summarize the documents." }],
+};
+
+const LARGE_CONTENT = "x".repeat(600) + "\n".repeat(20) + "y".repeat(600);
+
+const DEDUP_BODY = {
+  messages: [
+    {
+      role: "assistant",
+      content: "",
+      tool_calls: [
+        { id: "call_1", function: { name: "read_file", arguments: "{}" } },
+        { id: "call_2", function: { name: "read_file", arguments: "{}" } },
+      ],
+    },
+    { role: "tool", tool_call_id: "call_1", content: LARGE_CONTENT },
+    { role: "tool", tool_call_id: "call_2", content: LARGE_CONTENT },
+  ],
+};
+
+function makeLargeContent(prefix: string): string {
+  return Array.from(
+    { length: 20 },
+    (_, i) => `${prefix} line ${i} ${"z".repeat(60)}`,
+  ).join("\n");
+}
+
+describe("extract", () => {
+  it("parseInterceptHeaders parses X-ContextPilot-* headers and defaults", () => {
+    const parsed = parseInterceptHeaders({
+      "X-ContextPilot-Enabled": "0",
+      "x-contextpilot-mode": "xml_tag",
+      "x-contextpilot-tag": "context",
+      "x-contextpilot-separator": "===",
+      "x-contextpilot-alpha": "0.05",
+      "x-contextpilot-linkage": "single",
+      "x-contextpilot-scope": "invalid",
+    });
+
+    expect(parsed).toEqual({
+      enabled: false,
+      mode: "xml_tag",
+      tag: "context",
+      separator: "===",
+      alpha: 0.05,
+      linkageMethod: "single",
+      scope: "all",
+    });
+
+    const defaults = parseInterceptHeaders({});
+    expect(defaults.enabled).toBe(true);
+    expect(defaults.mode).toBe("auto");
+    expect(defaults.tag).toBe("document");
+    expect(defaults.separator).toBe("---");
+    expect(defaults.alpha).toBe(0.001);
+    expect(defaults.linkageMethod).toBe("average");
+    expect(defaults.scope).toBe("all");
+  });
+
+  it("extractDocuments extracts XML-tagged documents", () => {
+    const text =
+      "<documents><document>A</document><document>B</document></documents>";
+    const extraction = extractDocuments(text, DEFAULT_CONFIG);
+    expect(extraction).not.toBeNull();
+    expect(extraction?.mode).toBe("xml_tag");
+    expect(extraction?.documents).toEqual(["A", "B"]);
+    expect(extraction?.wrapperTag).toBe("documents");
+    expect(extraction?.itemTag).toBe("document");
+  });
+
+  it("extractDocuments extracts numbered documents", () => {
+    const extraction = extractDocuments(
+      "[1] First doc\n[2] Second doc",
+      parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }),
+    );
+    expect(extraction).not.toBeNull();
+    expect(extraction?.mode).toBe("numbered");
+    expect(extraction?.documents).toEqual(["First doc", "Second doc"]);
+  });
+
+  it("extractDocuments extracts JSON results documents", () => {
+    const extraction = extractDocuments(
+      JSON.stringify({ results: [{ url: "a.com" }, { url: "b.com" }] }),
+      parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }),
+    );
+    expect(extraction).not.toBeNull();
+    expect(extraction?.mode).toBe("json_results");
+    expect(extraction?.documents).toEqual(["a.com", "b.com"]);
+  });
+
+  it("extractDocuments auto mode resolves XML > numbered > JSON", () => {
+    const xml = extractDocuments(
+      "<documents><document>[1] one</document><document>[2] two</document></documents>",
+      DEFAULT_CONFIG,
+    );
+    expect(xml?.mode).toBe("xml_tag");
+
+    const numbered = extractDocuments("[1] one\n[2] two", DEFAULT_CONFIG);
+    expect(numbered?.mode).toBe("numbered");
+
+    const json = extractDocuments(
+      JSON.stringify({ results: [{ url: "one" }, { url: "two" }] }),
+      DEFAULT_CONFIG,
+    );
+    expect(json?.mode).toBe("json_results");
+  });
+
+  it("extractDocuments returns null for fewer than two docs", () => {
+    const numberedSingle = extractDocuments(
+      "[1] Only one",
+      parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }),
+    );
+    expect(numberedSingle).toBeNull();
+
+    const jsonSingle = extractDocuments(
+      JSON.stringify({ results: [{ url: "only-one" }] }),
+      parseInterceptHeaders({ "x-contextpilot-mode": "json_results" }),
+    );
+    expect(jsonSingle).toBeNull();
+  });
+
+  it("reconstructContent rebuilds XML while preserving tags", () => {
+    const extraction = extractDocuments(
+      "prefix<documents><document>A</document><document>B</document></documents>suffix",
+      DEFAULT_CONFIG,
+    );
+    expect(extraction).not.toBeNull();
+    if (!extraction) {
+      throw new Error("expected extraction");
+    }
+
+    const rebuilt = reconstructContent(extraction, ["B", "A"]);
+    expect(rebuilt).toContain("prefix");
+    expect(rebuilt).toContain("suffix");
+    expect(rebuilt).toContain("<documents>");
+    expect(rebuilt).toContain("<document>B</document>");
+    expect(rebuilt).toContain("<document>A</document>");
+  });
+
+  it("reconstructContent rebuilds numbered format", () => {
+    const extraction = extractDocuments(
+      "Lead\n[1] First\n[2] Second",
+      parseInterceptHeaders({ "x-contextpilot-mode": "numbered" }),
+    );
+    expect(extraction).not.toBeNull();
+    if (!extraction) {
+      throw new Error("expected extraction");
+    }
+
+    const rebuilt = reconstructContent(extraction, ["Second", "First"]);
+    expect(rebuilt).toContain("Lead");
+    expect(rebuilt).toContain("[1] Second");
+    expect(rebuilt).toContain("[2] First");
+  });
+
+  it("extractFromOpenaiChat extracts from system message", () => {
+    const extraction = extractFromOpenaiChat(OPENAI_CHAT_BODY, DEFAULT_CONFIG);
+    expect(extraction).not.toBeNull();
+    expect(extraction?.[1]).toBe(0);
+    expect(extraction?.[0].documents).toEqual([
+      "Doc A content here",
+      "Doc B content here",
+      "Doc C content here",
+    ]);
+  });
+
+  it("extractFromAnthropicMessages extracts from system string", () => {
+    const extraction = extractFromAnthropicMessages(
+      ANTHROPIC_MESSAGES_BODY,
+      DEFAULT_CONFIG,
+    );
+    expect(extraction).not.toBeNull();
+    expect(extraction?.documents).toEqual([
+      "Doc A content here",
+      "Doc B content here",
+    ]);
+  });
+
+  it("extractFromOpenaiToolResults extracts tool-result documents", () => {
+    const body = {
+      messages: [
+        { role: "tool", content: "<documents><document>A</document><document>B</document></documents>" },
+      ],
+    };
+    const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    expect(extractions[0]?.[0].documents).toEqual(["A", "B"]);
+    expect(extractions[0]?.[1]).toEqual({
+      msgIndex: 0,
+      blockIndex: -1,
+      innerBlockIndex: -1,
+    });
+  });
+
+  it("extractFromAnthropicToolResults extracts tool_result blocks", () => {
+    const body = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              content:
+                "<documents><document>A</document><document>B</document></documents>",
+            },
+          ],
+        },
+      ],
+    };
+    const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    expect(extractions[0]?.[0].documents).toEqual(["A", "B"]);
+    expect(extractions[0]?.[1]).toEqual({
+      msgIndex: 0,
+      blockIndex: 0,
+      innerBlockIndex: -1,
+    });
+  });
+
+  it("FormatHandler OpenAI returns a working handler", () => {
+    const handler = getFormatHandler("openai_chat");
+    expect(handler.targetPath()).toBe("/v1/chat/completions");
+
+    const body = structuredClone(OPENAI_CHAT_BODY);
+    const all = handler.extractAll(body, DEFAULT_CONFIG);
+    expect(all.systemExtraction).not.toBeNull();
+    expect(all.hasExtractions).toBe(true);
+
+    if (!all.systemExtraction) {
+      throw new Error("expected system extraction");
+    }
+
+    handler.reconstructSystem(
+      body,
+      all.systemExtraction[0],
+      ["Doc C content here", "Doc B content here", "Doc A content here"],
+      all.systemExtraction[1],
+    );
+    expect(body.messages[0]?.content).toContain("Doc C content here");
+  });
+
+  it("FormatHandler Anthropic returns a working handler", () => {
+    const handler = getFormatHandler("anthropic_messages");
+    expect(handler.targetPath()).toBe("/v1/messages");
+
+    const body = structuredClone(ANTHROPIC_MESSAGES_BODY);
+    const all = handler.extractAll(body, DEFAULT_CONFIG);
+    expect(all.systemExtraction).not.toBeNull();
+    expect(all.hasExtractions).toBe(true);
+
+    if (!all.systemExtraction) {
+      throw new Error("expected system extraction");
+    }
+
+    handler.reconstructSystem(
+      body,
+      all.systemExtraction[0],
+      ["Doc B content here", "Doc A content here"],
+      all.systemExtraction[1],
+    );
+    expect(body.system).toContain("Doc B content here");
+  });
+
+  it("extractAllOpenai extracts from both system and tool results", () => {
+    const body = {
+      messages: [
+        {
+          role: "system",
+          content:
+            "<documents><document>Sys A</document><document>Sys B</document></documents>",
+        },
+        {
+          role: "tool",
+          content:
+            "<documents><document>Tool A</document><document>Tool B</document></documents>",
+        },
+      ],
+    };
+
+    const all = extractAllOpenai(body, DEFAULT_CONFIG);
+    expect(all.systemExtraction).not.toBeNull();
+    expect(all.toolExtractions).toHaveLength(1);
+    expect(all.totalDocuments).toBe(4);
+  });
+
+  it("extractSingleDocsFromOpenaiToolResults extracts single long docs", () => {
+    const body = {
+      messages: [
+        {
+          role: "tool",
+          tool_call_id: "call_99",
+          content: `Result:\n${"r".repeat(240)}`,
+        },
+      ],
+    };
+
+    const extracted = extractSingleDocsFromOpenaiToolResults(body, DEFAULT_CONFIG);
+    expect(extracted).toHaveLength(1);
+    expect(extracted[0]?.[0].toolCallId).toBe("call_99");
+    expect(extracted[0]?.[0].content.length).toBeGreaterThanOrEqual(200);
+    expect(extracted[0]?.[0].contentHash).toMatch(/^[0-9a-f]{64}$/);
+  });
+
+  it("reconstructOpenaiToolResult reconstructs a tool result in-place", () => {
+    const body = {
+      messages: [
+        {
+          role: "tool",
+          content:
+            "<documents><document>A</document><document>B</document></documents>",
+        },
+      ],
+    };
+
+    const extractions = extractFromOpenaiToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    const first = extractions[0];
+    if (!first) {
+      throw new Error("expected extraction");
+    }
+
+    reconstructOpenaiToolResult(body, first[0], ["B", "A"], first[1]);
+    expect(body.messages[0]?.content).toContain("<document>B</document>");
+    expect(body.messages[0]?.content).toContain("<document>A</document>");
+  });
+
+  it("reconstructAnthropicToolResult reconstructs a tool result in-place", () => {
+    const body = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            {
+              type: "tool_result",
+              content:
+                "<documents><document>A</document><document>B</document></documents>",
+            },
+          ],
+        },
+      ],
+    };
+
+    const extractions = extractFromAnthropicToolResults(body, DEFAULT_CONFIG);
+    expect(extractions).toHaveLength(1);
+    const first = extractions[0];
+    if (!first) {
+      throw new Error("expected extraction");
+    }
+
+    reconstructAnthropicToolResult(body, first[0], ["B", "A"], first[1]);
+    expect(body.messages[0]?.content[0]?.content).toContain("<document>B</document>");
+    expect(body.messages[0]?.content[0]?.content).toContain("<document>A</document>");
+  });
+});
+
+describe("dedup", () => {
+  it("contentDefinedChunking splits text into multiple blocks at boundaries", () => {
+    const text = Array.from({ length: 12 }, (_, i) => `line-${i}`).join("\n");
+    const blocks = contentDefinedChunking(text, 1);
+    expect(blocks).toHaveLength(2);
+    expect(blocks[0]?.split("\n")).toHaveLength(5);
+    expect(blocks[1]?.split("\n")).toHaveLength(7);
+  });
+
+  it("contentDefinedChunking returns one block for short text", () => {
+    const short = "a\nb\nc\nd\ne";
+    const blocks = contentDefinedChunking(short);
+    expect(blocks).toEqual([short]);
+  });
+
+  it("hashBlock is consistent and returns 20-char hex", () => {
+    const h1 = hashBlock("  abc\n");
+    const h2 = hashBlock("abc");
+    expect(h1).toBe(h2);
+    expect(h1).toMatch(/^[0-9a-f]{20}$/);
+  });
+
+  it("dedupChatCompletions returns zero savings with no duplicates", () => {
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [
+            { id: "a", function: { name: "read_file" } },
+            { id: "b", function: { name: "read_file" } },
+          ],
+        },
+        { role: "tool", tool_call_id: "a", content: makeLargeContent("first") },
+        { role: "tool", tool_call_id: "b", content: makeLargeContent("second") },
+      ],
+    };
+
+    const before = body.messages[2]?.content;
+    const result = dedupChatCompletions(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBe(0);
+    expect(result.charsSaved).toBe(0);
+    expect(body.messages[2]?.content).toBe(before);
+  });
+
+  it("dedupChatCompletions dedups duplicate blocks and inserts references", () => {
+    const body = structuredClone(DEDUP_BODY);
+    const result = dedupChatCompletions(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.systemBlocksMatched).toBe(0);
+    expect(result.charsSaved).toBeGreaterThan(0);
+    expect(body.messages[2]?.content).toContain(
+      "identical to earlier read_file result",
+    );
+  });
+
+  it("dedupChatCompletions dedups tool content against pre-scanned system blocks", () => {
+    const shared = makeLargeContent("shared");
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [{ id: "call_1", function: { name: "read_file" } }],
+        },
+        { role: "tool", tool_call_id: "call_1", content: shared },
+      ],
+    };
+
+    const result = dedupChatCompletions(body, shared, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.systemBlocksMatched).toBeGreaterThan(0);
+    expect(body.messages[1]?.content).toContain(
+      "identical to earlier system prompt result",
+    );
+  });
+
+  it("dedupChatCompletions dedups assistant fenced code blocks against seen tool blocks", () => {
+    const shared = makeLargeContent("code-shared");
+    const assistantWithCode = [
+      "Here is the generated code:",
+      "```ts",
+      shared,
+      "```",
+    ].join("\n");
+
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [{ id: "call_1", function: { name: "read_file" } }],
+        },
+        { role: "tool", tool_call_id: "call_1", content: shared },
+        { role: "assistant", content: assistantWithCode },
+      ],
+    };
+
+    const result = dedupChatCompletions(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.systemBlocksMatched).toBe(0);
+    expect(body.messages[2]?.content).toContain(
+      "identical to earlier read_file result",
+    );
+  });
+
+  it("dedupChatCompletions remains backward-compatible when systemContent is omitted", () => {
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [
+            { id: "a", function: { name: "read_file" } },
+            { id: "b", function: { name: "read_file" } },
+          ],
+        },
+        { role: "tool", tool_call_id: "a", content: makeLargeContent("same") },
+        { role: "tool", tool_call_id: "b", content: makeLargeContent("same") },
+      ],
+    };
+
+    const result = dedupChatCompletions(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.systemBlocksMatched).toBe(0);
+    expect(body.messages[2]?.content).toContain(
+      "identical to earlier read_file result",
+    );
+  });
+
+  it("dedupChatCompletions skips short content", () => {
+    const short = "s".repeat(300);
+    const body = {
+      messages: [
+        {
+          role: "assistant",
+          tool_calls: [
+            { id: "a", function: { name: "search" } },
+            { id: "b", function: { name: "search" } },
+          ],
+        },
+        { role: "tool", tool_call_id: "a", content: short },
+        { role: "tool", tool_call_id: "b", content: short },
+      ],
+    };
+
+    const result = dedupChatCompletions(body);
+    expect(result.blocksTotal).toBe(0);
+    expect(result.blocksDeduped).toBe(0);
+    expect(result.charsSaved).toBe(0);
+    expect(body.messages[2]?.content).toBe(short);
+  });
+
+  it("dedupResponsesApi dedups duplicate function_call_output content", () => {
+    const body = {
+      input: [
+        { type: "function_call", call_id: "r1", name: "search" },
+        { type: "function_call", call_id: "r2", name: "search" },
+        { type: "function_call_output", call_id: "r1", output: LARGE_CONTENT },
+        { type: "function_call_output", call_id: "r2", output: LARGE_CONTENT },
+      ],
+    };
+
+    const result = dedupResponsesApi(body, { chunkModulus: 1 });
+    expect(result.blocksDeduped).toBeGreaterThan(0);
+    expect(result.charsSaved).toBeGreaterThan(0);
+    expect(body.input[3]?.output).toContain("identical to earlier search result");
+  });
+
+  it("buildToolNameMapOpenai maps tool_call_id to function name", () => {
+    const mapping = buildToolNameMapOpenai([
+      {
+        role: "assistant",
+        tool_calls: [
+          { id: "id_1", function: { name: "read_file" } },
+          { id: "id_2", function: { name: "search" } },
+        ],
+      },
+      { role: "user" },
+    ]);
+
+    expect(mapping).toEqual({ id_1: "read_file", id_2: "search" });
+  });
+});
+
+describe("cache-control", () => {
+  it("injectAnthropicCacheControl converts string system into array with cache_control", () => {
+    const body: Record<string, unknown> = { system: "system text", messages: [] };
+    const result = injectAnthropicCacheControl(body);
+
+    const system = result.system as Array<{
+      type?: string;
+      text?: string;
+      cache_control?: { type: string };
+    }>;
+    expect(Array.isArray(system)).toBe(true);
+    expect(system[0]).toEqual({
+      type: "text",
+      text: "system text",
+      cache_control: { type: "ephemeral" },
+    });
+  });
+
+  it("injectAnthropicCacheControl adds cache_control to last system block", () => {
+    const body: Record<string, unknown> = {
+      system: [
+        { type: "text", text: "first" },
+        { type: "text", text: "last" },
+      ],
+      messages: [],
+    };
+    const result = injectAnthropicCacheControl(body);
+    const system = result.system as Array<{
+      type?: string;
+      text?: string;
+      cache_control?: { type: string };
+    }>;
+
+    expect(system[0]?.cache_control).toBeUndefined();
+    expect(system[1]?.cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  it("injectAnthropicCacheControl adds cache_control to large tool_result blocks", () => {
+    const body: Record<string, unknown> = {
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", content: "x".repeat(1200) },
+            {
+              type: "tool_result",
+              content: [
+                { type: "text", text: "a".repeat(800) },
+                { type: "text", text: "b".repeat(300) },
+              ],
+            },
+          ],
+        },
+      ],
+    };
+
+    const result = injectAnthropicCacheControl(body);
+    const messages = result.messages as Array<{
+      role?: string;
+      content?: Array<{
+        type?: string;
+        content?: string | Array<{ type?: string; text?: string; cache_control?: { type: string } }>;
+        cache_control?: { type: string };
+      }>;
+    }>;
+
+    const firstToolResult = messages[0]?.content?.[0];
+    const secondToolResult = messages[0]?.content?.[1];
+    const secondInner = secondToolResult?.content as Array<{
+      type?: string;
+      text?: string;
+      cache_control?: { type: string };
+    }>;
+
+    expect(firstToolResult?.cache_control).toEqual({ type: "ephemeral" });
+    expect(secondInner[0]?.cache_control).toBeUndefined();
+    expect(secondInner[1]?.cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  it("injectAnthropicCacheControl does not mutate original body", () => {
+    const body: Record<string, unknown> = {
+      system: "immutable",
+      messages: [{ role: "user", content: [] }],
+    };
+    const snapshot = structuredClone(body);
+    const result = injectAnthropicCacheControl(body);
+
+    expect(body).toEqual(snapshot);
+    expect(result).not.toBe(body);
+  });
+
+  it("injectOpenAICacheControl is a no-op", () => {
+    const body: Record<string, unknown> = {
+      messages: [{ role: "system", content: "keep" }],
+    };
+    const result = injectOpenAICacheControl(body);
+    expect(result).toBe(body);
+  });
+
+  it("injectCacheControl dispatches by provider", () => {
+    const anthropicBody: Record<string, unknown> = { system: "hello", messages: [] };
+    const openaiBody: Record<string, unknown> = { messages: [] };
+
+    const anthropicResult = injectCacheControl(anthropicBody, "anthropic");
+    const openaiResult = injectCacheControl(openaiBody, "openai");
+
+    expect(anthropicResult).not.toBe(anthropicBody);
+    expect(Array.isArray(anthropicResult.system)).toBe(true);
+    expect(openaiResult).toBe(openaiBody);
+  });
+});
+
+describe("reorder", () => {
+  it("ReorderState first call matches deterministic hash sort", () => {
+    const docs = ["Doc C", "Doc A", "Doc B"];
+    const state = new ReorderState();
+    const [stateOrder] = state.reorder(docs);
+    const [statelessOrder] = reorderDocuments(docs);
+    expect(stateOrder).toEqual(statelessOrder);
+  });
+
+  it("ReorderState second call keeps known order and appends new docs", () => {
+    const state = new ReorderState();
+    const [first] = state.reorder(["alpha", "beta", "gamma"]);
+    const [second] = state.reorder(["gamma", "alpha", "delta"]);
+
+    const knownOrder = first.filter((doc) => doc === "gamma" || doc === "alpha");
+    expect(second.slice(0, knownOrder.length)).toEqual(knownOrder);
+    expect(second[second.length - 1]).toBe("delta");
+  });
+
+  it("ReorderState reset restores first-call behavior", () => {
+    const docs = ["alpha", "beta", "gamma"];
+    const state = new ReorderState();
+
+    state.reorder(docs);
+    state.reorder(["gamma", "alpha", "delta"]);
+    state.reset();
+
+    const [afterReset] = state.reorder(docs);
+    const [expected] = reorderDocuments(docs);
+    expect(afterReset).toEqual(expected);
+  });
+
+  it("reorderDocuments is deterministic and stateless", () => {
+    const docs = ["one", "two", "three", "four"];
+    const first = reorderDocuments(docs);
+    const second = reorderDocuments(docs);
+    expect(first).toEqual(second);
+  });
+
+  it("reorderDocuments returns correct originalOrder and newOrder mappings", () => {
+    const docs = ["one", "two", "three", "four"];
+    const [reordered, originalOrder, newOrder] = reorderDocuments(docs);
+
+    expect(originalOrder).toHaveLength(docs.length);
+    expect(newOrder).toHaveLength(docs.length);
+
+    for (let newIndex = 0; newIndex < reordered.length; newIndex += 1) {
+      const originalIndex = originalOrder[newIndex];
+      expect(reordered[newIndex]).toBe(docs[originalIndex]);
+    }
+
+    for (let originalIndex = 0; originalIndex < docs.length; originalIndex += 1) {
+      const mappedNewIndex = newOrder[originalIndex];
+      expect(reordered[mappedNewIndex]).toBe(docs[originalIndex]);
+    }
+  });
+
+  it("ReorderState preserves known-doc prefix stability across calls", () => {
+    const state = new ReorderState();
+    const knownDocs = ["alpha", "beta", "gamma"];
+
+    const [first] = state.reorder(knownDocs);
+    const [second] = state.reorder(["gamma", "beta", "alpha", "delta"]);
+    const [third] = state.reorder(["alpha", "epsilon", "gamma", "beta", "zeta"]);
+
+    const knownPrefix = first.filter((doc) =>
+      knownDocs.includes(doc),
+    );
+
+    expect(second.slice(0, knownPrefix.length)).toEqual(knownPrefix);
+    expect(third.slice(0, knownPrefix.length)).toEqual(knownPrefix);
+  });
+});
diff --git a/openclaw-plugin/src/engine/eviction-heap.ts b/openclaw-plugin/src/engine/eviction-heap.ts
new file mode 100644
index 0000000..ab0f8c6
--- /dev/null
+++ b/openclaw-plugin/src/engine/eviction-heap.ts
@@ -0,0 +1,301 @@
+import type { NodeMetadata } from "./metadata.js";
+
+type HeapEntry = [number, number];
+
+export interface EvictionHeapStats {
+    size: number;
+    total_tokens: number;
+    max_tokens: number | null;
+    utilization_pct: number;
+    avg_tokens_per_node: number;
+    oldest_access_time: number | null;
+    newest_access_time: number | null;
+    num_requests: number;
+}
+
+export class EvictionHeap {
+    private _heap: HeapEntry[];
+    private _metadata: Map<number, NodeMetadata>;
+    private _requestToNode: Map<string, number>;
+    private _inHeap: Map<number, boolean>;
+    private _maxTokens: number | null;
+    private _totalTokens: number;
+
+    constructor(maxTokens?: number | null) {
+        this._heap = [];
+        this._metadata = new Map<number, NodeMetadata>();
+        this._requestToNode = new Map<string, number>();
+        this._inHeap = new Map<number, boolean>();
+        this._maxTokens = maxTokens ?? null;
+        this._totalTokens = 0;
+    }
+
+    get maxTokens(): number | null {
+        return this._maxTokens;
+    }
+
+    set maxTokens(value: number | null) {
+        this._maxTokens = value;
+    }
+
+    private _compare(a: HeapEntry, b: HeapEntry): number {
+        if (a[0] !== b[0]) {
+            return a[0] - b[0];
+        }
+        return a[1] - b[1];
+    }
+
+    private _swap(i: number, j: number): void {
+        const tmp = this._heap[i];
+        this._heap[i] = this._heap[j];
+        this._heap[j] = tmp;
+    }
+
+    private _siftUp(index: number): void {
+        let current = index;
+
+        while (current > 0) {
+            const parent = Math.floor((current - 1) / 2);
+            if (this._compare(this._heap[current], this._heap[parent]) >= 0) {
+                break;
+            }
+
+            this._swap(current, parent);
+            current = parent;
+        }
+    }
+
+    private _siftDown(index: number): void {
+        const n = this._heap.length;
+        let current = index;
+
+        while (true) {
+            const left = 2 * current + 1;
+            const right = 2 * current + 2;
+            let smallest = current;
+
+            if (left < n && this._compare(this._heap[left], this._heap[smallest]) < 0) {
+                smallest = left;
+            }
+
+            if (right < n && this._compare(this._heap[right], this._heap[smallest]) < 0) {
+                smallest = right;
+            }
+
+            if (smallest === current) {
+                break;
+            }
+
+            this._swap(current, smallest);
+            current = smallest;
+        }
+    }
+
+    private _heapPush(entry: HeapEntry): void {
+        this._heap.push(entry);
+        this._siftUp(this._heap.length - 1);
+    }
+
+    private _heapPop(): HeapEntry | null {
+        if (this._heap.length === 0) {
+            return null;
+        }
+
+        if (this._heap.length === 1) {
+            return this._heap.pop() ?? null;
+        }
+
+        const min = this._heap[0];
+        const last = this._heap.pop();
+        if (last !== undefined) {
+            this._heap[0] = last;
+            this._siftDown(0);
+        }
+        return min;
+    }
+
+    push(metadata: NodeMetadata): void {
+        const nodeId = metadata.nodeId;
+
+        if (this._inHeap.get(nodeId) === true) {
+            const oldMetadata = this._metadata.get(nodeId);
+            if (oldMetadata) {
+                this._totalTokens += metadata.extraTokens - oldMetadata.extraTokens;
+            }
+            this._metadata.set(nodeId, metadata);
+            this.updateAccessTime(nodeId, metadata.lastAccessTime);
+            return;
+        }
+
+        this._heapPush([metadata.lastAccessTime, nodeId]);
+        this._metadata.set(nodeId, metadata);
+        this._inHeap.set(nodeId, true);
+        this._totalTokens += metadata.extraTokens;
+
+        if (metadata.requestId) {
+            this._requestToNode.set(metadata.requestId, nodeId);
+        }
+    }
+
+    pop(): NodeMetadata | null {
+        while (this._heap.length > 0) {
+            const entry = this._heapPop();
+            if (entry === null) {
+                return null;
+            }
+
+            const [accessTime, nodeId] = entry;
+
+            const metadata = this._metadata.get(nodeId);
+            if (!metadata) {
+                continue;
+            }
+
+            if (metadata.lastAccessTime === accessTime) {
+                this._inHeap.set(nodeId, false);
+                this._totalTokens -= metadata.extraTokens;
+                return metadata;
+            }
+        }
+
+        return null;
+    }
+
+    peek(): NodeMetadata | null {
+        while (this._heap.length > 0) {
+            const [accessTime, nodeId] = this._heap[0];
+
+            const metadata = this._metadata.get(nodeId);
+            if (!metadata) {
+                this._heapPop();
+                continue;
+            }
+
+            if (metadata.lastAccessTime === accessTime) {
+                return metadata;
+            }
+
+            this._heapPop();
+        }
+
+        return null;
+    }
+
+    updateAccessTime(nodeId: number, newTime?: number): void {
+        const metadata = this._metadata.get(nodeId);
+        if (!metadata) {
+            return;
+        }
+
+        metadata.lastAccessTime = newTime ?? Date.now() / 1000;
+        this._heapPush([metadata.lastAccessTime, nodeId]);
+    }
+
+    remove(nodeId: number): void {
+        const metadata = this._metadata.get(nodeId);
+
+        if (metadata) {
+            this._totalTokens -= metadata.extraTokens;
+
+            if (metadata.requestId) {
+                this._requestToNode.delete(metadata.requestId);
+            }
+
+            this._metadata.delete(nodeId);
+        }
+
+        this._inHeap.delete(nodeId);
+    }
+
+    getNodeByRequestId(requestId: string): NodeMetadata | null {
+        const nodeId = this._requestToNode.get(requestId);
+        if (nodeId !== undefined) {
+            return this._metadata.get(nodeId) ?? null;
+        }
+        return null;
+    }
+
+    updateTokensForRequest(requestId: string, inputTokens: number, outputTokens: number): boolean {
+        const metadata = this.getNodeByRequestId(requestId);
+        if (metadata === null) {
+            return false;
+        }
+
+        const delta = (inputTokens + outputTokens) - metadata.totalTokens;
+        metadata.totalTokens = inputTokens + outputTokens;
+        metadata.extraTokens = Math.max(0, metadata.extraTokens + delta);
+        metadata.updateAccessTime();
+
+        this._totalTokens += delta;
+        this._heapPush([metadata.lastAccessTime, metadata.nodeId]);
+
+        return true;
+    }
+
+    needsEviction(): boolean {
+        if (this._maxTokens === null) {
+            return false;
+        }
+        return this._totalTokens > this._maxTokens;
+    }
+
+    tokensToEvict(): number {
+        if (this._maxTokens === null || this._totalTokens <= this._maxTokens) {
+            return 0;
+        }
+        return this._totalTokens - this._maxTokens;
+    }
+
+    getMetadata(nodeId: number): NodeMetadata | null {
+        return this._metadata.get(nodeId) ?? null;
+    }
+
+    isEmpty(): boolean {
+        return this.peek() === null;
+    }
+
+    size(): number {
+        return this._metadata.size;
+    }
+
+    totalTokens(): number {
+        return this._totalTokens;
+    }
+
+    getAllRequestIds(): Set<string> {
+        return new Set(this._requestToNode.keys());
+    }
+
+    getStats(): EvictionHeapStats {
+        if (this._metadata.size === 0) {
+            return {
+                size: 0,
+                total_tokens: 0,
+                max_tokens: this._maxTokens,
+                utilization_pct: 0,
+                avg_tokens_per_node: 0,
+                oldest_access_time: null,
+                newest_access_time: null,
+                num_requests: 0
+            };
+        }
+
+        const accessTimes = Array.from(this._metadata.values(), (m) => m.lastAccessTime);
+        const utilization = this._maxTokens ? (this._totalTokens / this._maxTokens) * 100 : 0;
+
+        return {
+            size: this._metadata.size,
+            total_tokens: this._totalTokens,
+            max_tokens: this._maxTokens,
+            utilization_pct: utilization,
+            avg_tokens_per_node: this._totalTokens / this._metadata.size,
+            oldest_access_time: Math.min(...accessTimes),
+            newest_access_time: Math.max(...accessTimes),
+            num_requests: this._requestToNode.size
+        };
+    }
+
+    toString(): string {
+        return `EvictionHeap(size=${this._metadata.size}, total_tokens=${this._totalTokens}, max_tokens=${this._maxTokens})`;
+    }
+}
diff --git a/openclaw-plugin/src/engine/extract.ts b/openclaw-plugin/src/engine/extract.ts
new file mode 100644
index 0000000..8c7a673
--- /dev/null
+++ b/openclaw-plugin/src/engine/extract.ts
@@ -0,0 +1,945 @@
+import * as crypto from 'crypto';
+
+const _KNOWN_WRAPPER_TAGS = new Set(["documents", "contexts", "docs", "passages", "references", "files"]);
+const _KNOWN_ITEM_TAGS = new Set(["document", "context", "doc", "passage", "reference", "file"]);
+
+const _NUMBERED_RE = /\[(\d+)\]\s*/;
+const _SEPARATOR_PATTERNS = ["---", "==="];
+const _SINGLE_DOC_MIN_CHARS = 200;
+
+export interface InterceptConfig {
+    enabled: boolean;
+    mode: string;
+    tag: string;
+    separator: string;
+    alpha: number;
+    linkageMethod: string;
+    scope: string;
+}
+
+export interface ExtractionResult {
+    documents: string[];
+    prefix: string;
+    suffix: string;
+    mode: string;
+    wrapperTag: string;
+    itemTag: string;
+    separatorChar: string;
+    originalContent: string;
+    jsonItems: any[] | null;
+}
+
+export interface ToolResultLocation {
+    msgIndex: number;
+    blockIndex: number;      // -1 = content is string
+    innerBlockIndex: number; // For Anthropic nested content blocks
+}
+
+export interface SingleDocExtraction {
+    content: string;
+    contentHash: string;
+    toolCallId: string;
+}
+
+export class MultiExtractionResult {
+    systemExtraction: [ExtractionResult, number] | null = null;
+    toolExtractions: [ExtractionResult, ToolResultLocation][] = [];
+    singleDocExtractions: [SingleDocExtraction, ToolResultLocation][] = [];
+
+    get hasExtractions(): boolean {
+        return (
+            this.systemExtraction !== null ||
+            this.toolExtractions.length > 0 ||
+            this.singleDocExtractions.length > 0
+        );
+    }
+
+    get totalDocuments(): number {
+        let total = this.singleDocExtractions.length;
+        if (this.systemExtraction) {
+            total += this.systemExtraction[0].documents.length;
+        }
+        for (const [ext, _] of this.toolExtractions) {
+            total += ext.documents.length;
+        }
+        return total;
+    }
+}
+
+export function parseInterceptHeaders(headers: Record<string, string>): InterceptConfig {
+    const get = (name: string, def: string = ""): string => {
+        const key = `x-contextpilot-${name}`;
+        for (const [k, v] of Object.entries(headers)) {
+            if (k.toLowerCase() === key) {
+                return v;
+            }
+        }
+        return def;
+    };
+
+    const enabledStr = get("enabled", "true").toLowerCase();
+    const enabled = !["false", "0", "no"].includes(enabledStr);
+
+    let scope = get("scope", "all").toLowerCase();
+    if (!["system", "tool_results", "all"].includes(scope)) {
+        scope = "all";
+    }
+
+    return {
+        enabled,
+        mode: get("mode", "auto").toLowerCase(),
+        tag: get("tag", "document").toLowerCase(),
+        separator: get("separator", "---"),
+        alpha: parseFloat(get("alpha", "0.001")) || 0.001,
+        linkageMethod: get("linkage", "average"),
+        scope
+    };
+}
+
+// ── Document extraction ─────────────────────────────────────────────────────
+
+function _escapeRegExp(string: string): string {
+    return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
+}
+
+export function extractXmlTags(text: string, config: InterceptConfig): ExtractionResult | null {
+    let itemTagsToTry: string[] = [];
+    let wrapperTagsToTry: string[] = [];
+
+    if (config.mode === "xml_tag") {
+        itemTagsToTry.push(config.tag);
+        wrapperTagsToTry.push(config.tag + "s");
+        for (const t of _KNOWN_ITEM_TAGS) {
+            if (t !== config.tag) itemTagsToTry.push(t);
+        }
+        for (const t of _KNOWN_WRAPPER_TAGS) {
+            if (t !== config.tag + "s") wrapperTagsToTry.push(t);
+        }
+    } else {
+        itemTagsToTry = Array.from(_KNOWN_ITEM_TAGS);
+        wrapperTagsToTry = Array.from(_KNOWN_WRAPPER_TAGS);
+    }
+
+    for (const wrapperTag of wrapperTagsToTry) {
+        const wrapperPattern = new RegExp(`(<${wrapperTag}(?:\\s[^>]*)?>)(.*?)(</${wrapperTag}>)`, "s");
+        const wrapperMatch = wrapperPattern.exec(text);
+        if (!wrapperMatch) continue;
+
+        const innerText = wrapperMatch[2];
+        const prefix = text.substring(0, wrapperMatch.index);
+        const suffix = text.substring(wrapperMatch.index + wrapperMatch[0].length);
+
+        for (const itemTag of itemTagsToTry) {
+            const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)(</${itemTag}>)`, "gs");
+            let items: string[] = [];
+            while (true) {
+                const itemMatch = itemPattern.exec(innerText);
+                if (itemMatch === null) break;
+                items.push(itemMatch[2].trim());
+            }
+            if (items.length > 0) {
+                return {
+                    documents: items,
+                    prefix,
+                    suffix,
+                    mode: "xml_tag",
+                    wrapperTag,
+                    itemTag,
+                    separatorChar: "",
+                    originalContent: text,
+                    jsonItems: null
+                };
+            }
+        }
+    }
+
+    for (const itemTag of itemTagsToTry) {
+        const itemPattern = new RegExp(`(<${itemTag}(?:\\s[^>]*)?>)(.*?)(</${itemTag}>)`, "gs");
+        const items: RegExpExecArray[] = [];
+        while (true) {
+            const match = itemPattern.exec(text);
+            if (match === null) break;
+            items.push(match);
+        }
+        
+        if (items.length >= 2) {
+            const firstStart = items[0].index;
+            const lastEnd = items[items.length - 1].index + items[items.length - 1][0].length;
+            return {
+                documents: items.map(m => m[2].trim()),
+                prefix: text.substring(0, firstStart),
+                suffix: text.substring(lastEnd),
+                mode: "xml_tag",
+                wrapperTag: "",
+                itemTag,
+                separatorChar: "",
+                originalContent: text,
+                jsonItems: null
+            };
+        }
+    }
+
+    return null;
+}
+
+export function extractNumbered(text: string, config: InterceptConfig): ExtractionResult | null {
+    const splits = text.split(_NUMBERED_RE);
+    if (splits.length < 4) {
+        return null;
+    }
+
+    const prefix = splits[0];
+    const documents: string[] = [];
+    let i = 1;
+    while (i + 1 < splits.length) {
+        const docText = splits[i + 1].trim();
+        if (docText) {
+            documents.push(docText);
+        }
+        i += 2;
+    }
+
+    if (documents.length < 2) return null;
+
+    return {
+        documents,
+        prefix,
+        suffix: "",
+        mode: "numbered",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: "",
+        originalContent: text,
+        jsonItems: null
+    };
+}
+
+export function extractSeparator(text: string, config: InterceptConfig): ExtractionResult | null {
+    let sep = config.separator;
+    let parts: string[] = [];
+    let documents: string[] = [];
+    
+    if (config.mode === "auto") {
+        let found = false;
+        for (const candidate of _SEPARATOR_PATTERNS) {
+            const regex = new RegExp(`\\n${_escapeRegExp(candidate)}\\n`);
+            parts = text.split(regex);
+            if (parts.length >= 3) {
+                sep = candidate;
+                found = true;
+                break;
+            }
+        }
+        if (!found) return null;
+        documents = parts.map(p => p.trim()).filter(p => p);
+    } else {
+        const regex = new RegExp(`\\n${_escapeRegExp(sep)}\\n`);
+        parts = text.split(regex);
+        documents = parts.map(p => p.trim()).filter(p => p);
+    }
+
+    if (documents.length < 2) return null;
+
+    return {
+        documents,
+        prefix: "",
+        suffix: "",
+        mode: "separator",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: sep,
+        originalContent: text,
+        jsonItems: null
+    };
+}
+
+export function extractMarkdownHeaders(text: string, config: InterceptConfig): ExtractionResult | null {
+    const parts = text.split(/(?=^#{1,2}\s)/m);
+    if (!parts || parts.length === 0) return null;
+
+    let prefix = "";
+    const sections: string[] = [];
+    
+    for (const part of parts) {
+        const stripped = part.trim();
+        if (!stripped) continue;
+        
+        if (/^#{1,2}\s/.test(stripped)) {
+            sections.push(stripped);
+        } else {
+            prefix = part;
+        }
+    }
+
+    if (sections.length < 2) return null;
+
+    return {
+        documents: sections,
+        prefix,
+        suffix: "",
+        mode: "markdown_header",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: "",
+        originalContent: text,
+        jsonItems: null
+    };
+}
+
+const _JSON_ID_KEYS = ["url", "path", "file", "filename", "uri", "href"];
+
+function _extractJsonId(item: Record<string, unknown>): string | null {
+    for (const key of _JSON_ID_KEYS) {
+        const val = item[key];
+        if (typeof val === "string" && val.trim()) {
+            return val.trim();
+        }
+    }
+    return null;
+}
+
+export function extractJsonResults(text: string, config: InterceptConfig): ExtractionResult | null {
+    const stripped = text.trim();
+    if (!stripped.startsWith("{")) return null;
+    
+    let obj: any;
+    try {
+        obj = JSON.parse(stripped);
+    } catch (e) {
+        return null;
+    }
+
+    if (typeof obj !== "object" || obj === null) return null;
+    
+    const results = obj.results;
+    if (!Array.isArray(results) || results.length < 2) return null;
+
+    const documents: string[] = [];
+    for (const item of results) {
+        if (typeof item === "object" && item !== null) {
+            documents.push(_extractJsonId(item) ?? JSON.stringify(item));
+        } else {
+            documents.push(JSON.stringify(item));
+        }
+    }
+
+    if (documents.length < 2) return null;
+
+    return {
+        documents,
+        prefix: "",
+        suffix: "",
+        mode: "json_results",
+        wrapperTag: "",
+        itemTag: "",
+        separatorChar: "",
+        originalContent: text,
+        jsonItems: results
+    };
+}
+
+export function extractDocuments(text: string, config: InterceptConfig): ExtractionResult | null {
+    if (config.mode === "xml_tag") {
+        return extractXmlTags(text, config);
+    }
+    if (config.mode === "numbered") {
+        return extractNumbered(text, config);
+    }
+    if (config.mode === "json_results") {
+        return extractJsonResults(text, config);
+    }
+    if (config.mode === "separator") {
+        return extractSeparator(text, config);
+    }
+    if (config.mode === "markdown_header") {
+        return extractMarkdownHeaders(text, config);
+    }
+
+    return extractXmlTags(text, config)
+        ?? extractNumbered(text, config)
+        ?? extractJsonResults(text, config)
+        ?? null;
+}
+
+// ── Reconstruction ───────────────────────────────────────────────────────────
+
+export function reconstructContent(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    if (extraction.mode === "xml_tag") {
+        return reconstructXml(extraction, reorderedDocs);
+    }
+    if (extraction.mode === "numbered") {
+        return reconstructNumbered(extraction, reorderedDocs);
+    }
+    if (extraction.mode === "json_results") {
+        return reconstructJsonResults(extraction, reorderedDocs);
+    }
+    if (extraction.mode === "separator") {
+        return reconstructSeparator(extraction, reorderedDocs);
+    }
+    if (extraction.mode === "markdown_header") {
+        return reconstructMarkdownHeaders(extraction, reorderedDocs);
+    }
+    return extraction.originalContent;
+}
+
+export function reconstructXml(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const itemTag = extraction.itemTag;
+    const items = reorderedDocs.map(doc => `<${itemTag}>${doc}</${itemTag}>`).join("\n");
+    const block = extraction.wrapperTag
+        ? `<${extraction.wrapperTag}>\n${items}\n</${extraction.wrapperTag}>`
+        : items;
+    return extraction.prefix + block + extraction.suffix;
+}
+
+export function reconstructNumbered(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const parts = extraction.prefix ? [extraction.prefix] : [];
+    for (let i = 0; i < reorderedDocs.length; i++) {
+        parts.push(`[${i + 1}] ${reorderedDocs[i]}`);
+    }
+    let result = parts.length > 0 ? parts.join("\n") : "";
+    if (extraction.suffix) {
+        result += extraction.suffix;
+    }
+    return result;
+}
+
+export function reconstructJsonResults(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const obj = JSON.parse(extraction.originalContent);
+    if (extraction.jsonItems !== null) {
+        const origDocs = extraction.documents;
+        const docToIndices: Record<string, number[]> = {};
+        for (let i = 0; i < origDocs.length; i++) {
+            if (!docToIndices[origDocs[i]]) {
+                docToIndices[origDocs[i]] = [];
+            }
+            docToIndices[origDocs[i]].push(i);
+        }
+        
+        const used = new Set<number>();
+        const reorderedItems: any[] = [];
+        for (const doc of reorderedDocs) {
+            const indices = docToIndices[doc] || [];
+            for (const idx of indices) {
+                if (!used.has(idx)) {
+                    reorderedItems.push(extraction.jsonItems[idx]);
+                    used.add(idx);
+                    break;
+                }
+            }
+        }
+        obj.results = reorderedItems;
+    } else {
+        obj.results = reorderedDocs.map(doc => JSON.parse(doc));
+    }
+    return JSON.stringify(obj, null, 2);
+}
+
+export function reconstructSeparator(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const sep = extraction.separatorChar || "---";
+    return reorderedDocs.join(`\n${sep}\n`);
+}
+
+export function reconstructMarkdownHeaders(extraction: ExtractionResult, reorderedDocs: string[]): string {
+    const parts: string[] = [];
+    if (extraction.prefix.trim()) {
+        parts.push(extraction.prefix.trimEnd());
+    }
+    parts.push(...reorderedDocs);
+    return parts.join("\n\n");
+}
+
+// ── OpenAI Chat format ──────────────────────────────────────────────────────
+
+export function extractFromOpenaiChat(body: any, config: InterceptConfig): [ExtractionResult, number] | null {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return null;
+
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "system") continue;
+        
+        const content = msg.content || "";
+        if (typeof content === "string") {
+            const result = extractDocuments(content, config);
+            if (result) return [result, i];
+        } else if (Array.isArray(content)) {
+            for (const block of content) {
+                if (block && typeof block === "object" && block.type === "text") {
+                    const result = extractDocuments(block.text || "", config);
+                    if (result) return [result, i];
+                }
+            }
+        }
+    }
+    return null;
+}
+
+export function reconstructOpenaiChat(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[],
+    systemMsgIndex: number
+): any {
+    const newBody = structuredClone(body);
+    const newContent = reconstructContent(extraction, reorderedDocs);
+    const msg = newBody.messages[systemMsgIndex];
+
+    if (typeof msg.content === "string") {
+        msg.content = newContent;
+    } else if (Array.isArray(msg.content)) {
+        for (const block of msg.content) {
+            if (block && typeof block === "object" && block.type === "text") {
+                if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                    block.text = newContent;
+                    break;
+                }
+            }
+        }
+    }
+    return newBody;
+}
+
+// ── Anthropic Messages format ───────────────────────────────────────────────
+
+export function extractFromAnthropicMessages(body: any, config: InterceptConfig): ExtractionResult | null {
+    const system = body?.system;
+    if (system === undefined || system === null) return null;
+
+    if (typeof system === "string") {
+        return extractDocuments(system, config);
+    }
+    if (Array.isArray(system)) {
+        for (const block of system) {
+            if (block && typeof block === "object" && block.type === "text") {
+                const result = extractDocuments(block.text || "", config);
+                if (result) return result;
+            }
+        }
+    }
+    return null;
+}
+
+export function reconstructAnthropicMessages(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[]
+): any {
+    const newBody = structuredClone(body);
+    const newContent = reconstructContent(extraction, reorderedDocs);
+
+    if (typeof newBody.system === "string") {
+        newBody.system = newContent;
+    } else if (Array.isArray(newBody.system)) {
+        for (const block of newBody.system) {
+            if (block && typeof block === "object" && block.type === "text") {
+                if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                    block.text = newContent;
+                    break;
+                }
+            }
+        }
+    }
+    return newBody;
+}
+
+// ── Tool result extraction ─────────────────────────────────────────────────
+
+export function extractFromOpenaiToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [ExtractionResult, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "tool" && msg?.role !== "toolResult") continue;
+        
+        const content = msg.content || "";
+        if (typeof content === "string") {
+            const extraction = extractDocuments(content, config);
+            if (extraction && extraction.documents.length >= 2) {
+                results.push([extraction, { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 }]);
+            }
+        } else if (Array.isArray(content)) {
+            for (let j = 0; j < content.length; j++) {
+                const block = content[j];
+                if (block && typeof block === "object" && block.type === "text") {
+                    const extraction = extractDocuments(block.text || "", config);
+                    if (extraction && extraction.documents.length >= 2) {
+                        results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]);
+                    }
+                }
+            }
+        }
+    }
+    return results;
+}
+
+export function extractFromAnthropicToolResults(body: any, config: InterceptConfig): [ExtractionResult, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [ExtractionResult, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "user") continue;
+        
+        const content = msg.content;
+        if (!Array.isArray(content)) continue;
+        
+        for (let j = 0; j < content.length; j++) {
+            const block = content[j];
+            if (!block || typeof block !== "object" || (block.type !== "tool_result" && block.type !== "toolResult")) continue;
+            
+            const trContent = block.content || "";
+            if (typeof trContent === "string") {
+                const extraction = extractDocuments(trContent, config);
+                if (extraction && extraction.documents.length >= 2) {
+                    results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }]);
+                }
+            } else if (Array.isArray(trContent)) {
+                for (let k = 0; k < trContent.length; k++) {
+                    const inner = trContent[k];
+                    if (inner && typeof inner === "object" && inner.type === "text") {
+                        const extraction = extractDocuments(inner.text || "", config);
+                        if (extraction && extraction.documents.length >= 2) {
+                            results.push([extraction, { msgIndex: i, blockIndex: j, innerBlockIndex: k }]);
+                        }
+                    }
+                }
+            }
+        }
+    }
+    return results;
+}
+
+// ── Tool result reconstruction ─────────────────────────────────────────────
+
+export function reconstructOpenaiToolResult(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[],
+    location: ToolResultLocation
+): void {
+    const newContent = reconstructContent(extraction, reorderedDocs);
+    const msg = body.messages[location.msgIndex];
+    if (location.blockIndex === -1) {
+        msg.content = newContent;
+    } else {
+        msg.content[location.blockIndex].text = newContent;
+    }
+}
+
+export function reconstructAnthropicToolResult(
+    body: any,
+    extraction: ExtractionResult,
+    reorderedDocs: string[],
+    location: ToolResultLocation
+): void {
+    const newContent = reconstructContent(extraction, reorderedDocs);
+    const msg = body.messages[location.msgIndex];
+    const block = msg.content[location.blockIndex];
+    if (location.innerBlockIndex === -1) {
+        block.content = newContent;
+    } else {
+        block.content[location.innerBlockIndex].text = newContent;
+    }
+}
+
+// ── Aggregate extraction ───────────────────────────────────────────────────
+
+export function extractAllOpenai(body: any, config: InterceptConfig): MultiExtractionResult {
+    const result = new MultiExtractionResult();
+    if (["system", "all"].includes(config.scope)) {
+        const sysResult = extractFromOpenaiChat(body, config);
+        if (sysResult) {
+            result.systemExtraction = sysResult;
+        }
+    }
+    if (["tool_results", "all"].includes(config.scope)) {
+        result.toolExtractions = extractFromOpenaiToolResults(body, config);
+        result.singleDocExtractions = extractSingleDocsFromOpenaiToolResults(body, config);
+    }
+    return result;
+}
+
+export function extractAllAnthropic(body: any, config: InterceptConfig): MultiExtractionResult {
+    const result = new MultiExtractionResult();
+    if (["system", "all"].includes(config.scope)) {
+        const sysExtraction = extractFromAnthropicMessages(body, config);
+        if (sysExtraction && sysExtraction.documents.length >= 2) {
+            result.systemExtraction = [sysExtraction, -1];
+        }
+    }
+    if (["tool_results", "all"].includes(config.scope)) {
+        result.toolExtractions = extractFromAnthropicToolResults(body, config);
+        result.singleDocExtractions = extractSingleDocsFromAnthropicToolResults(body, config);
+    }
+    return result;
+}
+
+// ── Single-document extraction (for cross-turn dedup) ─────────────────────
+
+function _makeSingleDoc(content: string, toolCallId: string = ""): SingleDocExtraction {
+    const stripped = content.trim();
+    const contentHash = crypto.createHash("sha256").update(stripped).digest("hex");
+    return {
+        content: stripped,
+        contentHash,
+        toolCallId
+    };
+}
+
+export function extractSingleDocsFromOpenaiToolResults(
+    body: any, config: InterceptConfig
+): [SingleDocExtraction, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [SingleDocExtraction, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "tool" && msg?.role !== "toolResult") continue;
+        
+        const toolCallId = msg.tool_call_id || "";
+        const content = msg.content || "";
+
+        if (typeof content === "string") {
+            const extraction = extractDocuments(content, config);
+            if (extraction && extraction.documents.length >= 2) continue;
+            
+            if (content.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                results.push([
+                    _makeSingleDoc(content, toolCallId),
+                    { msgIndex: i, blockIndex: -1, innerBlockIndex: -1 }
+                ]);
+            }
+        } else if (Array.isArray(content)) {
+            for (let j = 0; j < content.length; j++) {
+                const block = content[j];
+                if (!block || typeof block !== "object" || block.type !== "text") continue;
+                
+                const text = block.text || "";
+                const extraction = extractDocuments(text, config);
+                if (extraction && extraction.documents.length >= 2) continue;
+                
+                if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                    results.push([
+                        _makeSingleDoc(text, toolCallId),
+                        { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }
+                    ]);
+                }
+            }
+        }
+    }
+    return results;
+}
+
+export function extractSingleDocsFromAnthropicToolResults(
+    body: any, config: InterceptConfig
+): [SingleDocExtraction, ToolResultLocation][] {
+    const messages = body?.messages;
+    if (!messages || !Array.isArray(messages)) return [];
+
+    const results: [SingleDocExtraction, ToolResultLocation][] = [];
+    for (let i = 0; i < messages.length; i++) {
+        const msg = messages[i];
+        if (msg?.role !== "user") continue;
+        
+        const content = msg.content;
+        if (!Array.isArray(content)) continue;
+        
+        for (let j = 0; j < content.length; j++) {
+            const block = content[j];
+            if (!block || typeof block !== "object") continue;
+            if (block.type !== "tool_result" && block.type !== "toolResult") continue;
+            
+            const toolUseId = block.tool_use_id || "";
+            const trContent = block.content || "";
+
+            if (typeof trContent === "string") {
+                const extraction = extractDocuments(trContent, config);
+                if (extraction && extraction.documents.length >= 2) continue;
+                
+                if (trContent.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                    results.push([
+                        _makeSingleDoc(trContent, toolUseId),
+                        { msgIndex: i, blockIndex: j, innerBlockIndex: -1 }
+                    ]);
+                }
+            } else if (Array.isArray(trContent)) {
+                for (let k = 0; k < trContent.length; k++) {
+                    const inner = trContent[k];
+                    if (!inner || typeof inner !== "object" || inner.type !== "text") continue;
+                    
+                    const text = inner.text || "";
+                    const extraction = extractDocuments(text, config);
+                    if (extraction && extraction.documents.length >= 2) continue;
+                    
+                    if (text.trim().length >= _SINGLE_DOC_MIN_CHARS) {
+                        results.push([
+                            _makeSingleDoc(text, toolUseId),
+                            { msgIndex: i, blockIndex: j, innerBlockIndex: k }
+                        ]);
+                    }
+                }
+            }
+        }
+    }
+    return results;
+}
+
+// ── Single-document hint replacement ──────────────────────────────────────
+
+export function replaceSingleDocOpenai(
+    body: any, location: ToolResultLocation, hint: string
+): void {
+    const msg = body.messages[location.msgIndex];
+    if (location.blockIndex === -1) {
+        msg.content = hint;
+    } else {
+        msg.content[location.blockIndex].text = hint;
+    }
+}
+
+export function replaceSingleDocAnthropic(
+    body: any, location: ToolResultLocation, hint: string
+): void {
+    const msg = body.messages[location.msgIndex];
+    const block = msg.content[location.blockIndex];
+    if (location.innerBlockIndex === -1) {
+        block.content = hint;
+    } else {
+        block.content[location.innerBlockIndex].text = hint;
+    }
+}
+
+// ── Format handler abstraction ─────────────────────────────────────────────
+
+export interface FormatHandler {
+    extractAll(body: any, config: InterceptConfig): MultiExtractionResult;
+    reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void;
+    reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void;
+    replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void;
+    toolCallPresent(body: any, toolCallId: string): boolean;
+    targetPath(): string;
+    cacheSystem(body: any): any;
+    restoreSystem(body: any, cached: any): void;
+}
+
+export class OpenAIChatHandler implements FormatHandler {
+    extractAll(body: any, config: InterceptConfig): MultiExtractionResult {
+        return extractAllOpenai(body, config);
+    }
+
+    reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void {
+        const newContent = reconstructContent(extraction, docs);
+        const msg = body.messages[sysIdx];
+        if (typeof msg.content === "string") {
+            msg.content = newContent;
+        } else if (Array.isArray(msg.content)) {
+            for (const block of msg.content) {
+                if (block && typeof block === "object" && block.type === "text") {
+                    if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                        block.text = newContent;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void {
+        reconstructOpenaiToolResult(body, extraction, docs, location);
+    }
+
+    replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void {
+        replaceSingleDocOpenai(body, location, hint);
+    }
+
+    toolCallPresent(body: any, toolCallId: string): boolean {
+        for (const msg of (body.messages || [])) {
+            if (msg.role === "tool" || msg.role === "toolResult") {
+                if (msg.tool_call_id === toolCallId) return true;
+            }
+        }
+        return false;
+    }
+
+    targetPath(): string {
+        return "/v1/chat/completions";
+    }
+
+    cacheSystem(_body: any): any {
+        return null;
+    }
+
+    restoreSystem(_body: any, _cached: any): void {}
+}
+
+export class AnthropicMessagesHandler implements FormatHandler {
+    extractAll(body: any, config: InterceptConfig): MultiExtractionResult {
+        return extractAllAnthropic(body, config);
+    }
+
+    reconstructSystem(body: any, extraction: ExtractionResult, docs: string[], sysIdx: number): void {
+        const newContent = reconstructContent(extraction, docs);
+        if (typeof body.system === "string") {
+            body.system = newContent;
+        } else if (Array.isArray(body.system)) {
+            for (const block of body.system) {
+                if (block && typeof block === "object" && block.type === "text") {
+                    if (extractDocuments(block.text || "", parseInterceptHeaders({}))) {
+                        block.text = newContent;
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    reconstructToolResult(body: any, extraction: ExtractionResult, docs: string[], location: ToolResultLocation): void {
+        reconstructAnthropicToolResult(body, extraction, docs, location);
+    }
+
+    replaceSingleDoc(body: any, location: ToolResultLocation, hint: string): void {
+        replaceSingleDocAnthropic(body, location, hint);
+    }
+
+    toolCallPresent(body: any, toolCallId: string): boolean {
+        for (const msg of (body.messages || [])) {
+            if (msg.role === "user" && Array.isArray(msg.content)) {
+                for (const block of msg.content) {
+                    if (block && typeof block === "object" && 
+                        (block.type === "tool_result" || block.type === "toolResult") && 
+                        block.tool_use_id === toolCallId) {
+                        return true;
+                    }
+                }
+            }
+        }
+        return false;
+    }
+
+    targetPath(): string {
+        return "/v1/messages";
+    }
+
+    cacheSystem(body: any): any {
+        return structuredClone(body.system);
+    }
+
+    restoreSystem(body: any, cached: any): void {
+        if (cached !== null && cached !== undefined) {
+            body.system = structuredClone(cached);
+        }
+    }
+}
+
+const _FORMAT_HANDLERS: Record<string, FormatHandler> = {
+    "openai_chat": new OpenAIChatHandler(),
+    "anthropic_messages": new AnthropicMessagesHandler()
+};
+
+export function getFormatHandler(apiFormat: string): FormatHandler {
+    return _FORMAT_HANDLERS[apiFormat] || _FORMAT_HANDLERS["openai_chat"];
+}
diff --git a/openclaw-plugin/src/engine/http-client.ts b/openclaw-plugin/src/engine/http-client.ts
new file mode 100644
index 0000000..1166785
--- /dev/null
+++ b/openclaw-plugin/src/engine/http-client.ts
@@ -0,0 +1,267 @@
+type JsonObject = Record<string, unknown>;
+
+function isJsonObject(value: unknown): value is JsonObject {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+async function fetchJson(
+  url: string,
+  init: RequestInit,
+  timeoutMs: number,
+): Promise<JsonObject | null> {
+  try {
+    const response = await fetch(url, {
+      ...init,
+      signal: AbortSignal.timeout(timeoutMs),
+    });
+
+    if (!response.ok) {
+      return null;
+    }
+
+    const data: unknown = await response.json();
+    return isJsonObject(data) ? data : null;
+  } catch {
+    return null;
+  }
+}
+
+export class ContextPilotIndexClient {
+  private readonly baseUrl: string;
+
+  private readonly timeout: number;
+
+  private readonly retryOnFailure: boolean;
+
+  constructor(
+    baseUrl: string = "http://localhost:8765",
+    timeout: number = 1000,
+    retryOnFailure: boolean = false,
+  ) {
+    this.baseUrl = baseUrl.replace(/\/+$/, "");
+    this.timeout = timeout;
+    this.retryOnFailure = retryOnFailure;
+  }
+
+  private async _post(endpoint: string, jsonData: JsonObject): Promise<JsonObject | null> {
+    const url = `${this.baseUrl}${endpoint}`;
+    const attempt = () =>
+      fetchJson(
+        url,
+        {
+          method: "POST",
+          headers: {
+            "Content-Type": "application/json",
+          },
+          body: JSON.stringify(jsonData),
+        },
+        this.timeout,
+      );
+
+    const result = await attempt();
+    if (result !== null || !this.retryOnFailure) {
+      return result;
+    }
+
+    return attempt();
+  }
+
+  private async _get(endpoint: string): Promise<JsonObject | null> {
+    const url = `${this.baseUrl}${endpoint}`;
+    const attempt = () =>
+      fetchJson(
+        url,
+        {
+          method: "GET",
+        },
+        this.timeout,
+      );
+
+    const result = await attempt();
+    if (result !== null || !this.retryOnFailure) {
+      return result;
+    }
+
+    return attempt();
+  }
+
+  async evict(requestIds: string[]): Promise<JsonObject | null> {
+    return this._post("/evict", { request_ids: requestIds });
+  }
+
+  async search(context: number[], updateAccess: boolean = true): Promise<JsonObject | null> {
+    return this._post("/search", {
+      context,
+      update_access: updateAccess,
+    });
+  }
+
+  async updateNode(searchPath: number[], tokenDelta: number): Promise<JsonObject | null> {
+    return this._post("/update", {
+      search_path: searchPath,
+      token_delta: tokenDelta,
+    });
+  }
+
+  async insert(
+    context: number[],
+    searchPath: number[],
+    totalTokens: number = 0,
+  ): Promise<JsonObject | null> {
+    return this._post("/insert", {
+      context,
+      search_path: searchPath,
+      total_tokens: totalTokens,
+    });
+  }
+
+  async reorder(
+    contexts: Array<Array<number | string>>,
+    alpha: number = 0.001,
+    useGpu: boolean = false,
+    linkageMethod: string = "average",
+    initialTokensPerContext: number = 0,
+    deduplicate: boolean = false,
+    parentRequestIds?: Array<string | null>,
+    hintTemplate?: string,
+  ): Promise<[Array<Array<number | string>>, number[]] | null> {
+    const result = await this.reorderRaw(
+      contexts,
+      alpha,
+      useGpu,
+      linkageMethod,
+      initialTokensPerContext,
+      deduplicate,
+      parentRequestIds,
+      hintTemplate,
+    );
+
+    if (result === null) {
+      return null;
+    }
+
+    const reorderedContexts = result.reordered_contexts;
+    const originalIndices = result.original_indices;
+
+    if (!Array.isArray(reorderedContexts) || !Array.isArray(originalIndices)) {
+      return null;
+    }
+
+    if (!originalIndices.every((index) => typeof index === "number")) {
+      return null;
+    }
+
+    return [reorderedContexts as Array<Array<number | string>>, originalIndices as number[]];
+  }
+
+  async reorderRaw(
+    contexts: Array<Array<number | string>>,
+    alpha: number = 0.001,
+    useGpu: boolean = false,
+    linkageMethod: string = "average",
+    initialTokensPerContext: number = 0,
+    deduplicate: boolean = false,
+    parentRequestIds?: Array<string | null>,
+    hintTemplate?: string,
+  ): Promise<JsonObject | null> {
+    const payload: JsonObject = {
+      contexts,
+      alpha,
+      use_gpu: useGpu,
+      linkage_method: linkageMethod,
+      initial_tokens_per_context: initialTokensPerContext,
+      deduplicate,
+    };
+
+    if (parentRequestIds !== undefined) {
+      payload.parent_request_ids = parentRequestIds;
+    }
+
+    if (hintTemplate !== undefined) {
+      payload.hint_template = hintTemplate;
+    }
+
+    return this._post("/reorder", payload);
+  }
+
+  async deduplicate(
+    contexts: number[][],
+    parentRequestIds: Array<string | null>,
+    hintTemplate?: string,
+  ): Promise<JsonObject | null> {
+    const payload: JsonObject = {
+      contexts,
+      parent_request_ids: parentRequestIds,
+    };
+
+    if (hintTemplate !== undefined) {
+      payload.hint_template = hintTemplate;
+    }
+
+    return this._post("/deduplicate", payload);
+  }
+
+  async reset(): Promise<JsonObject | null> {
+    return this._post("/reset", {});
+  }
+
+  async getRequests(): Promise<JsonObject | null> {
+    return this._get("/requests");
+  }
+
+  async getStats(): Promise<JsonObject | null> {
+    return this._get("/stats");
+  }
+
+  async health(): Promise<JsonObject | null> {
+    return this._get("/health");
+  }
+
+  async isReady(): Promise<boolean> {
+    const health = await this.health();
+    return health !== null && health.status === "ready";
+  }
+}
+
+export async function evictRequests(
+  requestIds: string[],
+  serverUrl: string = "http://localhost:8765",
+): Promise<JsonObject | null> {
+  return fetchJson(
+    `${serverUrl.replace(/\/+$/, "")}/evict`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({ request_ids: requestIds }),
+    },
+    1000,
+  );
+}
+
+export async function scheduleBatch(
+  contexts: number[][],
+  serverUrl: string = "http://localhost:8765",
+  alpha: number = 0.001,
+  useGpu: boolean = false,
+  linkageMethod: string = "average",
+  timeout: number = 30000,
+): Promise<JsonObject | null> {
+  return fetchJson(
+    `${serverUrl.replace(/\/+$/, "")}/reorder`,
+    {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+      },
+      body: JSON.stringify({
+        contexts,
+        alpha,
+        use_gpu: useGpu,
+        linkage_method: linkageMethod,
+      }),
+    },
+    timeout,
+  );
+}
diff --git a/openclaw-plugin/src/engine/index-construction.ts b/openclaw-plugin/src/engine/index-construction.ts
new file mode 100644
index 0000000..a0f28d5
--- /dev/null
+++ b/openclaw-plugin/src/engine/index-construction.ts
@@ -0,0 +1,348 @@
+import { ClusterNode, NodeManager, NodeStats } from './tree-nodes.js';
+import { IntraContextOrderer } from './intra-ordering.js';
+import { computeDistanceMatrixCpu } from './compute-distance.js';
+
+export function linkage(
+  condensedDistances: Float64Array,
+  n: number,
+  method: "single" | "complete" | "average" = "average"
+): number[][] {
+  const dist: number[][] = Array.from({length: n}, () => new Array(n).fill(Infinity));
+  for (let i = 0; i < n; i++) dist[i][i] = 0;
+  for (let i = 0; i < n; i++) {
+    for (let j = i + 1; j < n; j++) {
+      const idx = n * i - (i * (i + 1)) / 2 + j - i - 1;
+      dist[i][j] = condensedDistances[idx];
+      dist[j][i] = condensedDistances[idx];
+    }
+  }
+  
+  const active = new Set(Array.from({length: n}, (_, i) => i));
+  const sizes = new Array(2 * n - 1).fill(1);
+  const result: number[][] = [];
+  
+  for (let step = 0; step < n - 1; step++) {
+    let minDist = Infinity;
+    let minI = -1, minJ = -1;
+    
+    for (const i of active) {
+      for (const j of active) {
+        if (j <= i) continue;
+        if (dist[i][j] < minDist) {
+          minDist = dist[i][j];
+          minI = i;
+          minJ = j;
+        }
+      }
+    }
+    
+    const newClusterId = n + step;
+    const sizeNew = sizes[minI] + sizes[minJ];
+    sizes[newClusterId] = sizeNew;
+    
+    result.push([minI, minJ, minDist, sizeNew]);
+    
+    while (dist.length <= newClusterId) {
+      dist.push(new Array(dist[0]?.length ?? 0).fill(Infinity));
+    }
+    for (const row of dist) {
+      while (row.length <= newClusterId) row.push(Infinity);
+    }
+    dist[newClusterId][newClusterId] = 0;
+    
+    for (const k of active) {
+      if (k === minI || k === minJ) continue;
+      let newDist: number;
+      if (method === "single") {
+        newDist = Math.min(dist[minI][k], dist[minJ][k]);
+      } else if (method === "complete") {
+        newDist = Math.max(dist[minI][k], dist[minJ][k]);
+      } else { // average (UPGMA)
+        newDist = (dist[minI][k] * sizes[minI] + dist[minJ][k] * sizes[minJ]) / sizeNew;
+      }
+      dist[newClusterId][k] = newDist;
+      dist[k][newClusterId] = newDist;
+    }
+    
+    active.delete(minI);
+    active.delete(minJ);
+    active.add(newClusterId);
+  }
+  
+  return result;
+}
+
+export class IndexResult {
+    linkageMatrix: number[][];
+    clusterNodes: Map<number, ClusterNode>;
+    uniqueNodes: Map<number, ClusterNode>;
+    reorderedContexts: (number[] | string[])[];
+    originalContexts: (number[] | string[])[];
+    stats: NodeStats;
+    searchPaths: number[][] | null;
+    
+    // Legacy attributes for backward compatibility
+    reorderedPrompts: (number[] | string[])[];
+    originalPrompts: (number[] | string[])[];
+
+    constructor(
+        linkageMatrix: number[][],
+        clusterNodes: Map<number, ClusterNode>,
+        uniqueNodes: Map<number, ClusterNode>,
+        reorderedContexts: (number[] | string[])[],
+        originalContexts: (number[] | string[])[],
+        stats: NodeStats,
+        searchPaths: number[][] | null = null
+    ) {
+        this.linkageMatrix = linkageMatrix;
+        this.clusterNodes = clusterNodes;
+        this.uniqueNodes = uniqueNodes;
+        this.reorderedContexts = reorderedContexts;
+        this.originalContexts = originalContexts;
+        this.stats = stats;
+        this.searchPaths = searchPaths;
+
+        this.reorderedPrompts = this.reorderedContexts;
+        this.originalPrompts = this.originalContexts;
+    }
+
+    printTree(): void {
+        console.log("\n--- Unique Cluster Tree Nodes ---");
+        const sortedKeys = Array.from(this.uniqueNodes.keys()).sort((a, b) => a - b);
+        for (const nodeId of sortedKeys) {
+            const node = this.uniqueNodes.get(nodeId);
+            if (!node) continue;
+            console.log(`ClusterNode ${nodeId}`);
+            console.log(`  Content: [${node.docIds.join(', ')}]`);
+            console.log(`  Original indices: [${Array.from(node.originalIndices).sort((a, b) => a - b).join(', ')}]`);
+            if (node.searchPath && node.searchPath.length > 0) {
+                const pathStr = "[" + node.searchPath.join("][") + "]";
+                console.log(`  Search path (child indices from root): ${pathStr}`);
+            } else {
+                console.log(`  Search path: (root node)`);
+            }
+            if (!node.isLeaf) {
+                console.log(`  Children: [${node.children.join(', ')}]`);
+                console.log(`  Merge distance: ${node.mergeDistance.toFixed(4)}`);
+            }
+            console.log("-".repeat(40));
+        }
+    }
+}
+
+export interface ContextIndexOptions {
+    linkageMethod?: "single" | "complete" | "average";
+    useGpu?: boolean;
+    alpha?: number;
+    numWorkers?: number | null;
+    batchSize?: number;
+}
+
+export class ContextIndex {
+    linkageMethod: "single" | "complete" | "average";
+    useGpu: boolean;
+    alpha: number;
+    numWorkers: number | null;
+    batchSize: number;
+
+    nodeManager: NodeManager;
+    contextOrderer: IntraContextOrderer;
+
+    _strToId: Map<string, number>;
+    _idToStr: Map<number, string>;
+    _nextStrId: number;
+    _isStringInput: boolean;
+
+    constructor(options: ContextIndexOptions = {}) {
+        this.linkageMethod = options.linkageMethod || "average";
+        this.useGpu = false;
+        this.alpha = options.alpha !== undefined ? options.alpha : 0.001;
+        this.numWorkers = options.numWorkers || null;
+        this.batchSize = options.batchSize || 1000;
+
+        this.nodeManager = new NodeManager();
+        this.contextOrderer = new IntraContextOrderer();
+
+        this._strToId = new Map<string, number>();
+        this._idToStr = new Map<number, string>();
+        this._nextStrId = 0;
+        this._isStringInput = false;
+    }
+
+    _convertToInt(contexts: (number[] | string[])[]): number[][] {
+        if (!contexts || contexts.length === 0 || !contexts[0] || contexts[0].length === 0) {
+            return contexts as number[][];
+        }
+        if (typeof contexts[0][0] === "string") {
+            this._isStringInput = true;
+            const converted: number[][] = [];
+            for (const ctx of contexts as string[][]) {
+                const convertedCtx: number[] = [];
+                for (const item of ctx) {
+                    let sid = this._strToId.get(item);
+                    if (sid === undefined) {
+                        sid = this._nextStrId;
+                        this._strToId.set(item, sid);
+                        this._idToStr.set(sid, item);
+                        this._nextStrId += 1;
+                    }
+                    convertedCtx.push(sid);
+                }
+                converted.push(convertedCtx);
+            }
+            return converted;
+        }
+        return contexts as number[][];
+    }
+
+    _convertToStr(contexts: number[][]): string[][] {
+        if (!this._isStringInput || !contexts || contexts.length === 0) {
+            return contexts as any;
+        }
+        if (contexts[0] && typeof contexts[0][0] === "string") {
+            return contexts as any;
+        }
+        const result: string[][] = [];
+        for (const ctx of contexts) {
+            const strCtx: string[] = [];
+            for (const i of ctx) {
+                strCtx.push(this._idToStr.get(i) as string);
+            }
+            result.push(strCtx);
+        }
+        return result;
+    }
+
+    fitTransform(contexts: (number[] | string[])[]): IndexResult {
+        const intContexts = this._convertToInt(contexts);
+        const n = intContexts.length;
+
+        if (n < 2) {
+            return this._handleSinglePrompt(intContexts);
+        }
+
+        const condensedDistances = this._computeDistanceMatrix(intContexts);
+        const linkageMatrix = linkage(condensedDistances, n, this.linkageMethod);
+
+        this._buildTree(intContexts, linkageMatrix);
+        
+        this.nodeManager.cleanupEmptyNodes();
+        this.nodeManager.updateSearchPaths();
+
+        const reorderedContexts = this.contextOrderer.reorderContexts(
+            intContexts,
+            this.nodeManager.uniqueNodes
+        );
+
+        const searchPaths = this.contextOrderer.extractSearchPaths(
+            this.nodeManager.uniqueNodes,
+            intContexts.length
+        );
+
+        const stats = this.nodeManager.getNodeStats();
+
+        return new IndexResult(
+            linkageMatrix,
+            this.nodeManager.clusterNodes,
+            this.nodeManager.uniqueNodes,
+            reorderedContexts,
+            intContexts,
+            stats,
+            searchPaths
+        );
+    }
+
+    _computeDistanceMatrix(contexts: number[][]): Float64Array {
+        return computeDistanceMatrixCpu(contexts, this.alpha);
+    }
+
+    _handleSinglePrompt(contexts: number[][]): IndexResult {
+        for (let i = 0; i < contexts.length; i++) {
+            const prompt = contexts[i];
+            const node = this.nodeManager.createLeafNode(i, prompt);
+            node.docIds = [...prompt];
+        }
+
+        const leafIds = Array.from(this.nodeManager.uniqueNodes.keys());
+        const virtualRootId = leafIds.length > 0 ? Math.max(...leafIds) + 1 : 0;
+        
+        let freqSum = 0;
+        for (const nid of leafIds) {
+            const n = this.nodeManager.uniqueNodes.get(nid);
+            if (n) freqSum += n.frequency;
+        }
+
+        const virtualRoot = new ClusterNode(
+            virtualRootId,
+            new Set<number>(),
+            new Set<number>(),
+            0.0,
+            leafIds,
+            null,
+            freqSum
+        );
+        this.nodeManager.uniqueNodes.set(virtualRootId, virtualRoot);
+
+        for (const nid of leafIds) {
+            const n = this.nodeManager.uniqueNodes.get(nid);
+            if (n) {
+                n.parent = virtualRootId;
+            }
+        }
+
+        this.nodeManager.updateSearchPaths();
+
+        const searchPaths = this.contextOrderer.extractSearchPaths(
+            this.nodeManager.uniqueNodes,
+            contexts.length
+        );
+
+        const reorderedContexts = contexts.map(c => [...c]);
+
+        return new IndexResult(
+            [],
+            this.nodeManager.clusterNodes,
+            this.nodeManager.uniqueNodes,
+            reorderedContexts,
+            contexts,
+            this.nodeManager.getNodeStats(),
+            searchPaths
+        );
+    }
+
+    _buildTree(contexts: number[][], linkageMatrix: number[][]): void {
+        const n = contexts.length;
+
+        for (let i = 0; i < n; i++) {
+            this.nodeManager.createLeafNode(i, contexts[i]);
+        }
+
+        for (let i = 0; i < linkageMatrix.length; i++) {
+            const [idx1, idx2, distance] = linkageMatrix[i];
+            const newNodeId = n + i;
+            this.nodeManager.createInternalNode(
+                newNodeId,
+                Math.floor(idx1),
+                Math.floor(idx2),
+                distance
+            );
+        }
+    }
+}
+
+export function buildContextIndex(
+    contexts: (number[] | string[])[],
+    options: ContextIndexOptions = {}
+): IndexResult {
+    const indexer = new ContextIndex(options);
+    const result = indexer.fitTransform(contexts);
+
+    if (indexer._isStringInput) {
+        result.reorderedContexts = indexer._convertToStr(result.reorderedContexts as number[][]);
+        result.originalContexts = indexer._convertToStr(result.originalContexts as number[][]);
+        result.reorderedPrompts = result.reorderedContexts;
+        result.originalPrompts = result.originalContexts;
+    }
+
+    return result;
+}
diff --git a/openclaw-plugin/src/engine/integration.test.ts b/openclaw-plugin/src/engine/integration.test.ts
new file mode 100644
index 0000000..74328a6
--- /dev/null
+++ b/openclaw-plugin/src/engine/integration.test.ts
@@ -0,0 +1,362 @@
+import { describe, it, expect } from "vitest";
+import { getFormatHandler, type InterceptConfig } from "./extract.js";
+import { dedupChatCompletions, dedupResponsesApi } from "./dedup.js";
+import { injectCacheControl } from "./cache-control.js";
+import { ReorderState } from "./reorder.js";
+
+function runPipeline(
+  body: Record<string, unknown>,
+  opts: {
+    provider?: "anthropic" | "openai";
+    scope?: string;
+    reorderState?: ReorderState;
+  } = {}
+): Record<string, unknown> {
+  const provider = opts.provider ?? "anthropic";
+  const scope = opts.scope ?? "all";
+  const reorderState = opts.reorderState ?? new ReorderState();
+
+  const clonedBody = structuredClone(body);
+  const apiFormat = provider === "anthropic" ? "anthropic_messages" : "openai_chat";
+
+  const interceptConfig: InterceptConfig = {
+    enabled: true,
+    mode: "auto",
+    tag: "document",
+    separator: "---",
+    alpha: 0.001,
+    linkageMethod: "average",
+    scope,
+  };
+
+  const handler = getFormatHandler(apiFormat);
+  const multi = handler.extractAll(clonedBody, interceptConfig);
+
+  if (multi.systemExtraction) {
+    const [extraction, sysIdx] = multi.systemExtraction;
+    if (extraction.documents.length >= 2) {
+      const [reordered] = reorderState.reorder(extraction.documents);
+      handler.reconstructSystem(clonedBody, extraction, reordered, sysIdx);
+    }
+  }
+
+  for (const [extraction, location] of multi.toolExtractions) {
+    if (extraction.documents.length >= 2) {
+      const [reordered] = reorderState.reorder(extraction.documents);
+      handler.reconstructToolResult(clonedBody, extraction, reordered, location);
+    }
+  }
+
+  if (apiFormat === "openai_chat") {
+    dedupChatCompletions(clonedBody as any);
+  }
+  if (clonedBody.input && Array.isArray(clonedBody.input)) {
+    dedupResponsesApi(clonedBody as any);
+  }
+
+  return injectCacheControl(clonedBody, provider);
+}
+
+describe("full pipeline — Anthropic", () => {
+  it("system prompt with XML documents gets reordered and cache-controlled", () => {
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: `You are a helpful assistant.\n<documents>\n<document index="1">\nFirst document about TypeScript.\nIt has multiple lines.\n</document>\n<document index="2">\nSecond document about Python.\nAlso multi-line.\n</document>\n<document index="3">\nThird document about Rust.\nYet another multi-line doc.\n</document>\n</documents>\nPlease answer based on the above.`,
+      messages: [{ role: "user", content: "Summarize the documents." }],
+    };
+
+    const reorderState = new ReorderState();
+    const result = runPipeline(body, { provider: "anthropic", reorderState });
+
+    expect(Array.isArray(result.system)).toBe(true);
+    const systemArray = result.system as any[];
+    
+    const lastBlock = systemArray[systemArray.length - 1];
+    expect(lastBlock.cache_control).toEqual({ type: "ephemeral" });
+
+    const textContent = systemArray.map(b => b.text).join("");
+    expect(textContent).toContain("You are a helpful assistant.");
+    expect(textContent).toContain("Please answer based on the above.");
+    
+    expect(textContent).toContain("First document about TypeScript.");
+    expect(textContent).toContain("Second document about Python.");
+    expect(textContent).toContain("Third document about Rust.");
+  });
+
+  it("Anthropic tool_result with large content gets cache_control", () => {
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: "You are helpful.",
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "tu_1", content: "A".repeat(2000) },
+          ],
+        },
+      ],
+    };
+
+    const result = runPipeline(body, { provider: "anthropic" });
+    const messages = result.messages as any[];
+    const content = messages[0].content as any[];
+    expect(content[0].cache_control).toEqual({ type: "ephemeral" });
+  });
+
+  it("Anthropic scope=\"system\" only processes system, not tool results", () => {
+    const docText = `<documents><document index="1">\nFirst document about TypeScript.\nIt has multiple lines.\n</document><document index="2">\nSecond document about Python.\nAlso multi-line.\n</document></documents>`;
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: `You are helpful.\n${docText}`,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "tu_1", content: docText },
+          ],
+        },
+      ],
+    };
+
+    const reorderState = new ReorderState();
+    // Reorder stability means it will process it
+    const result = runPipeline(body, { provider: "anthropic", scope: "system", reorderState });
+    
+    // System should have its format modified to array due to reconstruction/cache control
+    expect(Array.isArray(result.system)).toBe(true);
+
+    const messages = result.messages as any[];
+    const content = messages[0].content as any[];
+    // Tool result shouldn't have been reconstructed into blocks of its internal documents
+    expect(content[0].content).toBe(docText);
+  });
+
+  it("Anthropic scope=\"tool_results\" only processes tools, not system", () => {
+    const docText = `<documents><document index="1">\nFirst document about TypeScript.\nIt has multiple lines.\n</document><document index="2">\nSecond document about Python.\nAlso multi-line.\n</document></documents>`;
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: `You are helpful.\n${docText}`,
+      messages: [
+        {
+          role: "user",
+          content: [
+            { type: "tool_result", tool_use_id: "tu_1", content: docText },
+          ],
+        },
+      ],
+    };
+
+    const reorderState = new ReorderState();
+    const result = runPipeline(body, { provider: "anthropic", scope: "tool_results", reorderState });
+    
+    // System should not be processed for documents (though it may be arrayified for cache control)
+    // Cache control injects string to array conversion for Anthropic system if needed
+    if (Array.isArray(result.system)) {
+        const textContent = (result.system as any[]).map(b => b.text).join("");
+        expect(textContent).toBe(`You are helpful.\n${docText}`);
+    } else {
+        expect(result.system).toBe(`You are helpful.\n${docText}`);
+    }
+
+    // Tool results should be reconstructed/reordered
+    const messages = result.messages as any[];
+    const content = messages[0].content as any[];
+    expect(typeof content[0].content).toBe("string");
+    expect(content[0].content).toContain("First document about TypeScript.");
+    expect(content[0].content).toContain("Second document about Python.");
+  });
+});
+
+describe("full pipeline — OpenAI", () => {
+  it("OpenAI chat system message with XML documents gets reordered", () => {
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document><document>Doc C content</document></documents>" },
+        { role: "user", content: "Hello" }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const msgs = result.messages as any[];
+    const sysMsg = msgs[0].content;
+    expect(sysMsg).toContain("Doc A content");
+    expect(sysMsg).toContain("Doc B content");
+    expect(sysMsg).toContain("Doc C content");
+  });
+
+  it("OpenAI chat with duplicate tool results gets deduped", () => {
+    const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n");
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "assistant", content: null, tool_calls: [
+          { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } },
+          { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } }
+        ]},
+        { role: "tool", tool_call_id: "call_1", content: sharedContent },
+        { role: "tool", tool_call_id: "call_2", content: sharedContent }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const msgs = result.messages as any[];
+    
+    expect(msgs[1].content).toBe(sharedContent);
+    expect(msgs[2].content).not.toBe(sharedContent);
+    expect(msgs[2].content).toContain("identical to earlier read_file result");
+  });
+
+  it("OpenAI body with no extractable docs passes through unchanged", () => {
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "You are helpful." },
+        { role: "user", content: "Hi" }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    expect(result).toEqual(body);
+  });
+
+  it("OpenAI responses API format gets deduped", () => {
+    const sharedContent = Array.from({length: 30}, (_, i) => `Line ${i}: ${"x".repeat(50)}`).join("\n");
+    const body = {
+      input: [
+        { type: "function_call_output", call_id: "c1", output: sharedContent },
+        { type: "function_call_output", call_id: "c2", output: sharedContent }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const input = result.input as any[];
+    
+    expect(input[0].output).toBe(sharedContent);
+    expect(input[1].output).not.toBe(sharedContent);
+    expect(input[1].output).toContain("identical");
+  });
+});
+
+describe("multi-turn state — reorder stability", () => {
+  it("reorder state preserves doc order across turns", () => {
+    const reorderState = new ReorderState();
+    
+    const bodyTurn1 = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document><document>Doc C content</document></documents>" }
+      ]
+    };
+    
+    runPipeline(bodyTurn1, { provider: "openai", reorderState });
+    
+    const bodyTurn2 = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document><document>Doc C content</document><document>Doc D content</document></documents>" }
+      ]
+    };
+
+    const res2 = runPipeline(bodyTurn2, { provider: "openai", reorderState });
+    const sysMsg2 = (res2.messages as any[])[0].content;
+    
+    // In multi-turn, ReorderState should put the new item (D) at top, and preserve relative ordering of A, B, C.
+    // We just verify all are present and stable.
+    expect(sysMsg2).toContain("Doc A content");
+    expect(sysMsg2).toContain("Doc B content");
+    expect(sysMsg2).toContain("Doc C content");
+    expect(sysMsg2).toContain("Doc D content");
+  });
+
+  it("reorder state reset clears history", () => {
+    const reorderState = new ReorderState();
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Doc A content</document><document>Doc B content</document></documents>" }
+      ]
+    };
+    
+    runPipeline(body, { provider: "openai", reorderState });
+    
+    reorderState.reset();
+    
+    const res2 = runPipeline(body, { provider: "openai", reorderState });
+    const sysMsg2 = (res2.messages as any[])[0].content;
+    
+    expect(sysMsg2).toContain("Doc A content");
+    expect(sysMsg2).toContain("Doc B content");
+  });
+});
+
+describe("edge cases", () => {
+  it("empty body passes through", () => {
+    const result = runPipeline({}, { provider: "anthropic" });
+    expect(result).toEqual({});
+  });
+
+  it("body with no messages passes through", () => {
+    const body = { model: "gpt-4o" };
+    const result = runPipeline(body, { provider: "openai" });
+    expect(result).toEqual(body);
+  });
+
+  it("body with single document doesn't get reordered", () => {
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "system", content: "<documents><document>Only Doc</document></documents>" }
+      ]
+    };
+    const result = runPipeline(body, { provider: "openai" });
+    // It should be unchanged
+    expect(result).toEqual(body);
+  });
+
+  it("very short tool result content not deduped", () => {
+    const shortContent = "Too short for dedup.";
+    const body = {
+      model: "gpt-4o",
+      messages: [
+        { role: "assistant", content: null, tool_calls: [
+          { id: "call_1", type: "function", function: { name: "read_file", arguments: "{}" } },
+          { id: "call_2", type: "function", function: { name: "read_file", arguments: "{}" } }
+        ]},
+        { role: "tool", tool_call_id: "call_1", content: shortContent },
+        { role: "tool", tool_call_id: "call_2", content: shortContent }
+      ]
+    };
+
+    const result = runPipeline(body, { provider: "openai" });
+    const msgs = result.messages as any[];
+    expect(msgs[1].content).toBe(shortContent);
+    expect(msgs[2].content).toBe(shortContent);
+  });
+
+  it("null/undefined messages gracefully handled", () => {
+    const body = { model: "gpt-4o", messages: null };
+    const result = runPipeline(body, { provider: "openai" });
+    expect(result).toEqual(body);
+  });
+
+  it("Anthropic body with system as content block array", () => {
+    const body = {
+      model: "claude-sonnet-4-6",
+      system: [
+        { type: "text", text: "<documents><document>A</document><document>B</document></documents>" }
+      ],
+      messages: [{ role: "user", content: "hi" }]
+    };
+
+    const result = runPipeline(body, { provider: "anthropic" });
+    const sys = result.system as any[];
+    expect(Array.isArray(sys)).toBe(true);
+    // Last block should have cache_control
+    expect(sys[sys.length - 1].cache_control).toEqual({ type: "ephemeral" });
+    
+    const fullText = sys.map(b => b.text).join("");
+    expect(fullText).toContain("A");
+    expect(fullText).toContain("B");
+  });
+});
diff --git a/openclaw-plugin/src/engine/inter-scheduler.ts b/openclaw-plugin/src/engine/inter-scheduler.ts
new file mode 100644
index 0000000..8ca7192
--- /dev/null
+++ b/openclaw-plugin/src/engine/inter-scheduler.ts
@@ -0,0 +1,112 @@
+import type { ClusterNode } from './tree-nodes.js';
+
+export interface ClusteringResult {
+    reorderedPrompts: number[][];
+    originalPrompts: number[][];
+    searchPaths: number[][];
+}
+
+export class InterContextScheduler {
+    scheduleContexts(
+        clusteringResult: ClusteringResult
+    ): [number[][], number[][], number[], Array<[number, number[]]>] {
+        const { reorderedPrompts, originalPrompts, searchPaths } = clusteringResult;
+
+        const groupsByRoot = this._groupByRootPrefix(searchPaths);
+        const sortedGroups = this._sortGroupsByPathLength(groupsByRoot, searchPaths);
+
+        const allGroupsWithInfo: Array<[number, number[]]> = [];
+        for (const groupIndices of sortedGroups) {
+            allGroupsWithInfo.push([0, groupIndices]);
+        }
+
+        allGroupsWithInfo.sort((a, b) => {
+            const sizeDiff = b[1].length - a[1].length;
+            if (sizeDiff !== 0) {
+                return sizeDiff;
+            }
+
+            const aFirst = a[1].length > 0 ? a[1][0] : Number.POSITIVE_INFINITY;
+            const bFirst = b[1].length > 0 ? b[1][0] : Number.POSITIVE_INFINITY;
+            return aFirst - bFirst;
+        });
+
+        const finalIndexMapping = allGroupsWithInfo.flatMap(([, group]) => group);
+
+        const scheduledReordered = finalIndexMapping.map((idx) => reorderedPrompts[idx]);
+        const scheduledOriginals = finalIndexMapping.map((idx) => originalPrompts[idx]);
+
+        return [scheduledReordered, scheduledOriginals, finalIndexMapping, allGroupsWithInfo];
+    }
+
+    _groupByRootPrefix(searchPaths: number[][]): Map<number, number[]> {
+        const groups = new Map<number, number[]>();
+
+        for (let contextIdx = 0; contextIdx < searchPaths.length; contextIdx += 1) {
+            const path = searchPaths[contextIdx];
+            const groupKey = path.length >= 1 ? path[0] : -1;
+
+            const existing = groups.get(groupKey);
+            if (existing) {
+                existing.push(contextIdx);
+            } else {
+                groups.set(groupKey, [contextIdx]);
+            }
+        }
+
+        return groups;
+    }
+
+    _sortGroupsByPathLength(
+        groupsByRoot: Map<number, number[]>,
+        searchPaths: number[][]
+    ): number[][] {
+        const sortedGroups: number[][] = [];
+
+        for (const groupIndices of groupsByRoot.values()) {
+            const sortedGroup = [...groupIndices].sort((a, b) => {
+                const lengthDiff = searchPaths[b].length - searchPaths[a].length;
+                if (lengthDiff !== 0) {
+                    return lengthDiff;
+                }
+
+                const lexCompare = this._compareNumberArrays(searchPaths[a], searchPaths[b]);
+                if (lexCompare !== 0) {
+                    return lexCompare;
+                }
+
+                return a - b;
+            });
+
+            sortedGroups.push(sortedGroup);
+        }
+
+        return sortedGroups;
+    }
+
+    reorderPrompts(
+        clusteringResult: ClusteringResult
+    ): [number[][], number[][], number[], Array<[number, number[]]>] {
+        return this.scheduleContexts(clusteringResult);
+    }
+
+    _reorderSinglePrompt(
+        promptIndex: number,
+        originalPrompt: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        void promptIndex;
+        void uniqueNodes;
+        return [...originalPrompt];
+    }
+
+    private _compareNumberArrays(a: number[], b: number[]): number {
+        const minLength = Math.min(a.length, b.length);
+        for (let i = 0; i < minLength; i += 1) {
+            if (a[i] !== b[i]) {
+                return a[i] - b[i];
+            }
+        }
+        return a.length - b.length;
+    }
+}
diff --git a/openclaw-plugin/src/engine/intra-ordering.ts b/openclaw-plugin/src/engine/intra-ordering.ts
new file mode 100644
index 0000000..de633dd
--- /dev/null
+++ b/openclaw-plugin/src/engine/intra-ordering.ts
@@ -0,0 +1,338 @@
+import type { ClusterNode } from './tree-nodes.js';
+
+export class IntraContextOrderer {
+    reorderContexts(originalContexts: number[][], uniqueNodes: Map<number, ClusterNode>): number[][] {
+        let rootNode: ClusterNode | null = null;
+        for (const node of uniqueNodes.values()) {
+            if (node.isRoot) {
+                rootNode = node;
+                break;
+            }
+        }
+
+        if (!rootNode) {
+            return originalContexts;
+        }
+
+        for (const node of uniqueNodes.values()) {
+            if (node.isLeaf && node.originalIndices.size > 0) {
+                const firstIdx = Math.min(...node.originalIndices);
+                if (firstIdx < originalContexts.length) {
+                    this._setNodeDocs(node, [...originalContexts[firstIdx]]);
+                }
+            }
+        }
+
+        const queue: number[] = [rootNode.nodeId];
+        const visited = new Set<number>();
+
+        while (queue.length > 0) {
+            const nodeId = queue.shift()!;
+            if (visited.has(nodeId) || !uniqueNodes.has(nodeId)) {
+                continue;
+            }
+
+            visited.add(nodeId);
+            const node = uniqueNodes.get(nodeId)!;
+
+            if (!node.isRoot && node.parent !== null) {
+                const parentNode = uniqueNodes.get(node.parent);
+                if (parentNode) {
+                    const parentDocs = this._getNodeDocs(parentNode);
+                    const nodeDocs = this._getNodeDocs(node);
+                    if (parentDocs.length > 0 && nodeDocs.length > 0) {
+                        this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs));
+                    }
+                }
+            }
+
+            for (const childId of node.children) {
+                if (uniqueNodes.has(childId)) {
+                    queue.push(childId);
+                }
+            }
+        }
+
+        const reorderedContexts: number[][] = [];
+        for (let i = 0; i < originalContexts.length; i += 1) {
+            const leafNode = this._findLeafNode(i, uniqueNodes);
+            if (leafNode) {
+                const leafDocs = this._getNodeDocs(leafNode);
+                if (leafDocs.length > 0) {
+                    reorderedContexts.push(leafDocs);
+                    continue;
+                }
+            }
+
+            reorderedContexts.push([...originalContexts[i]]);
+        }
+
+        return reorderedContexts;
+    }
+
+    _updateTreeAndReorderNodes(uniqueNodes: Map<number, ClusterNode>, reorderedContexts: number[][]): void {
+        let rootNode: ClusterNode | null = null;
+        for (const node of uniqueNodes.values()) {
+            if (node.isRoot) {
+                rootNode = node;
+                break;
+            }
+        }
+
+        for (const node of uniqueNodes.values()) {
+            if (node.isLeaf && node.originalIndices.size > 0) {
+                const firstIdx = Math.min(...node.originalIndices);
+                if (firstIdx < reorderedContexts.length) {
+                    this._setNodeDocs(node, [...reorderedContexts[firstIdx]]);
+                }
+            }
+        }
+
+        if (!rootNode) {
+            return;
+        }
+
+        const queue: Array<[number, boolean]> = [];
+        for (const childId of rootNode.children) {
+            if (uniqueNodes.has(childId)) {
+                queue.push([childId, true]);
+            }
+        }
+
+        while (queue.length > 0) {
+            const [nodeId, isChildOfRoot] = queue.shift()!;
+            const node = uniqueNodes.get(nodeId);
+            if (!node) {
+                continue;
+            }
+
+            if (!isChildOfRoot && node.parent !== null) {
+                const parentNode = uniqueNodes.get(node.parent);
+                if (parentNode) {
+                    const parentDocs = this._getNodeDocs(parentNode);
+                    const nodeDocs = this._getNodeDocs(node);
+                    if (parentDocs.length > 0 && nodeDocs.length > 0) {
+                        this._setNodeDocs(node, this._reorderWithParentPrefix(nodeDocs, parentDocs));
+                    }
+                }
+            }
+
+            for (const childId of node.children) {
+                if (uniqueNodes.has(childId)) {
+                    queue.push([childId, false]);
+                }
+            }
+        }
+    }
+
+    _reorderWithParentPrefix(nodeDocs: number[], parentDocs: number[]): number[] {
+        if (parentDocs.length === 0) {
+            return nodeDocs;
+        }
+
+        const result = [...parentDocs];
+        const parentSet = new Set(parentDocs);
+
+        for (const doc of nodeDocs) {
+            if (!parentSet.has(doc)) {
+                result.push(doc);
+            }
+        }
+
+        return result;
+    }
+
+    _reorderContextWithTreePrefix(
+        contextIndex: number,
+        originalContext: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        const leafNode = this._findLeafNode(contextIndex, uniqueNodes);
+        if (!leafNode) {
+            return [...originalContext];
+        }
+
+        const prefixDocs: number[] = [];
+        const visited = new Set<number>();
+        let currentNode: ClusterNode | undefined = leafNode;
+
+        const ancestors: ClusterNode[] = [];
+        while (currentNode && !currentNode.isRoot) {
+            if (visited.has(currentNode.nodeId)) {
+                break;
+            }
+
+            visited.add(currentNode.nodeId);
+            ancestors.push(currentNode);
+
+            if (currentNode.parent !== null && uniqueNodes.has(currentNode.parent)) {
+                currentNode = uniqueNodes.get(currentNode.parent);
+            } else {
+                break;
+            }
+        }
+
+        ancestors.reverse();
+
+        const seenDocs = new Set<number>();
+        for (const ancestor of ancestors) {
+            const ancestorDocs = this._getNodeDocs(ancestor);
+            for (const doc of ancestorDocs) {
+                if (!seenDocs.has(doc)) {
+                    prefixDocs.push(doc);
+                    seenDocs.add(doc);
+                }
+            }
+        }
+
+        const result = [...prefixDocs];
+        for (const doc of originalContext) {
+            if (!seenDocs.has(doc)) {
+                result.push(doc);
+                seenDocs.add(doc);
+            }
+        }
+
+        return result;
+    }
+
+    extractSearchPaths(uniqueNodes: Map<number, ClusterNode>, numContexts: number): number[][] {
+        const searchPaths: number[][] = Array.from({ length: numContexts }, () => []);
+
+        const contextToLeaf = new Map<number, number>();
+        for (const [nodeId, node] of uniqueNodes.entries()) {
+            if (!node.isLeaf) {
+                continue;
+            }
+
+            for (const origIdx of node.originalIndices) {
+                contextToLeaf.set(origIdx, nodeId);
+            }
+        }
+
+        for (let contextIdx = 0; contextIdx < numContexts; contextIdx += 1) {
+            const leafId = contextToLeaf.get(contextIdx);
+            if (leafId === undefined) continue;
+
+            const childIndices: number[] = [];
+            let currentId: number | null = leafId;
+            const visited = new Set<number>();
+
+            while (currentId !== null) {
+                if (visited.has(currentId)) {
+                    break;
+                }
+                visited.add(currentId);
+
+                const currentNode = uniqueNodes.get(currentId);
+                if (!currentNode) {
+                    break;
+                }
+
+                if (currentNode.parent !== null) {
+                    const parentNode = uniqueNodes.get(currentNode.parent);
+                    if (parentNode) {
+                        const childIndex = parentNode.children.indexOf(currentId);
+                        if (childIndex !== -1) {
+                            childIndices.push(childIndex);
+                        }
+                    }
+                }
+
+                currentId = currentNode.parent;
+            }
+
+            searchPaths[contextIdx] = childIndices.reverse();
+        }
+
+        return searchPaths;
+    }
+
+    _reorderSingleContext(
+        contextIndex: number,
+        originalContext: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        const originalSet = new Set(originalContext);
+
+        const leafNode = this._findLeafNode(contextIndex, uniqueNodes);
+        if (!leafNode) {
+            return [...originalContext];
+        }
+
+        if (leafNode.isRoot) {
+            return Array.from(leafNode.content).sort((a, b) => a - b);
+        }
+
+        if (leafNode.frequency > 1) {
+            const prefixContent = leafNode.content;
+            const prefixList = Array.from(prefixContent).sort((a, b) => a - b);
+            const remainingList = Array.from(originalSet)
+                .filter((value) => !prefixContent.has(value))
+                .sort((a, b) => a - b);
+            return [...prefixList, ...remainingList];
+        }
+
+        const bestNode = this._findBestAncestor(leafNode, uniqueNodes);
+        if (!bestNode) {
+            return [...originalContext];
+        }
+
+        const prefixContent = bestNode.content;
+        const prefixList = Array.from(prefixContent).sort((a, b) => a - b);
+        const remainingList = Array.from(originalSet)
+            .filter((value) => !prefixContent.has(value))
+            .sort((a, b) => a - b);
+        return [...prefixList, ...remainingList];
+    }
+
+    _findLeafNode(contextIndex: number, uniqueNodes: Map<number, ClusterNode>): ClusterNode | null {
+        for (const node of uniqueNodes.values()) {
+            if (node.isLeaf && node.originalIndices.has(contextIndex)) {
+                return node;
+            }
+        }
+
+        return null;
+    }
+
+    _findBestAncestor(startNode: ClusterNode, uniqueNodes: Map<number, ClusterNode>): ClusterNode | null {
+        let currentNode: ClusterNode = startNode;
+
+        while (currentNode.parent !== null) {
+            const parentId = currentNode.parent;
+            const parentNode = uniqueNodes.get(parentId);
+            if (!parentNode) {
+                return null;
+            }
+
+            if (parentNode.frequency > 1 && !parentNode.isEmpty) {
+                return parentNode;
+            }
+
+            currentNode = parentNode;
+        }
+
+        return null;
+    }
+
+    reorderPrompts(originalPrompts: number[][], uniqueNodes: Map<number, ClusterNode>): number[][] {
+        return this.reorderContexts(originalPrompts, uniqueNodes);
+    }
+
+    _reorderSinglePrompt(
+        promptIndex: number,
+        originalPrompt: number[],
+        uniqueNodes: Map<number, ClusterNode>
+    ): number[] {
+        return this._reorderSingleContext(promptIndex, originalPrompt, uniqueNodes);
+    }
+
+    private _getNodeDocs(node: ClusterNode): number[] {
+        return Array.from(node.content);
+    }
+
+    private _setNodeDocs(node: ClusterNode, docs: number[]): void {
+        node.content = new Set(docs);
+    }
+}
diff --git a/openclaw-plugin/src/engine/live-index.ts b/openclaw-plugin/src/engine/live-index.ts
new file mode 100644
index 0000000..c81e901
--- /dev/null
+++ b/openclaw-plugin/src/engine/live-index.ts
@@ -0,0 +1,1223 @@
+import { ContextIndex, IndexResult } from './index-construction.js';
+import { ClusterNode, NodeManager } from './tree-nodes.js';
+import { NodeMetadata } from './metadata.js';
+import { InterContextScheduler } from './inter-scheduler.js';
+import { IntraContextOrderer } from './intra-ordering.js';
+import { computeDistanceSingle, computeDistancesBatch } from './compute-distance.js';
+import { ConversationTracker, type DeduplicationResult } from './conversation-tracker.js';
+import { EvictionHeap } from './eviction-heap.js';
+import * as crypto from 'node:crypto';
+
+export function computePrefixLength(list1: number[], list2: number[]): number {
+    let length = 0;
+    const minLen = Math.min(list1.length, list2.length);
+    for (let i = 0; i < minLen; i++) {
+        if (list1[i] === list2[i]) {
+            length++;
+        } else {
+            break;
+        }
+    }
+    return length;
+}
+
+export class ContextPilot extends ContextIndex {
+    metadata: Map<number, NodeMetadata> = new Map();
+    interScheduler = new InterContextScheduler();
+    
+    protected _requestToNode: Map<string, number | null> = new Map();
+    protected _nextRequestCounter: number = 0;
+    
+    protected _conversations: Map<string, { seenDocs: Set<any>; turnCount: number }> = new Map();
+    protected _hasExplicitConversation: boolean = false;
+    
+    isLive: boolean = false;
+    initialResult: any = null;
+    scheduledResult: any = null;
+    
+    nodes: Map<number, ClusterNode> = new Map();
+    rootId: number | null = null;
+    nextNodeId: number = 0;
+    
+    liveStats = {
+        totalSearches: 0,
+        totalInsertions: 0,
+        totalEvictions: 0,
+        totalSearchTimeUs: 0,
+        totalTraversalTimeUs: 0,
+        totalRemovals: 0
+    };
+    
+    static readonly _DEFAULT_CONVERSATION = "_default";
+
+    constructor(alpha: number = 0.001, useGpu: boolean = false, linkageMethod: "single" | "complete" | "average" = "average", batchSize: number = 10000) {
+        super({ alpha, useGpu, linkageMethod, batchSize });
+    }
+
+    getAllRequestIds(): Set<string> {
+        return new Set(this._requestToNode.keys());
+    }
+
+    reset(): void {
+        this.metadata.clear();
+        this._requestToNode.clear();
+        this._nextRequestCounter = 0;
+        this.isLive = false;
+        this.initialResult = null;
+        this.scheduledResult = null;
+        this.nodes.clear();
+        this.rootId = null;
+        this.nextNodeId = 0;
+        this.liveStats = {
+            totalSearches: 0,
+            totalInsertions: 0,
+            totalEvictions: 0,
+            totalSearchTimeUs: 0,
+            totalTraversalTimeUs: 0,
+            totalRemovals: 0
+        };
+    }
+
+    buildAndSchedule(contexts: number[][], initialTokensPerContext: number = 0): any {
+        this.initialResult = this.fitTransform(contexts);
+        
+        const [scheduledReordered, scheduledOriginals, finalMapping, groups] = 
+            this.interScheduler.scheduleContexts(this.initialResult);
+            
+        this.scheduledResult = {
+            reordered_contexts: scheduledReordered,
+            original_indices: finalMapping,
+            scheduled_originals: scheduledOriginals,
+            groups: groups,
+            clustering_result: this.initialResult
+        };
+        
+        const [requestIdMapping, requestIdsOrdered] = this._initializeLiveMetadata(
+            initialTokensPerContext,
+            contexts.length
+        );
+        
+        this.scheduledResult['request_id_mapping'] = requestIdMapping;
+        this.scheduledResult['request_ids'] = requestIdsOrdered;
+        
+        this.isLive = true;
+        
+        return this.scheduledResult;
+    }
+
+    reorder(contexts: any, initialTokensPerContext: number = 0, conversationId?: string): [any[], number[]] {
+        if (contexts && !Array.isArray(contexts[0])) {
+            contexts = [contexts];
+        }
+
+        const result = this.buildIncremental(contexts, initialTokensPerContext);
+        const reordered = result.reordered_contexts;
+
+        const cid = conversationId || ContextPilot._DEFAULT_CONVERSATION;
+        if (conversationId !== undefined && conversationId !== null) {
+            this._hasExplicitConversation = true;
+        }
+
+        let conv = this._conversations.get(cid);
+        if (!conv) {
+            conv = { seenDocs: new Set(), turnCount: 0 };
+            this._conversations.set(cid, conv);
+        }
+
+        for (const ctx of reordered) {
+            for (const doc of ctx) {
+                conv.seenDocs.add(doc);
+            }
+        }
+        conv.turnCount += 1;
+
+        return [reordered, result.original_indices];
+    }
+
+    optimize(docs: string[], query: string, conversationId?: string, systemInstruction?: string): any[] {
+        const [reordered, _indices] = this.reorder(docs, 0, conversationId);
+        const reorderedDocs = reordered[0];
+        
+        const systemContent = [systemInstruction, ...reorderedDocs].filter(Boolean).join("\n\n");
+        
+        return [
+            { role: "system", content: systemContent },
+            { role: "user", content: query }
+        ];
+    }
+
+    optimizeBatch(allDocs: string[][], allQueries: string[], systemInstruction?: string): [any[][], number[]] {
+        if (allDocs.length !== allQueries.length) {
+            throw new Error(`all_docs (${allDocs.length}) and all_queries (${allQueries.length}) must have the same length.`);
+        }
+
+        const [reorderedContexts, order] = this.reorder(allDocs);
+        const messagesBatch: any[][] = [];
+
+        for (let i = 0; i < reorderedContexts.length; i++) {
+            const ctx = reorderedContexts[i];
+            const origIdx = order[i];
+            
+            const systemContent = [systemInstruction, ...ctx].filter(Boolean).join("\n\n");
+            messagesBatch.push([
+                { role: "system", content: systemContent },
+                { role: "user", content: allQueries[origIdx] }
+            ]);
+        }
+
+        return [messagesBatch, order];
+    }
+
+    deduplicate(contexts: any[][], conversationId: string, hintTemplate?: string): any[] {
+        if (!conversationId) {
+            throw new Error("conversation_id is required for .deduplicate().");
+        }
+
+        const template = hintTemplate || "Please refer to [Doc {doc_id}] from the previous conversation.";
+
+        if (!this._conversations.has(conversationId)) {
+            throw new Error(`No prior .reorder() call found for conversation_id='${conversationId}'.`);
+        }
+
+        const conv = this._conversations.get(conversationId)!;
+        const seen = conv.seenDocs;
+        const results: any[] = [];
+
+        for (const ctx of contexts) {
+            const overlapping = ctx.filter(d => seen.has(d));
+            const newDocs = ctx.filter(d => !seen.has(d));
+            const hints = overlapping.map(d => template.replace("{doc_id}", String(d)));
+
+            results.push({
+                new_docs: newDocs,
+                overlapping_docs: overlapping,
+                reference_hints: hints,
+                deduplicated_docs: newDocs
+            });
+
+            for (const d of ctx) {
+                seen.add(d);
+            }
+        }
+
+        conv.turnCount += 1;
+        return results;
+    }
+
+    buildIncremental(contexts: any[][], initialTokensPerContext: number = 0): any {
+        const convertedContexts = this._convertToInt(contexts);
+
+        if (!this.isLive) {
+            const result = this.buildAndSchedule(convertedContexts, initialTokensPerContext);
+            const reordered = result.reordered_contexts || convertedContexts;
+            const stringReordered = this._convertToStr(reordered);
+            
+            return {
+                request_ids: result.request_ids || [],
+                reordered_contexts: stringReordered,
+                matched_count: 0,
+                inserted_count: convertedContexts.length,
+                merged_count: 0,
+                original_indices: result.original_indices || Array.from({ length: convertedContexts.length }, (_, i) => i),
+                groups: result.groups || []
+            };
+        }
+
+        const matchedContexts: any[] = [];
+        const unmatchedContexts: any[] = [];
+
+        const searchResults = this.searchBatch(convertedContexts);
+
+        for (let i = 0; i < convertedContexts.length; i++) {
+            const context = convertedContexts[i];
+            let [searchPath, matchedNodeId, overlapCount, hasPrefix] = searchResults[i];
+
+            if (overlapCount > 0 && matchedNodeId >= 0 && matchedNodeId !== this.rootId) {
+                const matchedNode = this.nodes.get(matchedNodeId);
+                let nodeDocs: number[] | null = null;
+                
+                if (this.metadata.has(matchedNodeId) && this.metadata.get(matchedNodeId)!.docIds) {
+                    nodeDocs = this.metadata.get(matchedNodeId)!.docIds as number[];
+                } else if (matchedNode && matchedNode.docIds) {
+                    nodeDocs = matchedNode.docIds as number[];
+                }
+
+                let reordered = context;
+                if (nodeDocs) {
+                    reordered = this._reorderWithPrefix(context, nodeDocs);
+                } else {
+                    hasPrefix = true;
+                }
+                
+                matchedContexts.push([i, reordered, searchPath, hasPrefix]);
+            } else {
+                unmatchedContexts.push([i, context]);
+            }
+        }
+
+        const requestIds: (string | null)[] = new Array(convertedContexts.length).fill(null);
+        const reorderedContexts: any[] = new Array(convertedContexts.length).fill(null);
+        const contextInfo: any[] = [];
+
+        for (const [origIdx, reordered, searchPath, hasPrefix] of matchedContexts) {
+            const matchedNode = this.traverse(searchPath);
+            let newNodeId: number, newSearchPath: number[], requestId: string;
+
+            if (hasPrefix && matchedNode && matchedNode.isLeaf) {
+                [newNodeId, newSearchPath, requestId] = this._splitLeafAndInsert(
+                    reordered, matchedNode, searchPath, initialTokensPerContext
+                );
+            } else if (hasPrefix) {
+                [newNodeId, newSearchPath, requestId] = this.insert(
+                    reordered, searchPath, initialTokensPerContext
+                );
+            } else {
+                const insertPath = searchPath.length > 0 ? searchPath.slice(0, -1) : searchPath;
+                [newNodeId, newSearchPath, requestId] = this.insert(
+                    reordered, insertPath, initialTokensPerContext
+                );
+            }
+            
+            requestIds[origIdx] = requestId;
+            reorderedContexts[origIdx] = reordered;
+            contextInfo.push([origIdx, requestId, newSearchPath]);
+        }
+
+        let mergedCount = 0;
+        if (unmatchedContexts.length > 0) {
+            const unmatchedOnly = unmatchedContexts.map(x => x[1]);
+            
+            const tempIndex = new ContextPilot(
+                this.alpha,
+                this.useGpu,
+                this.linkageMethod,
+                this.batchSize
+            );
+            
+            const tempResult = tempIndex.fitTransform(unmatchedOnly);
+            
+            const [mergedRequestIds, mergedSearchPaths] = this._mergeIndex(
+                tempResult,
+                unmatchedContexts,
+                initialTokensPerContext
+            );
+
+            for (let i = 0; i < unmatchedContexts.length; i++) {
+                const [origIdx, origContext] = unmatchedContexts[i];
+                requestIds[origIdx] = mergedRequestIds[i];
+                
+                if (tempResult.reordered_contexts && i < tempResult.reordered_contexts.length) {
+                    reorderedContexts[origIdx] = tempResult.reordered_contexts[i];
+                } else {
+                    reorderedContexts[origIdx] = origContext;
+                }
+                
+                contextInfo.push([origIdx, mergedRequestIds[i], mergedSearchPaths[i]]);
+            }
+            
+            mergedCount = unmatchedContexts.length;
+        }
+
+        const scheduledOrder = this._scheduleIncremental(contextInfo);
+        const groups = this._groupByPathPrefix(contextInfo);
+
+        const finalReorderedStr = this._convertToStr(reorderedContexts);
+
+        return {
+            request_ids: requestIds,
+            reordered_contexts: finalReorderedStr,
+            matched_count: matchedContexts.length,
+            inserted_count: convertedContexts.length,
+            merged_count: mergedCount,
+            original_indices: scheduledOrder,
+            groups: groups
+        };
+    }
+
+    _reorderWithPrefix(context: number[], prefix: number[]): number[] {
+        const contextSet = new Set(context);
+        const result: number[] = [];
+        const prefixUsed = new Set<number>();
+
+        for (const elem of prefix) {
+            if (contextSet.has(elem) && !prefixUsed.has(elem)) {
+                result.push(elem);
+                prefixUsed.add(elem);
+            }
+        }
+
+        for (const elem of context) {
+            if (!prefixUsed.has(elem)) {
+                result.push(elem);
+            }
+        }
+
+        return result;
+    }
+
+    _mergeIndex(tempResult: any, unmatchedInfo: any[], initialTokens: number): [string[], number[][]] {
+        const requestIds: string[] = [];
+        const searchPaths: number[][] = [];
+        
+        const uniqueNodes = tempResult.unique_nodes || tempResult.uniqueNodes;
+        let tempRoot: any = null;
+        
+        if (uniqueNodes) {
+            for (const node of uniqueNodes.values()) {
+                if (node.isRoot) {
+                    tempRoot = node;
+                    break;
+                }
+            }
+        }
+
+        const fallbackInsert = () => {
+            for (const [origIdx, context] of unmatchedInfo) {
+                const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens);
+                requestIds.push(reqId);
+                searchPaths.push(newPath);
+            }
+        };
+
+        if (!tempRoot || this.rootId === null) {
+            fallbackInsert();
+            return [requestIds, searchPaths];
+        }
+
+        const globalRoot = this.nodes.get(this.rootId);
+        if (!globalRoot) {
+            fallbackInsert();
+            return [requestIds, searchPaths];
+        }
+
+        const nodeIdMap = new Map<number, number>();
+        const baseChildIdx = globalRoot.children.length;
+
+        for (let childIdx = 0; childIdx < tempRoot.children.length; childIdx++) {
+            const tempChildId = tempRoot.children[childIdx];
+            const newChildIdx = baseChildIdx + childIdx;
+            this._copySubtree(
+                uniqueNodes,
+                tempChildId,
+                this.rootId,
+                nodeIdMap,
+                initialTokens,
+                [newChildIdx]
+            );
+        }
+
+        for (let i = 0; i < unmatchedInfo.length; i++) {
+            const [origIdx, context] = unmatchedInfo[i];
+            let tempLeafId: number | null = null;
+            
+            for (const [nodeId, node] of uniqueNodes.entries()) {
+                if (node.isLeaf && node.originalIndices && node.originalIndices.has(i)) {
+                    tempLeafId = nodeId;
+                    break;
+                }
+            }
+
+            if (tempLeafId !== null && nodeIdMap.has(tempLeafId)) {
+                const newNodeId = nodeIdMap.get(tempLeafId)!;
+                if (this.metadata.has(newNodeId)) {
+                    const meta = this.metadata.get(newNodeId)!;
+                    requestIds.push(meta.requestId!);
+                    searchPaths.push(meta.searchPath);
+                    continue;
+                }
+            }
+
+            const [newNodeId, newPath, reqId] = this.insert(context, [], initialTokens);
+            requestIds.push(reqId);
+            searchPaths.push(newPath);
+        }
+
+        return [requestIds, searchPaths];
+    }
+
+    _copySubtree(sourceNodes: Map<number, any>, sourceNodeId: number, parentId: number, 
+                 nodeIdMap: Map<number, number>, initialTokens: number, searchPath: number[]): void {
+        const sourceNode = sourceNodes.get(sourceNodeId);
+        if (!sourceNode) return;
+
+        const newNodeId = this.nextNodeId++;
+        const content = sourceNode.docIds ? [...sourceNode.docIds] : (sourceNode.content ? [...sourceNode.content] : []);
+        const originalIndices: Set<number> = sourceNode.originalIndices ? new Set<number>(sourceNode.originalIndices) : new Set<number>();
+
+        const newNode = new ClusterNode(
+            newNodeId,
+            new Set<number>(content),
+            originalIndices,
+            0.0,
+            [],
+            parentId
+        );
+        
+        if (sourceNode.docIds) {
+            newNode.docIds = [...sourceNode.docIds];
+        }
+
+        this.nodes.set(newNodeId, newNode);
+        nodeIdMap.set(sourceNodeId, newNodeId);
+
+        const parentNode = this.nodes.get(parentId);
+        if (parentNode) {
+            parentNode.addChild(newNodeId);
+        }
+
+        const isLeaf = sourceNode.isLeaf || sourceNode.is_leaf;
+        const requestId = isLeaf ? `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}` : null;
+
+        const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0;
+        
+        const metadata = new NodeMetadata(newNodeId, {
+            totalTokens: isLeaf ? initialTokens : 0,
+            extraTokens: isLeaf ? Math.max(0, initialTokens - parentTokens) : 0,
+            searchPath,
+            docIds: sourceNode.docIds ? [...sourceNode.docIds] : null,
+            isLeaf,
+            requestId,
+        });
+        
+        this.metadata.set(newNodeId, metadata);
+
+        if (isLeaf && requestId) {
+            this._requestToNode.set(requestId, newNodeId);
+        }
+
+        if (sourceNode.children) {
+            for (let childIdx = 0; childIdx < sourceNode.children.length; childIdx++) {
+                const childId = sourceNode.children[childIdx];
+                const childSearchPath = [...searchPath, childIdx];
+                this._copySubtree(
+                    sourceNodes, childId, newNodeId,
+                    nodeIdMap, initialTokens, childSearchPath
+                );
+            }
+        }
+    }
+
+    _scheduleIncremental(contextInfo: any[]): number[] {
+        const groups = new Map<number, any[]>();
+
+        for (const [ctxIdx, reqId, path] of contextInfo) {
+            const groupKey = path && path.length > 0 ? path[0] : -1;
+            if (!groups.has(groupKey)) {
+                groups.set(groupKey, []);
+            }
+            groups.get(groupKey)!.push({ ctxIdx, len: path ? path.length : 0 });
+        }
+
+        const scheduled: number[] = [];
+        const sortedKeys = Array.from(groups.keys()).sort((a, b) => a - b);
+
+        for (const groupKey of sortedKeys) {
+            const items = groups.get(groupKey)!;
+            items.sort((a, b) => b.len - a.len);
+            scheduled.push(...items.map(item => item.ctxIdx));
+        }
+
+        return scheduled;
+    }
+
+    _groupByPathPrefix(contextInfo: any[]): [number, number[]][] {
+        const groups = new Map<number, number[]>();
+
+        for (const [ctxIdx, reqId, path] of contextInfo) {
+            const groupKey = path && path.length > 0 ? path[0] : -1;
+            if (!groups.has(groupKey)) {
+                groups.set(groupKey, []);
+            }
+            groups.get(groupKey)!.push(ctxIdx);
+        }
+
+        const result: [number, number[]][] = [];
+        for (const [groupKey, indices] of groups.entries()) {
+            result.push([indices.length, indices]);
+        }
+
+        result.sort((a, b) => b[0] - a[0]);
+        return result;
+    }
+
+    scheduleOnly(contexts: number[][]): any {
+        const result = this.fitTransform(contexts);
+        
+        const [scheduledReordered, scheduledOriginals, finalMapping, groups] = 
+            this.interScheduler.scheduleContexts(result);
+            
+        return {
+            reordered_contexts: scheduledReordered,
+            original_indices: finalMapping,
+            scheduled_originals: scheduledOriginals,
+            groups: groups,
+            stats: {
+                total_nodes: result.stats?.total_nodes || result.stats?.totalNodes,
+                leaf_nodes: result.stats?.leaf_nodes || result.stats?.leafNodes,
+                num_contexts: contexts.length,
+                num_groups: groups.length
+            }
+        };
+    }
+
+    _initializeLiveMetadata(initialTokensPerContext: number, numInputContexts?: number): [Record<string, number>, (string | null)[]] {
+        if (!this.initialResult) {
+            throw new Error("Must call fitTransform() before initializing metadata");
+        }
+
+        const uniqueNodes = this.initialResult.unique_nodes || this.initialResult.uniqueNodes;
+        const reorderedContexts = this.initialResult.reordered_contexts || this.initialResult.reorderedContexts;
+        const requestIdMapping: Record<string, number> = {};
+
+        this.nodes = uniqueNodes;
+
+        for (const [nodeId, node] of uniqueNodes.entries()) {
+            if (node.isRoot || node.is_root) {
+                this.rootId = nodeId;
+                break;
+            }
+        }
+
+        this.nextNodeId = uniqueNodes.size > 0 ? Math.max(...Array.from(uniqueNodes.keys())) + 1 : 0;
+        let leafCounter = 0;
+        const originalIndexToRequestId = new Map<number, string>();
+
+        for (const [nodeId, node] of uniqueNodes.entries()) {
+            const searchPath = this._computeSearchPath(nodeId);
+            const isLeaf = node.isLeaf || node.is_leaf;
+            
+            let totalTokens = 0;
+            let requestId: string | null = null;
+
+            if (isLeaf) {
+                totalTokens = initialTokensPerContext;
+                requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+                leafCounter++;
+
+                if (node.originalIndices || node.original_indices) {
+                    const indices = node.originalIndices || node.original_indices;
+                    for (const origIdx of indices) {
+                        originalIndexToRequestId.set(origIdx, requestId);
+                    }
+                }
+            }
+
+            let parentTokens = 0;
+            if (node.parent !== null && this.metadata.has(node.parent)) {
+                parentTokens = this.metadata.get(node.parent)!.totalTokens;
+            }
+            const extraTokens = Math.max(0, totalTokens - parentTokens);
+
+            let leafDocIds: number[] | null = null;
+            if (isLeaf && (node.originalIndices || node.original_indices)) {
+                const indices = Array.from((node.originalIndices || node.original_indices) as Set<number>);
+                if (indices.length > 0) {
+                    const firstOrigIdx = Math.min(...indices);
+                    if (reorderedContexts && firstOrigIdx < reorderedContexts.length) {
+                        leafDocIds = reorderedContexts[firstOrigIdx];
+                    } else {
+                        leafDocIds = node.docIds || node.doc_ids;
+                    }
+                }
+            } else {
+                leafDocIds = node.docIds || node.doc_ids;
+            }
+
+            const metadata = new NodeMetadata(nodeId, {
+                totalTokens,
+                extraTokens,
+                searchPath,
+                docIds: leafDocIds,
+                isLeaf,
+                requestId,
+            });
+
+            this.metadata.set(nodeId, metadata);
+
+            if (isLeaf && requestId) {
+                this._requestToNode.set(requestId, nodeId);
+                requestIdMapping[requestId] = nodeId;
+            }
+        }
+
+        this.nextNodeId = this.nodes.size > 0 ? Math.max(...Array.from(this.nodes.keys())) + 1 : 0;
+        this._nextRequestCounter = leafCounter;
+
+        const numContexts = numInputContexts !== undefined ? numInputContexts : originalIndexToRequestId.size;
+        const requestIdsOrdered: (string | null)[] = [];
+        
+        for (let i = 0; i < numContexts; i++) {
+            requestIdsOrdered.push(originalIndexToRequestId.get(i) || null);
+        }
+
+        return [requestIdMapping, requestIdsOrdered];
+    }
+
+    trackRequest(requestId: string): void {
+        if (!this._requestToNode.has(requestId)) {
+            this._requestToNode.set(requestId, null);
+        }
+    }
+
+    removeRequests(requestIds: Set<string>): any {
+        const evictedNodes: number[] = [];
+        const notFound: string[] = [];
+
+        for (const requestId of requestIds) {
+            if (!this._requestToNode.has(requestId)) {
+                notFound.push(requestId);
+                continue;
+            }
+
+            const nodeId = this._requestToNode.get(requestId);
+            this._requestToNode.delete(requestId);
+
+            if (nodeId !== null && nodeId !== undefined) {
+                evictedNodes.push(nodeId);
+                this._removeNodeAndPrune(nodeId);
+            }
+        }
+
+        this.liveStats.totalEvictions += evictedNodes.length;
+
+        return {
+            removed_count: evictedNodes.length,
+            evicted_node_ids: evictedNodes,
+            evicted_request_ids: Array.from(requestIds).filter(id => !notFound.includes(id)),
+            not_found: notFound,
+            nodes_remaining: this.nodes.size,
+            requests_remaining: this._requestToNode.size
+        };
+    }
+
+    removeRequestById(requestId: string): boolean {
+        const result = this.removeRequests(new Set([requestId]));
+        return result.evicted_node_ids.length > 0;
+    }
+
+    getRequestNode(requestId: string): number | null {
+        return this._requestToNode.get(requestId) ?? null;
+    }
+
+    _collectAllNodeDocs(): [number[], number[][], Record<number, number[]>] {
+        const nodeIds: number[] = [];
+        const nodeDocsList: number[][] = [];
+        const nodeIdToPath: Record<number, number[]> = {};
+
+        if (this.rootId === null) return [nodeIds, nodeDocsList, nodeIdToPath];
+
+        const queue: [number, number[]][] = [[this.rootId, []]];
+
+        while (queue.length > 0) {
+            const [nodeId, path] = queue.shift()!;
+
+            if (!this.nodes.has(nodeId)) continue;
+
+            const node = this.nodes.get(nodeId)!;
+            const nodeMeta = this.metadata.get(nodeId);
+
+            let docs: number[] | null = null;
+            if (nodeMeta && nodeMeta.docIds) {
+                docs = nodeMeta.docIds;
+            } else if (node.docIds) {
+                docs = node.docIds;
+            }
+
+            if (docs) {
+                nodeIds.push(nodeId);
+                nodeDocsList.push(docs);
+                nodeIdToPath[nodeId] = path;
+            }
+
+            if (!node.isLeaf && node.children) {
+                for (let idx = 0; idx < node.children.length; idx++) {
+                    queue.push([node.children[idx], [...path, idx]]);
+                }
+            }
+        }
+
+        return [nodeIds, nodeDocsList, nodeIdToPath];
+    }
+
+    _getNodeDocs(nodeId: number): number[] | null {
+        const meta = this.metadata.get(nodeId);
+        if (meta && meta.docIds) return meta.docIds;
+        const node = this.nodes.get(nodeId);
+        if (node && node.docIds) return node.docIds;
+        return null;
+    }
+
+    _searchSingleHierarchical(context: number[]): [number[], number, number, boolean] {
+        const contextSet = new Set(context);
+        let currentId = this.rootId;
+        let currentPath: number[] = [];
+
+        while (true) {
+            if (currentId === null) return [[], -1, 0, false];
+            const currentNode = this.nodes.get(currentId);
+            
+            if (!currentNode || currentNode.isLeaf || !currentNode.children || currentNode.children.length === 0) {
+                const docs = this._getNodeDocs(currentId);
+                if (docs && currentId !== this.rootId) {
+                    const docsSet = new Set(docs);
+                    const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length;
+                    const hasPrefix = overlap > 0 ? contextSet.has(docs[0]) : false;
+                    return [currentPath, currentId, overlap, hasPrefix];
+                }
+                return [[], -1, 0, false];
+            }
+
+            const childIds: number[] = [];
+            const childDocsList: number[][] = [];
+            const childIndices: number[] = [];
+
+            for (let idx = 0; idx < currentNode.children.length; idx++) {
+                const childId = currentNode.children[idx];
+                const docs = this._getNodeDocs(childId);
+                if (docs) {
+                    childIds.push(childId);
+                    childDocsList.push(docs);
+                    childIndices.push(idx);
+                }
+            }
+
+            if (childIds.length === 0) return [[], -1, 0, false];
+
+            const distances = computeDistancesBatch([context], childDocsList, this.alpha);
+            
+            let bestJ = -1;
+            let bestDistance = Infinity;
+            let bestOverlap = 0;
+
+            for (let j = 0; j < childIds.length; j++) {
+                const docs = childDocsList[j];
+                const docsSet = new Set(docs);
+                const overlap = Array.from(contextSet).filter(x => docsSet.has(x)).length;
+                if (overlap === 0) continue;
+                
+                const dist = Array.isArray(distances[0]) ? distances[0][j] : distances[j];
+                
+                if (dist < bestDistance) {
+                    bestDistance = dist;
+                    bestOverlap = overlap;
+                    bestJ = j;
+                }
+            }
+
+            if (bestJ < 0) {
+                if (currentId !== this.rootId) {
+                    const docs = this._getNodeDocs(currentId);
+                    if (docs) {
+                        const docsSet2 = new Set(docs);
+                        const overlap = Array.from(contextSet).filter(x => docsSet2.has(x)).length;
+                        return [currentPath, currentId, overlap, true];
+                    }
+                }
+                return [[], -1, 0, false];
+            }
+
+            const bestChildId = childIds[bestJ];
+            const bestChildIdx = childIndices[bestJ];
+            const bestDocs = childDocsList[bestJ];
+            const childPath = [...currentPath, bestChildIdx];
+
+            if (contextSet.has(bestDocs[0])) {
+                const bestChildNode = this.nodes.get(bestChildId);
+                if (bestChildNode && !bestChildNode.isLeaf && bestChildNode.children && bestChildNode.children.length > 0) {
+                    currentId = bestChildId;
+                    currentPath = childPath;
+                    continue;
+                }
+                return [childPath, bestChildId, bestOverlap, true];
+            }
+            return [childPath, bestChildId, bestOverlap, false];
+        }
+    }
+
+    searchBatch(contexts: number[][]): [number[], number, number, boolean][] {
+        const startTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+
+        if (this.rootId === null || contexts.length === 0) {
+            return contexts.map(() => [[], -1, 0, false]);
+        }
+
+        const results = contexts.map(ctx => this._searchSingleHierarchical(ctx));
+
+        const endTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+        const elapsedUs = (endTime - startTime) * 1000;
+        
+        this.liveStats.totalSearches += contexts.length;
+        this.liveStats.totalSearchTimeUs += elapsedUs;
+
+        return results;
+    }
+
+    search(context: number[], updateAccess: boolean = true): [number[], number, number, boolean] {
+        const results = this.searchBatch([context]);
+        const [searchPath, nodeId, overlap, hasPrefix] = results[0];
+
+        if (updateAccess && nodeId >= 0 && this.metadata.has(nodeId)) {
+            this.metadata.get(nodeId)!.updateAccessTime();
+        }
+
+        return [searchPath, nodeId, overlap, hasPrefix];
+    }
+
+    traverse(searchPath: number[]): ClusterNode | null {
+        const startTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+
+        if (this.rootId === null) return null;
+
+        let currentId = this.rootId;
+
+        for (const childIdx of searchPath) {
+            if (!this.nodes.has(currentId)) return null;
+
+            const currentNode = this.nodes.get(currentId)!;
+
+            if (!currentNode.children || childIdx >= currentNode.children.length) {
+                return null;
+            }
+
+            currentId = currentNode.children[childIdx];
+        }
+
+        const endTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+        const elapsedUs = (endTime - startTime) * 1000;
+        this.liveStats.totalTraversalTimeUs += elapsedUs;
+
+        return this.nodes.get(currentId) || null;
+    }
+
+    insert(context: number[], searchPath: number[], totalTokens: number = 0): [number, number[], string] {
+        const startTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+
+        let matchedNode = this.traverse(searchPath);
+
+        if (!matchedNode) {
+            matchedNode = this.nodes.get(this.rootId!)!;
+            searchPath = [];
+        }
+
+        let newNodeId: number, newSearchPath: number[], requestId: string;
+
+        if (matchedNode.isLeaf) {
+            [newNodeId, newSearchPath, requestId] = this._insertAtLeaf(
+                context, matchedNode, searchPath, totalTokens
+            );
+        } else {
+            [newNodeId, newSearchPath, requestId] = this._insertAtInternal(
+                context, matchedNode, searchPath, totalTokens
+            );
+        }
+
+        const endTime = globalThis.performance ? globalThis.performance.now() : Date.now();
+        this.liveStats.totalInsertions += 1;
+
+        return [newNodeId, newSearchPath, requestId];
+    }
+
+    _insertAtInternal(context: number[], parentNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] {
+        const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+        
+        const newNodeId = this.nextNodeId++;
+        const newNode = new ClusterNode(
+            newNodeId,
+            new Set(context),
+            new Set([newNodeId]),
+            0.0,
+            [],
+            parentNode.nodeId
+        );
+
+        this.nodes.set(newNodeId, newNode);
+        parentNode.addChild(newNodeId);
+
+        const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0;
+        const newSearchPath = [...searchPath, parentNode.children.length - 1];
+
+        const metadata = new NodeMetadata(newNodeId, {
+            totalTokens,
+            extraTokens: Math.max(0, totalTokens - parentTokens),
+            searchPath: newSearchPath,
+            docIds: context,
+            isLeaf: true,
+            requestId,
+        });
+
+        this.metadata.set(newNodeId, metadata);
+        this._requestToNode.set(requestId, newNodeId);
+
+        return [newNodeId, newSearchPath, requestId];
+    }
+
+    _insertAtLeaf(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] {
+        const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+        
+        let parentNode: ClusterNode;
+        let parentSearchPath: number[];
+
+        if (leafNode.parent === null) {
+            parentNode = this.nodes.get(this.rootId!)!;
+            parentSearchPath = [];
+        } else {
+            parentNode = this.nodes.get(leafNode.parent)!;
+            parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : [];
+        }
+
+        const newLeafId = this.nextNodeId++;
+        const newLeaf = new ClusterNode(
+            newLeafId,
+            new Set(context),
+            new Set([newLeafId]),
+            0.0,
+            [],
+            parentNode.nodeId
+        );
+
+        this.nodes.set(newLeafId, newLeaf);
+        parentNode.addChild(newLeafId);
+
+        const newSearchPath = [...parentSearchPath, parentNode.children.length - 1];
+        const parentTokens = this.metadata.has(parentNode.nodeId) ? this.metadata.get(parentNode.nodeId)!.totalTokens : 0;
+
+        const newMetadata = new NodeMetadata(newLeafId, {
+            totalTokens,
+            extraTokens: Math.max(0, totalTokens - parentTokens),
+            searchPath: newSearchPath,
+            docIds: context,
+            isLeaf: true,
+            requestId,
+        });
+
+        this.metadata.set(newLeafId, newMetadata);
+        this._requestToNode.set(requestId, newLeafId);
+
+        return [newLeafId, newSearchPath, requestId];
+    }
+
+    _splitLeafAndInsert(context: number[], leafNode: ClusterNode, searchPath: number[], totalTokens: number): [number, number[], string] {
+        const matchedDocs = this._getNodeDocs(leafNode.nodeId);
+
+        if (!matchedDocs) {
+            return this._insertAtLeaf(context, leafNode, searchPath, totalTokens);
+        }
+
+        const sharedPrefix: number[] = [];
+        for (let i = 0; i < Math.min(matchedDocs.length, context.length); i++) {
+            if (matchedDocs[i] === context[i]) {
+                sharedPrefix.push(matchedDocs[i]);
+            } else {
+                break;
+            }
+        }
+
+        if (sharedPrefix.length === 0) {
+            return this._insertAtLeaf(context, leafNode, searchPath, totalTokens);
+        }
+
+        if (sharedPrefix.length === matchedDocs.length && new Set(matchedDocs).size === new Set(context).size && 
+            [...new Set(matchedDocs)].every(d => new Set(context).has(d))) {
+            return this._insertAtLeaf(context, leafNode, searchPath, totalTokens);
+        }
+
+        let parentId = leafNode.parent;
+        if (parentId === null) {
+            parentId = this.rootId!;
+        }
+        const parentNode = this.nodes.get(parentId)!;
+        const parentSearchPath = searchPath.length > 0 ? searchPath.slice(0, -1) : [];
+
+        const leafChildIdx = parentNode.children.indexOf(leafNode.nodeId);
+
+        const newInternalId = this.nextNodeId++;
+        const allContent = new Set([...leafNode.content, ...context]);
+        
+        const newInternal = new ClusterNode(
+            newInternalId,
+            allContent,
+            new Set(),
+            0.0,
+            [leafNode.nodeId],
+            parentId
+        );
+        newInternal.docIds = [...sharedPrefix];
+
+        this.nodes.set(newInternalId, newInternal);
+
+        parentNode.children[leafChildIdx] = newInternalId;
+        leafNode.parent = newInternalId;
+
+        const parentTokens = this.metadata.has(parentId) ? this.metadata.get(parentId)!.totalTokens : 0;
+        const leafMeta = this.metadata.get(leafNode.nodeId);
+        const leafTotal = leafMeta ? leafMeta.totalTokens : 0;
+
+        let internalTokens = parentTokens;
+        if (matchedDocs && matchedDocs.length > 0) {
+            const prefixRatio = sharedPrefix.length / matchedDocs.length;
+            internalTokens = Math.floor(parentTokens + (leafTotal - parentTokens) * prefixRatio);
+        }
+
+        const internalPath = [...parentSearchPath, leafChildIdx];
+
+        const internalMeta = new NodeMetadata(newInternalId, {
+            totalTokens: internalTokens,
+            extraTokens: Math.max(0, internalTokens - parentTokens),
+            searchPath: internalPath,
+            docIds: [...sharedPrefix],
+            isLeaf: false,
+            requestId: null,
+        });
+        this.metadata.set(newInternalId, internalMeta);
+
+        if (leafMeta) {
+            leafMeta.extraTokens = Math.max(0, leafTotal - internalTokens);
+            leafMeta.searchPath = [...internalPath, 0];
+        }
+
+        const requestId = `req-${crypto.randomUUID().replace(/-/g, '').substring(0, 12)}`;
+        const newLeafId = this.nextNodeId++;
+
+        const newLeaf = new ClusterNode(
+            newLeafId,
+            new Set(context),
+            new Set([newLeafId]),
+            0.0,
+            [],
+            newInternalId
+        );
+        newLeaf.docIds = [...context];
+
+        this.nodes.set(newLeafId, newLeaf);
+        newInternal.addChild(newLeafId);
+
+        const newLeafPath = [...internalPath, 1];
+
+        const newLeafMeta = new NodeMetadata(newLeafId, {
+            totalTokens,
+            extraTokens: Math.max(0, totalTokens - internalTokens),
+            searchPath: newLeafPath,
+            docIds: [...context],
+            isLeaf: true,
+            requestId,
+        });
+
+        this.metadata.set(newLeafId, newLeafMeta);
+        this._requestToNode.set(requestId, newLeafId);
+
+        return [newLeafId, newLeafPath, requestId];
+    }
+
+    updateNode(searchPath: number[], tokenDelta: number): boolean {
+        const node = this.traverse(searchPath);
+        
+        if (!node || !this.metadata.has(node.nodeId)) {
+            return false;
+        }
+
+        const metadata = this.metadata.get(node.nodeId)!;
+
+        if (tokenDelta > 0) {
+            metadata.addTokens(tokenDelta);
+        } else {
+            metadata.removeTokens(Math.abs(tokenDelta));
+        }
+
+        return true;
+    }
+
+    _removeNode(nodeId: number): void {
+        this._removeNodeAndPrune(nodeId);
+    }
+
+    _removeNodeAndPrune(nodeId: number): number {
+        if (!this.nodes.has(nodeId)) {
+            return 0;
+        }
+
+        let nodesPruned = 0;
+        const node = this.nodes.get(nodeId)!;
+        const parentId = node.parent;
+
+        if (parentId !== null && this.nodes.has(parentId)) {
+            const parent = this.nodes.get(parentId)!;
+            const idx = parent.children.indexOf(nodeId);
+            if (idx > -1) {
+                parent.children.splice(idx, 1);
+            }
+
+            if (parent.children.length === 0 && !parent.isRoot) {
+                nodesPruned += 1;
+                nodesPruned += this._removeNodeAndPrune(parentId);
+            }
+        }
+
+        this.nodes.delete(nodeId);
+
+        this.metadata.delete(nodeId);
+
+        return nodesPruned;
+    }
+
+    _computeSearchPath(nodeId: number): number[] {
+        if (nodeId === this.rootId) return [];
+
+        const path: number[] = [];
+        let currentId: number | null = nodeId;
+        const visited = new Set<number>();
+
+        while (currentId !== this.rootId && currentId !== null) {
+            if (visited.has(currentId)) break;
+            visited.add(currentId);
+
+            const node = this.nodes.get(currentId);
+            if (!node || node.parent === null) break;
+
+            const parent = this.nodes.get(node.parent);
+            if (!parent) break;
+
+            const childIdx = parent.children.indexOf(currentId);
+            if (childIdx === -1) break;
+
+            path.push(childIdx);
+            currentId = node.parent;
+        }
+
+        return path.reverse();
+    }
+
+    _findCommonPrefix(list1: number[], list2: number[]): number[] {
+        const prefix: number[] = [];
+        const minLen = Math.min(list1.length, list2.length);
+        for (let i = 0; i < minLen; i++) {
+            if (list1[i] === list2[i]) {
+                prefix.push(list1[i]);
+            } else {
+                break;
+            }
+        }
+        return prefix;
+    }
+
+    getStats(): any {
+        const avgSearchTime = this.liveStats.totalSearches > 0 
+            ? this.liveStats.totalSearchTimeUs / this.liveStats.totalSearches 
+            : 0;
+
+        let totalTokens = 0;
+        for (const meta of this.metadata.values()) {
+            totalTokens += meta.extraTokens;
+        }
+
+        return {
+            num_nodes: this.nodes.size,
+            active_nodes: this.metadata.size,
+            total_tokens: totalTokens,
+            num_requests: this._requestToNode.size,
+            total_searches: this.liveStats.totalSearches,
+            total_insertions: this.liveStats.totalInsertions,
+            total_removals: this.liveStats.totalRemovals,
+            avg_search_time_us: avgSearchTime
+        };
+    }
+}
diff --git a/openclaw-plugin/src/engine/metadata.ts b/openclaw-plugin/src/engine/metadata.ts
new file mode 100644
index 0000000..d2bf562
--- /dev/null
+++ b/openclaw-plugin/src/engine/metadata.ts
@@ -0,0 +1,82 @@
+export interface NodeMetadataInit {
+    totalTokens?: number;
+    extraTokens?: number;
+    lastAccessTime?: number;
+    searchPath?: number[];
+    isActive?: boolean;
+    isLeaf?: boolean;
+    docIds?: number[] | null;
+    requestId?: string | null;
+}
+
+export class NodeMetadata {
+    nodeId: number;
+    totalTokens: number;
+    extraTokens: number;
+    lastAccessTime: number;
+    searchPath: number[];
+    isActive: boolean;
+    isLeaf: boolean;
+    docIds: number[] | null;
+    requestId: string | null;
+
+    constructor(nodeId: number, init: NodeMetadataInit = {}) {
+        this.nodeId = nodeId;
+        this.totalTokens = init.totalTokens ?? 0;
+        this.extraTokens = init.extraTokens ?? 0;
+        this.lastAccessTime = init.lastAccessTime ?? Date.now() / 1000;
+        this.searchPath = init.searchPath ?? [];
+        this.isActive = init.isActive ?? true;
+        this.isLeaf = init.isLeaf ?? false;
+        this.docIds = init.docIds ?? null;
+        this.requestId = init.requestId ?? null;
+    }
+
+    updateAccessTime(): void {
+        this.lastAccessTime = Date.now() / 1000;
+    }
+
+    addTokens(delta: number): void {
+        this.totalTokens += delta;
+        this.extraTokens += delta;
+        this.updateAccessTime();
+    }
+
+    removeTokens(delta: number): number {
+        if (delta <= 0) {
+            return 0;
+        }
+
+        let tokensRemoved = Math.min(delta, this.extraTokens);
+        this.extraTokens -= tokensRemoved;
+        this.totalTokens -= tokensRemoved;
+
+        const remaining = delta - tokensRemoved;
+        if (remaining > 0) {
+            const actualRemoved = Math.min(remaining, this.totalTokens);
+            this.totalTokens -= actualRemoved;
+            tokensRemoved += actualRemoved;
+        }
+
+        return tokensRemoved;
+    }
+
+    isEmpty(): boolean {
+        return this.totalTokens <= 0;
+    }
+
+    lessThan(other: NodeMetadata): boolean {
+        return this.lastAccessTime < other.lastAccessTime;
+    }
+
+    toString(): string {
+        const req = this.requestId ? `, request_id=${this.requestId}` : "";
+        return (
+            `NodeMetadata(id=${this.nodeId}, ` +
+            `total_tokens=${this.totalTokens}, ` +
+            `extra_tokens=${this.extraTokens}, ` +
+            `is_leaf=${this.isLeaf}${req}, ` +
+            `active=${this.isActive})`
+        );
+    }
+}
diff --git a/openclaw-plugin/src/engine/reorder.ts b/openclaw-plugin/src/engine/reorder.ts
new file mode 100644
index 0000000..9b4d90f
--- /dev/null
+++ b/openclaw-plugin/src/engine/reorder.ts
@@ -0,0 +1,109 @@
+import * as crypto from 'node:crypto';
+
+interface IndexedDoc {
+    doc: string;
+    hash: string;
+    originalIndex: number;
+    previousPosition: number;
+}
+
+function hashDoc(doc: string): string {
+    return crypto.createHash('sha256').update(doc.trim()).digest('hex').slice(0, 16);
+}
+
+function buildIndexMappings(entries: IndexedDoc[], total: number): [number[], number[]] {
+    const originalOrder = entries.map((entry) => entry.originalIndex);
+
+    const newOrder = new Array<number>(total);
+    for (let newIndex = 0; newIndex < entries.length; newIndex += 1) {
+        newOrder[entries[newIndex].originalIndex] = newIndex;
+    }
+
+    return [originalOrder, newOrder];
+}
+
+function indexDocuments(docs: string[]): IndexedDoc[] {
+    return docs.map((doc, originalIndex) => ({
+        doc,
+        hash: hashDoc(doc),
+        originalIndex,
+        previousPosition: Number.POSITIVE_INFINITY
+    }));
+}
+
+export function reorderDocuments(docs: string[]): [string[], number[], number[]] {
+    const indexed = indexDocuments(docs);
+    indexed.sort((a, b) => {
+        const byHash = a.hash.localeCompare(b.hash);
+        if (byHash !== 0) {
+            return byHash;
+        }
+        return a.originalIndex - b.originalIndex;
+    });
+
+    const reorderedDocs = indexed.map((entry) => entry.doc);
+    const [originalOrder, newOrder] = buildIndexMappings(indexed, docs.length);
+    return [reorderedDocs, originalOrder, newOrder];
+}
+
+export class ReorderState {
+    private previousOrder: string[] = [];
+
+    private hashToDoc: Map<string, string> = new Map();
+
+    reorder(docs: string[]): [string[], number[], number[]] {
+        const indexed = indexDocuments(docs);
+        const previousPositions = new Map<string, number>();
+
+        for (let i = 0; i < this.previousOrder.length; i += 1) {
+            const hash = this.previousOrder[i];
+            if (!previousPositions.has(hash)) {
+                previousPositions.set(hash, i);
+            }
+        }
+
+        const known: IndexedDoc[] = [];
+        const unknown: IndexedDoc[] = [];
+
+        for (const entry of indexed) {
+            const previousPosition = previousPositions.get(entry.hash);
+            if (previousPosition === undefined) {
+                unknown.push(entry);
+                continue;
+            }
+
+            known.push({ ...entry, previousPosition });
+        }
+
+        known.sort((a, b) => {
+            if (a.previousPosition !== b.previousPosition) {
+                return a.previousPosition - b.previousPosition;
+            }
+            return a.originalIndex - b.originalIndex;
+        });
+
+        unknown.sort((a, b) => {
+            const byHash = a.hash.localeCompare(b.hash);
+            if (byHash !== 0) {
+                return byHash;
+            }
+            return a.originalIndex - b.originalIndex;
+        });
+
+        const reordered = [...known, ...unknown];
+
+        this.previousOrder = reordered.map((entry) => entry.hash);
+        for (const entry of reordered) {
+            this.hashToDoc.set(entry.hash, entry.doc);
+        }
+
+        const reorderedDocs = reordered.map((entry) => entry.doc);
+        const [originalOrder, newOrder] = buildIndexMappings(reordered, docs.length);
+        return [reorderedDocs, originalOrder, newOrder];
+    }
+
+    reset(): void {
+        this.previousOrder = [];
+        this.hashToDoc.clear();
+    }
+}
diff --git a/openclaw-plugin/src/engine/tree-nodes.ts b/openclaw-plugin/src/engine/tree-nodes.ts
new file mode 100644
index 0000000..7cf7561
--- /dev/null
+++ b/openclaw-plugin/src/engine/tree-nodes.ts
@@ -0,0 +1,334 @@
+export class ClusterNode {
+    nodeId: number;
+    content: Set<number>;
+    originalIndices: Set<number>;
+    distance: number;
+    children: number[];
+    parent: number | null;
+    frequency: number;
+    mergeDistance: number;
+    searchPath: number[];
+
+    constructor(
+        nodeId: number,
+        content: Set<number>,
+        originalIndices: Set<number> = new Set([nodeId]),
+        distance: number = 0.0,
+        children: number[] = [],
+        parent: number | null = null,
+        frequency: number = 1
+    ) {
+        this.nodeId = nodeId;
+        this.content = new Set(content);
+        this.originalIndices = originalIndices;
+        this.distance = distance;
+        this.children = children;
+        this.parent = parent;
+        this.frequency = frequency;
+        this.mergeDistance = distance;
+        this.searchPath = [];
+    }
+
+    get isLeaf(): boolean {
+        return this.children.length === 0;
+    }
+
+    get isRoot(): boolean {
+        return this.parent === null;
+    }
+
+    get isEmpty(): boolean {
+        return this.content.size === 0;
+    }
+
+    get docIds(): number[] {
+        return Array.from(this.content).sort((a, b) => a - b);
+    }
+
+    set docIds(value: number[]) {
+        this.content = new Set(value);
+    }
+
+    addChild(childId: number): void {
+        if (!this.children.includes(childId) && childId !== this.nodeId) {
+            this.children.push(childId);
+        }
+    }
+
+    removeChild(childId: number): void {
+        const idx = this.children.indexOf(childId);
+        if (idx !== -1) {
+            this.children.splice(idx, 1);
+        }
+    }
+
+    updateFrequency(additionalFrequency: number): void {
+        this.frequency += additionalFrequency;
+    }
+
+    mergeWith(otherNode: ClusterNode): void {
+        this.content = new Set(Array.from(this.content).filter((v) => otherNode.content.has(v)));
+        this.originalIndices = new Set([...this.originalIndices, ...otherNode.originalIndices]);
+        this.frequency += otherNode.frequency;
+    }
+
+    getDepth(): number {
+        return this.searchPath.length;
+    }
+}
+
+export interface NodeStats {
+    totalNodes: number;
+    leafNodes: number;
+    rootNodes: number;
+    internalNodes: number;
+}
+
+export class NodeManager {
+    clusterNodes: Map<number, ClusterNode>;
+    uniqueNodes: Map<number, ClusterNode>;
+    redirects: Map<number, number>;
+    contentToNodeId: Map<string, number>;
+
+    constructor() {
+        this.clusterNodes = new Map<number, ClusterNode>();
+        this.uniqueNodes = new Map<number, ClusterNode>();
+        this.redirects = new Map<number, number>();
+        this.contentToNodeId = new Map<string, number>();
+    }
+
+    private contentKey(content: Set<number>): string {
+        return Array.from(content).sort((a, b) => a - b).join(',');
+    }
+
+    createLeafNode(nodeId: number, promptContent: Iterable<number>): ClusterNode {
+        const contentSet = new Set(promptContent);
+        const key = this.contentKey(contentSet);
+
+        const canonicalId = this.contentToNodeId.get(key);
+        if (canonicalId !== undefined) {
+            const canonicalNode = this.uniqueNodes.get(canonicalId);
+            if (!canonicalNode) {
+                throw new Error(`Missing canonical leaf node for id ${canonicalId}`);
+            }
+
+            canonicalNode.updateFrequency(1);
+            canonicalNode.originalIndices.add(nodeId);
+
+            this.redirects.set(nodeId, canonicalId);
+            this.clusterNodes.set(nodeId, canonicalNode);
+            return canonicalNode;
+        }
+
+        const node = new ClusterNode(nodeId, contentSet);
+        this.clusterNodes.set(nodeId, node);
+        this.uniqueNodes.set(nodeId, node);
+        this.contentToNodeId.set(key, nodeId);
+        return node;
+    }
+
+    createInternalNode(
+        nodeId: number,
+        child1Id: number,
+        child2Id: number,
+        distance: number
+    ): ClusterNode {
+        const canonicalChild1Id = this.redirects.get(child1Id) ?? child1Id;
+        const canonicalChild2Id = this.redirects.get(child2Id) ?? child2Id;
+
+        if (canonicalChild1Id === canonicalChild2Id) {
+            this.redirects.set(nodeId, canonicalChild1Id);
+            const canonicalNode = this.uniqueNodes.get(canonicalChild1Id);
+            if (!canonicalNode) {
+                throw new Error(`Missing canonical child node for id ${canonicalChild1Id}`);
+            }
+            this.clusterNodes.set(nodeId, canonicalNode);
+            return canonicalNode;
+        }
+
+        const child1 = this.uniqueNodes.get(canonicalChild1Id);
+        const child2 = this.uniqueNodes.get(canonicalChild2Id);
+        if (!child1 || !child2) {
+            throw new Error(
+                `Missing child nodes for internal node ${nodeId}: ${canonicalChild1Id}, ${canonicalChild2Id}`
+            );
+        }
+
+        const intersectionContent = new Set(
+            Array.from(child1.content).filter((v) => child2.content.has(v))
+        );
+        const key = this.contentKey(intersectionContent);
+
+        const existingId = this.contentToNodeId.get(key);
+        if (existingId !== undefined && intersectionContent.size > 0) {
+            if (existingId !== canonicalChild1Id && existingId !== canonicalChild2Id) {
+                const existingNode = this.uniqueNodes.get(existingId);
+                if (!existingNode) {
+                    throw new Error(`Missing existing node for id ${existingId}`);
+                }
+
+                existingNode.addChild(canonicalChild1Id);
+                existingNode.addChild(canonicalChild2Id);
+                existingNode.frequency = Math.max(
+                    existingNode.frequency,
+                    child1.frequency + child2.frequency
+                );
+                existingNode.originalIndices = new Set([
+                    ...existingNode.originalIndices,
+                    ...child1.originalIndices,
+                    ...child2.originalIndices
+                ]);
+
+                child1.parent = existingId;
+                child2.parent = existingId;
+
+                this.redirects.set(nodeId, existingId);
+                this.clusterNodes.set(nodeId, existingNode);
+                return existingNode;
+            }
+        }
+
+        const combinedIndices = new Set([...child1.originalIndices, ...child2.originalIndices]);
+        const node = new ClusterNode(
+            nodeId,
+            intersectionContent,
+            combinedIndices,
+            distance,
+            [canonicalChild1Id, canonicalChild2Id],
+            null,
+            child1.frequency + child2.frequency
+        );
+
+        this.clusterNodes.set(nodeId, node);
+        this.uniqueNodes.set(nodeId, node);
+
+        if (intersectionContent.size > 0) {
+            this.contentToNodeId.set(key, nodeId);
+        }
+
+        child1.parent = nodeId;
+        child2.parent = nodeId;
+
+        return node;
+    }
+
+    cleanupEmptyNodes(): void {
+        const emptyNodeIds = Array.from(this.uniqueNodes.entries())
+            .filter(([_, node]) => node.isEmpty)
+            .map(([nodeId]) => nodeId);
+
+        if (emptyNodeIds.length === 0) {
+            return;
+        }
+
+        const sortedEmptyIds = emptyNodeIds.sort((a, b) => b - a);
+
+        for (const emptyId of sortedEmptyIds) {
+            const emptyNode = this.uniqueNodes.get(emptyId);
+            if (!emptyNode) {
+                continue;
+            }
+
+            const parentId = emptyNode.parent;
+            const childrenIds = [...emptyNode.children];
+
+            if (parentId !== null) {
+                const parentNode = this.uniqueNodes.get(parentId);
+                if (parentNode) {
+                    parentNode.removeChild(emptyId);
+                    for (const childId of childrenIds) {
+                        if (this.uniqueNodes.has(childId)) {
+                            parentNode.addChild(childId);
+                        }
+                    }
+                }
+            }
+
+            for (const childId of childrenIds) {
+                const childNode = this.uniqueNodes.get(childId);
+                if (childNode) {
+                    childNode.parent = parentId;
+                }
+            }
+
+            this.uniqueNodes.delete(emptyId);
+        }
+
+        for (const node of this.uniqueNodes.values()) {
+            if (node.parent !== null && !this.uniqueNodes.has(node.parent)) {
+                node.parent = null;
+            }
+        }
+    }
+
+    getNodeStats(): NodeStats {
+        const totalNodes = this.uniqueNodes.size;
+        let leafNodes = 0;
+        let rootNodes = 0;
+
+        for (const node of this.uniqueNodes.values()) {
+            if (node.isLeaf) {
+                leafNodes += 1;
+            }
+            if (node.isRoot) {
+                rootNodes += 1;
+            }
+        }
+
+        return {
+            totalNodes,
+            leafNodes,
+            rootNodes,
+            internalNodes: totalNodes - leafNodes
+        };
+    }
+
+    updateSearchPaths(): void {
+        const rootNodes = Array.from(this.uniqueNodes.values()).filter((node) => node.isRoot);
+
+        if (rootNodes.length === 0) {
+            return;
+        }
+
+        if (rootNodes.length === 1) {
+            const root = rootNodes[0];
+            root.searchPath = [];
+            this._updatePathsFromNode(root);
+            return;
+        }
+
+        const currentMaxId = Math.max(...Array.from(this.uniqueNodes.keys()));
+        const virtualRootId = currentMaxId + 1;
+        const virtualRoot = new ClusterNode(
+            virtualRootId,
+            new Set<number>(),
+            new Set<number>(),
+            0.0,
+            rootNodes.map((node) => node.nodeId),
+            null,
+            rootNodes.reduce((sum, node) => sum + node.frequency, 0)
+        );
+        virtualRoot.searchPath = [];
+
+        this.uniqueNodes.set(virtualRootId, virtualRoot);
+
+        for (const node of rootNodes) {
+            node.parent = virtualRootId;
+        }
+
+        this._updatePathsFromNode(virtualRoot);
+    }
+
+    _updatePathsFromNode(node: ClusterNode): void {
+        for (let childIndex = 0; childIndex < node.children.length; childIndex += 1) {
+            const childId = node.children[childIndex];
+            const childNode = this.uniqueNodes.get(childId);
+            if (!childNode) {
+                continue;
+            }
+
+            childNode.searchPath = [...node.searchPath, childIndex];
+            this._updatePathsFromNode(childNode);
+        }
+    }
+}
diff --git a/openclaw-plugin/src/index.ts b/openclaw-plugin/src/index.ts
new file mode 100644
index 0000000..adf30ba
--- /dev/null
+++ b/openclaw-plugin/src/index.ts
@@ -0,0 +1,259 @@
+import { Type } from "@sinclair/typebox";
+import { delegateCompactionToRuntime } from "openclaw/plugin-sdk/core";
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+
+import { injectCacheControl } from "./engine/cache-control.js";
+import { dedupChatCompletions } from "./engine/dedup.js";
+import { getFormatHandler, type InterceptConfig } from "./engine/extract.js";
+import { ContextPilot } from "./engine/live-index.js";
+
+type Scope = "all" | "system" | "tool_results";
+
+function parseScope(value: unknown): Scope {
+  if (value === "system" || value === "tool_results" || value === "all") {
+    return value;
+  }
+  return "all";
+}
+
+function reorderWithEngine(engine: ContextPilot, docs: string[]): string[] {
+  const [reordered] = engine.reorder(docs);
+  if (!Array.isArray(reordered) || !Array.isArray(reordered[0])) {
+    return docs;
+  }
+  const candidate = reordered[0];
+  if (!candidate.every((entry) => typeof entry === "string")) {
+    return docs;
+  }
+  return candidate as string[];
+}
+
+interface Message {
+  role: string;
+  content: unknown;
+}
+
+interface TextBlock {
+  type?: string;
+  text?: string;
+}
+
+interface ToolUseIdCarrier {
+  tool_use_id?: unknown;
+  toolUseId?: unknown;
+}
+
+function normalizeMessageContent(content: unknown): string {
+  if (typeof content === "string") {
+    return content;
+  }
+  if (!Array.isArray(content)) {
+    return "";
+  }
+
+  const parts: string[] = [];
+  for (const block of content) {
+    if (!block || typeof block !== "object") {
+      continue;
+    }
+    const textBlock = block as TextBlock;
+    if (textBlock.type === "text" && typeof textBlock.text === "string") {
+      parts.push(textBlock.text);
+    }
+  }
+  return parts.join("\n");
+}
+
+function extractToolUseId(message: Message, idx: number): string {
+  const withToolUseId = message as Message & ToolUseIdCarrier;
+  if (typeof withToolUseId.tool_use_id === "string" && withToolUseId.tool_use_id) {
+    return withToolUseId.tool_use_id;
+  }
+  if (typeof withToolUseId.toolUseId === "string" && withToolUseId.toolUseId) {
+    return withToolUseId.toolUseId;
+  }
+  return `tool_${idx}`;
+}
+
+export default definePluginEntry({
+  id: "contextpilot",
+  name: "ContextPilot",
+  description: "Optimizes context via reordering, deduplication, and cache control injection.",
+  register: (api) => {
+    const config = {
+      scope: parseScope(api.pluginConfig?.scope),
+    };
+
+    const engine = new ContextPilot(0.001, false, "average");
+
+    let assembleCount = 0;
+    let totalCharsSaved = 0;
+
+    api.registerContextEngine("contextpilot", () => ({
+      info: {
+        id: "contextpilot",
+        name: "ContextPilot",
+        ownsCompaction: false,
+      },
+
+      async ingest() {
+        return { ingested: true };
+      },
+
+      async assemble({ messages, system }: { messages: Message[]; system?: string }) {
+        const interceptConfig: InterceptConfig = {
+          enabled: true,
+          mode: "auto",
+          tag: "document",
+          separator: "---",
+          alpha: 0.001,
+          linkageMethod: "average",
+          scope: config.scope,
+        };
+
+        const convertedMessages = messages.map((msg, idx) => {
+          if (msg.role === "toolResult") {
+            const content = normalizeMessageContent(msg.content);
+            return {
+              role: "user",
+              content: [{
+                type: "tool_result",
+                tool_use_id: extractToolUseId(msg, idx),
+                content: content,
+              }],
+            };
+          }
+          return msg;
+        });
+
+        const convertedBody: Record<string, unknown> = {
+          messages: convertedMessages,
+          system: system,
+        };
+
+        const handler = getFormatHandler("anthropic_messages");
+        const multi = handler.extractAll(convertedBody, interceptConfig);
+
+        const reorderDocs = (docs: string[]): string[] => {
+          if (docs.length < 2) {
+            return docs;
+          }
+          return reorderWithEngine(engine, docs);
+        };
+
+        if (multi.systemExtraction) {
+          const [extraction, sysIdx] = multi.systemExtraction;
+          if (extraction.documents.length >= 2) {
+            const reordered = reorderDocs(extraction.documents);
+            handler.reconstructSystem(convertedBody, extraction, reordered, sysIdx);
+          }
+        }
+
+        for (const [extraction, location] of multi.toolExtractions) {
+          if (extraction.documents.length >= 2) {
+            const reordered = reorderDocs(extraction.documents);
+            handler.reconstructToolResult(convertedBody, extraction, reordered, location);
+          }
+        }
+
+        const convertedMessageList = Array.isArray(convertedBody.messages)
+          ? (convertedBody.messages as Array<{ content?: unknown }>)
+          : [];
+
+        const finalMessages = convertedMessageList.map((msg, idx) => {
+          const original = messages[idx];
+          if (original?.role === "toolResult") {
+            const block = Array.isArray(msg.content)
+              ? msg.content[0]
+              : null;
+            const extractedContent = block && typeof block === "object"
+              ? (block as { content?: unknown }).content
+              : undefined;
+
+            if (Array.isArray(original.content)) {
+              const newContentArray = original.content.map((entry) => {
+                if (
+                  entry
+                  && typeof entry === "object"
+                  && (entry as TextBlock).type === "text"
+                  && typeof extractedContent === "string"
+                ) {
+                  return {
+                    ...(entry as Record<string, unknown>),
+                    text: extractedContent,
+                  };
+                }
+                return entry;
+              });
+              return { ...original, content: newContentArray };
+            } else if (typeof extractedContent === "string") {
+              return { ...original, content: extractedContent };
+            }
+            return original;
+          }
+          return msg;
+        });
+
+        const finalBody: Record<string, unknown> = {
+          messages: finalMessages,
+          system: system,
+        };
+
+        const dedupResult = dedupChatCompletions(finalBody, system);
+        totalCharsSaved += dedupResult.charsSaved;
+
+        const optimizedBody = injectCacheControl(finalBody, "anthropic");
+
+        assembleCount++;
+
+        if (dedupResult.charsSaved > 0 || assembleCount % 5 === 0) {
+          const estimatedTokensSaved = Math.round(totalCharsSaved / 4);
+          const estimatedCostSaved = (estimatedTokensSaved * 0.003 / 1000).toFixed(4);
+          console.error(`[ContextPilot] Stats: ${assembleCount} requests, ${totalCharsSaved.toLocaleString()} chars saved (~${estimatedTokensSaved.toLocaleString()} tokens, ~$${estimatedCostSaved})`);
+        }
+
+        return {
+          messages: (optimizedBody.messages as Message[]) || messages,
+          system: optimizedBody.system as string | undefined,
+          estimatedTokens: 0,
+        };
+      },
+
+      async compact(params) {
+        return await delegateCompactionToRuntime(params);
+      },
+    }));
+
+    api.registerTool({
+      name: "contextpilot_status",
+      description: "Report ContextPilot engine state",
+      parameters: Type.Object({}),
+      async execute(_toolCallId: string, _params: unknown) {
+        const stats = engine.getStats();
+        const lines = [
+          "ContextPilot Engine Status:",
+          `  Scope: ${config.scope}`,
+          `  Contexts assembled: ${assembleCount}`,
+          `  Total chars saved: ${totalCharsSaved.toLocaleString()}`,
+          `  Live index: ${engine.isLive ? "active" : "warming"}`,
+          `  Nodes: ${Number(stats.num_nodes ?? 0)}`,
+          `  Active nodes: ${Number(stats.active_nodes ?? 0)}`,
+          `  Requests tracked: ${Number(stats.num_requests ?? 0)}`,
+          `  Total searches: ${Number(stats.total_searches ?? 0)}`,
+          `  Total insertions: ${Number(stats.total_insertions ?? 0)}`,
+          `  Total removals: ${Number(stats.total_removals ?? 0)}`,
+          `  Avg search time (us): ${Number(stats.avg_search_time_us ?? 0).toFixed(2)}`,
+        ];
+
+        return {
+          content: [
+            {
+              type: "text" as const,
+              text: lines.join("\n"),
+            },
+          ],
+        };
+      },
+    });
+  },
+});
diff --git a/openclaw-plugin/test-e2e.ts b/openclaw-plugin/test-e2e.ts
new file mode 100644
index 0000000..6b2f6f6
--- /dev/null
+++ b/openclaw-plugin/test-e2e.ts
@@ -0,0 +1,188 @@
+#!/usr/bin/env npx tsx
+/**
+ * E2E test for ContextPilot plugin
+ *
+ * Run: npx tsx test-e2e.ts
+ * Requires: ANTHROPIC_API_KEY in environment
+ */
+
+import { ContextPilot } from './src/engine/live-index.js';
+import { getFormatHandler, type InterceptConfig } from './src/engine/extract.js';
+import { injectCacheControl } from './src/engine/cache-control.js';
+import { dedupChatCompletions } from './src/engine/dedup.js';
+
+const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY;
+if (!ANTHROPIC_API_KEY) {
+  console.error('Error: ANTHROPIC_API_KEY not set');
+  process.exit(1);
+}
+
+// Simulated system prompt with multiple documents (like Claude Code's context)
+const systemPromptWithDocs = `You are a helpful coding assistant.
+
+<documents>
+<document>
+# File: src/index.ts
+export function main() {
+  console.log("Hello world");
+  const result = processData(getData());
+  return result;
+}
+
+function getData() {
+  return { items: [1, 2, 3, 4, 5] };
+}
+
+function processData(data: { items: number[] }) {
+  return data.items.map(x => x * 2);
+}
+</document>
+<document>
+# File: src/utils.ts
+export function formatOutput(data: number[]): string {
+  return data.join(', ');
+}
+
+export function validateInput(input: unknown): boolean {
+  return Array.isArray(input) && input.every(x => typeof x === 'number');
+}
+
+export function calculateSum(numbers: number[]): number {
+  return numbers.reduce((a, b) => a + b, 0);
+}
+</document>
+<document>
+# File: README.md
+# My Project
+
+This is a sample project demonstrating the ContextPilot optimization.
+
+## Installation
+npm install
+
+## Usage
+npm start
+
+## Features
+- Data processing
+- Input validation
+- Output formatting
+</document>
+</documents>
+
+Answer questions about the code above.`;
+
+// Build Anthropic Messages API request body
+const requestBody = {
+  model: 'claude-sonnet-4-6',
+  max_tokens: 256,
+  system: systemPromptWithDocs,
+  messages: [
+    {
+      role: 'user',
+      content: 'What does the main function do? Be brief.'
+    }
+  ]
+};
+
+async function runTest() {
+  console.log('=== ContextPilot E2E Test ===\n');
+
+  // 1. Initialize engine
+  const engine = new ContextPilot(0.001, false, 'average');
+  console.log('1. Engine initialized');
+
+  // 2. Extract documents
+  const interceptConfig: InterceptConfig = {
+    enabled: true,
+    mode: 'auto',
+    tag: 'document',
+    separator: '---',
+    alpha: 0.001,
+    linkageMethod: 'average',
+    scope: 'all'
+  };
+
+  const body = structuredClone(requestBody);
+  const handler = getFormatHandler('anthropic_messages');
+  const multi = handler.extractAll(body, interceptConfig);
+
+  console.log(`2. Extracted ${multi.totalDocuments} documents from system prompt`);
+
+  // 3. Reorder documents
+  if (multi.systemExtraction) {
+    const [extraction, sysIdx] = multi.systemExtraction;
+    console.log(`   Original order: ${extraction.documents.map((_, i) => i).join(', ')}`);
+
+    if (extraction.documents.length >= 2) {
+      const [reordered] = engine.reorder(extraction.documents);
+      if (Array.isArray(reordered) && Array.isArray(reordered[0])) {
+        const reorderedDocs = reordered[0] as string[];
+        handler.reconstructSystem(body, extraction, reorderedDocs, sysIdx);
+        console.log('3. Documents reordered for prefix cache optimization');
+      }
+    }
+  }
+
+  // 4. Inject cache control
+  const optimizedBody = injectCacheControl(body, 'anthropic');
+  console.log('4. Cache control markers injected');
+
+  // 5. Show optimization summary
+  const originalLen = JSON.stringify(requestBody).length;
+  const optimizedLen = JSON.stringify(optimizedBody).length;
+  console.log(`\n=== Optimization Summary ===`);
+  console.log(`Original request size: ${originalLen} chars`);
+  console.log(`Optimized request size: ${optimizedLen} chars`);
+  console.log(`Cache control added: ${JSON.stringify(optimizedBody).includes('cache_control')}`);
+
+  // 6. Make real API call
+  console.log('\n=== Making API Call ===');
+  console.log('Calling Anthropic API with optimized request...\n');
+
+  const response = await fetch('https://api.anthropic.com/v1/messages', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+      'x-api-key': ANTHROPIC_API_KEY,
+      'anthropic-version': '2023-06-01',
+      'anthropic-beta': 'prompt-caching-2024-07-31'
+    },
+    body: JSON.stringify(optimizedBody)
+  });
+
+  const result = await response.json();
+
+  if (result.error) {
+    console.error('API Error:', result.error);
+    process.exit(1);
+  }
+
+  console.log('=== Response ===');
+  console.log('Model:', result.model);
+  console.log('Stop reason:', result.stop_reason);
+  console.log('\nAssistant:', result.content?.[0]?.text || '(no text)');
+
+  console.log('\n=== Usage ===');
+  console.log('Input tokens:', result.usage?.input_tokens);
+  console.log('Output tokens:', result.usage?.output_tokens);
+  if (result.usage?.cache_creation_input_tokens) {
+    console.log('Cache creation tokens:', result.usage.cache_creation_input_tokens);
+  }
+  if (result.usage?.cache_read_input_tokens) {
+    console.log('Cache read tokens:', result.usage.cache_read_input_tokens);
+  }
+
+  console.log('\n=== Engine Stats ===');
+  const stats = engine.getStats();
+  console.log('Nodes:', stats.num_nodes);
+  console.log('Active nodes:', stats.active_nodes);
+  console.log('Total insertions:', stats.total_insertions);
+
+  console.log('\n✓ E2E test complete');
+}
+
+runTest().catch(err => {
+  console.error('Test failed:', err);
+  process.exit(1);
+});
diff --git a/openclaw-plugin/tsconfig.json b/openclaw-plugin/tsconfig.json
new file mode 100644
index 0000000..017a5f9
--- /dev/null
+++ b/openclaw-plugin/tsconfig.json
@@ -0,0 +1,15 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "moduleResolution": "bundler",
+    "strict": true,
+    "esModuleInterop": true,
+    "skipLibCheck": true,
+    "outDir": "dist",
+    "declaration": true,
+    "resolveJsonModule": true
+  },
+  "include": ["src/**/*.ts"],
+  "exclude": ["node_modules", "dist"]
+}
diff --git a/pyproject.toml b/pyproject.toml
index a1abfef..8682152 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "contextpilot"
-version = "0.4.0"
+version = "0.4.1"
 description = "Fast Long-Context Inference via Context Reuse"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -68,7 +68,7 @@ filterwarnings = [
 ]
 
 [tool.bumpver]
-current_version = "0.4.0"
+current_version = "0.4.1"
 version_pattern = "MAJOR.MINOR.PATCH"
 commit_message  = "Bump version {old_version} -> {new_version}"
 commit          = true
diff --git a/tests/test_http_intercept.py b/tests/test_http_intercept.py
index d746595..a7c01e8 100644
--- a/tests/test_http_intercept.py
+++ b/tests/test_http_intercept.py
@@ -118,18 +118,19 @@ def client(mock_session):
     original_session = http_mod._aiohttp_session
     original_url = http_mod._infer_api_url
     original_intercept_index = http_mod._intercept_index
-    original_state = http_mod._intercept_state
+    original_states = http_mod._intercept_states.copy()
     http_mod._aiohttp_session = mock_session
     http_mod._infer_api_url = "http://mock-backend:30000"
     http_mod._intercept_index = None  # reset so each test starts fresh
-    http_mod._intercept_state = http_mod._InterceptConvState()
+    http_mod._intercept_states.clear()
     try:
         yield TestClient(app, raise_server_exceptions=False)
     finally:
         http_mod._aiohttp_session = original_session
         http_mod._infer_api_url = original_url
         http_mod._intercept_index = original_intercept_index
-        http_mod._intercept_state = original_state
+        http_mod._intercept_states.clear()
+        http_mod._intercept_states.update(original_states)
 
 
 # ============================================================================
@@ -146,7 +147,7 @@ def _warmup(client, path, body):
     resp = client.post(path, json=body)
     assert resp.status_code == 200
     # Keep _intercept_index primed, but reset conversation tracking.
-    http_mod._intercept_state = http_mod._InterceptConvState()
+    http_mod._intercept_states.clear()
     return resp
 
 
@@ -431,6 +432,44 @@ def test_auth_headers_forwarded(self, client, mock_session):
 
 
 class TestToolResultIntercept:
+    def test_cross_layer_block_dedup_with_system_prompt(self, client, mock_session):
+        """Tool result blocks are deduped against system prompt content."""
+        shared = "\n".join(
+            [
+                f"memory chunk line {i:03d}: repeated text for cross-layer dedup"
+                for i in range(70)
+            ]
+        )
+        body = {
+            "model": "gpt-4",
+            "messages": [
+                {"role": "system", "content": shared},
+                {"role": "user", "content": "read file"},
+                {
+                    "role": "assistant",
+                    "content": None,
+                    "tool_calls": [
+                        {
+                            "id": "tc_sys",
+                            "type": "function",
+                            "function": {"name": "read", "arguments": "{}"},
+                        }
+                    ],
+                },
+                {"role": "tool", "tool_call_id": "tc_sys", "content": shared},
+            ],
+        }
+
+        resp = client.post("/v1/chat/completions", json=body)
+        assert resp.status_code == 200
+
+        forwarded = mock_session._last_json
+        assert "earlier system prompt result" in forwarded["messages"][3]["content"]
+
+        meta = _cp_meta(resp)
+        dedup_meta = meta.get("dedup", {})
+        assert dedup_meta.get("system_blocks_matched", 0) > 0
+
     def test_openai_tool_result_forwarded(self, client, mock_session):
         """OpenAI tool results with docs are extracted and forwarded."""
         body = {
@@ -1004,8 +1043,7 @@ def _make_body(marker_id):
         assert resp1.status_code == 200
         content1 = mock_session._last_json["messages"][3]["content"]
 
-        # Reset intercept state for clean comparison
-        http_mod._intercept_state = http_mod._InterceptConvState()
+        http_mod._intercept_states.clear()
 
         # Request 2 with different id "bbbb"
         resp2 = client.post("/v1/chat/completions", json=_make_body("cccc2222dddd3333"))