diff --git a/.algo-profile/README.md b/.algo-profile/README.md
new file mode 100644
index 000000000..11e7f79e4
--- /dev/null
+++ b/.algo-profile/README.md
@@ -0,0 +1,10 @@
+# Algorithm Profile - EverOS
+
+## Optimization
+
+- [Content-Addressed Manifest Delta Sync](optimization/content-addressed-manifest-delta-sync.md) - O(N) first run, O(Δ) incremental, used in docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
+- [Bounded Top-K Heap](optimization/bounded-top-k-heap.md) - O(M log K), used in docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
+
+## Structures
+
+- [LRU Retrieval Cache](structures/lru-retrieval-cache.md) - O(1) average lookup, used in docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
diff --git a/.algo-profile/optimization/bounded-top-k-heap.md b/.algo-profile/optimization/bounded-top-k-heap.md
new file mode 100644
index 000000000..dfb78c2f7
--- /dev/null
+++ b/.algo-profile/optimization/bounded-top-k-heap.md
@@ -0,0 +1,20 @@
+---
+algorithm: Bounded Top-K Heap
+category: optimization
+complexity_time: O(M log K)
+complexity_space: O(K)
+used_in: docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
+date: 2026-05-16
+---
+
+## Why This Was Chosen
+
+When multiple candidate sources are merged, the system only needs the best K snippets, not a full sort of the entire candidate pool. A bounded min-heap keeps the strongest candidates while avoiding the extra cost of sorting low-value items that will never be shown to Hermes.
+
+## Implementation Notes
+
+Use this only at the merge boundary where candidate sets from collection search, local cache, or memory providers are combined. If the source already returns a stable top-k list, the heap can be skipped; otherwise keep the heap small and enforce K as a hard cap.
+
+## Reference
+
+[Heap / Priority Queue](https://github.com/trekhleb/javascript-algorithms)
diff --git a/.algo-profile/optimization/content-addressed-manifest-delta-sync.md b/.algo-profile/optimization/content-addressed-manifest-delta-sync.md
new file mode 100644
index 000000000..a7977b1c2
--- /dev/null
+++ b/.algo-profile/optimization/content-addressed-manifest-delta-sync.md
@@ -0,0 +1,20 @@
+---
+algorithm: Content-Addressed Manifest Delta Sync
+category: optimization
+complexity_time: O(N) first run, O(Δ) incremental
+complexity_space: O(N)
+used_in: docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
+date: 2026-05-16
+---
+
+## Why This Was Chosen
+
+The xAI knowledge bundle is repo-shaped data, so a manifest keyed by content hash lets the host distinguish unchanged files from changed ones without re-uploading everything. That keeps the initial build linear while making steady-state refreshes proportional to the actual delta instead of the full corpus.
+
+## Implementation Notes
+
+The manifest should store path, content hash, upload state, and a stable bundle hash so a successful upload can become the new baseline atomically. Removed files should be tombstoned rather than silently forgotten, which keeps reconciliation explicit on the next sync run.
+
+## Reference
+
+[Big-O Reference](https://github.com/trekhleb/javascript-algorithms)
diff --git a/.algo-profile/structures/lru-retrieval-cache.md b/.algo-profile/structures/lru-retrieval-cache.md
new file mode 100644
index 000000000..4bcce6969
--- /dev/null
+++ b/.algo-profile/structures/lru-retrieval-cache.md
@@ -0,0 +1,20 @@
+---
+algorithm: LRU Retrieval Cache
+category: structures
+complexity_time: O(1)
+complexity_space: O(capacity)
+used_in: docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
+date: 2026-05-16
+---
+
+## Why This Was Chosen
+
+Repeated Hermes queries against the same collection bundle should reuse prior retrieval results instead of hitting the collection on every turn. An LRU-style cache gives bounded memory with constant-time average lookup and eviction, which matches the read-heavy, hot-query pattern of the remote NixOS lane.
+
+## Implementation Notes
+
+Cache keys should include collection id, bundle hash, normalized query hash, top_k, and filter serialization so different auth contexts do not collide. A TTL layer should sit on top of the LRU policy so stale entries disappear even if the bundle hash does not change.
+
+## Reference
+
+[Data Structures Reference](https://github.com/trekhleb/javascript-algorithms)
diff --git a/.claude/hooks/commit-boundary-check.sh b/.claude/hooks/commit-boundary-check.sh
new file mode 100755
index 000000000..fc741fd98
--- /dev/null
+++ b/.claude/hooks/commit-boundary-check.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# commit-boundary-check.sh
+#
+# PreToolUse hook for EverOS. Reads the Claude Code hook payload from stdin,
+# self-filters to git-commit / gh-pr-create invocations, and warns when a
+# staged change set crosses multiple top-level directories.
+#
+# Rationale: PR #31 (Raven v2 closure) accidentally bundled 6 independent lanes
+# (raven, hermes use-case, skillhub, upstream-return, ci, chores) into one
+# 27-commit PR. EverOS convention from that retrospective: one component,
+# one PR. This hook is a soft nudge, not a block — cross-cutting work (lint
+# sweeps, dependency bumps, .gitignore policy) still proceeds.
+
+set -eu
+
+# Read the hook payload (we don't strictly need to parse it; we just want to
+# self-filter and inspect git state). Discard the JSON.
+cat >/dev/null 2>&1 || true
+
+# The actual command Claude Code is about to run is exposed via tool input.
+# Hook payload format varies across CC versions; we keep this hook scope-safe
+# by running unconditionally and only acting when staged changes exist.
+
+# Find the repo root from cwd so the hook works from worktrees too.
+repo_root=$(git rev-parse --show-toplevel 2>/dev/null || true)
+if [ -z "$repo_root" ]; then
+  exit 0
+fi
+
+cd "$repo_root"
+
+# What is staged for the next commit?
+staged=$(git diff --cached --name-only 2>/dev/null || true)
+if [ -z "$staged" ]; then
+  exit 0
+fi
+
+# Extract the first path segment of each staged file. Filter out hidden
+# top-level dirs (.github, .gitignore, .claude) and the root README so a
+# legitimate root-doc fix doesn't trip the warning by itself.
+top_dirs=$(echo "$staged" \
+  | awk -F/ 'NF>1 {print $1} NF==1 {print "_root_"}' \
+  | sort -u \
+  | grep -Ev '^(\.github|\.claude|_root_)$' || true)
+
+count=$(echo "$top_dirs" | grep -c . || true)
+
+if [ "${count:-0}" -ge 2 ]; then
+  cat >&2 <<EOF
+⚠ commit-boundary-check: this staged change set crosses $count top-level directories:
+$(echo "$top_dirs" | sed 's/^/  - /')
+
+EverOS convention (post-PR-#31 retrospective): one component, one PR.
+If these directories are part of the same logical change (e.g., a feature
+that genuinely spans methods/ and use-cases/), proceed.
+Otherwise, consider:
+  git reset HEAD <files-from-other-lanes>
+and committing the lanes separately.
+
+This is a warning, not a block. Re-run the commit command to proceed.
+EOF
+fi
+
+exit 0
diff --git a/.claude/settings.json b/.claude/settings.json
new file mode 100644
index 000000000..1c3625f43
--- /dev/null
+++ b/.claude/settings.json
@@ -0,0 +1,16 @@
+{
+  "$schema": "https://json.schemastore.org/claude-code-settings.json",
+  "hooks": {
+    "PreToolUse": [
+      {
+        "matcher": "Bash",
+        "hooks": [
+          {
+            "type": "command",
+            "command": ".claude/hooks/commit-boundary-check.sh"
+          }
+        ]
+      }
+    ]
+  }
+}
diff --git a/.claude/skills/everos-prompts-sync/SKILL.md b/.claude/skills/everos-prompts-sync/SKILL.md
new file mode 100644
index 000000000..40870327a
--- /dev/null
+++ b/.claude/skills/everos-prompts-sync/SKILL.md
@@ -0,0 +1,97 @@
+---
+name: everos-prompts-sync
+description: Use when editing EverCore prompts under methods/EverCore/src/memory_layer/prompts/en or .../zh, or before opening a PR that touches that tree. Verifies EN/ZH file-name mirror + export-symbol parity, surfaces missing zh files and divergent __all__ lists, and falls back to the existing src/devops_scripts/i18n/i18n_tool.py for code-comment translation drift.
+---
+
+# everos-prompts-sync
+
+Keeps `methods/EverCore/src/memory_layer/prompts/{en,zh}/` in lockstep at the
+**file-name + symbol-export** layer. Does **not** judge translation quality —
+that's a content review.
+
+This skill encodes a recurring EverCore failure mode: a contributor adds a new
+prompt constant to `en/` and forgets to add a matching entry under `zh/`,
+which silently breaks imports the moment a tenant uses the ZH locale.
+
+## When to invoke
+
+- A diff under `methods/EverCore/src/memory_layer/prompts/` is in flight.
+- A new prompt constant is being added to `en/<file>.py`.
+- A PR is about to be opened and the prompt tree has any change at all.
+
+If neither side of `prompts/` changed, skip — this skill has nothing to say.
+
+## Procedure
+
+1. **Confirm scope.** From repo root:
+
+   ```bash
+   cd methods/EverCore/src/memory_layer/prompts
+   ```
+
+2. **File-name mirror.** Both directories must have identical file lists
+   (excluding `__pycache__`):
+
+   ```bash
+   diff <(ls en/ | grep -v __pycache__) <(ls zh/ | grep -v __pycache__)
+   ```
+
+   Any difference is a bug. The fix is **always** to add the missing file to
+   the side that lacks it. The new file can be a translation OR an explicit
+   re-export from the other side (the existing convention — see
+   `zh/agent_prompts.py` for the re-export pattern).
+
+3. **Export-symbol parity.** For each file pair `en/X.py` and `zh/X.py`,
+   their public exports must be the same set:
+
+   ```bash
+   python -c "
+   import ast, sys, pathlib
+   for f in pathlib.Path('en').glob('*.py'):
+       if f.name == '__init__.py': continue
+       z = pathlib.Path('zh') / f.name
+       if not z.exists(): print(f'MISSING zh: {f.name}'); continue
+       def syms(p):
+           tree = ast.parse(p.read_text())
+           return {t.id for n in tree.body if isinstance(n, ast.Assign)
+                   for t in n.targets if isinstance(t, ast.Name) and t.id.isupper()}
+       en_syms, zh_syms = syms(f), syms(z)
+       if en_syms != zh_syms:
+           missing_in_zh = en_syms - zh_syms
+           missing_in_en = zh_syms - en_syms
+           if missing_in_zh: print(f'{f.name}: zh missing {missing_in_zh}')
+           if missing_in_en: print(f'{f.name}: en missing {missing_in_en}')
+   "
+   ```
+
+   Re-exports count: `zh/agent_prompts.py` doing
+   `from ...en.agent_prompts import FOO, BAR` exposes `FOO` and `BAR` as
+   ZH symbols — that satisfies parity even though the strings live on the EN
+   side only. The AST scan above catches direct top-level assignments;
+   re-exports need either a `__all__` list or a wider AST walk if you want to
+   be exhaustive.
+
+4. **Report.** Output one of:
+   - `PASS: EN/ZH prompt parity OK` (no further action)
+   - `FAIL: <list of mismatches>` (fix before merge)
+
+5. **Adjacent tooling.** This skill does **not** translate Chinese code
+   comments to English. That's `src/devops_scripts/i18n/i18n_tool.py`, which
+   is already wired into `make lint`. Use that for code-comment drift, this
+   skill for prompt-constant drift.
+
+## What this skill explicitly does NOT do
+
+- Translate prompts from EN to ZH or vice versa. That's a human/LLM content
+  task, not a parity check.
+- Validate template variables (`{messages_json}`, `{new_count}`, etc.) match
+  between EN and ZH versions. That's a deeper content check worth a separate
+  skill if it turns out to be a recurring failure mode.
+- Block commits. This is informational. Wire it into a hook only after the
+  false-positive rate is known to be near zero.
+
+## Recurrence threshold
+
+If the parity check has surfaced the same root cause **three times** (e.g.,
+"forgot to add zh re-export when adding a new EN prompt constant"), upgrade
+this skill into a pre-commit hook under `.claude/hooks/`.
diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md
index d4008706c..d29042db6 100644
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@@ -1,6 +1,6 @@
 ## Summary
 
-<!-- What changed, and why? -->
+<!-- What changed, why, and what evidence shows it is correct? -->
 
 ## Area
 
@@ -15,7 +15,7 @@
 
 ## Verification
 
-<!-- List commands run, manual checks, screenshots, or reasons verification was not needed. -->
+<!-- List exact commands, checks, screenshots, or a clear reason verification was not needed. -->
 
 ```text
 
@@ -27,6 +27,7 @@
 - [ ] I updated docs, examples, or setup notes when behavior changed.
 - [ ] I added or updated tests when the change affects behavior.
 - [ ] I did not commit secrets, `.env` files, dependency folders, or generated output.
+- [ ] I listed the exact evidence, checks, or blocker for this change.
 - [ ] Active relative links in Markdown files resolve.
 
 ## Notes for Reviewers
diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
index cb06530f6..4ac6a6eb5 100644
--- a/.github/workflows/docs.yml
+++ b/.github/workflows/docs.yml
@@ -94,6 +94,7 @@ jobs:
               Path("methods/README.md"),
               Path("benchmarks/README.md"),
               Path("AGENTS.md"),
+              Path(".github/PULL_REQUEST_TEMPLATE.md"),
               Path(".github/CONTRIBUTING.md"),
               Path(".github/CODE_OF_CONDUCT.md"),
               Path(".github/SECURITY.md"),
diff --git a/.gitignore b/.gitignore
index 16db8a8bb..fe04fcbd2 100755
--- a/.gitignore
+++ b/.gitignore
@@ -206,7 +206,10 @@ src/memory_layer/memory_extractor/profile_memory_extractor keep llm merge.py
 #LLM related
 AGENTS.mk
 docs/api_docs/profile_extraction_fields.md
-.claude/
+# Claude Code: session-local artifacts only; skills/hooks/commands stay versioned
+.claude/settings.local.json
+.claude/worktrees/
+.claude/verify/
 .cursor/*
 
 #tmp_data
@@ -226,4 +229,22 @@ use-cases/**/package-lock.json
 
 # Local playwright + goal state traces
 .playwright-mcp/
-.goal/
\ No newline at end of file
+.goal/
+
+# Hermes EverOS Raven SkillHub dogfood artifacts (runtime / local traces)
+# These are intentionally generated during dogfood smoke tests but should not pollute git
+.kilo/
+use-cases/hermes-everos-memory/raven/.local-runs/
+
+# Windburn system artifacts (runtime, not temporary)
+.kilo/raven-aliases.zsh
+
+# Multica Ultimate Workbench runtime
+.automations/
+.automations/**/*.json
+
+# Codex session artifacts (already covered but be explicit)
+.codex/.codex-global-state.json
+.codex/.codex-global-state.json.bak
+.sisyphus/
+.gstack/
diff --git a/benchmarks/EverMemBench/.gitignore b/benchmarks/EverMemBench/.gitignore
index 6c52f99af..720ccabda 100755
--- a/benchmarks/EverMemBench/.gitignore
+++ b/benchmarks/EverMemBench/.gitignore
@@ -26,9 +26,8 @@ htmlcov/
 .cursor/
 .DS_Store
 
-# Claude Code
+# Claude Code session artifacts (CLAUDE.md is versioned, .claude/ is session-local)
 .claude/
-CLAUDE.md
 
 # Logs
 *.log
diff --git a/benchmarks/EverMemBench/CLAUDE.md b/benchmarks/EverMemBench/CLAUDE.md
new file mode 100644
index 000000000..bb37026fb
--- /dev/null
+++ b/benchmarks/EverMemBench/CLAUDE.md
@@ -0,0 +1,83 @@
+# benchmarks/EverMemBench — Local CLAUDE.md
+
+Local-only context. Root `CLAUDE.md` and `AGENTS.md` cover the cross-repo map.
+
+## What this module is
+
+A multi-person group-chat memory evaluation framework. Pits 5 memory systems
+(**Memos, Mem0, Memobase, EverCore, Zep**) plus an LLM long-context baseline
+against the **EverMemBench-Dynamic** dataset on HuggingFace
+(`EverMind-AI/EverMemBench-Dynamic`).
+
+Pipeline: **Add → Search → Answer → Evaluate**. Two question types: multiple
+choice (direct comparison) and open-ended (LLM-judge).
+
+## Internal map
+
+```text
+eval/
+├── cli.py        main entry — orchestrates the 4-stage pipeline
+├── config/       YAML configs per memory system + per dataset slice
+└── src/          stage implementations (add/search/answer/evaluate)
+
+tools/
+└── analyze_results.py   post-run accuracy + breakdown reporter
+```
+
+## Hard rules
+
+- **Numbers are reportable.** Any code change that affects retrieval, answer
+  generation, or evaluation logic must report a paired before/after run in
+  the PR. Treat this like HyperMem — research artifact, not utility code.
+- **Datasets are not in-repo.** Source comes from HuggingFace. Do not vendor
+  the full dataset; cache it via `datasets` library or a pinned snapshot
+  path.
+- **OpenRouter is the default LLM gateway.** `LLM_API_KEY` in `.env` must
+  point at OpenRouter (or compatible). Per-system keys (`MEMOS_API_KEY`,
+  `MEM0_API_KEY`, etc.) are only needed for the systems being benchmarked.
+- **Smoke mode exists for a reason.** Use it (`--smoke` or equivalent in
+  CLI) before any full run. Full runs are expensive.
+
+## Working commands
+
+```bash
+# from this directory:
+cp env.template .env       # fill in LLM_API_KEY + system-specific keys
+pip install -r requirements.txt
+# install only the SDKs for systems you are evaluating:
+pip install mem0ai memobase zep-cloud   # subset as needed
+
+# pipeline (smoke first, then full):
+python -m eval.cli add    --config eval/config/<config>.yaml --smoke
+python -m eval.cli search --config eval/config/<config>.yaml --smoke
+python -m eval.cli answer --config eval/config/<config>.yaml --smoke
+python -m eval.cli evaluate --config eval/config/<config>.yaml --smoke
+
+# post-run analysis:
+python tools/analyze_results.py <run-output-dir>
+```
+
+## Common gotchas
+
+- **Message format differs per system.** Memos wants
+  `[Group: X][Speaker: Y]content`; Mem0 wants `run_id="${user_id}_${groupId}"`
+  - `name=<Speaker>`. The README has the full matrix — do not paper over the
+  differences with a generic adapter.
+- **Timestamp handling is per-system.** Memos uses native `chat_time`, Mem0
+  uses Unix timestamps per batch. Misaligned timestamps silently kill recall.
+- **Rate limits matter.** OpenRouter and the memory system providers all rate
+  limit. `aiolimiter` is wired in — do not bypass it.
+
+## Cross-directory contract
+
+- `methods/EverCore/` is one of the systems under evaluation. EverCore DTO
+  changes can break the EverCore adapter here; treat the EverCore HTTP API
+  as a frozen contract for benchmark runs.
+- `methods/HyperMem/` may be added as a benchmark target via its `main/`
+  entry; add adapters in `eval/src/` not by importing HyperMem internals.
+
+## What does NOT belong here
+
+- Memory system implementations. Adapters only.
+- Live agent demos — that is `use-cases/`.
+- The dataset itself — keep it on HuggingFace.
diff --git a/docs/research/2026-05-16-claude-code-large-codebases-methodology.md b/docs/research/2026-05-16-claude-code-large-codebases-methodology.md
new file mode 100644
index 000000000..cdcb3ab76
--- /dev/null
+++ b/docs/research/2026-05-16-claude-code-large-codebases-methodology.md
@@ -0,0 +1,126 @@
+# Claude Code 大型代码库最佳实践方法论
+
+> 来源：[How Claude Code works in large codebases: Best practices and where to start](https://claude.com/blog/how-claude-code-works-in-large-codebases-best-practices-and-where-to-start)
+> 提取时间：2026-05-16
+> 来源仓库：Fearvox/ds-research-vault（目标路径：knowledge/ai-agents/coding-practices/claude-code-large-codebases/）
+
+---
+
+## 核心方法论（7 条）
+
+### 1. CLAUDE.md 文件优先
+
+- **作用**：每个会话自动加载的上下文文件，给 Claude 提供代码库知识
+- **层级**：
+  - 根目录 `CLAUDE.md`：大图、整体架构
+  - 子目录 `CLAUDE.md`：局部约定、子模块规范
+- **原则**：保持聚焦在广泛适用的内容上，避免变成性能负担
+- **加载机制**：Claude 会自动向上遍历目录树，加载路径上所有 `CLAUDE.md`
+
+### 2. Hooks 让设置自我进化
+
+- **传统认知**：Hooks 是防止 Claude 做错事的脚本
+- **高阶用法**：持续改进的催化剂
+  - `Stop` hook：会话结束后反思发生了什么，提出 `CLAUDE.md` 更新（上下文新鲜时）
+  - `Start` hook：动态加载团队特定上下文，无需手动配置
+  - 确定性检查：lint、格式化等，比依赖 Claude 记忆更一致
+
+### 3. Skills 按需提供专业知识
+
+- **问题**：大型代码库有几十种任务类型，不需要每个会话都加载所有专业知识
+- **解决方案**：渐进式披露（Progressive Disclosure）
+  - Skills 按需加载，只在任务需要时注入
+  - 示例：安全审查时加载安全 review skill；文档更新时加载文档处理 skill
+  - 避免上下文竞争，节省 token
+
+### 4. 在子目录初始化，而非仓库根目录
+
+- **原则**：Claude 在 scoped 到与任务实际相关的代码部分时效果最好
+- **Monorepo 注意**：虽然工具默认假设根目录访问，但 Claude 会自动向上查找 `CLAUDE.md`
+- **实践**：在子目录工作，根级上下文不会丢失
+
+### 5. 按子目录限定测试和 Lint 命令
+
+- **问题**：Claude 改了一个服务就跑完整测试套件 → 超时 + 浪费上下文
+- **方案**：在子目录级 `CLAUDE.md` 指定适用于该部分的命令
+- **适用**：服务导向的代码库（每个目录有自己的测试和构建命令）
+- **编译型语言 monorepo**：跨目录依赖深，子目录 scoping 更难，可能需要项目特定构建配置
+
+### 6. 目录结构不工作时，构建代码库地图
+
+- **场景**：代码未组织在常规目录结构中
+- **方案**：在仓库根目录放轻量 markdown 文件，每行描述一个顶级文件夹的内容
+- **作用**：给 Claude 一张目录表，在打开文件前先扫描
+- **分层方法**：
+  - 根文件：只描述最高层结构
+  - 子目录 `CLAUDE.md`：提供下一层细节，随 Claude 遍历树时按需加载
+- **简单场景**：用 `@` 提及特定文件或目录也能达到同样效果
+
+### 7. 运行 LSP 服务器，让 Claude 按符号搜索
+
+- **问题**：对大型代码库常用函数名做 grep → 返回数千个匹配，Claude 烧上下文去搞清楚哪个有意义
+- **方案**：用 LSP（语言服务器协议）按符号搜索
+  - LSP 只返回指向同一符号的引用
+  - 过滤在 Claude 读任何文件之前就完成了
+- **配置要求**：
+  - 为你的语言安装代码智能插件
+  - 对应的语言服务器二进制文件
+  - Claude Code 文档覆盖了可用插件和故障排除
+
+---
+
+## 上下文工程总结
+
+| 层次 | 工具 | 用途 |
+|---|---|---|
+| **持久层** | `CLAUDE.md`（根 + 子目录） | 每个会话自动加载，项目知识库 |
+| **行为层** | Hooks（Start/Stop/PreToolUse/PostToolUse） | 自动化、规则执行、动态上下文注入 |
+| **专业层** | Skills（按需加载） | 特定领域知识，避免上下文膨胀 |
+| **导航层** | 代码库地图（markdown 索引） | 快速理解目录结构 |
+| **搜索层** | LSP 服务器（符号搜索） | 精确查找，减少无效匹配 |
+
+---
+
+## 与 Windburn 认知缓存的映射
+
+| Claude Code 方法论 | Windburn 认知缓存对应 |
+|---|---|
+| `CLAUDE.md` 持久上下文 | **Source 层**（Research Vault、repo docs、source-of-truth） |
+| Hooks 动态注入 | **Perception 层**（实时观察、工具反馈） |
+| Skills 按需加载 | **Procedural 层**（可用 skills、repo 路由） |
+| 子目录 scoping | **Belief 层**（假设 + 证据 + 置信度） |
+| LSP 符号搜索 | **Episodic 层**（发生了什么，按序） |
+
+---
+
+## 实践建议（基于 EverOS 仓库）
+
+### 当前状态
+
+- ✅ `CLAUDE.md` 已存在（232 行，覆盖 runtime artifacts）
+- ✅ `.codex/AGENTS.md` 已配置（Windburn 通信配置）
+- ⚠️ 子目录 `CLAUDE.md` 可能缺失（methods/、benchmarks/、use-cases/ 等）
+
+### 建议补充
+
+1. **子目录 CLAUDE.md**：
+   - `methods/EverCore/CLAUDE.md` — EverCore 特定约定
+   - `benchmarks/EverMemBench/CLAUDE.md` — 评估运行命令
+   - `use-cases/hermes-everos-memory/CLAUDE.md` — Hermes 集成规范
+
+2. **Hooks 配置**（参考 `~/.codex/AGENTS.md`）：
+   - `Stop` hook：会话结束更新 `docs/superpowers/goal.md`
+   - `Start` hook：注入当前 git status 和 TODO
+
+3. **Skills 按需加载**：
+   - 已安装：`gsd-*` 系列（项目管理）
+   - 建议：为 EverCore 开发创建 `evercore-dev` skill
+
+---
+
+## 参考资料
+
+- 原博客：<https://claude.com/blog/how-claude-code-works-in-large-codebases-best-practices-and-where-to-start>
+- Claude Code 文档：<https://docs.anthropic.com/en/docs/claude-code>
+- Windburn 认知缓存：<https://github.com/Fearvox/multica-ultimate-workbench/blob/main/docs/windburn-cognitive-cache-direction.md>
+- Multica Ultimate Workbench：<https://github.com/Fearvox/multica-ultimate-workbench>
diff --git a/docs/superpowers/goal.md b/docs/superpowers/goal.md
new file mode 100644
index 000000000..b3af8baf3
--- /dev/null
+++ b/docs/superpowers/goal.md
@@ -0,0 +1,166 @@
+# Hermes SuperGrok NixOS Goal
+
+Short `/goal` capsule:
+
+```text
+Read and execute docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md as the source of truth. Turn Hermes SuperGrok OAuth, xAI collection sync, and NixOS host control into a clean three-plane implementation with content-addressed delta sync, bounded top-k retrieval, and a local retrieval cache. Fix the knowledge anchors up front, then build `everos-ops-mcp` as the reusable ops plane. Keep auth boundaries strict, preserve the existing EverOS memory provider, and prove the remote lane with live smokes before calling it done.
+```
+
+## Role
+
+You are the implementation captain for the Hermes SuperGrok on NixOS lane.
+
+Your job is not to redesign the auth model again. Your job is to turn the approved spec into a working remote lane that can:
+
+- log into Hermes with SuperGrok OAuth,
+- refresh the xAI knowledge bundle from NixOS,
+- retrieve context through Hermes hooks and plugin boundaries,
+- and keep the management key, OAuth state, and host control separate.
+
+## Starting State
+
+The current repo already has:
+
+- a committed design spec at `docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md`;
+- a populated xAI `Windburn` collection with the repo knowledge bundle;
+- an existing EverOS memory provider path under `use-cases/hermes-everos-memory`;
+- a remote EverCore/NixOS packet under `use-cases/hermes-everos-memory/deploy/nixos/`;
+- an `.algo-profile/` history for the content-addressed sync, bounded top-k merge, and retrieval cache choices.
+- a fixed knowledge-anchor contract rooted at `research-vault`, `dash-knowledge-vault`, and `dash-kv-view-full`;
+- a new `everos-ops-mcp` package under `use-cases/hermes-everos-memory/apps/`.
+
+Do not treat any of those as optional context. They are the baseline.
+
+## Hard Boundaries
+
+1. Do not mix SuperGrok OAuth with xAI collection management credentials.
+2. Do not expose raw tokens, private paths, or host/IP details in model-visible context.
+3. Do not replace the existing EverOS memory provider.
+4. Do not turn the knowledge sync path into a browser-driven manual workflow.
+5. Do not claim incremental sync unless a no-op delta path is proven on an unchanged source tree.
+6. Do not claim retrieval is ready unless cache hit, cache miss, and stale-bundle behavior are all tested.
+7. Do not touch unrelated workspace junk in `.goal/`, `.kilo/`, `.playwright-mcp/`, or local run output.
+8. Do not widen the knowledge surface beyond the agreed anchors before the ops plane is stable.
+
+## Primary Objective
+
+Deliver a remote-first Hermes knowledge lane on NixOS where:
+
+- Hermes session auth is handled by SuperGrok OAuth,
+- xAI knowledge uploads are handled by a collection-scoped management key on the host,
+- the canonical knowledge anchors are the three agreed roots above,
+- the sync job is content-addressed and delta-aware,
+- retrieval uses a local cache plus bounded top-k merge,
+- `everos-ops-mcp` exposes the reusable ops/status surface,
+- and every plane can fail independently without collapsing the others.
+
+## Required Outputs
+
+The implementation should produce:
+
+- a NixOS-hosted sync service and timer for the knowledge bundle,
+- a manifest/delta engine that skips unchanged documents,
+- a Hermes-facing retrieval layer that injects concise, provenanced context,
+- cache and receipt artifacts for sync and retrieval,
+- a reusable `everos-ops-mcp` backend with public-safe anchor/status tools,
+- validation scripts or smokes that prove session, sync, cache, delta, and red-gate behavior,
+- and any small docs updates needed to keep the operator flow legible.
+
+## Phase Plan
+
+### Phase 0 - Live State Verification
+
+Confirm the real starting conditions before any edits:
+
+- Hermes xAI OAuth login is still available locally,
+- the `Windburn` collection is present and readable,
+- the NixOS remote packet still matches the intended host shape,
+- the knowledge bundle source roots are the ones we want to sync,
+- and the current workspace is not carrying a hidden breakage in the relevant paths.
+
+Gate: no implementation until the live state matches the plan.
+
+### Phase 1 - Knowledge Sync Plane
+
+Build the host-owned sync path first:
+
+- normalize the approved source roots,
+- walk the source tree once,
+- sanitize and hash each document,
+- write a manifest row per path with content hash and upload state,
+- diff the new manifest against the last successful manifest,
+- upload only added or changed documents in stable path order,
+- tombstone deletions explicitly,
+- and publish the new manifest pointer only after the upload succeeds.
+
+This path should be `O(N)` on the first build and `O(Δ)` on refresh when the tree is unchanged or only lightly changed.
+
+Gate: an unchanged tree must produce a no-op diff and skip upload.
+
+### Phase 2 - Retrieval Plane
+
+Build the Hermes-facing retrieval path on top of the sync plane:
+
+- add a short-lived SQLite retrieval cache,
+- key it by collection id, bundle hash, normalized query hash, top_k, and stable filter serialization,
+- inject only top-k snippets with provenance,
+- and use a bounded min-heap when multiple candidate sources must be merged.
+
+This keeps repeated turns cheap and keeps the read path from degenerating into full sorts or repeated collection lookups.
+
+Gate: repeated queries against the same bundle should hit cache, and merged candidate lists should preserve only the strongest `K` items without a full resort.
+
+### Phase 3 - Hooks and Safety
+
+Wire the policy layer around the retrieval path:
+
+- `pre_tool_call` blocks mis-scoped or dangerous calls,
+- `pre_llm_call` injects the retrieved context and current health state,
+- `transform_tool_result` redacts secrets, paths, and oversized output,
+- `post_tool_call` records a compact receipt with tool, duration, status, and collection revision.
+
+Keep `execute_code` limited to mechanical packaging and validation work.
+
+Gate: no secret or private-path material appears in model-visible output or receipts.
+
+### Phase 4 - Validation and Proof
+
+Prove each plane independently:
+
+- session smoke: Hermes can log into xAI with SuperGrok OAuth and start a turn,
+- sync smoke: the host can build, upload, and refresh the `Windburn` collection,
+- cache smoke: repeated reads reuse the retrieval cache when the bundle hash is unchanged,
+- delta smoke: unchanged sources produce a no-op manifest diff,
+- top-k smoke: merged candidates stay bounded at `K`,
+- failure smoke: missing secrets, expired auth, and retrieval timeouts degrade cleanly.
+
+Gate: no plane can be marked PASS from inference alone.
+
+## Decision Order
+
+When trade-offs appear, prefer this order:
+
+1. Strict auth separation.
+2. Remote host reliability.
+3. Incremental sync efficiency.
+4. Retrieval latency.
+5. Cosmetic cleanup.
+
+## Exit Conditions
+
+Stop when all of the following are true:
+
+- the NixOS sync plane works end to end,
+- the Hermes retrieval path works end to end,
+- the cache and delta gates pass,
+- the failure cases degrade cleanly,
+- and the implementation is small enough that the operator can reason about the trust boundaries in one pass.
+
+## Final Deliverable
+
+When this goal is complete, the repo should have a single, truthful story:
+
+- SuperGrok OAuth runs the Hermes session,
+- the host-managed xAI key refreshes the collection,
+- Hermes hooks handle context injection and redaction,
+- and the EverOS memory provider remains the durable local memory layer.
diff --git a/docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md b/docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
new file mode 100644
index 000000000..29a94fc77
--- /dev/null
+++ b/docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md
@@ -0,0 +1,248 @@
+# Hermes SuperGrok on NixOS: Three Auth Planes
+
+## Status
+
+Draft design, approved at the architecture level.
+
+## Context
+
+Hermes now supports `xai-oauth`, and the local session shows a successful SuperGrok login with `model.provider=xai-oauth` and default model `grok-4.3`.
+
+At the same time, the remote NixOS path needs the sanitized repository knowledge bundle that was already prepared for the `Windburn` xAI collection. That bundle is not the same thing as the interactive chat session: it is a durable knowledge corpus that must be refreshable on a remote host.
+
+This design keeps those concerns separate:
+
+1. Hermes session auth for interactive model turns.
+2. xAI collection auth for knowledge upload and refresh.
+3. NixOS host auth for scheduling, file ownership, and deployment control.
+4. A reusable `everos-ops-mcp` plane for public-safe anchor and ops status, which can be mounted by ChatGPT custom apps without exposing secrets.
+
+## Goals
+
+- Use SuperGrok OAuth only for Hermes model sessions.
+- Keep xAI collection management credentials out of the Hermes session context.
+- Keep the knowledge corpus anchored to the three agreed source roots before any broader expansion.
+- Make the knowledge corpus reproducible and incrementally refreshable on NixOS.
+- Inject retrieved knowledge into Hermes through hooks and tool boundaries, not through ad hoc prompt stuffing.
+- Support both scheduled and manual sync on the remote host.
+- Keep raw tokens, private paths, and host details out of model-visible context and receipts.
+
+## Non-Goals
+
+- Replacing the existing EverOS memory provider.
+- Making the browser part of the steady-state auth path.
+- Mirroring every repo file into xAI.
+- Letting model output mutate the knowledge corpus directly.
+- Sharing one secret across the session, collection, and host planes.
+- Growing the knowledge surface beyond the agreed anchors before the ops plane is stable.
+
+## Decision
+
+The recommended implementation is a host-owned sync service on NixOS plus a Hermes plugin/hook layer:
+
+- Hermes uses `xai-oauth` for chat and model calls.
+- A separate NixOS service uses a collection-scoped xAI management key to build and refresh the knowledge bundle.
+- A Hermes plugin uses collection search plus local cache to retrieve relevant snippets before a turn.
+- Hermes hooks redact, gate, and record tool activity.
+- `execute_code` is allowed only for mechanical packaging and validation work, not for auth-sensitive calls.
+- A separate `everos-ops-mcp` service exposes public-safe anchor/status and bounded smoke operations for ChatGPT custom app wiring.
+
+This gives one clean operator flow on NixOS without turning OAuth into a universal credential.
+
+## Architecture
+
+```mermaid
+flowchart LR
+  subgraph Host["NixOS host"]
+    S["systemd timer/service"]
+    P["knowledge packer"]
+    C["Hermes plugin + hooks"]
+    L["SQLite retrieval cache"]
+    R["JSONL receipts"]
+  end
+
+  H["Hermes session"]
+  O["SuperGrok / xai-oauth"]
+  K["xAI collection: Windburn"]
+  E["EverOS memory provider"]
+
+  H --> O
+  S --> P
+  P --> K
+  C --> K
+  C --> L
+  C --> R
+  C --> H
+  E --> H
+```
+
+## Components
+
+### 1. Hermes session plane
+
+Hermes runs with `xai-oauth` as the model provider. The OAuth state stays in Hermes-managed auth state, not in the knowledge bundle and not in the xAI collection sync service.
+
+This plane is only for interactive turns and tool orchestration. It must not be used as a transport for collection management keys.
+
+### 2. Knowledge sync plane
+
+The NixOS host owns a dedicated sync service and timer. The service:
+
+- reads the sanitized knowledge source set,
+- builds a manifest and bundle hash,
+- uploads or refreshes the `Windburn` collection,
+- writes an audit receipt,
+- marks the corpus stale or healthy.
+
+The sync service uses a collection-scoped xAI management key that lives in a host secret file wired in as the unit's `EnvironmentFile`. The key is readable by the sync unit only.
+
+The canonical knowledge anchors are fixed up front and treated as the shared source contract for every knowledge-related surface:
+
+- `research-vault` -> `/Users/0xvox/Documents/Evensong/research-vault`
+- `dash-knowledge-vault` -> `/Users/0xvox/Desktop/dash-knowledge-vault`
+- `dash-kv-view-full` -> `/Users/0xvox/Desktop/dash-kv-view-full`
+
+These anchors are the input set for the future knowledge MCP layer and the current ops surface. They should stay stable unless the operator explicitly expands the corpus.
+
+The bundle itself is the reproducible artifact. It should contain the same repo knowledge corpus that was already prepared for xAI, plus enough metadata to make incremental updates safe:
+
+- source roots
+- doc list
+- checksums
+- sanitization timestamp
+- bundle hash
+- upload time
+
+### 3. NixOS control plane
+
+NixOS controls when sync runs, where the bundle lives, and which service user owns the artifacts. The host may trigger sync in two ways:
+
+- a `systemd timer` for steady-state refresh
+- a manual `systemctl start` / operator-triggered run for catch-up or re-upload
+
+The timer is the default path and should refresh on an hourly cadence unless host config overrides it. Manual runs use the same service so the behavior stays identical.
+
+### 4. Ops MCP plane
+
+`everos-ops-mcp` is a reusable ChatGPT-facing MCP server for the high-value operational surface around this lane.
+
+Its first job is not broad automation. Its first job is public-safe observability and bounded execution:
+
+- report token-file health without printing token contents,
+- report anchor presence and freshness,
+- expose a narrow allowlist of smoke commands for the Hermes/EverOS lane,
+- and keep future knowledge and signals MCP servers composable rather than monolithic.
+
+This plane should stay separate from the collection sync key and from Hermes session auth. ChatGPT can mount it as a custom MCP app, but it should not become the universal credential bucket.
+
+## Data Flow
+
+1. The operator logs into Hermes with SuperGrok OAuth.
+2. NixOS starts or resumes the sync service on a timer or manual trigger.
+3. The packer gathers the approved knowledge sources and produces a sanitized bundle.
+4. The sync service uploads the bundle to the existing `Windburn` collection.
+5. When Hermes starts a turn, the plugin checks local cache and collection health.
+6. If the cache misses or the corpus is stale, the plugin queries the collection.
+7. The plugin injects a short, provenance-bearing context block into the next turn.
+8. Hooks redact sensitive output and write receipts.
+9. If sync fails, Hermes keeps working with the last known corpus or with no collection context rather than failing the whole session.
+
+## Auth Boundaries
+
+- SuperGrok OAuth may authenticate the Hermes session, but it never authenticates collection writes.
+- The collection management key may upload and refresh the collection, but it never authenticates Hermes chat.
+- Host control credentials may start and supervise the service, but they never enter the model context.
+- No plane should read the others’ secret material unless a wrapper explicitly resolves it inside the trusted host process.
+
+## Caching and Context Retrieval
+
+The design uses two caches:
+
+- a short-lived SQLite retrieval cache for collection search results
+- Hermes conversation caching via the existing `x-grok-conv-id` behavior when xAI transport is in use
+
+Cache keys should include the collection name, bundle hash, normalized query hash, top_k, and stable filter serialization. That makes invalidation straightforward when the corpus changes and keeps repeated turns on the same bundle at `O(1)` average cache lookup cost.
+
+Context retrieval should stay small and focused:
+
+- top-k snippets only
+- provenance on every snippet
+- no raw documents unless a user explicitly asks
+- no private paths or token material in the injected text
+
+The EverOS memory provider remains the durable local turn-memory layer. The xAI collection is a separate knowledge corpus, not a replacement for local memory.
+
+## Sync Algorithm
+
+The NixOS sync job should be content-addressed instead of rebuild-everything:
+
+- normalize the approved source root list once
+- walk the source tree once
+- ignore generated outputs and public-surface junk
+- hash each source document after sanitization
+- store a manifest row per document path with its content hash and upload state
+- diff the new manifest against the last successful manifest with a path -> hash map
+- upload only the added or changed documents in stable path order
+- mark deletions as tombstones in the manifest so the next run can reconcile them safely
+- publish the new manifest pointer only after the upload succeeds
+
+That keeps the first run at `O(N)` but makes incremental refreshes proportional to the changed set, `O(Δ)`, instead of resending the entire corpus.
+
+If the plugin merges multiple candidate sources at read time, it should keep only the best `K` results in a bounded min-heap rather than sorting the full candidate list. That keeps the merge step at `O(M log K)` instead of `O(M log M)`.
+
+## Hooks and Sandbox
+
+Use Hermes hooks for policy, not business logic:
+
+- `pre_tool_call` blocks dangerous or mis-scoped tool calls.
+- `pre_llm_call` injects retrieved knowledge and current health state.
+- `transform_tool_result` redacts secrets, paths, and oversized outputs before they reach the model.
+- `post_tool_call` records a receipt with tool name, duration, status, and collection revision.
+
+Use `execute_code` only for mechanical work such as bundle generation, manifest checks, mock uploads, and offline validation. It should not hold raw collection secrets or perform browser-based auth.
+
+If a future implementation wants a sandboxed helper for packaging, that helper must read secrets only from trusted host files and must never echo them to stdout or into the model context.
+
+## Error Handling
+
+- If Hermes OAuth expires, the session should fail closed and ask for re-authentication.
+- If the xAI sync key is missing, the sync service should stop before any upload attempt.
+- If upload fails mid-run, the bundle should be marked stale and the previous healthy corpus should remain usable.
+- If retrieval fails, Hermes should continue with the EverOS memory provider or no external collection context.
+- If a receipt write fails, the turn may continue, but the sync service must surface a visible health warning so the host does not silently drift.
+
+## Testing
+
+The implementation should prove each plane independently:
+
+- session smoke: Hermes can log into xAI with SuperGrok OAuth and start a turn
+- sync smoke: the NixOS service can build, upload, and refresh the `Windburn` collection
+- cache smoke: repeated queries hit the local retrieval cache when the bundle hash is unchanged
+- delta smoke: an unchanged source tree produces a no-op manifest diff and skips upload
+- top-k smoke: merged candidates preserve only the strongest `K` results without a full resort
+- hook smoke: secret/path redaction works before model-visible output
+- failure smoke: missing secrets, expired auth, and retrieval timeouts degrade cleanly
+
+The existing repo already has good patterns for this style of proof:
+
+- local provider load and smoke commands in `use-cases/hermes-everos-memory`
+- remote health and full smoke patterns for the NixOS service
+- packet-based receipts for Raven / Hermes / EverOS work
+
+## Rollout
+
+1. Confirm the Hermes xAI OAuth session works on the target NixOS host.
+2. Add the knowledge sync service and timer.
+3. Wire the retrieval plugin and hooks.
+4. Add cache and receipt files.
+5. Run the session, sync, cache, delta, top-k, and failure smokes.
+6. Treat the remote lane as `PASS` only when the auth planes stay separated and the knowledge corpus can be refreshed again without reworking the architecture.
+
+## Success Criteria
+
+- Hermes uses SuperGrok OAuth for model turns on NixOS.
+- The remote host can refresh the xAI knowledge corpus without exposing the management key to the session.
+- Incremental refreshes reuse the manifest diff path when the source tree is unchanged.
+- Retrieved knowledge enters the prompt through hooks, not ad hoc manual copy/paste.
+- The system continues to function when sync is stale or temporarily unavailable.
+- The design stays compatible with the existing EverOS memory provider and remote EverCore packet.
diff --git a/methods/EverCore/CLAUDE.md b/methods/EverCore/CLAUDE.md
new file mode 100644
index 000000000..33cccea5f
--- /dev/null
+++ b/methods/EverCore/CLAUDE.md
@@ -0,0 +1,94 @@
+# methods/EverCore — Local CLAUDE.md
+
+Local-only context for working inside this directory. Root `CLAUDE.md` and
+`AGENTS.md` already cover the cross-repo project map and the canonical Quick
+Commands — do not duplicate them here.
+
+## What this module is
+
+`memsys` (pyproject name) is the long-term memory operating system for agents.
+Multi-tenant, fully async, FastAPI-backed, layered over MongoDB + Elasticsearch
+
++ Milvus + Redis. Public API surface lives at
+`src/infra_layer/adapters/input/api/`.
+
+## Internal layer map
+
+```text
+src/
+├── core/           cross-cutting infra (DI, tenants, middleware, cache, queue,
+│                   lifespan, rate_limit, observation, capability, oxm, lock)
+├── memory_layer/   the memory pipeline — LLM, prompts (en/zh), extractors
+│                   (memory_extractor, memcell_extractor), profile_indexer,
+│                   profile_manager, cluster_manager
+├── agentic_layer/  memory_manager.py orchestrates the layers above
+├── biz_layer/      business policies on top of memory primitives
+├── infra_layer/    HTTP, persistence, vector store, embedding adapters
+├── api_specs/      DTOs / request-response contracts
+├── service/        service-level wiring
+├── migrations/     mongodb + postgresql schema migrations
+└── devops_scripts/ sensitive_info scrubbing, milvus_admin, data_fix, i18n
+```
+
+Read order for a new task: `agentic_layer/memory_manager.py` → the layer it
+touches → `core/` only if you hit a DI / tenant / lifespan question.
+
+## Hard rules in this module
+
++ **Async everywhere.** No sync I/O in request paths. If a library is sync-only,
+  push it to a thread pool via the existing `core/` helpers.
++ **Tenant scoping is not optional.** Every query, write, and cache key must
+  carry tenant context resolved through `core/tenants/`. Cross-tenant leakage
+  is a P0 bug.
++ **Prompts EN/ZH must stay in lockstep.** `src/memory_layer/prompts/en/` and
+  `src/memory_layer/prompts/zh/` are mirrors. Adding a prompt to one without
+  the other is a lint failure target.
++ **Public DTOs are a contract.** Files under `src/api_specs/dtos/` are
+  consumed by `use-cases/` and external clients. Breaking changes need a
+  migration note in `docs/CHANGELOG.md`.
+
+## Working commands (precise — Root CLAUDE.md has the broad strokes)
+
+```bash
+# from this directory:
+docker compose up -d           # boot mongo + es + milvus + redis (first time)
+uv sync                        # install / refresh deps
+uv run python src/run.py       # boot the API
+make test                      # full pytest run
+uv run pytest tests/test_memory_manager_multi_type_search.py -x -vv
+                               # single-file iteration with -x stop-on-first-fail
+make lint                      # ruff + black + i18n sync check
+uv run pyright                 # type check (config in pyrightconfig.json)
+```
+
+## Common gotchas
+
++ Milvus standalone takes ~30s to become healthy. `docker compose ps` will
+  show "starting" — wait for "healthy" before `python src/run.py`.
++ `env.template` defaults to OpenRouter → `x-ai/grok-4-fast`. Local runs that
+  hit the actual LLM need a real `LLM_API_KEY` (OpenRouter or DeepSeek key).
++ The 202 Accepted path in `SimpleMemoryManager` is the async-ingest contract
+  — do not collapse it to 200. See `tests/test_simple_memory_manager.py`.
++ Multi-type search (recall + extract) has hybrid dedup logic in
+  `agentic_layer/memory_manager.py` — `test_memory_manager_multi_type_search.py`
+  pins the invariant.
+
+## Cross-directory contract
+
+Things outside `methods/EverCore/` that depend on this module:
+
++ `use-cases/hermes-everos-memory/` mounts EverCore as the memory provider via
+  the public HTTP API. Changing routes under `src/infra_layer/adapters/input/api/`
+  needs a heads-up in that use case.
++ `benchmarks/EverMemBench/` exercises the recall + extract paths. Schema
+  changes in DTOs require regenerating any frozen benchmark inputs.
++ `methods/EverCore/examples/openclaw-plugin/` is the JS plugin reference; the
+  `engine.js` / `types.js` contract mirrors the Python DTOs.
+
+## What does NOT belong here
+
++ New cross-cutting frameworks (auth plane, retrieval cache, MCP server, etc.).
+  Those go to their own top-level lane and consume EverCore through the public
+  API. Do not bolt them into `src/`.
++ Repository-wide planning state. `.planning/`, `.goal/`, and `.remember/` are
+  root-level. Subdirectory CLAUDE.md files stay focused on this module only.
diff --git a/methods/HyperMem/CLAUDE.md b/methods/HyperMem/CLAUDE.md
new file mode 100644
index 000000000..e8e7f8d7c
--- /dev/null
+++ b/methods/HyperMem/CLAUDE.md
@@ -0,0 +1,86 @@
+# methods/HyperMem — Local CLAUDE.md
+
+Local-only context. Root `CLAUDE.md` and `AGENTS.md` cover the cross-repo map.
+
+## What this module is
+
+Official implementation of the **ACL 2026** paper *HyperMem: Hypergraph Memory
+for Long-Term Conversations*. Three-level hypergraph (**topics → episodes →
+facts**) with weighted hyperedges, retrieved via coarse-to-fine top-down
+traversal. LoCoMo headline number: **92.73% LLM-as-judge accuracy** (vs.
+HyperGraphRAG 86.49%, MemOS 75.80%).
+
+This is research code with a publication frozen behind it. Treat it as a
+reference implementation — refactors that change numerics need to re-run the
+LoCoMo eval before merge.
+
+## Internal map
+
+```text
+hypermem/
+├── main/         entry points for construction + retrieval + evaluation
+├── structure.py  hypergraph data structures (topics, episodes, facts, edges)
+├── types.py      typed schemas
+├── config.py     run config
+├── extractors/   episode detection + topic aggregation + fact extraction
+├── llm/          LLM client adapters
+├── prompts/      extractor + retrieval prompts
+└── utils/        shared helpers
+
+scripts/
+├── run_eval.sh           one-shot eval runner
+├── serve_embedding.sh    local embedding service
+└── serve_reranker.sh     local reranker service
+```
+
+Read order for a new task: `structure.py` → `main/` → the extractor or
+retrieval stage you are touching → `prompts/` only if changing prompt schema.
+
+## Hard rules
+
+- **Numerics are paper-load-bearing.** Changes that touch propagation
+  (`λ = 0.5`), attention weighting, BM25-dense RRF fusion, or top-k thresholds
+  must re-run the LoCoMo eval and report the delta in the PR.
+- **Python 3.12+.** ML stack (torch, transformers, sentence-transformers) —
+  CPU works for smoke; GPU recommended for full eval.
+- **Embedding + reranker services are external.** `scripts/serve_*.sh` boots
+  them locally. Do not vendor the model weights into the repo.
+
+## Working commands
+
+```bash
+# from this directory:
+python -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt
+
+# boot the embedding + reranker services in separate terminals:
+bash scripts/serve_embedding.sh
+bash scripts/serve_reranker.sh
+
+# run a full eval (LoCoMo or your dataset):
+bash scripts/run_eval.sh
+```
+
+## Common gotchas
+
+- The hypergraph construction is **streaming** — episode boundary detection
+  runs as the dialogue is ingested. Do not batch-rewrite that loop without
+  re-validating boundary placement on the paper's eval set.
+- BM25 + dense RRF fusion is implementation-sensitive. Changing the k constant
+  in RRF (default 60) shifts retrieval and downstream accuracy.
+- Hyperedge weights are in `[0, 1]` and used as attention logits before
+  softmax. Negative or unbounded values silently break propagation.
+
+## Cross-directory contract
+
+- `benchmarks/EverMemBench/` may import HyperMem as one of the memory systems
+  under evaluation; keep the public `main/` entry signatures stable.
+- HyperMem does not depend on `methods/EverCore/`. They are independent
+  memory architectures, both reachable as benchmark targets.
+
+## What does NOT belong here
+
+- Production multi-tenant memory APIs — that is EverCore's role.
+- New benchmark datasets — put those in `benchmarks/`.
+- Hermes / use-case integrations — they should consume HyperMem through a
+  benchmark adapter, not import internal modules directly.
diff --git a/use-cases/hermes-everos-memory/CLAUDE.md b/use-cases/hermes-everos-memory/CLAUDE.md
new file mode 100644
index 000000000..7b43a63f2
--- /dev/null
+++ b/use-cases/hermes-everos-memory/CLAUDE.md
@@ -0,0 +1,108 @@
+# use-cases/hermes-everos-memory — Local CLAUDE.md
+
+Local-only context. Root `CLAUDE.md` and `AGENTS.md` cover the cross-repo map.
+
+## What this use case is
+
+Hermes `MemoryProvider` integration that mounts **EverCore** (HTTP API at
+`http://127.0.0.1:1995` by default) as the memory backend for Hermes sessions.
+Covers prefetch (pre-turn recall), `sync_turn` (post-turn persistence) with
+auto-flush, and explicit memory tools (search, store, health, flush).
+
+This is also the staging ground for the **Hermes SuperGrok NixOS** lane (see
+`docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md`).
+
+## Internal map
+
+```text
+__init__.py          thin Hermes interface shim (Python class entry)
+bin/
+  everos-memory.mjs    operator/dev CLI (Node/Bun) — health/search/sync-smoke
+  skillhub-packet.mjs  SkillHub fixture validator
+  skillhub-mock-api.mjs SkillHub mock API server
+  raven-run.mjs        Raven run packet validate/render
+  mock-openai-compatible.mjs  Mock OpenAI-compatible server
+scripts/
+  install-local.sh     installs provider into Hermes profile (no activation)
+  skillhub-api-smoke.sh  HTTP smoke against SkillHub mock
+deploy/
+  nixos/               remote workhorse deploy packet (DEPLOY_PACKET.md,
+                       README.md, evercore-remote-workhorse.nix)
+skillhub/fixtures/     read-only views + install-packet fixtures
+raven/fixtures/        doomsday + dogfood run fixtures
+plugin.yaml            Hermes plugin manifest
+package.json           Node scripts: health / search / sync-smoke /
+                       skillhub:* / raven:* / mock-openai:* / test
+justfile               just-runner shortcuts
+```
+
+## Hard rules
+
+- **EverCore lifecycle is not our problem.** This package does not start
+  EverCore. The expectation is documented in `README.md`: bring EverCore up
+  first with `cd methods/EverCore && uv run python src/run.py --host 127.0.0.1
+  --port 1995`.
+- **Configuration is env-var driven only.** No hard-coded URLs or user IDs.
+  See `EVEROS_*` vars in `README.md`. Defaults stay loopback-friendly.
+- **Remote deploy stays loopback-bound by default.** `deploy/nixos/` keeps
+  EverCore on `127.0.0.1`; CCR / external clients reach it through reverse
+  proxy, not direct binding.
+- **One component, one PR.** This lane is about to expand into Hermes
+  SuperGrok + NixOS sync service + retrieval plugin + `everos-ops-mcp`.
+  Each of those is a separate PR. **No multi-component PRs.** See the
+  commit-boundary hook in root `.claude/`.
+
+## Working commands
+
+```bash
+# from this directory:
+npm run health        # ping EverCore at EVEROS_API_BASE_URL
+npm run search        # smoke a search call
+npm run sync-smoke    # round-trip sync_turn
+
+# SkillHub mock:
+npm run skillhub:serve     # boot mock API
+npm run skillhub:check     # validate config-only
+npm run skillhub:sample    # validate fixture
+npm run skillhub:smoke     # HTTP smoke
+
+# Raven:
+npm run raven:sample       # validate doomsday-run fixture
+npm run raven:render       # render fixture to terminal
+
+# Self-test:
+npm test                   # everos-memory self-test
+
+# Install into Hermes (no activation):
+bash scripts/install-local.sh
+```
+
+## Common gotchas
+
+- **EverCore must be reachable.** `EVEROS_API_BASE_URL=http://127.0.0.1:1995`
+  is the default. If EverCore is on a remote host, set this explicitly — do
+  not assume tunnels.
+- **`EVEROS_AUTO_FLUSH=1` and `EVEROS_SYNC_INLINE=1` are CLI-friendly
+  defaults.** They make recall immediately searchable, at the cost of an
+  extra round trip. Production / long-running session may want them `0`.
+- **`memory_types` is comma-separated.** Default is
+  `episodic_memory,profile`. Adding a third type means EverCore must support
+  it on the search method.
+
+## Cross-directory contract
+
+- **Consumes** `methods/EverCore/` through its HTTP API only. No Python imports.
+- **Surfaces** to Hermes through `__init__.py` (Python provider class) +
+  `plugin.yaml` (manifest). Hermes loads the class at startup.
+- **Does not** depend on `methods/HyperMem/` or `benchmarks/`.
+- **Future Hermes SuperGrok lane** will add: NixOS sync service, retrieval
+  plugin, `everos-ops-mcp`. Those will live here under
+  `deploy/`, a new `plugin/` subtree, and a new `ops-mcp/` subtree
+  respectively — each landing in its own PR.
+
+## What does NOT belong here
+
+- EverCore feature changes — those are in `methods/EverCore/`.
+- New memory architectures — those are in `methods/`.
+- Repo-wide planning state. `.planning/`, `.goal/`, `.remember/` stay at root.
+- Multi-component PRs that mix sync + plugin + MCP + docs. **Always split.**
diff --git a/use-cases/hermes-everos-memory/__init__.py b/use-cases/hermes-everos-memory/__init__.py
index 4c059f430..d1e8bbd2f 100644
--- a/use-cases/hermes-everos-memory/__init__.py
+++ b/use-cases/hermes-everos-memory/__init__.py
@@ -141,12 +141,16 @@ def add_agent_messages(
             },
         )
 
-    def flush_agent(self, *, user_id: str, session_id: str) -> dict:
-        return self.request(
-            "POST",
-            "/api/v1/memories/agent/flush",
-            {"user_id": user_id, "session_id": session_id},
-        )
+    def flush_agent(self, *, user_id: str, session_id: Optional[str] = None) -> dict:
+        # EverCore treats session_id as optional on the flush endpoint.
+        # When unset, omit it from the payload so the server uses its
+        # default (a freshly-allocated session per flush) rather than
+        # coalescing flushes into a shared empty-string session across
+        # agents/runs. Copilot review on PR #104.
+        payload: Dict[str, Any] = {"user_id": user_id}
+        if session_id:
+            payload["session_id"] = session_id
+        return self.request("POST", "/api/v1/memories/agent/flush", payload)
 
 
 class EverOSMemoryProvider(MemoryProvider):
@@ -326,9 +330,13 @@ def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> st
                 self._flush_session(session_id)
                 return json.dumps({"result": "stored", "data": data.get("data")}, ensure_ascii=False)
             if tool_name == "everos_flush":
+                # Pass session_id as-is (None when unset); flush_agent
+                # omits it from the payload so EverCore uses its default
+                # rather than an empty-string shared session (Copilot
+                # review on PR #104).
                 data = self._client.flush_agent(
                     user_id=self._user_id,
-                    session_id=self._session_id or "",
+                    session_id=self._session_id or None,
                 )
                 return json.dumps({"result": "flushed", "data": data.get("data")}, ensure_ascii=False)
         except urllib.error.URLError as exc:
diff --git a/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix b/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix
index 9c002fe34..8a4fcf933 100644
--- a/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix
+++ b/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix
@@ -122,11 +122,11 @@ in
     virtualisation.docker.enable = true;
 
     users.groups = lib.mkIf cfg.createUser {
-      ${cfg.group} = { };
+      "${cfg.group}" = { };
     };
 
     users.users = lib.mkIf cfg.createUser {
-      ${cfg.user} = {
+      "${cfg.user}" = {
         isSystemUser = true;
         group = cfg.group;
         extraGroups = [ "docker" ];