diff --git a/.algo-profile/README.md b/.algo-profile/README.md new file mode 100644 index 000000000..11e7f79e4 --- /dev/null +++ b/.algo-profile/README.md @@ -0,0 +1,10 @@ +# Algorithm Profile - EverOS + +## Optimization + +- [Content-Addressed Manifest Delta Sync](optimization/content-addressed-manifest-delta-sync.md) - O(N) first run, O(Δ) incremental, used in docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md +- [Bounded Top-K Heap](optimization/bounded-top-k-heap.md) - O(M log K), used in docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md + +## Structures + +- [LRU Retrieval Cache](structures/lru-retrieval-cache.md) - O(1) average lookup, used in docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md diff --git a/.algo-profile/optimization/bounded-top-k-heap.md b/.algo-profile/optimization/bounded-top-k-heap.md new file mode 100644 index 000000000..dfb78c2f7 --- /dev/null +++ b/.algo-profile/optimization/bounded-top-k-heap.md @@ -0,0 +1,20 @@ +--- +algorithm: Bounded Top-K Heap +category: optimization +complexity_time: O(M log K) +complexity_space: O(K) +used_in: docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md +date: 2026-05-16 +--- + +## Why This Was Chosen + +When multiple candidate sources are merged, the system only needs the best K snippets, not a full sort of the entire candidate pool. A bounded min-heap keeps the strongest candidates while avoiding the extra cost of sorting low-value items that will never be shown to Hermes. + +## Implementation Notes + +Use this only at the merge boundary where candidate sets from collection search, local cache, or memory providers are combined. If the source already returns a stable top-k list, the heap can be skipped; otherwise keep the heap small and enforce K as a hard cap. + +## Reference + +[Heap / Priority Queue](https://github.com/trekhleb/javascript-algorithms) diff --git a/.algo-profile/optimization/content-addressed-manifest-delta-sync.md b/.algo-profile/optimization/content-addressed-manifest-delta-sync.md new file mode 100644 index 000000000..a7977b1c2 --- /dev/null +++ b/.algo-profile/optimization/content-addressed-manifest-delta-sync.md @@ -0,0 +1,20 @@ +--- +algorithm: Content-Addressed Manifest Delta Sync +category: optimization +complexity_time: O(N) first run, O(Δ) incremental +complexity_space: O(N) +used_in: docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md +date: 2026-05-16 +--- + +## Why This Was Chosen + +The xAI knowledge bundle is repo-shaped data, so a manifest keyed by content hash lets the host distinguish unchanged files from changed ones without re-uploading everything. That keeps the initial build linear while making steady-state refreshes proportional to the actual delta instead of the full corpus. + +## Implementation Notes + +The manifest should store path, content hash, upload state, and a stable bundle hash so a successful upload can become the new baseline atomically. Removed files should be tombstoned rather than silently forgotten, which keeps reconciliation explicit on the next sync run. + +## Reference + +[Big-O Reference](https://github.com/trekhleb/javascript-algorithms) diff --git a/.algo-profile/structures/lru-retrieval-cache.md b/.algo-profile/structures/lru-retrieval-cache.md new file mode 100644 index 000000000..4bcce6969 --- /dev/null +++ b/.algo-profile/structures/lru-retrieval-cache.md @@ -0,0 +1,20 @@ +--- +algorithm: LRU Retrieval Cache +category: structures +complexity_time: O(1) +complexity_space: O(capacity) +used_in: docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md +date: 2026-05-16 +--- + +## Why This Was Chosen + +Repeated Hermes queries against the same collection bundle should reuse prior retrieval results instead of hitting the collection on every turn. An LRU-style cache gives bounded memory with constant-time average lookup and eviction, which matches the read-heavy, hot-query pattern of the remote NixOS lane. + +## Implementation Notes + +Cache keys should include collection id, bundle hash, normalized query hash, top_k, and filter serialization so different auth contexts do not collide. A TTL layer should sit on top of the LRU policy so stale entries disappear even if the bundle hash does not change. + +## Reference + +[Data Structures Reference](https://github.com/trekhleb/javascript-algorithms) diff --git a/.claude/hooks/commit-boundary-check.sh b/.claude/hooks/commit-boundary-check.sh new file mode 100755 index 000000000..fc741fd98 --- /dev/null +++ b/.claude/hooks/commit-boundary-check.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# commit-boundary-check.sh +# +# PreToolUse hook for EverOS. Reads the Claude Code hook payload from stdin, +# self-filters to git-commit / gh-pr-create invocations, and warns when a +# staged change set crosses multiple top-level directories. +# +# Rationale: PR #31 (Raven v2 closure) accidentally bundled 6 independent lanes +# (raven, hermes use-case, skillhub, upstream-return, ci, chores) into one +# 27-commit PR. EverOS convention from that retrospective: one component, +# one PR. This hook is a soft nudge, not a block — cross-cutting work (lint +# sweeps, dependency bumps, .gitignore policy) still proceeds. + +set -eu + +# Read the hook payload (we don't strictly need to parse it; we just want to +# self-filter and inspect git state). Discard the JSON. +cat >/dev/null 2>&1 || true + +# The actual command Claude Code is about to run is exposed via tool input. +# Hook payload format varies across CC versions; we keep this hook scope-safe +# by running unconditionally and only acting when staged changes exist. + +# Find the repo root from cwd so the hook works from worktrees too. +repo_root=$(git rev-parse --show-toplevel 2>/dev/null || true) +if [ -z "$repo_root" ]; then + exit 0 +fi + +cd "$repo_root" + +# What is staged for the next commit? +staged=$(git diff --cached --name-only 2>/dev/null || true) +if [ -z "$staged" ]; then + exit 0 +fi + +# Extract the first path segment of each staged file. Filter out hidden +# top-level dirs (.github, .gitignore, .claude) and the root README so a +# legitimate root-doc fix doesn't trip the warning by itself. +top_dirs=$(echo "$staged" \ + | awk -F/ 'NF>1 {print $1} NF==1 {print "_root_"}' \ + | sort -u \ + | grep -Ev '^(\.github|\.claude|_root_)$' || true) + +count=$(echo "$top_dirs" | grep -c . || true) + +if [ "${count:-0}" -ge 2 ]; then + cat >&2 < +and committing the lanes separately. + +This is a warning, not a block. Re-run the commit command to proceed. +EOF +fi + +exit 0 diff --git a/.claude/settings.json b/.claude/settings.json new file mode 100644 index 000000000..1c3625f43 --- /dev/null +++ b/.claude/settings.json @@ -0,0 +1,16 @@ +{ + "$schema": "https://json.schemastore.org/claude-code-settings.json", + "hooks": { + "PreToolUse": [ + { + "matcher": "Bash", + "hooks": [ + { + "type": "command", + "command": ".claude/hooks/commit-boundary-check.sh" + } + ] + } + ] + } +} diff --git a/.claude/skills/everos-prompts-sync/SKILL.md b/.claude/skills/everos-prompts-sync/SKILL.md new file mode 100644 index 000000000..40870327a --- /dev/null +++ b/.claude/skills/everos-prompts-sync/SKILL.md @@ -0,0 +1,97 @@ +--- +name: everos-prompts-sync +description: Use when editing EverCore prompts under methods/EverCore/src/memory_layer/prompts/en or .../zh, or before opening a PR that touches that tree. Verifies EN/ZH file-name mirror + export-symbol parity, surfaces missing zh files and divergent __all__ lists, and falls back to the existing src/devops_scripts/i18n/i18n_tool.py for code-comment translation drift. +--- + +# everos-prompts-sync + +Keeps `methods/EverCore/src/memory_layer/prompts/{en,zh}/` in lockstep at the +**file-name + symbol-export** layer. Does **not** judge translation quality — +that's a content review. + +This skill encodes a recurring EverCore failure mode: a contributor adds a new +prompt constant to `en/` and forgets to add a matching entry under `zh/`, +which silently breaks imports the moment a tenant uses the ZH locale. + +## When to invoke + +- A diff under `methods/EverCore/src/memory_layer/prompts/` is in flight. +- A new prompt constant is being added to `en/.py`. +- A PR is about to be opened and the prompt tree has any change at all. + +If neither side of `prompts/` changed, skip — this skill has nothing to say. + +## Procedure + +1. **Confirm scope.** From repo root: + + ```bash + cd methods/EverCore/src/memory_layer/prompts + ``` + +2. **File-name mirror.** Both directories must have identical file lists + (excluding `__pycache__`): + + ```bash + diff <(ls en/ | grep -v __pycache__) <(ls zh/ | grep -v __pycache__) + ``` + + Any difference is a bug. The fix is **always** to add the missing file to + the side that lacks it. The new file can be a translation OR an explicit + re-export from the other side (the existing convention — see + `zh/agent_prompts.py` for the re-export pattern). + +3. **Export-symbol parity.** For each file pair `en/X.py` and `zh/X.py`, + their public exports must be the same set: + + ```bash + python -c " + import ast, sys, pathlib + for f in pathlib.Path('en').glob('*.py'): + if f.name == '__init__.py': continue + z = pathlib.Path('zh') / f.name + if not z.exists(): print(f'MISSING zh: {f.name}'); continue + def syms(p): + tree = ast.parse(p.read_text()) + return {t.id for n in tree.body if isinstance(n, ast.Assign) + for t in n.targets if isinstance(t, ast.Name) and t.id.isupper()} + en_syms, zh_syms = syms(f), syms(z) + if en_syms != zh_syms: + missing_in_zh = en_syms - zh_syms + missing_in_en = zh_syms - en_syms + if missing_in_zh: print(f'{f.name}: zh missing {missing_in_zh}') + if missing_in_en: print(f'{f.name}: en missing {missing_in_en}') + " + ``` + + Re-exports count: `zh/agent_prompts.py` doing + `from ...en.agent_prompts import FOO, BAR` exposes `FOO` and `BAR` as + ZH symbols — that satisfies parity even though the strings live on the EN + side only. The AST scan above catches direct top-level assignments; + re-exports need either a `__all__` list or a wider AST walk if you want to + be exhaustive. + +4. **Report.** Output one of: + - `PASS: EN/ZH prompt parity OK` (no further action) + - `FAIL: ` (fix before merge) + +5. **Adjacent tooling.** This skill does **not** translate Chinese code + comments to English. That's `src/devops_scripts/i18n/i18n_tool.py`, which + is already wired into `make lint`. Use that for code-comment drift, this + skill for prompt-constant drift. + +## What this skill explicitly does NOT do + +- Translate prompts from EN to ZH or vice versa. That's a human/LLM content + task, not a parity check. +- Validate template variables (`{messages_json}`, `{new_count}`, etc.) match + between EN and ZH versions. That's a deeper content check worth a separate + skill if it turns out to be a recurring failure mode. +- Block commits. This is informational. Wire it into a hook only after the + false-positive rate is known to be near zero. + +## Recurrence threshold + +If the parity check has surfaced the same root cause **three times** (e.g., +"forgot to add zh re-export when adding a new EN prompt constant"), upgrade +this skill into a pre-commit hook under `.claude/hooks/`. diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index d4008706c..d29042db6 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,6 +1,6 @@ ## Summary - + ## Area @@ -15,7 +15,7 @@ ## Verification - + ```text @@ -27,6 +27,7 @@ - [ ] I updated docs, examples, or setup notes when behavior changed. - [ ] I added or updated tests when the change affects behavior. - [ ] I did not commit secrets, `.env` files, dependency folders, or generated output. +- [ ] I listed the exact evidence, checks, or blocker for this change. - [ ] Active relative links in Markdown files resolve. ## Notes for Reviewers diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index cb06530f6..4ac6a6eb5 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -94,6 +94,7 @@ jobs: Path("methods/README.md"), Path("benchmarks/README.md"), Path("AGENTS.md"), + Path(".github/PULL_REQUEST_TEMPLATE.md"), Path(".github/CONTRIBUTING.md"), Path(".github/CODE_OF_CONDUCT.md"), Path(".github/SECURITY.md"), diff --git a/.gitignore b/.gitignore index 16db8a8bb..fe04fcbd2 100755 --- a/.gitignore +++ b/.gitignore @@ -206,7 +206,10 @@ src/memory_layer/memory_extractor/profile_memory_extractor keep llm merge.py #LLM related AGENTS.mk docs/api_docs/profile_extraction_fields.md -.claude/ +# Claude Code: session-local artifacts only; skills/hooks/commands stay versioned +.claude/settings.local.json +.claude/worktrees/ +.claude/verify/ .cursor/* #tmp_data @@ -226,4 +229,22 @@ use-cases/**/package-lock.json # Local playwright + goal state traces .playwright-mcp/ -.goal/ \ No newline at end of file +.goal/ + +# Hermes EverOS Raven SkillHub dogfood artifacts (runtime / local traces) +# These are intentionally generated during dogfood smoke tests but should not pollute git +.kilo/ +use-cases/hermes-everos-memory/raven/.local-runs/ + +# Windburn system artifacts (runtime, not temporary) +.kilo/raven-aliases.zsh + +# Multica Ultimate Workbench runtime +.automations/ +.automations/**/*.json + +# Codex session artifacts (already covered but be explicit) +.codex/.codex-global-state.json +.codex/.codex-global-state.json.bak +.sisyphus/ +.gstack/ diff --git a/benchmarks/EverMemBench/.gitignore b/benchmarks/EverMemBench/.gitignore index 6c52f99af..720ccabda 100755 --- a/benchmarks/EverMemBench/.gitignore +++ b/benchmarks/EverMemBench/.gitignore @@ -26,9 +26,8 @@ htmlcov/ .cursor/ .DS_Store -# Claude Code +# Claude Code session artifacts (CLAUDE.md is versioned, .claude/ is session-local) .claude/ -CLAUDE.md # Logs *.log diff --git a/benchmarks/EverMemBench/CLAUDE.md b/benchmarks/EverMemBench/CLAUDE.md new file mode 100644 index 000000000..bb37026fb --- /dev/null +++ b/benchmarks/EverMemBench/CLAUDE.md @@ -0,0 +1,83 @@ +# benchmarks/EverMemBench — Local CLAUDE.md + +Local-only context. Root `CLAUDE.md` and `AGENTS.md` cover the cross-repo map. + +## What this module is + +A multi-person group-chat memory evaluation framework. Pits 5 memory systems +(**Memos, Mem0, Memobase, EverCore, Zep**) plus an LLM long-context baseline +against the **EverMemBench-Dynamic** dataset on HuggingFace +(`EverMind-AI/EverMemBench-Dynamic`). + +Pipeline: **Add → Search → Answer → Evaluate**. Two question types: multiple +choice (direct comparison) and open-ended (LLM-judge). + +## Internal map + +```text +eval/ +├── cli.py main entry — orchestrates the 4-stage pipeline +├── config/ YAML configs per memory system + per dataset slice +└── src/ stage implementations (add/search/answer/evaluate) + +tools/ +└── analyze_results.py post-run accuracy + breakdown reporter +``` + +## Hard rules + +- **Numbers are reportable.** Any code change that affects retrieval, answer + generation, or evaluation logic must report a paired before/after run in + the PR. Treat this like HyperMem — research artifact, not utility code. +- **Datasets are not in-repo.** Source comes from HuggingFace. Do not vendor + the full dataset; cache it via `datasets` library or a pinned snapshot + path. +- **OpenRouter is the default LLM gateway.** `LLM_API_KEY` in `.env` must + point at OpenRouter (or compatible). Per-system keys (`MEMOS_API_KEY`, + `MEM0_API_KEY`, etc.) are only needed for the systems being benchmarked. +- **Smoke mode exists for a reason.** Use it (`--smoke` or equivalent in + CLI) before any full run. Full runs are expensive. + +## Working commands + +```bash +# from this directory: +cp env.template .env # fill in LLM_API_KEY + system-specific keys +pip install -r requirements.txt +# install only the SDKs for systems you are evaluating: +pip install mem0ai memobase zep-cloud # subset as needed + +# pipeline (smoke first, then full): +python -m eval.cli add --config eval/config/.yaml --smoke +python -m eval.cli search --config eval/config/.yaml --smoke +python -m eval.cli answer --config eval/config/.yaml --smoke +python -m eval.cli evaluate --config eval/config/.yaml --smoke + +# post-run analysis: +python tools/analyze_results.py +``` + +## Common gotchas + +- **Message format differs per system.** Memos wants + `[Group: X][Speaker: Y]content`; Mem0 wants `run_id="${user_id}_${groupId}"` + - `name=`. The README has the full matrix — do not paper over the + differences with a generic adapter. +- **Timestamp handling is per-system.** Memos uses native `chat_time`, Mem0 + uses Unix timestamps per batch. Misaligned timestamps silently kill recall. +- **Rate limits matter.** OpenRouter and the memory system providers all rate + limit. `aiolimiter` is wired in — do not bypass it. + +## Cross-directory contract + +- `methods/EverCore/` is one of the systems under evaluation. EverCore DTO + changes can break the EverCore adapter here; treat the EverCore HTTP API + as a frozen contract for benchmark runs. +- `methods/HyperMem/` may be added as a benchmark target via its `main/` + entry; add adapters in `eval/src/` not by importing HyperMem internals. + +## What does NOT belong here + +- Memory system implementations. Adapters only. +- Live agent demos — that is `use-cases/`. +- The dataset itself — keep it on HuggingFace. diff --git a/docs/research/2026-05-16-claude-code-large-codebases-methodology.md b/docs/research/2026-05-16-claude-code-large-codebases-methodology.md new file mode 100644 index 000000000..cdcb3ab76 --- /dev/null +++ b/docs/research/2026-05-16-claude-code-large-codebases-methodology.md @@ -0,0 +1,126 @@ +# Claude Code 大型代码库最佳实践方法论 + +> 来源:[How Claude Code works in large codebases: Best practices and where to start](https://claude.com/blog/how-claude-code-works-in-large-codebases-best-practices-and-where-to-start) +> 提取时间:2026-05-16 +> 来源仓库:Fearvox/ds-research-vault(目标路径:knowledge/ai-agents/coding-practices/claude-code-large-codebases/) + +--- + +## 核心方法论(7 条) + +### 1. CLAUDE.md 文件优先 + +- **作用**:每个会话自动加载的上下文文件,给 Claude 提供代码库知识 +- **层级**: + - 根目录 `CLAUDE.md`:大图、整体架构 + - 子目录 `CLAUDE.md`:局部约定、子模块规范 +- **原则**:保持聚焦在广泛适用的内容上,避免变成性能负担 +- **加载机制**:Claude 会自动向上遍历目录树,加载路径上所有 `CLAUDE.md` + +### 2. Hooks 让设置自我进化 + +- **传统认知**:Hooks 是防止 Claude 做错事的脚本 +- **高阶用法**:持续改进的催化剂 + - `Stop` hook:会话结束后反思发生了什么,提出 `CLAUDE.md` 更新(上下文新鲜时) + - `Start` hook:动态加载团队特定上下文,无需手动配置 + - 确定性检查:lint、格式化等,比依赖 Claude 记忆更一致 + +### 3. Skills 按需提供专业知识 + +- **问题**:大型代码库有几十种任务类型,不需要每个会话都加载所有专业知识 +- **解决方案**:渐进式披露(Progressive Disclosure) + - Skills 按需加载,只在任务需要时注入 + - 示例:安全审查时加载安全 review skill;文档更新时加载文档处理 skill + - 避免上下文竞争,节省 token + +### 4. 在子目录初始化,而非仓库根目录 + +- **原则**:Claude 在 scoped 到与任务实际相关的代码部分时效果最好 +- **Monorepo 注意**:虽然工具默认假设根目录访问,但 Claude 会自动向上查找 `CLAUDE.md` +- **实践**:在子目录工作,根级上下文不会丢失 + +### 5. 按子目录限定测试和 Lint 命令 + +- **问题**:Claude 改了一个服务就跑完整测试套件 → 超时 + 浪费上下文 +- **方案**:在子目录级 `CLAUDE.md` 指定适用于该部分的命令 +- **适用**:服务导向的代码库(每个目录有自己的测试和构建命令) +- **编译型语言 monorepo**:跨目录依赖深,子目录 scoping 更难,可能需要项目特定构建配置 + +### 6. 目录结构不工作时,构建代码库地图 + +- **场景**:代码未组织在常规目录结构中 +- **方案**:在仓库根目录放轻量 markdown 文件,每行描述一个顶级文件夹的内容 +- **作用**:给 Claude 一张目录表,在打开文件前先扫描 +- **分层方法**: + - 根文件:只描述最高层结构 + - 子目录 `CLAUDE.md`:提供下一层细节,随 Claude 遍历树时按需加载 +- **简单场景**:用 `@` 提及特定文件或目录也能达到同样效果 + +### 7. 运行 LSP 服务器,让 Claude 按符号搜索 + +- **问题**:对大型代码库常用函数名做 grep → 返回数千个匹配,Claude 烧上下文去搞清楚哪个有意义 +- **方案**:用 LSP(语言服务器协议)按符号搜索 + - LSP 只返回指向同一符号的引用 + - 过滤在 Claude 读任何文件之前就完成了 +- **配置要求**: + - 为你的语言安装代码智能插件 + - 对应的语言服务器二进制文件 + - Claude Code 文档覆盖了可用插件和故障排除 + +--- + +## 上下文工程总结 + +| 层次 | 工具 | 用途 | +|---|---|---| +| **持久层** | `CLAUDE.md`(根 + 子目录) | 每个会话自动加载,项目知识库 | +| **行为层** | Hooks(Start/Stop/PreToolUse/PostToolUse) | 自动化、规则执行、动态上下文注入 | +| **专业层** | Skills(按需加载) | 特定领域知识,避免上下文膨胀 | +| **导航层** | 代码库地图(markdown 索引) | 快速理解目录结构 | +| **搜索层** | LSP 服务器(符号搜索) | 精确查找,减少无效匹配 | + +--- + +## 与 Windburn 认知缓存的映射 + +| Claude Code 方法论 | Windburn 认知缓存对应 | +|---|---| +| `CLAUDE.md` 持久上下文 | **Source 层**(Research Vault、repo docs、source-of-truth) | +| Hooks 动态注入 | **Perception 层**(实时观察、工具反馈) | +| Skills 按需加载 | **Procedural 层**(可用 skills、repo 路由) | +| 子目录 scoping | **Belief 层**(假设 + 证据 + 置信度) | +| LSP 符号搜索 | **Episodic 层**(发生了什么,按序) | + +--- + +## 实践建议(基于 EverOS 仓库) + +### 当前状态 + +- ✅ `CLAUDE.md` 已存在(232 行,覆盖 runtime artifacts) +- ✅ `.codex/AGENTS.md` 已配置(Windburn 通信配置) +- ⚠️ 子目录 `CLAUDE.md` 可能缺失(methods/、benchmarks/、use-cases/ 等) + +### 建议补充 + +1. **子目录 CLAUDE.md**: + - `methods/EverCore/CLAUDE.md` — EverCore 特定约定 + - `benchmarks/EverMemBench/CLAUDE.md` — 评估运行命令 + - `use-cases/hermes-everos-memory/CLAUDE.md` — Hermes 集成规范 + +2. **Hooks 配置**(参考 `~/.codex/AGENTS.md`): + - `Stop` hook:会话结束更新 `docs/superpowers/goal.md` + - `Start` hook:注入当前 git status 和 TODO + +3. **Skills 按需加载**: + - 已安装:`gsd-*` 系列(项目管理) + - 建议:为 EverCore 开发创建 `evercore-dev` skill + +--- + +## 参考资料 + +- 原博客: +- Claude Code 文档: +- Windburn 认知缓存: +- Multica Ultimate Workbench: diff --git a/docs/superpowers/goal.md b/docs/superpowers/goal.md new file mode 100644 index 000000000..b3af8baf3 --- /dev/null +++ b/docs/superpowers/goal.md @@ -0,0 +1,166 @@ +# Hermes SuperGrok NixOS Goal + +Short `/goal` capsule: + +```text +Read and execute docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md as the source of truth. Turn Hermes SuperGrok OAuth, xAI collection sync, and NixOS host control into a clean three-plane implementation with content-addressed delta sync, bounded top-k retrieval, and a local retrieval cache. Fix the knowledge anchors up front, then build `everos-ops-mcp` as the reusable ops plane. Keep auth boundaries strict, preserve the existing EverOS memory provider, and prove the remote lane with live smokes before calling it done. +``` + +## Role + +You are the implementation captain for the Hermes SuperGrok on NixOS lane. + +Your job is not to redesign the auth model again. Your job is to turn the approved spec into a working remote lane that can: + +- log into Hermes with SuperGrok OAuth, +- refresh the xAI knowledge bundle from NixOS, +- retrieve context through Hermes hooks and plugin boundaries, +- and keep the management key, OAuth state, and host control separate. + +## Starting State + +The current repo already has: + +- a committed design spec at `docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md`; +- a populated xAI `Windburn` collection with the repo knowledge bundle; +- an existing EverOS memory provider path under `use-cases/hermes-everos-memory`; +- a remote EverCore/NixOS packet under `use-cases/hermes-everos-memory/deploy/nixos/`; +- an `.algo-profile/` history for the content-addressed sync, bounded top-k merge, and retrieval cache choices. +- a fixed knowledge-anchor contract rooted at `research-vault`, `dash-knowledge-vault`, and `dash-kv-view-full`; +- a new `everos-ops-mcp` package under `use-cases/hermes-everos-memory/apps/`. + +Do not treat any of those as optional context. They are the baseline. + +## Hard Boundaries + +1. Do not mix SuperGrok OAuth with xAI collection management credentials. +2. Do not expose raw tokens, private paths, or host/IP details in model-visible context. +3. Do not replace the existing EverOS memory provider. +4. Do not turn the knowledge sync path into a browser-driven manual workflow. +5. Do not claim incremental sync unless a no-op delta path is proven on an unchanged source tree. +6. Do not claim retrieval is ready unless cache hit, cache miss, and stale-bundle behavior are all tested. +7. Do not touch unrelated workspace junk in `.goal/`, `.kilo/`, `.playwright-mcp/`, or local run output. +8. Do not widen the knowledge surface beyond the agreed anchors before the ops plane is stable. + +## Primary Objective + +Deliver a remote-first Hermes knowledge lane on NixOS where: + +- Hermes session auth is handled by SuperGrok OAuth, +- xAI knowledge uploads are handled by a collection-scoped management key on the host, +- the canonical knowledge anchors are the three agreed roots above, +- the sync job is content-addressed and delta-aware, +- retrieval uses a local cache plus bounded top-k merge, +- `everos-ops-mcp` exposes the reusable ops/status surface, +- and every plane can fail independently without collapsing the others. + +## Required Outputs + +The implementation should produce: + +- a NixOS-hosted sync service and timer for the knowledge bundle, +- a manifest/delta engine that skips unchanged documents, +- a Hermes-facing retrieval layer that injects concise, provenanced context, +- cache and receipt artifacts for sync and retrieval, +- a reusable `everos-ops-mcp` backend with public-safe anchor/status tools, +- validation scripts or smokes that prove session, sync, cache, delta, and red-gate behavior, +- and any small docs updates needed to keep the operator flow legible. + +## Phase Plan + +### Phase 0 - Live State Verification + +Confirm the real starting conditions before any edits: + +- Hermes xAI OAuth login is still available locally, +- the `Windburn` collection is present and readable, +- the NixOS remote packet still matches the intended host shape, +- the knowledge bundle source roots are the ones we want to sync, +- and the current workspace is not carrying a hidden breakage in the relevant paths. + +Gate: no implementation until the live state matches the plan. + +### Phase 1 - Knowledge Sync Plane + +Build the host-owned sync path first: + +- normalize the approved source roots, +- walk the source tree once, +- sanitize and hash each document, +- write a manifest row per path with content hash and upload state, +- diff the new manifest against the last successful manifest, +- upload only added or changed documents in stable path order, +- tombstone deletions explicitly, +- and publish the new manifest pointer only after the upload succeeds. + +This path should be `O(N)` on the first build and `O(Δ)` on refresh when the tree is unchanged or only lightly changed. + +Gate: an unchanged tree must produce a no-op diff and skip upload. + +### Phase 2 - Retrieval Plane + +Build the Hermes-facing retrieval path on top of the sync plane: + +- add a short-lived SQLite retrieval cache, +- key it by collection id, bundle hash, normalized query hash, top_k, and stable filter serialization, +- inject only top-k snippets with provenance, +- and use a bounded min-heap when multiple candidate sources must be merged. + +This keeps repeated turns cheap and keeps the read path from degenerating into full sorts or repeated collection lookups. + +Gate: repeated queries against the same bundle should hit cache, and merged candidate lists should preserve only the strongest `K` items without a full resort. + +### Phase 3 - Hooks and Safety + +Wire the policy layer around the retrieval path: + +- `pre_tool_call` blocks mis-scoped or dangerous calls, +- `pre_llm_call` injects the retrieved context and current health state, +- `transform_tool_result` redacts secrets, paths, and oversized output, +- `post_tool_call` records a compact receipt with tool, duration, status, and collection revision. + +Keep `execute_code` limited to mechanical packaging and validation work. + +Gate: no secret or private-path material appears in model-visible output or receipts. + +### Phase 4 - Validation and Proof + +Prove each plane independently: + +- session smoke: Hermes can log into xAI with SuperGrok OAuth and start a turn, +- sync smoke: the host can build, upload, and refresh the `Windburn` collection, +- cache smoke: repeated reads reuse the retrieval cache when the bundle hash is unchanged, +- delta smoke: unchanged sources produce a no-op manifest diff, +- top-k smoke: merged candidates stay bounded at `K`, +- failure smoke: missing secrets, expired auth, and retrieval timeouts degrade cleanly. + +Gate: no plane can be marked PASS from inference alone. + +## Decision Order + +When trade-offs appear, prefer this order: + +1. Strict auth separation. +2. Remote host reliability. +3. Incremental sync efficiency. +4. Retrieval latency. +5. Cosmetic cleanup. + +## Exit Conditions + +Stop when all of the following are true: + +- the NixOS sync plane works end to end, +- the Hermes retrieval path works end to end, +- the cache and delta gates pass, +- the failure cases degrade cleanly, +- and the implementation is small enough that the operator can reason about the trust boundaries in one pass. + +## Final Deliverable + +When this goal is complete, the repo should have a single, truthful story: + +- SuperGrok OAuth runs the Hermes session, +- the host-managed xAI key refreshes the collection, +- Hermes hooks handle context injection and redaction, +- and the EverOS memory provider remains the durable local memory layer. diff --git a/docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md b/docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md new file mode 100644 index 000000000..29a94fc77 --- /dev/null +++ b/docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md @@ -0,0 +1,248 @@ +# Hermes SuperGrok on NixOS: Three Auth Planes + +## Status + +Draft design, approved at the architecture level. + +## Context + +Hermes now supports `xai-oauth`, and the local session shows a successful SuperGrok login with `model.provider=xai-oauth` and default model `grok-4.3`. + +At the same time, the remote NixOS path needs the sanitized repository knowledge bundle that was already prepared for the `Windburn` xAI collection. That bundle is not the same thing as the interactive chat session: it is a durable knowledge corpus that must be refreshable on a remote host. + +This design keeps those concerns separate: + +1. Hermes session auth for interactive model turns. +2. xAI collection auth for knowledge upload and refresh. +3. NixOS host auth for scheduling, file ownership, and deployment control. +4. A reusable `everos-ops-mcp` plane for public-safe anchor and ops status, which can be mounted by ChatGPT custom apps without exposing secrets. + +## Goals + +- Use SuperGrok OAuth only for Hermes model sessions. +- Keep xAI collection management credentials out of the Hermes session context. +- Keep the knowledge corpus anchored to the three agreed source roots before any broader expansion. +- Make the knowledge corpus reproducible and incrementally refreshable on NixOS. +- Inject retrieved knowledge into Hermes through hooks and tool boundaries, not through ad hoc prompt stuffing. +- Support both scheduled and manual sync on the remote host. +- Keep raw tokens, private paths, and host details out of model-visible context and receipts. + +## Non-Goals + +- Replacing the existing EverOS memory provider. +- Making the browser part of the steady-state auth path. +- Mirroring every repo file into xAI. +- Letting model output mutate the knowledge corpus directly. +- Sharing one secret across the session, collection, and host planes. +- Growing the knowledge surface beyond the agreed anchors before the ops plane is stable. + +## Decision + +The recommended implementation is a host-owned sync service on NixOS plus a Hermes plugin/hook layer: + +- Hermes uses `xai-oauth` for chat and model calls. +- A separate NixOS service uses a collection-scoped xAI management key to build and refresh the knowledge bundle. +- A Hermes plugin uses collection search plus local cache to retrieve relevant snippets before a turn. +- Hermes hooks redact, gate, and record tool activity. +- `execute_code` is allowed only for mechanical packaging and validation work, not for auth-sensitive calls. +- A separate `everos-ops-mcp` service exposes public-safe anchor/status and bounded smoke operations for ChatGPT custom app wiring. + +This gives one clean operator flow on NixOS without turning OAuth into a universal credential. + +## Architecture + +```mermaid +flowchart LR + subgraph Host["NixOS host"] + S["systemd timer/service"] + P["knowledge packer"] + C["Hermes plugin + hooks"] + L["SQLite retrieval cache"] + R["JSONL receipts"] + end + + H["Hermes session"] + O["SuperGrok / xai-oauth"] + K["xAI collection: Windburn"] + E["EverOS memory provider"] + + H --> O + S --> P + P --> K + C --> K + C --> L + C --> R + C --> H + E --> H +``` + +## Components + +### 1. Hermes session plane + +Hermes runs with `xai-oauth` as the model provider. The OAuth state stays in Hermes-managed auth state, not in the knowledge bundle and not in the xAI collection sync service. + +This plane is only for interactive turns and tool orchestration. It must not be used as a transport for collection management keys. + +### 2. Knowledge sync plane + +The NixOS host owns a dedicated sync service and timer. The service: + +- reads the sanitized knowledge source set, +- builds a manifest and bundle hash, +- uploads or refreshes the `Windburn` collection, +- writes an audit receipt, +- marks the corpus stale or healthy. + +The sync service uses a collection-scoped xAI management key that lives in a host secret file wired in as the unit's `EnvironmentFile`. The key is readable by the sync unit only. + +The canonical knowledge anchors are fixed up front and treated as the shared source contract for every knowledge-related surface: + +- `research-vault` -> `/Users/0xvox/Documents/Evensong/research-vault` +- `dash-knowledge-vault` -> `/Users/0xvox/Desktop/dash-knowledge-vault` +- `dash-kv-view-full` -> `/Users/0xvox/Desktop/dash-kv-view-full` + +These anchors are the input set for the future knowledge MCP layer and the current ops surface. They should stay stable unless the operator explicitly expands the corpus. + +The bundle itself is the reproducible artifact. It should contain the same repo knowledge corpus that was already prepared for xAI, plus enough metadata to make incremental updates safe: + +- source roots +- doc list +- checksums +- sanitization timestamp +- bundle hash +- upload time + +### 3. NixOS control plane + +NixOS controls when sync runs, where the bundle lives, and which service user owns the artifacts. The host may trigger sync in two ways: + +- a `systemd timer` for steady-state refresh +- a manual `systemctl start` / operator-triggered run for catch-up or re-upload + +The timer is the default path and should refresh on an hourly cadence unless host config overrides it. Manual runs use the same service so the behavior stays identical. + +### 4. Ops MCP plane + +`everos-ops-mcp` is a reusable ChatGPT-facing MCP server for the high-value operational surface around this lane. + +Its first job is not broad automation. Its first job is public-safe observability and bounded execution: + +- report token-file health without printing token contents, +- report anchor presence and freshness, +- expose a narrow allowlist of smoke commands for the Hermes/EverOS lane, +- and keep future knowledge and signals MCP servers composable rather than monolithic. + +This plane should stay separate from the collection sync key and from Hermes session auth. ChatGPT can mount it as a custom MCP app, but it should not become the universal credential bucket. + +## Data Flow + +1. The operator logs into Hermes with SuperGrok OAuth. +2. NixOS starts or resumes the sync service on a timer or manual trigger. +3. The packer gathers the approved knowledge sources and produces a sanitized bundle. +4. The sync service uploads the bundle to the existing `Windburn` collection. +5. When Hermes starts a turn, the plugin checks local cache and collection health. +6. If the cache misses or the corpus is stale, the plugin queries the collection. +7. The plugin injects a short, provenance-bearing context block into the next turn. +8. Hooks redact sensitive output and write receipts. +9. If sync fails, Hermes keeps working with the last known corpus or with no collection context rather than failing the whole session. + +## Auth Boundaries + +- SuperGrok OAuth may authenticate the Hermes session, but it never authenticates collection writes. +- The collection management key may upload and refresh the collection, but it never authenticates Hermes chat. +- Host control credentials may start and supervise the service, but they never enter the model context. +- No plane should read the others’ secret material unless a wrapper explicitly resolves it inside the trusted host process. + +## Caching and Context Retrieval + +The design uses two caches: + +- a short-lived SQLite retrieval cache for collection search results +- Hermes conversation caching via the existing `x-grok-conv-id` behavior when xAI transport is in use + +Cache keys should include the collection name, bundle hash, normalized query hash, top_k, and stable filter serialization. That makes invalidation straightforward when the corpus changes and keeps repeated turns on the same bundle at `O(1)` average cache lookup cost. + +Context retrieval should stay small and focused: + +- top-k snippets only +- provenance on every snippet +- no raw documents unless a user explicitly asks +- no private paths or token material in the injected text + +The EverOS memory provider remains the durable local turn-memory layer. The xAI collection is a separate knowledge corpus, not a replacement for local memory. + +## Sync Algorithm + +The NixOS sync job should be content-addressed instead of rebuild-everything: + +- normalize the approved source root list once +- walk the source tree once +- ignore generated outputs and public-surface junk +- hash each source document after sanitization +- store a manifest row per document path with its content hash and upload state +- diff the new manifest against the last successful manifest with a path -> hash map +- upload only the added or changed documents in stable path order +- mark deletions as tombstones in the manifest so the next run can reconcile them safely +- publish the new manifest pointer only after the upload succeeds + +That keeps the first run at `O(N)` but makes incremental refreshes proportional to the changed set, `O(Δ)`, instead of resending the entire corpus. + +If the plugin merges multiple candidate sources at read time, it should keep only the best `K` results in a bounded min-heap rather than sorting the full candidate list. That keeps the merge step at `O(M log K)` instead of `O(M log M)`. + +## Hooks and Sandbox + +Use Hermes hooks for policy, not business logic: + +- `pre_tool_call` blocks dangerous or mis-scoped tool calls. +- `pre_llm_call` injects retrieved knowledge and current health state. +- `transform_tool_result` redacts secrets, paths, and oversized outputs before they reach the model. +- `post_tool_call` records a receipt with tool name, duration, status, and collection revision. + +Use `execute_code` only for mechanical work such as bundle generation, manifest checks, mock uploads, and offline validation. It should not hold raw collection secrets or perform browser-based auth. + +If a future implementation wants a sandboxed helper for packaging, that helper must read secrets only from trusted host files and must never echo them to stdout or into the model context. + +## Error Handling + +- If Hermes OAuth expires, the session should fail closed and ask for re-authentication. +- If the xAI sync key is missing, the sync service should stop before any upload attempt. +- If upload fails mid-run, the bundle should be marked stale and the previous healthy corpus should remain usable. +- If retrieval fails, Hermes should continue with the EverOS memory provider or no external collection context. +- If a receipt write fails, the turn may continue, but the sync service must surface a visible health warning so the host does not silently drift. + +## Testing + +The implementation should prove each plane independently: + +- session smoke: Hermes can log into xAI with SuperGrok OAuth and start a turn +- sync smoke: the NixOS service can build, upload, and refresh the `Windburn` collection +- cache smoke: repeated queries hit the local retrieval cache when the bundle hash is unchanged +- delta smoke: an unchanged source tree produces a no-op manifest diff and skips upload +- top-k smoke: merged candidates preserve only the strongest `K` results without a full resort +- hook smoke: secret/path redaction works before model-visible output +- failure smoke: missing secrets, expired auth, and retrieval timeouts degrade cleanly + +The existing repo already has good patterns for this style of proof: + +- local provider load and smoke commands in `use-cases/hermes-everos-memory` +- remote health and full smoke patterns for the NixOS service +- packet-based receipts for Raven / Hermes / EverOS work + +## Rollout + +1. Confirm the Hermes xAI OAuth session works on the target NixOS host. +2. Add the knowledge sync service and timer. +3. Wire the retrieval plugin and hooks. +4. Add cache and receipt files. +5. Run the session, sync, cache, delta, top-k, and failure smokes. +6. Treat the remote lane as `PASS` only when the auth planes stay separated and the knowledge corpus can be refreshed again without reworking the architecture. + +## Success Criteria + +- Hermes uses SuperGrok OAuth for model turns on NixOS. +- The remote host can refresh the xAI knowledge corpus without exposing the management key to the session. +- Incremental refreshes reuse the manifest diff path when the source tree is unchanged. +- Retrieved knowledge enters the prompt through hooks, not ad hoc manual copy/paste. +- The system continues to function when sync is stale or temporarily unavailable. +- The design stays compatible with the existing EverOS memory provider and remote EverCore packet. diff --git a/methods/EverCore/CLAUDE.md b/methods/EverCore/CLAUDE.md new file mode 100644 index 000000000..33cccea5f --- /dev/null +++ b/methods/EverCore/CLAUDE.md @@ -0,0 +1,94 @@ +# methods/EverCore — Local CLAUDE.md + +Local-only context for working inside this directory. Root `CLAUDE.md` and +`AGENTS.md` already cover the cross-repo project map and the canonical Quick +Commands — do not duplicate them here. + +## What this module is + +`memsys` (pyproject name) is the long-term memory operating system for agents. +Multi-tenant, fully async, FastAPI-backed, layered over MongoDB + Elasticsearch + ++ Milvus + Redis. Public API surface lives at +`src/infra_layer/adapters/input/api/`. + +## Internal layer map + +```text +src/ +├── core/ cross-cutting infra (DI, tenants, middleware, cache, queue, +│ lifespan, rate_limit, observation, capability, oxm, lock) +├── memory_layer/ the memory pipeline — LLM, prompts (en/zh), extractors +│ (memory_extractor, memcell_extractor), profile_indexer, +│ profile_manager, cluster_manager +├── agentic_layer/ memory_manager.py orchestrates the layers above +├── biz_layer/ business policies on top of memory primitives +├── infra_layer/ HTTP, persistence, vector store, embedding adapters +├── api_specs/ DTOs / request-response contracts +├── service/ service-level wiring +├── migrations/ mongodb + postgresql schema migrations +└── devops_scripts/ sensitive_info scrubbing, milvus_admin, data_fix, i18n +``` + +Read order for a new task: `agentic_layer/memory_manager.py` → the layer it +touches → `core/` only if you hit a DI / tenant / lifespan question. + +## Hard rules in this module + ++ **Async everywhere.** No sync I/O in request paths. If a library is sync-only, + push it to a thread pool via the existing `core/` helpers. ++ **Tenant scoping is not optional.** Every query, write, and cache key must + carry tenant context resolved through `core/tenants/`. Cross-tenant leakage + is a P0 bug. ++ **Prompts EN/ZH must stay in lockstep.** `src/memory_layer/prompts/en/` and + `src/memory_layer/prompts/zh/` are mirrors. Adding a prompt to one without + the other is a lint failure target. ++ **Public DTOs are a contract.** Files under `src/api_specs/dtos/` are + consumed by `use-cases/` and external clients. Breaking changes need a + migration note in `docs/CHANGELOG.md`. + +## Working commands (precise — Root CLAUDE.md has the broad strokes) + +```bash +# from this directory: +docker compose up -d # boot mongo + es + milvus + redis (first time) +uv sync # install / refresh deps +uv run python src/run.py # boot the API +make test # full pytest run +uv run pytest tests/test_memory_manager_multi_type_search.py -x -vv + # single-file iteration with -x stop-on-first-fail +make lint # ruff + black + i18n sync check +uv run pyright # type check (config in pyrightconfig.json) +``` + +## Common gotchas + ++ Milvus standalone takes ~30s to become healthy. `docker compose ps` will + show "starting" — wait for "healthy" before `python src/run.py`. ++ `env.template` defaults to OpenRouter → `x-ai/grok-4-fast`. Local runs that + hit the actual LLM need a real `LLM_API_KEY` (OpenRouter or DeepSeek key). ++ The 202 Accepted path in `SimpleMemoryManager` is the async-ingest contract + — do not collapse it to 200. See `tests/test_simple_memory_manager.py`. ++ Multi-type search (recall + extract) has hybrid dedup logic in + `agentic_layer/memory_manager.py` — `test_memory_manager_multi_type_search.py` + pins the invariant. + +## Cross-directory contract + +Things outside `methods/EverCore/` that depend on this module: + ++ `use-cases/hermes-everos-memory/` mounts EverCore as the memory provider via + the public HTTP API. Changing routes under `src/infra_layer/adapters/input/api/` + needs a heads-up in that use case. ++ `benchmarks/EverMemBench/` exercises the recall + extract paths. Schema + changes in DTOs require regenerating any frozen benchmark inputs. ++ `methods/EverCore/examples/openclaw-plugin/` is the JS plugin reference; the + `engine.js` / `types.js` contract mirrors the Python DTOs. + +## What does NOT belong here + ++ New cross-cutting frameworks (auth plane, retrieval cache, MCP server, etc.). + Those go to their own top-level lane and consume EverCore through the public + API. Do not bolt them into `src/`. ++ Repository-wide planning state. `.planning/`, `.goal/`, and `.remember/` are + root-level. Subdirectory CLAUDE.md files stay focused on this module only. diff --git a/methods/HyperMem/CLAUDE.md b/methods/HyperMem/CLAUDE.md new file mode 100644 index 000000000..e8e7f8d7c --- /dev/null +++ b/methods/HyperMem/CLAUDE.md @@ -0,0 +1,86 @@ +# methods/HyperMem — Local CLAUDE.md + +Local-only context. Root `CLAUDE.md` and `AGENTS.md` cover the cross-repo map. + +## What this module is + +Official implementation of the **ACL 2026** paper *HyperMem: Hypergraph Memory +for Long-Term Conversations*. Three-level hypergraph (**topics → episodes → +facts**) with weighted hyperedges, retrieved via coarse-to-fine top-down +traversal. LoCoMo headline number: **92.73% LLM-as-judge accuracy** (vs. +HyperGraphRAG 86.49%, MemOS 75.80%). + +This is research code with a publication frozen behind it. Treat it as a +reference implementation — refactors that change numerics need to re-run the +LoCoMo eval before merge. + +## Internal map + +```text +hypermem/ +├── main/ entry points for construction + retrieval + evaluation +├── structure.py hypergraph data structures (topics, episodes, facts, edges) +├── types.py typed schemas +├── config.py run config +├── extractors/ episode detection + topic aggregation + fact extraction +├── llm/ LLM client adapters +├── prompts/ extractor + retrieval prompts +└── utils/ shared helpers + +scripts/ +├── run_eval.sh one-shot eval runner +├── serve_embedding.sh local embedding service +└── serve_reranker.sh local reranker service +``` + +Read order for a new task: `structure.py` → `main/` → the extractor or +retrieval stage you are touching → `prompts/` only if changing prompt schema. + +## Hard rules + +- **Numerics are paper-load-bearing.** Changes that touch propagation + (`λ = 0.5`), attention weighting, BM25-dense RRF fusion, or top-k thresholds + must re-run the LoCoMo eval and report the delta in the PR. +- **Python 3.12+.** ML stack (torch, transformers, sentence-transformers) — + CPU works for smoke; GPU recommended for full eval. +- **Embedding + reranker services are external.** `scripts/serve_*.sh` boots + them locally. Do not vendor the model weights into the repo. + +## Working commands + +```bash +# from this directory: +python -m venv .venv && source .venv/bin/activate +pip install -r requirements.txt + +# boot the embedding + reranker services in separate terminals: +bash scripts/serve_embedding.sh +bash scripts/serve_reranker.sh + +# run a full eval (LoCoMo or your dataset): +bash scripts/run_eval.sh +``` + +## Common gotchas + +- The hypergraph construction is **streaming** — episode boundary detection + runs as the dialogue is ingested. Do not batch-rewrite that loop without + re-validating boundary placement on the paper's eval set. +- BM25 + dense RRF fusion is implementation-sensitive. Changing the k constant + in RRF (default 60) shifts retrieval and downstream accuracy. +- Hyperedge weights are in `[0, 1]` and used as attention logits before + softmax. Negative or unbounded values silently break propagation. + +## Cross-directory contract + +- `benchmarks/EverMemBench/` may import HyperMem as one of the memory systems + under evaluation; keep the public `main/` entry signatures stable. +- HyperMem does not depend on `methods/EverCore/`. They are independent + memory architectures, both reachable as benchmark targets. + +## What does NOT belong here + +- Production multi-tenant memory APIs — that is EverCore's role. +- New benchmark datasets — put those in `benchmarks/`. +- Hermes / use-case integrations — they should consume HyperMem through a + benchmark adapter, not import internal modules directly. diff --git a/use-cases/hermes-everos-memory/CLAUDE.md b/use-cases/hermes-everos-memory/CLAUDE.md new file mode 100644 index 000000000..7b43a63f2 --- /dev/null +++ b/use-cases/hermes-everos-memory/CLAUDE.md @@ -0,0 +1,108 @@ +# use-cases/hermes-everos-memory — Local CLAUDE.md + +Local-only context. Root `CLAUDE.md` and `AGENTS.md` cover the cross-repo map. + +## What this use case is + +Hermes `MemoryProvider` integration that mounts **EverCore** (HTTP API at +`http://127.0.0.1:1995` by default) as the memory backend for Hermes sessions. +Covers prefetch (pre-turn recall), `sync_turn` (post-turn persistence) with +auto-flush, and explicit memory tools (search, store, health, flush). + +This is also the staging ground for the **Hermes SuperGrok NixOS** lane (see +`docs/superpowers/specs/2026-05-16-hermes-supergrok-nixos-auth-plane-design.md`). + +## Internal map + +```text +__init__.py thin Hermes interface shim (Python class entry) +bin/ + everos-memory.mjs operator/dev CLI (Node/Bun) — health/search/sync-smoke + skillhub-packet.mjs SkillHub fixture validator + skillhub-mock-api.mjs SkillHub mock API server + raven-run.mjs Raven run packet validate/render + mock-openai-compatible.mjs Mock OpenAI-compatible server +scripts/ + install-local.sh installs provider into Hermes profile (no activation) + skillhub-api-smoke.sh HTTP smoke against SkillHub mock +deploy/ + nixos/ remote workhorse deploy packet (DEPLOY_PACKET.md, + README.md, evercore-remote-workhorse.nix) +skillhub/fixtures/ read-only views + install-packet fixtures +raven/fixtures/ doomsday + dogfood run fixtures +plugin.yaml Hermes plugin manifest +package.json Node scripts: health / search / sync-smoke / + skillhub:* / raven:* / mock-openai:* / test +justfile just-runner shortcuts +``` + +## Hard rules + +- **EverCore lifecycle is not our problem.** This package does not start + EverCore. The expectation is documented in `README.md`: bring EverCore up + first with `cd methods/EverCore && uv run python src/run.py --host 127.0.0.1 + --port 1995`. +- **Configuration is env-var driven only.** No hard-coded URLs or user IDs. + See `EVEROS_*` vars in `README.md`. Defaults stay loopback-friendly. +- **Remote deploy stays loopback-bound by default.** `deploy/nixos/` keeps + EverCore on `127.0.0.1`; CCR / external clients reach it through reverse + proxy, not direct binding. +- **One component, one PR.** This lane is about to expand into Hermes + SuperGrok + NixOS sync service + retrieval plugin + `everos-ops-mcp`. + Each of those is a separate PR. **No multi-component PRs.** See the + commit-boundary hook in root `.claude/`. + +## Working commands + +```bash +# from this directory: +npm run health # ping EverCore at EVEROS_API_BASE_URL +npm run search # smoke a search call +npm run sync-smoke # round-trip sync_turn + +# SkillHub mock: +npm run skillhub:serve # boot mock API +npm run skillhub:check # validate config-only +npm run skillhub:sample # validate fixture +npm run skillhub:smoke # HTTP smoke + +# Raven: +npm run raven:sample # validate doomsday-run fixture +npm run raven:render # render fixture to terminal + +# Self-test: +npm test # everos-memory self-test + +# Install into Hermes (no activation): +bash scripts/install-local.sh +``` + +## Common gotchas + +- **EverCore must be reachable.** `EVEROS_API_BASE_URL=http://127.0.0.1:1995` + is the default. If EverCore is on a remote host, set this explicitly — do + not assume tunnels. +- **`EVEROS_AUTO_FLUSH=1` and `EVEROS_SYNC_INLINE=1` are CLI-friendly + defaults.** They make recall immediately searchable, at the cost of an + extra round trip. Production / long-running session may want them `0`. +- **`memory_types` is comma-separated.** Default is + `episodic_memory,profile`. Adding a third type means EverCore must support + it on the search method. + +## Cross-directory contract + +- **Consumes** `methods/EverCore/` through its HTTP API only. No Python imports. +- **Surfaces** to Hermes through `__init__.py` (Python provider class) + + `plugin.yaml` (manifest). Hermes loads the class at startup. +- **Does not** depend on `methods/HyperMem/` or `benchmarks/`. +- **Future Hermes SuperGrok lane** will add: NixOS sync service, retrieval + plugin, `everos-ops-mcp`. Those will live here under + `deploy/`, a new `plugin/` subtree, and a new `ops-mcp/` subtree + respectively — each landing in its own PR. + +## What does NOT belong here + +- EverCore feature changes — those are in `methods/EverCore/`. +- New memory architectures — those are in `methods/`. +- Repo-wide planning state. `.planning/`, `.goal/`, `.remember/` stay at root. +- Multi-component PRs that mix sync + plugin + MCP + docs. **Always split.** diff --git a/use-cases/hermes-everos-memory/__init__.py b/use-cases/hermes-everos-memory/__init__.py index 4c059f430..d1e8bbd2f 100644 --- a/use-cases/hermes-everos-memory/__init__.py +++ b/use-cases/hermes-everos-memory/__init__.py @@ -141,12 +141,16 @@ def add_agent_messages( }, ) - def flush_agent(self, *, user_id: str, session_id: str) -> dict: - return self.request( - "POST", - "/api/v1/memories/agent/flush", - {"user_id": user_id, "session_id": session_id}, - ) + def flush_agent(self, *, user_id: str, session_id: Optional[str] = None) -> dict: + # EverCore treats session_id as optional on the flush endpoint. + # When unset, omit it from the payload so the server uses its + # default (a freshly-allocated session per flush) rather than + # coalescing flushes into a shared empty-string session across + # agents/runs. Copilot review on PR #104. + payload: Dict[str, Any] = {"user_id": user_id} + if session_id: + payload["session_id"] = session_id + return self.request("POST", "/api/v1/memories/agent/flush", payload) class EverOSMemoryProvider(MemoryProvider): @@ -326,9 +330,13 @@ def handle_tool_call(self, tool_name: str, args: Dict[str, Any], **kwargs) -> st self._flush_session(session_id) return json.dumps({"result": "stored", "data": data.get("data")}, ensure_ascii=False) if tool_name == "everos_flush": + # Pass session_id as-is (None when unset); flush_agent + # omits it from the payload so EverCore uses its default + # rather than an empty-string shared session (Copilot + # review on PR #104). data = self._client.flush_agent( user_id=self._user_id, - session_id=self._session_id or "", + session_id=self._session_id or None, ) return json.dumps({"result": "flushed", "data": data.get("data")}, ensure_ascii=False) except urllib.error.URLError as exc: diff --git a/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix b/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix index 9c002fe34..8a4fcf933 100644 --- a/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix +++ b/use-cases/hermes-everos-memory/deploy/nixos/evercore-remote-workhorse.nix @@ -122,11 +122,11 @@ in virtualisation.docker.enable = true; users.groups = lib.mkIf cfg.createUser { - ${cfg.group} = { }; + "${cfg.group}" = { }; }; users.users = lib.mkIf cfg.createUser { - ${cfg.user} = { + "${cfg.user}" = { isSystemUser = true; group = cfg.group; extraGroups = [ "docker" ];