diff --git a/.claude/skills/grok-multi-agent-api/SKILL.md b/.claude/skills/grok-multi-agent-api/SKILL.md new file mode 100644 index 0000000..8aa76af --- /dev/null +++ b/.claude/skills/grok-multi-agent-api/SKILL.md @@ -0,0 +1,153 @@ +--- +name: grok-multi-agent-api +description: xAI Grok Multi-Agent API reference for developing and maintaining this plugin. Triggers: "multi-agent api", "grok api", "agent_count", "reasoning effort", "openai sdk usage", "grok-4.20-multi-agent", "api configuration" +version: 1.0.0 +--- + +# xAI Grok 4.20 Multi-Agent API Reference + +Reference for the Realtime Multi-agent Research API that this plugin wraps. Use this when modifying `src/bridge/grok_bridge.py`, `src/agent/grok_agent.py`, or any bridge code that communicates with xAI/OpenRouter. + +## Model ID + +``` +grok-4.20-multi-agent +``` + +> **Note:** This plugin currently uses `x-ai/grok-4.20-multi-agent-beta` via OpenRouter. The direct xAI API uses `grok-4.20-multi-agent`. Both refer to the same underlying model. + +## API Endpoints + +| Provider | Base URL | Endpoint | +|----------|----------|----------| +| xAI Direct | `https://api.x.ai/v1` | `/responses` | +| OpenRouter | `https://openrouter.ai/api/v1` | `/chat/completions` | + +**This plugin uses OpenRouter** as the gateway. The bridge sends requests to OpenRouter which proxies to xAI. + +## Agent Count Configuration + +| SDK / API | Parameter | 4 Agents | 16 Agents | +|-----------|-----------|----------|-----------| +| xAI SDK | `agent_count` | `4` | `16` | +| OpenAI SDK | `reasoning.effort` | `"low"` or `"medium"` | `"high"` or `"xhigh"` | +| Vercel AI SDK | `reasoningEffort` | `"low"` or `"medium"` | `"high"` or `"xhigh"` | +| REST API | `reasoning.effort` | `"low"` or `"medium"` | `"high"` or `"xhigh"` | + +- **4 agents**: Quick research, focused queries, lower cost +- **16 agents**: Deep research, complex multi-faceted topics, higher token usage + +In this plugin's bridge code (`grok_bridge.py`), agent count is sent as `extra_body={"agent_count": N}` via the OpenAI SDK. + +## Built-in Tools + +xAI provides server-side tools that can be enabled per request: + +| Tool | Description | +|------|-------------| +| `web_search` | Web search | +| `x_search` | X/Twitter search | +| `code_execution` | Code execution | +| `collections_search` | Collections search | + +When enabled, the server runs the agent loop automatically, invoking tools until the final answer is generated. These incur additional cost. + +**Important for this plugin:** The bridge currently does NOT pass through built-in tools — it uses the agents for pure reasoning over provided file context. If adding tool support, pass them in the `tools` parameter. + +## Output Behavior + +- Only the **leader agent's** final response and tool calls are returned to the caller +- Sub-agent state (intermediate reasoning, tool calls, outputs) is encrypted +- Encrypted sub-agent state is included only when `use_encrypted_content=True` (xAI SDK) +- This keeps default responses clean while preserving context for multi-turn + +## Multi-turn Conversations + +Use `previous_response_id` to chain turns. The agents use prior context for more targeted follow-up answers. + +## API Limitations + +- **No Chat Completions API** — must use Responses API (`/responses`) or xAI SDK +- **No `max_tokens`** — parameter is not supported +- **No client-side/custom tools** — only built-in tools and remote MCP tools supported +- **Only leader output exposed** — sub-agent details are encrypted unless explicitly requested + +## Example: Direct xAI API (Python OpenAI SDK) + +```python +import os +from openai import OpenAI + +client = OpenAI( + api_key=os.getenv("XAI_API_KEY"), + base_url="https://api.x.ai/v1", +) + +# 4-agent setup +response = client.responses.create( + model="grok-4.20-multi-agent", + reasoning={"effort": "low"}, + input=[ + {"role": "user", "content": "Analyze this code..."}, + ], +) + +# 16-agent setup +response = client.responses.create( + model="grok-4.20-multi-agent", + reasoning={"effort": "high"}, + input=[ + {"role": "user", "content": "Deep analysis..."}, + ], +) +``` + +## Example: Via OpenRouter (This Plugin's Path) + +```python +from openai import OpenAI + +client = OpenAI( + api_key=os.getenv("OPENROUTER_API_KEY"), + base_url="https://openrouter.ai/api/v1", +) + +response = client.chat.completions.create( + model="x-ai/grok-4.20-multi-agent-beta", + extra_body={"agent_count": 4}, # or 16 + messages=[ + {"role": "system", "content": "You are..."}, + {"role": "user", "content": "Analyze..."}, + ], +) +``` + +## Prompting Best Practices + +When constructing system prompts for the multi-agent model: + +1. **Set scope and depth explicitly** — "Compare X across dimensions A, B, C" not "Tell me about X" +2. **Request structured output** — "Present as a comparison table with categories..." +3. **Specify sources/perspectives** — "Cite academic papers from 2024-2025" +4. **Break complex research into turns** — Start broad, narrow with follow-ups +5. **Provide context** — Include relevant constraints and prior knowledge + +## Pricing Considerations + +All tokens from **both leader and sub-agents** are billed (input, output, reasoning). Server-side tool calls by any agent also count. A single multi-agent request may use significantly more tokens than a standard request. Monitor via `usage` and `server_side_tool_usage` fields. + +## Streaming + +The xAI SDK supports streaming with `include=["verbose_streaming"]`: + +```python +chat = client.chat.create( + model="grok-4.20-multi-agent", + include=["verbose_streaming"], +) +for response, chunk in chat.stream(): + if chunk.content: + print(chunk.content, end="", flush=True) +``` + +This plugin's bridge does not currently stream — it waits for the full response. Streaming support would require changes to `grok_bridge.py:call_grok()` and `src/bridge/index.js`. diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..27a7d6d --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,97 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +A dual-platform plugin (Claude Code + OpenClaw) that bridges to xAI's **Grok 4.20 Multi-Agent Beta** via the **OpenRouter** API. It gives AI coding agents access to Grok's multi-agent swarm (4 or 16 agents) with ~2M token context for code analysis, refactoring, and generation. + +## Build & Development Commands + +```bash +# Build (copies Python bridge + Node wrapper to dist/) +npm run build + +# Test (only checks CLI --help flag) +npm test + +# Lint +npm run lint + +# Clean +npm run clean + +# Install to local platforms +./install.sh openclaw # copies to ~/.openclaw/ +./install.sh claude # copies to ~/.claude/plugins/grok-swarm/ +./install.sh both # both platforms + +# Python deps +pip3 install -r requirements.txt +``` + +Requires Node.js >= 18 and Python 3.8+. + +## Architecture + +Layered bridge pattern — each layer has a single responsibility: + +``` +Plugin Layer (TypeScript/manifests) + ↓ registers tools and skills +CLI Wrapper (Node.js — src/bridge/index.js) + ↓ timeout enforcement, process spawning +Python Bridge (src/bridge/grok_bridge.py) + ↓ OpenAI SDK → OpenRouter API +xAI Grok 4.20 Multi-Agent Beta +``` + +**Key modules:** + +- `src/bridge/grok_bridge.py` — Core API logic: key resolution, mode-based system prompts, file context assembly, code block parsing. The `call_grok()` function is the central entry point. +- `src/bridge/cli.py` — Unified CLI that dispatches to grok_bridge with argparse. +- `src/bridge/apply.py` — Parses annotated code blocks and writes files to disk. Supports three annotation formats: `lang:path`, `FILE:` marker, and `# filename.py` comments. +- `src/bridge/index.js` — Node.js wrapper that enforces timeouts on Python subprocess. +- `src/bridge/oauth_setup.py` — PKCE OAuth flow for OpenRouter (keeps keys out of LLM context). +- `src/bridge/usage_tracker.py` — Persistent token/cost tracking. +- `src/agent/grok_agent.py` — Autonomous loop: discover files → call Grok → apply changes → verify → iterate. +- `src/shared/patterns.py` — Centralized regex patterns for filename detection, shared between bridge and agent. +- `src/plugin/index.ts` — OpenClaw plugin: registers `grok_swarm` (single call) and `grok_swarm_agent` (autonomous loop) tools. + +## API Key Resolution Priority + +`grok_bridge.py:get_api_key()` checks in order: +1. `OPENROUTER_API_KEY` environment variable +2. `~/.config/grok-swarm/config.json` +3. `~/.claude/grok-swarm.local.md` +4. OpenClaw auth profiles + +## Thinking Levels + +- **Low** (default): 4-agent swarm — faster, cheaper +- **High**: 16-agent swarm — triggered by phrases like "16 agent swarm", "high thinking mode", or `--thinking high` + +## File Annotation Formats + +Code blocks can be annotated three ways for `apply.py` to write them: +1. Fenced block with language:path — ` ```python:src/main.py ` +2. `FILE: path/to/file.py` marker inside the block +3. Comment header — `# filename.py` (uses `shared/patterns.py` regex) + +## Task Tracking + +Uses **bd (beads)** — not TodoWrite or markdown lists: +```bash +bd ready # Find available work +bd show # View issue details +bd update --claim +bd close +``` + +## Code Duplication Note + +`skills/grok-refactor/bridge/` and `skills/grok-refactor/shared/` are copies of `src/bridge/` and `src/shared/` respectively (not symlinks). Changes to bridge/shared code must be applied in both locations. + +## Version Locations + +Version is defined in multiple places and must be kept in sync: `package.json`, `VERSION`, `pyproject.toml`, `CLAWHUB.md`, `.claude-plugin/marketplace.json`, and `platforms/claude/.claude-plugin/plugin.json`. Use `` as the canonical placeholder when referencing version numbers. diff --git a/TASKS.md b/TASKS.md index 2c485d1..8849ef2 100644 --- a/TASKS.md +++ b/TASKS.md @@ -1,196 +1,34 @@ -# Grok Multi-Agent — Project Status +# Tasks — Grok Multi-Agent -**Last Updated:** 2026-03-19 +## Completed -## Overview +- [x] Define project scope and done criteria +- [x] Build core bridge skill (`~/.openclaw/skills/grok-refactor/`) +- [x] Build OpenClaw plugin (`~/.openclaw/extensions/grok-swarm/`) +- [x] Create project scaffold (`~/projects/grok-multi-agent/`) +- [x] Write comprehensive README +- [x] Write CLAWHUB publication metadata +- [x] Write CHANGELOG +- [x] Document test results +- [x] Initialize git repository +- [x] Create requirements.txt and .gitignore -Grok Swarm is a dual-platform OpenClaw + Claude Code integration that bridges to xAI's Grok 4.20 Multi-Agent Beta. Built during Grok Swarm development sprint (2026-03-16 to 2026-03-19). +## Pending ---- +- [ ] Expand SKILL.md with full parameter reference and examples +- [ ] Create GitHub repository for ClawHub publishing +- [ ] Publish to npm +- [ ] Publish to ClawHub -## Completed ✅ +## Project Status -### Core Implementation -- [x] Grok 4.20 Multi-Agent Beta bridge (`grok_bridge.py`) -- [x] Node.js wrapper (`index.js`) -- [x] OpenClaw plugin (`src/plugin/`) -- [x] Claude Code skill (`platforms/claude/`) -- [x] File writing capability (`--write-files`, `--output-dir`) -- [x] Morph LLM MCP integration (`--use-morph`) -- [x] API key resolution (env → config file → OpenClaw profiles) +**Status:** Documentation Complete ✅ +**Location:** `~/projects/grok-multi-agent/` +**Skill:** `~/.openclaw/skills/grok-refactor/` +**Plugin:** `~/.openclaw/extensions/grok-swarm/` -### Documentation -- [x] User story ("The Story" section in README) -- [x] Dual-platform quick start (Claude Code + OpenClaw) -- [x] API key resolution precedence documented -- [x] File writing patterns documented -- [x] Morph LLM integration documented +## Next Steps -### Repository -- [x] GitHub repo: https://github.com/KHAEntertainment/grok-multiagent-plugin -- [x] Branches: `master`, `claude-plugin` (active) -- [x] File writing feature (Traycer's recommendation) -- [x] CodeRabbit reviews applied - ---- - -## In Progress 🚧 - -### Packaging & Distribution -- [x] Package as NPM module (`@khaentertainment/grok-swarm`) -- [x] Package as ClawHub skill -- [x] Package as Claude Code Marketplace Plugin (via GitHub Repo) -- [x] GitHub Actions workflow for npm auto-publish on tag push -- [ ] Test new packages/install methods -- [ ] Verify ClawHub install flow works end-to-end -- [ ] Verify Claude Code marketplace install flow works end-to-end - -### Documentation Gaps -- [ ] Installation video/screenshot walkthrough -- [ ] Troubleshooting section expansion - ---- - -## Planned Features (GitHub Issues) - -| Issue | Title | Priority | -|-------|-------|----------| -| #9 | Interactive TUI Setup Flow for Claude Code | Medium | -| #10 | High Thinking Mode - 16 Agent Swarm via Toggle | Medium | -| #11 | Cost/Usage Dashboard for Token and Credit Tracking | High | -| #12 | Grounding System Prompt for Agentic Assistant Context | High | -| #13 | Secure Credential Management for Claude Code | High | - -### Feature Details - -#### #11 — Cost/Usage Dashboard -**Why High Priority:** Grok 4.20 can burn through credits quickly. Users need visibility into token usage and costs. - -**MVP Approach:** -- Slash command: `/grok-swarm:stats` -- Store usage in `~/.config/grok-swarm/usage.json` -- Use OpenRouter API for quota status - -**Future:** TUI dashboard + Telegram Mini App - -#### #12 — Grounding System Prompt -**Why High Priority:** Grok needs consistent context about its role as an agentic assistant. - -**Approach:** -- Default system prompt establishing agentic role -- User-configurable via `~/.config/grok-swarm/system-prompt.txt` -- Merge with per-request prompts - -#### #9 — Interactive TUI Setup -**Approach:** Leverage Claude Code's TUI generation abilities for first-run setup. - -#### #10 — High Thinking Mode -**Approach:** Add `--thinking high` flag to enable 16-agent mode. - ---- - -## Project Structure - -``` -grok-multiagent-plugin/ -├── src/ -│ ├── bridge/ -│ │ ├── grok_bridge.py # Python API bridge -│ │ ├── cli.py # Unified CLI -│ │ ├── apply.py # File writing parser -│ │ └── index.js # Node wrapper -│ └── plugin/ -│ ├── index.ts # OpenClaw plugin -│ ├── openclaw.plugin.json -│ └── package.json -├── platforms/ -│ └── claude/ -│ ├── .claude-plugin/ -│ │ └── plugin.json -│ └── skills/ -│ └── grok-swarm/ -│ └── SKILL.md -├── scripts/ -│ ├── build.sh -│ ├── install.sh -│ └── setup.sh -├── README.md -├── LICENSE -├── CHANGELOG.md -├── CLAWHUB.md -├── pyproject.toml -└── requirements.txt -``` - ---- - -## Install Methods - -### NPM -```bash -npm install @openclaw/grok-swarm -``` - -### Claude Code Marketplace -``` -/plugin install grok-swarm@khaentertainment -``` - -### ClawHub -```bash -clawhub install grok-swarm -``` - -### Manual / Git Clone -```bash -git clone https://github.com/KHAEntertainment/grok-multiagent-plugin.git -cd grok-multiagent-plugin -./install.sh -``` - ---- - -## Dependencies - -- **Runtime:** Python 3.8+, Node.js 18+ -- **API:** OpenRouter API key with Grok 4.20 access -- **Optional:** Morph LLM MCP for partial file edits - ---- - -## Release Process - -### Versioning -This project follows semver. Version is maintained in: -- `package.json` (`version` field) -- `platforms/claude/.claude-plugin/plugin.json` (`version` field) -- `skills/grok-refactor/openclaw.plugin.json` (`version` field) - -### Publishing (NPM) - -1. **Bump version** in all three files above -2. **Create a git tag**: `git tag v1.3.0 && git push origin v1.3.0` -3. GitHub Actions automatically publishes to npm - -The `NPM_SECRET` secret must be set in the repo (GitHub → Settings → Secrets). - -### Claude Code Marketplace -Updates via git tag + GitHub repo updates. Claude Code plugins are installed directly from the repo URL. - -### OpenClaw (ClawHub) -Use `clawhub publish` or the ClawHub CLI after npm publish. - ---- - -## Team - -- **Billy** — Product owner, reviewer -- **Barry** — Implementation, documentation - ---- - -## Links - -- Repo: https://github.com/KHAEntertainment/grok-multiagent-plugin -- Issues: https://github.com/KHAEntertainment/grok-multiagent-plugin/issues -- Discord: https://discord.com/invite/clawd +1. Expand SKILL.md in the skill folder for ClawHub publication +2. Set up GitHub repo +3. Publish package diff --git a/skills/grok-refactor/bridge/grok_bridge.py b/skills/grok-refactor/bridge/grok_bridge.py index 808974d..38f00c6 100644 --- a/skills/grok-refactor/bridge/grok_bridge.py +++ b/skills/grok-refactor/bridge/grok_bridge.py @@ -48,6 +48,15 @@ "high": 16, } +# Modes that typically return text (not annotated code blocks) +TEXT_MODES = {"analyze", "reason", "orchestrate"} + +# PGP armored block detection — xAI multi-agent returns encrypted sub-agent state +PGP_BLOCK_PATTERN = re.compile( + r"-----BEGIN PGP MESSAGE-----.*?-----END PGP MESSAGE-----", + re.DOTALL, +) + # Plain-language phrases that trigger High Thinking mode automatically HIGH_THINKING_PHRASES = [ "16 agent swarm", @@ -246,6 +255,11 @@ def _safe_dest(output_path, file_path): return dest +def strip_pgp_blocks(text): + """Remove PGP-armored encrypted sub-agent state from response.""" + return PGP_BLOCK_PATTERN.sub("", text).strip() + + def parse_and_write_files(response_text, output_dir): """ Scan response for fenced code blocks with filename annotations and write to disk. @@ -508,31 +522,36 @@ def main(): ) # Output + # Normalize response once - strip PGP blocks before any writes + cleaned_result = strip_pgp_blocks(result) + if args.output: - Path(args.output).write_text(result) + Path(args.output).write_text(cleaned_result) print(f"Written to: {args.output}", file=sys.stderr) if args.write_files: - written = parse_and_write_files(result, args.output_dir) + written = parse_and_write_files(cleaned_result, args.output_dir) if written: total_bytes = sum(b for _, b in written) print(f"Wrote {len(written)} files to {args.output_dir}") for rel_path, byte_count in written: print(f" {rel_path} ({byte_count:,} bytes)") print(f"Total: {total_bytes:,} bytes") + elif args.mode in TEXT_MODES: + # Text mode: no annotated files is a normal outcome + print(cleaned_result) else: - # Save full response as a fallback so no output is lost + # Code mode: no files is unexpected but not fatal fallback_dir = Path(args.output_dir) fallback_dir.mkdir(parents=True, exist_ok=True) fallback_path = fallback_dir / "grok-response.txt" - fallback_path.write_text(result, encoding="utf-8") + fallback_path.write_text(cleaned_result, encoding="utf-8") print( - f"ERROR: No annotated files found in model response.\n" + f"WARNING: No annotated files found in model response.\n" f"Full response saved to: {fallback_path}\n" f"Tip: ask Grok to annotate code blocks with ```lang:path/to/file or # FILE: path/to/file", file=sys.stderr, ) - sys.exit(1) elif not args.output: print(result) diff --git a/src/bridge/grok_bridge.py b/src/bridge/grok_bridge.py index 0638886..503ebf4 100644 --- a/src/bridge/grok_bridge.py +++ b/src/bridge/grok_bridge.py @@ -39,6 +39,15 @@ "high": 16, } +# Modes that typically return text (not annotated code blocks) +TEXT_MODES = {"analyze", "reason", "orchestrate"} + +# PGP armored block detection — xAI multi-agent returns encrypted sub-agent state +PGP_BLOCK_PATTERN = re.compile( + r"-----BEGIN PGP MESSAGE-----.*?-----END PGP MESSAGE-----", + re.DOTALL, +) + # Plain-language phrases that trigger High Thinking mode automatically HIGH_THINKING_PHRASES = [ "16 agent swarm", @@ -201,6 +210,11 @@ def _safe_dest(output_path, file_path): return dest +def strip_pgp_blocks(text): + """Remove PGP-armored encrypted sub-agent state from response.""" + return PGP_BLOCK_PATTERN.sub("", text).strip() + + def parse_and_write_files(response_text, output_dir): """ Scan response for fenced code blocks with filename annotations and write to disk. @@ -462,22 +476,34 @@ def main(): ) # Output + # Normalize response once - strip PGP blocks before any writes + cleaned_result = strip_pgp_blocks(result) + if args.output: - Path(args.output).write_text(result) + Path(args.output).write_text(cleaned_result) print(f"Written to: {args.output}", file=sys.stderr) if args.write_files: - written = parse_and_write_files(result, args.output_dir) + written = parse_and_write_files(cleaned_result, args.output_dir) if written: total_bytes = sum(b for _, b in written) print(f"Wrote {len(written)} files to {args.output_dir}") for rel_path, byte_count in written: print(f" {rel_path} ({byte_count:,} bytes)") print(f"Total: {total_bytes:,} bytes") + elif args.mode in TEXT_MODES: + # Text mode: no annotated files is a normal outcome + print(cleaned_result) else: + # Code mode: no files is unexpected but not fatal + fallback_dir = Path(args.output_dir) + fallback_dir.mkdir(parents=True, exist_ok=True) + fallback_path = fallback_dir / "grok-response.txt" + fallback_path.write_text(cleaned_result, encoding="utf-8") print( - "No annotated files found in model response to write to disk.\n" - "Re-run without --write-files to see the full response.", + f"WARNING: No annotated files found in model response.\n" + f"Full response saved to: {fallback_path}\n" + f"Tip: ask Grok to annotate code blocks with ```lang:path/to/file or # FILE: path/to/file", file=sys.stderr, ) elif not args.output: diff --git a/tasks.md b/tasks.md index 8849ef2..e2a37ed 100644 --- a/tasks.md +++ b/tasks.md @@ -22,7 +22,7 @@ ## Project Status -**Status:** Documentation Complete ✅ +**Status:** Documentation In Progress ⚠️ (expand SKILL.md pending) **Location:** `~/projects/grok-multi-agent/` **Skill:** `~/.openclaw/skills/grok-refactor/` **Plugin:** `~/.openclaw/extensions/grok-swarm/`