diff --git a/.gitignore b/.gitignore index 979bc17c73..19f0c55a16 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ bin/gstack-global-discover .openclaw/ .hermes/ .gbrain/ +.cognition/ +.devin/ .context/ extension/.auth.json # xterm assets are vendored from npm at build time; not source-of-truth. diff --git a/AGENTS.md b/AGENTS.md index bea9c7fd72..68b48cd0a4 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -103,6 +103,31 @@ bun run skill:check # health dashboard for all skills helper resolves state roots through `CLAUDE_PLUGIN_DATA` / `GSTACK_HOME` so plugin installs work on every platform. +## Multi-agent support + +gstack works with multiple AI coding agents via typed host configs in `hosts/`. +Each agent gets its own generated SKILL.md output: + +| Host | Generate | Skill location | +| --- | --- | --- | +| Claude Code (default) | `bun run gen:skill-docs --host claude` | `/SKILL.md` (in-tree) | +| OpenAI Codex CLI | `bun run gen:skill-docs --host codex` | `.agents/skills/gstack-*/` | +| Factory Droid | `bun run gen:skill-docs --host factory` | `.factory/skills/gstack-*/` | +| OpenCode | `bun run gen:skill-docs --host opencode` | `.opencode/skills/gstack-*/` | +| Cursor | `bun run gen:skill-docs --host cursor` | `.cursor/skills/gstack-*/` | +| **Devin (Cognition AI)** | `bun run gen:skill-docs --host devin` | `.devin/skills/gstack-*/` | + +For Devin specifically, see [`docs/DEVIN.md`](docs/DEVIN.md). Two surfaces share +the same skills: + +- **Devin for Terminal CLI** (local) reads from `.devin/skills/` (project) and + `~/.config/devin/skills/` (global). Run `./setup --host devin` for global + install on your machine, or `./setup --host devin --local` from inside any + project repo to commit per-repo. +- **Devin remote/cloud sessions** clone the repo into a fresh Ubuntu VM and also + discover skills under `.devin/skills/`. The canonical commit-to-repo install is + `./setup --host devin --local` from inside your project repo. + ## Key conventions - SKILL.md files are **generated** from `.tmpl` templates. Edit the template, not the output. diff --git a/devin-setup/SKILL.md.tmpl b/devin-setup/SKILL.md.tmpl new file mode 100644 index 0000000000..92a48996c2 --- /dev/null +++ b/devin-setup/SKILL.md.tmpl @@ -0,0 +1,234 @@ +--- +name: devin-setup +preamble-tier: 1 +version: 1.0.0 +description: | + Devin-only one-shot setup. Registers /ship and /land-and-deploy as Devin + Playbooks for this user/org, creates an initial Knowledge Note linking the + project's ETHOS.md and AGENTS.md, and verifies that `devin_mcp` is reachable + for sub-session orchestration. Run this once per Devin workspace after + cloning a gstack-equipped repo. + Use when: "devin setup", "register playbooks", "set up gstack for devin", + "wire devin_mcp", "set up knowledge notes for this repo". +triggers: + - devin setup + - register playbooks + - setup knowledge notes +allowed-tools: + - Bash + - Read + - Grep + - Glob +--- + +{{PREAMBLE}} + +# /devin-setup — Wire gstack into this Devin workspace + +You are helping the user wire gstack into their Devin workspace. This is the +**Devin-equivalent of `/setup-gbrain`**: it ensures the cross-session memory +(Knowledge Notes) and orchestration (Playbooks, sub-sessions) are ready before +the user runs `/ship`, `/land-and-deploy`, `/review`, or any of the +review-army-style skills. + +This skill is idempotent — running it again is safe and only adds missing +pieces. + +## When to invoke + +Run this once per Devin workspace, ideally right after `./setup --host devin --local` +generates the `.devin/skills/` tree. Repeat anytime gstack changes how it +uses Devin's MCP surface. + +## Instructions + +### Step 1: Verify devin_mcp is reachable + +The whole skill assumes the agent has access to the `devin_mcp` tool with +`devin_session_create`, `devin_knowledge_manage`, and `devin_playbook_manage`. + +Try a no-op call: + +``` +tool_name = "devin_knowledge_manage" +tool_args = { + "action": "list", + "first": 1 +} +``` + +If this returns an authorization / "tool unavailable" error, stop and tell the +user: "devin_mcp is not enabled for this Devin account. Phase 2 features +(sub-session second opinions, Knowledge Notes memory, Playbook registration) +require `devin_mcp` access. Continuing without these — gstack still works, +but cross-session memory and outside-voice reviews will be no-ops." + +If the call succeeds, continue. + +### Step 2: Derive the repo identifier + +```bash +REPO_URL=$(git config --get remote.origin.url) +REPO_SLUG=$(echo "$REPO_URL" | sed -E 's#(git@github\.com:|https?://github\.com/)##; s#\.git$##') +echo "REPO_SLUG: $REPO_SLUG" +``` + +Remember `$REPO_SLUG` — every Knowledge Note created by this skill will be +pinned to it. + +### Step 3: Register /ship as a Devin Playbook + +Read the generated /ship skill markdown: + +```bash +SHIP_SKILL=".devin/skills/gstack-ship/SKILL.md" +[ -f "$SHIP_SKILL" ] || SHIP_SKILL=".agents/skills/gstack-ship/SKILL.md" +[ -f "$SHIP_SKILL" ] || SHIP_SKILL=".github/skills/gstack-ship/SKILL.md" +[ -f "$SHIP_SKILL" ] || SHIP_SKILL=".cognition/skills/gstack-ship/SKILL.md" # legacy +cat "$SHIP_SKILL" | head -1 +``` + +If found, register as a Playbook: + +``` +tool_name = "devin_playbook_manage" +tool_args = { + "action": "create", + "title": "gstack: /ship", + "body": "", + "macro": "!ship" +} +``` + +Before creating, list existing playbooks via +`{"action": "list", "first": 100}` and check whether one titled exactly +`gstack: /ship` already exists. If it does, prefer +`{"action": "update", "playbook_id": ""}` over creating a duplicate +(v3 PUT is full-replace — pass the full body). + +### Step 4: Register /land-and-deploy as a Devin Playbook + +Same procedure as Step 3, but for `/land-and-deploy`: + +```bash +LAND_SKILL=".devin/skills/gstack-land-and-deploy/SKILL.md" +[ -f "$LAND_SKILL" ] || LAND_SKILL=".agents/skills/gstack-land-and-deploy/SKILL.md" +[ -f "$LAND_SKILL" ] || LAND_SKILL=".github/skills/gstack-land-and-deploy/SKILL.md" +[ -f "$LAND_SKILL" ] || LAND_SKILL=".cognition/skills/gstack-land-and-deploy/SKILL.md" # legacy +``` + +Register with: + +``` +tool_name = "devin_playbook_manage" +tool_args = { + "action": "create", + "title": "gstack: /land-and-deploy", + "body": "", + "macro": "!land_and_deploy" +} +``` + +Same dedup-check (list → update if exists) as Step 3. + +### Step 5: Seed the project's Knowledge Notes + +Create the first project-pinned Knowledge Note describing this repo's gstack +ETHOS, so future sessions get context on the project's design philosophy when +they start a new skill that calls Knowledge Notes load. + +Read the local ETHOS file: + +```bash +ETHOS_FILE="ETHOS.md" +[ -f "$ETHOS_FILE" ] || ETHOS_FILE=".devin/skills/gstack/ETHOS.md" +[ -f "$ETHOS_FILE" ] || ETHOS_FILE=".cognition/skills/gstack/ETHOS.md" # legacy +[ -f "$ETHOS_FILE" ] && head -200 "$ETHOS_FILE" +``` + +If ETHOS.md exists, register it as a Knowledge Note: + +``` +tool_name = "devin_knowledge_manage" +tool_args = { + "action": "create", + "name": "gstack: ETHOS for ", + "body": "\n\n---\nPinned by /devin-setup. This note describes the design philosophy that should guide all gstack skill outputs in this repo.", + "trigger": "When starting a new gstack skill in this repo, especially /office-hours, /plan-ceo-review, /investigate, or /retro.", + "pinned_repo": "" +} +``` + +Dedup check: list with `{"action": "list", "search": "ETHOS for ", "pinned_repo": ""}` +and update if it exists. + +If `AGENTS.md` exists in the repo, register it as a second Knowledge Note: + +``` +tool_name = "devin_knowledge_manage" +tool_args = { + "action": "create", + "name": "gstack: AGENTS.md for ", + "body": "", + "trigger": "When starting any gstack skill in this repo, to load the slash-command catalog and platform conventions.", + "pinned_repo": "" +} +``` + +### Step 6: Print a summary + +Output exactly this block (filling in the values you observed): + +``` +gstack ↔ Devin wiring complete + +Repo: +Playbooks registered: + - gstack: /ship (macro !ship, id: ) + - gstack: /land-and-deploy (macro !land_and_deploy, id: ) +Knowledge Notes seeded: + - gstack: ETHOS for (id: ) + - gstack: AGENTS.md for (id: ) +Sub-session orchestration: + - devin_session_create: REACHABLE + - devin_knowledge_manage: REACHABLE + - devin_playbook_manage: REACHABLE + +Next steps: + - Run /ship (or type !ship) to land your next change. + - Run /review on any branch — it will spawn parallel review-army sub-sessions. + - Run /office-hours for product brainstorming with cross-session memory. +``` + +If any step failed, replace the corresponding line with `FAILED: ` +and continue with the rest. Setup is best-effort — partial wiring is fine. + +### Step 7: (optional) Set up a recurring health check + +Ask: + +> Want a weekly Devin schedule that runs `/health` on this repo every Monday at 09:00? +> A) Yes, schedule weekly /health +> B) No thanks + +If A, register the schedule via `devin_mcp`: + +``` +tool_name = "devin_schedule_manage" +tool_args = { + "action": "create", + "name": "gstack weekly /health for ", + "prompt": "Run /health on this repo and post the result.", + "frequency": "0 9 * * 1", + "schedule_type": "recurring", + "notify_on": "failure", + "agent": "devin" +} +``` + +If the schedule manager is unavailable, note and continue. + +## Done + +Setup complete. All future gstack skills in this repo will use Knowledge Notes +for memory and sub-sessions for second-opinion reviews automatically. diff --git a/docs/DEVIN.md b/docs/DEVIN.md new file mode 100644 index 0000000000..18d9c1b99a --- /dev/null +++ b/docs/DEVIN.md @@ -0,0 +1,240 @@ +# gstack on Devin (Cognition AI) + +> Status: Phase 1 (MVP) — host config registered, all ~40 skills generate as +> Devin-compatible Agent Skills. Phase 2 (sub-sessions / Knowledge Notes) and +> Phase 3 (browser test mode + recordings) are tracked in +> [`docs/designs/DEVIN_INTEGRATION.md`](designs/DEVIN_INTEGRATION.md). + +## TL;DR + +### Devin for Terminal CLI on your Ubuntu / macOS machine (one-liner) + +```bash +curl -fsSL https://raw.githubusercontent.com/louisCalderon888/gstack/devin/1777864006-add-devin-host/scripts/install-devin.sh | bash +``` + +Installs Bun, Node 20 (via nvm), clones the fork to `~/dev/gstack`, runs setup, +and symlinks all 43 gstack skills into `~/.config/devin/skills/`. After it +finishes, open a new terminal and run `devin` from any project — `/gstack-*` +slash commands appear in the autocomplete dropdown. + +The installer is safe to re-run; it cleans stale symlinks from previous attempts. + +### Or commit per-repo (cloud / remote VM sessions) + +```bash +# From inside YOUR project repo (not a clone of gstack): +git clone https://github.com/garrytan/gstack /tmp/gstack +/tmp/gstack/setup --host devin --local + +git add .devin/skills +git commit -m "chore: add gstack skills for Devin" +git push +``` + +That's it. The next Devin session you start against the repo will discover +all ~40 gstack skills automatically and you can invoke them with +`/review`, `/qa`, `/ship`, `@skills:office-hours`, etc. + +## How Devin discovers skills + +Two surfaces share the same SKILL.md format: + +### Devin for Terminal CLI (local) + +From the [official docs](https://cli.devin.ai/docs/extensibility/skills/overview), +the CLI scans these paths: + +``` +# Project-specific (committed to git) +.devin/skills//SKILL.md ← gstack writes here for Devin +.agents/skills//SKILL.md (Codex sidecar; CLI also reads it) +.windsurf/skills//SKILL.md + +# Global (per-user, never committed) +~/.config/devin/skills//SKILL.md ← gstack symlinks here on `--host devin` +~/.agents/skills//SKILL.md +``` + +### Devin remote / cloud sessions + +Remote sessions clone your repo into a fresh Ubuntu VM and discover skills +from `.devin/skills/`, `.agents/skills/`, and `.windsurf/skills/` per the +open Agent Skills standard. Same files — just whichever directory you +committed to. + +### Why `.devin/skills/` + +gstack writes Devin-formatted skills into `.devin/skills/` because: + +1. It is the **canonical Devin path** documented for both CLI surfaces. +2. It does not collide with the Codex sidecar at `.agents/skills/` when + you also generate for Codex on the same repo (e.g. via `--host all`). +3. The CLI scans it without any extra configuration. + +If you only target Devin and don't run Codex, `.agents/skills/` is also a +valid path — the CLI reads it, and it is the canonical Codex location too, +so you'd reuse a single set of files for both hosts. Set +`localSkillRoot: '.agents/skills/gstack'` and `hostSubdir: '.agents'` in +`hosts/devin.ts` to switch. + +## Why Devin needs its own host config + +Devin reads the open [Agent Skills standard](https://github.com/openai/agent-skills), +which is the same spec Codex and Factory use. The generated SKILL.md files are +*almost* drop-in compatible — but five things differ: + +| Concern | Codex output | Devin output | +| --- | --- | --- | +| Sidecar metadata file | Writes `agents/openai.yaml` (Codex needs it for fast indexing) | None — Devin parses `SKILL.md` directly | +| `triggers:` field semantics | gstack's "voice triggers" array (kept as-is) | Devin's invocation-gate field — `["user"]` vs `["user","model"]`. We synthesize this only for sensitive skills (careful, freeze, guard, unfreeze) and drop the template's voice-trigger array everywhere else | +| Co-author trailer | `Co-Authored-By: codex ` | `Co-Authored-By: Devin ` | +| Cross-model orchestrators | Suppresses `claude -p` invocations | Same suppressions PLUS skips the `/codex` skill itself (Phase 2 reintroduces this via `devin_mcp` sub-sessions) | +| Boundary instruction | "Don't read other agents' SKILL.md" prose tailored to OpenAI Codex | Devin-tuned: explicitly enumerate the seven scanned dirs and call out which to ignore | + +Everything else — the preamble bash blocks, the resolver expansions, the +$GSTACK_ROOT handling — is shared with Codex via gstack's existing +`HostConfig` system. + +## What gets generated + +``` +.devin/skills/ +├── gstack/ # Root skill (browse-as-the-agent surface) +├── gstack-autoplan/ # /autoplan +├── gstack-benchmark/ # /benchmark +├── gstack-benchmark-models/ +├── gstack-browse/ # /browse — headless browser +├── gstack-canary/ +├── gstack-careful/ # triggers: ["user"] (sensitive) +├── gstack-claude/ +├── gstack-context-restore/ +├── gstack-context-save/ +├── gstack-cso/ +├── gstack-design-consultation/ +├── gstack-design-html/ +├── gstack-design-review/ +├── gstack-design-shotgun/ +├── gstack-devex-review/ +├── gstack-document-release/ +├── gstack-freeze/ # triggers: ["user"] (sensitive) +├── gstack-guard/ # triggers: ["user"] (sensitive) +├── gstack-health/ +├── gstack-investigate/ +├── gstack-land-and-deploy/ +├── gstack-landing-report/ +├── gstack-learn/ +├── gstack-make-pdf/ +├── gstack-office-hours/ +├── gstack-open-gstack-browser/ +├── gstack-plan-ceo-review/ +├── gstack-plan-design-review/ +├── gstack-plan-devex-review/ +├── gstack-plan-eng-review/ +├── gstack-plan-tune/ +├── gstack-qa/ +├── gstack-qa-only/ +├── gstack-retro/ +├── gstack-review/ +├── gstack-scrape/ +├── gstack-setup-browser-cookies/ +├── gstack-setup-deploy/ +├── gstack-ship/ +├── gstack-skillify/ +├── gstack-unfreeze/ # triggers: ["user"] (sensitive) +└── gstack-upgrade/ +``` + +Each directory contains a single `SKILL.md` with frontmatter: + +```yaml +--- +name: review # Slash command becomes /review +description: | + Pre-landing PR review. Analyzes diff against the base branch... +allowed-tools: + - Bash + - Read + - Edit + - Write + - Grep + - Glob + - Agent + - AskUserQuestion + - WebSearch +--- +``` + +(Sensitive skills also carry `triggers: ["user"]` so Devin only invokes them +when you explicitly ask.) + +## Skills NOT generated for Devin + +These skills target architecture that doesn't exist on a Devin VM, so they're +in `generation.skipSkills` for the Devin host: + +| Skill | Why skipped on Devin | +| --- | --- | +| `/codex` | Wraps `codex exec` to get a second opinion from OpenAI Codex; on Devin we'll use `devin_mcp` sub-sessions instead (Phase 2). | +| `/pair-agent` | Pairs OpenClaw/Codex with the gstack browser sidebar over an ngrok tunnel; Devin already has its own browser/computer tool. | +| `/setup-gbrain` | Sets up gbrain cross-machine memory sync; Devin has Knowledge Notes (Phase 2 port). | + +Resolvers that spawn other CLIs (`DESIGN_OUTSIDE_VOICES`, `ADVERSARIAL_STEP`, +`CODEX_SECOND_OPINION`, `CODEX_PLAN_REVIEW`, `REVIEW_ARMY`) are likewise +suppressed in Devin output. The skills still work; they just lose the +"second model votes too" sub-step. Phase 2 reintroduces these via +`devin_mcp` to spawn parallel Devin sessions for the same kind of jury-of-models +review. + +## Binaries (`browse`, `design`, `make-pdf`) + +Devin VMs don't have a global gstack install, so the Bun-compiled binaries +that some skills depend on (`browse` for `/qa`, `design` for `/design-shotgun`, +`make-pdf` for `/make-pdf`) need to be reachable from inside the repo. + +For Phase 1, these skills will fall back to "no binary available — use Devin's +native browser/screenshot tools" prose if they can't find `$GSTACK_ROOT`. The +binary skills are still generated (so Devin sees them in the skill index and +can read the workflow), but the binary-dependent steps will degrade gracefully. + +For full binary support on Devin, vendor gstack as a git submodule or check the +binaries directly into your repo. See +[`docs/designs/DEVIN_INTEGRATION.md`](designs/DEVIN_INTEGRATION.md) for the +options under consideration. + +## Local development + +If you run the Devin for Terminal CLI on your machine, the global install path +follows the XDG convention documented at +[cli.devin.ai/docs/extensibility/skills](https://cli.devin.ai/docs/extensibility/skills/creating-skills): + +``` +# Linux / macOS +~/.config/devin/skills/gstack-*/SKILL.md + +# Windows +%APPDATA%\devin\skills\gstack-*\SKILL.md +``` + +`./setup --host devin` (without `--local`) regenerates `.devin/skills/` in +the gstack source tree and symlinks each `gstack-/` into +`~/.config/devin/skills/`. After running it once, every Devin CLI session you +start — from any directory — will see all gstack slash commands. + +For Windows users, run setup inside WSL2 with the gstack repo cloned to +your WSL home directory (e.g. `~/dev/gstack`), not under `/mnt/c/...`. +gstack itself supports a `GSTACK_CLAUDE_BIN=wsl` override for the Claude +binary; the same approach works for `devin` when it's installed inside WSL. + +## Roadmap + +- **Phase 1 (this PR):** host config + Devin-formatted skill generation. + Slash commands work in any Devin session (CLI or remote VM) against a repo + with `.devin/skills/gstack-*/`, or globally on a CLI machine with + `~/.config/devin/skills/gstack-*/`. +- **Phase 2:** rewire `claude -p`-based "second opinion" resolvers to + `devin_mcp` sub-sessions; port `gbrain` memory layer to Knowledge Notes; + register `/ship` and `/land-and-deploy` as Devin Playbooks. +- **Phase 3:** wire `/qa` and `/design-review` to Devin's + `enter_test_mode` + `recording_*` so the skills produce annotated + videos as test evidence. diff --git a/hosts/claude.ts b/hosts/claude.ts index f805da040e..c66da0c213 100644 --- a/hosts/claude.ts +++ b/hosts/claude.ts @@ -19,7 +19,10 @@ const claude: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['claude'], // Claude outside-voice skill is for non-Claude hosts + skipSkills: [ + 'claude', // Claude outside-voice skill is for non-Claude hosts + 'devin-setup', // Devin-only setup skill (registers playbooks + Knowledge Notes) + ], }, pathRewrites: [], // Claude is the primary host — no rewrites needed diff --git a/hosts/codex.ts b/hosts/codex.ts index 7dc80ea877..567bb970f4 100644 --- a/hosts/codex.ts +++ b/hosts/codex.ts @@ -21,7 +21,10 @@ const codex: HostConfig = { generation: { generateMetadata: true, metadataFormat: 'openai.yaml', - skipSkills: ['codex'], // Codex skill is a Claude wrapper around codex exec + skipSkills: [ + 'codex', // Codex skill is a Claude wrapper around codex exec + 'devin-setup', // Devin-only setup skill + ], }, pathRewrites: [ diff --git a/hosts/cursor.ts b/hosts/cursor.ts index 48e3a0f14c..52a15f5b72 100644 --- a/hosts/cursor.ts +++ b/hosts/cursor.ts @@ -19,7 +19,7 @@ const cursor: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], + skipSkills: ['codex', 'devin-setup'], }, pathRewrites: [ diff --git a/hosts/devin.ts b/hosts/devin.ts new file mode 100644 index 0000000000..4446f7def3 --- /dev/null +++ b/hosts/devin.ts @@ -0,0 +1,120 @@ +import type { HostConfig } from '../scripts/host-config'; + +/** + * Devin (Cognition AI) host config. + * + * Two surfaces share the same SKILL.md format: + * - Devin for Terminal CLI scans `.devin/skills//SKILL.md` (project-local) + * and `~/.config/devin/skills//SKILL.md` (global, XDG-style). + * - Devin remote/cloud sessions clone the user's repo into an ephemeral VM and + * also discover skills under `.devin/skills/`, `.agents/skills/`, and + * `.windsurf/skills/` per the open Agent Skills standard. + * + * Devin reads these frontmatter fields: + * - `name` — slash-command name (e.g. `gstack-review` → `/gstack-review`) + * - `description` — short summary shown in the slash-command dropdown + * - `allowed-tools` — restrict tools Devin may use while the skill is active + * - `argument-hint` — hint shown alongside the skill name + * - `triggers` — `["user"]` to prevent auto-invocation; default `["user","model"]` + * + * gstack emits Devin-formatted skills to `.devin/skills/gstack-/SKILL.md` + * (the canonical Devin path; does not collide with the Codex sidecar at + * `.agents/skills/gstack-*` when both hosts are generated together via + * `--host all`). Devin CLI and remote VM sessions both discover them + * automatically. + * + * References: + * - https://cli.devin.ai/docs/extensibility/skills/overview + * - https://cli.devin.ai/docs/extensibility/skills/creating-skills + * - https://docs.devin.ai/product-guides/skills (remote VM) + */ +const devin: HostConfig = { + name: 'devin', + displayName: 'Devin (Cognition AI)', + cliCommand: 'devin', + cliAliases: ['devin-cli', 'cognition'], + + globalRoot: '.devin/skills/gstack', + localSkillRoot: '.devin/skills/gstack', + hostSubdir: '.devin', + usesEnvVars: true, + + frontmatter: { + mode: 'allowlist', + // Note: the templates carry a `triggers:` array of voice/keyword phrases — + // semantically that's gstack's "voice triggers" hint, NOT Devin's `triggers` + // field (which is `["user"]` / `["user","model"]` to gate invocation). + // We drop the template's `triggers` from the allowlist here and synthesize + // a fresh, Devin-compliant `triggers` only for sensitive skills below. + keepFields: ['name', 'description', 'argument-hint', 'allowed-tools'], + descriptionLimit: null, + // Skills marked `sensitive: true` (careful, freeze, guard, unfreeze) must + // not auto-activate. Devin honors `triggers: ["user"]` to require explicit + // user invocation (slash command or @skills mention). + conditionalFields: [ + { if: { sensitive: true }, add: { triggers: '["user"]' } }, + ], + }, + + generation: { + generateMetadata: false, + skipSkills: [ + 'codex', // Codex skill is a Claude wrapper around `codex exec`; Devin replaces it via `devin_mcp`. + 'pair-agent', // Pairs OpenClaw/Codex with the gstack browser sidebar; Devin uses its own computer/browser tools. + 'setup-gbrain', // GBrain memory layer; Devin uses Knowledge Notes instead (see /devin-setup). + ], + }, + + pathRewrites: [ + { from: '~/.claude/skills/gstack', to: '$GSTACK_ROOT' }, + { from: '.claude/skills/gstack', to: '.devin/skills/gstack' }, + { from: '.claude/skills/review', to: '.devin/skills/gstack/review' }, + { from: '.claude/skills', to: '.devin/skills' }, + ], + + // Devin understands the standard tool names directly (Bash, Read, Grep, Edit, + // Write, etc.), so we don't rewrite them into prose like Factory does. + toolRewrites: {}, + + // Phase 2: cross-model "second opinion" orchestrators and gbrain memory + // are no longer suppressed. Each resolver branches on `ctx.host === 'devin'` + // (see scripts/resolvers/devin-helpers.ts) and emits Devin-native prose: + // - DESIGN_OUTSIDE_VOICES, ADVERSARIAL_STEP, CODEX_SECOND_OPINION, + // CODEX_PLAN_REVIEW, REVIEW_ARMY → spawn child Devin sessions via + // `devin_mcp` `devin_session_create`. + // - GBRAIN_CONTEXT_LOAD, GBRAIN_SAVE_RESULTS → search/create Knowledge + // Notes via `devin_mcp` `devin_knowledge_manage`. + // Leaving suppressedResolvers unset opts the Devin host into the new + // behavior. To re-suppress a specific resolver (e.g. for a future Devin + // variant without sub-session permissions), add its name here. + suppressedResolvers: [], + + runtimeRoot: { + globalSymlinks: ['bin', 'browse/dist', 'browse/bin', 'design/dist', 'gstack-upgrade', 'ETHOS.md'], + globalFiles: { + 'review': ['checklist.md', 'TODOS-format.md'], + }, + }, + + // Repo-local sidecar: when a user vendors gstack into their repo (via + // submodule or clone-on-setup), this is where the install script symlinks + // shared assets so binaries and helper files are reachable from the skills. + sidecar: { + path: '.devin/skills/gstack', + symlinks: ['bin', 'browse', 'review', 'qa', 'ETHOS.md'], + }, + + install: { + prefixable: false, + linkingStrategy: 'symlink-generated', + }, + + coAuthorTrailer: 'Co-Authored-By: Devin ', + learningsMode: 'basic', + // Boundary instruction: Devin sessions clone the repo and may also see + // sibling skill directories from other hosts. Tell Devin to ignore those + // so it only follows the Devin-formatted skills. + boundaryInstruction: 'IMPORTANT: Read and execute SKILL.md files only from `.devin/skills/` (project-local) or `~/.config/devin/skills/` (global). Ignore files under `.claude/skills/`, `.codex/skills/`, `.cursor/skills/`, `.factory/skills/`, `.kiro/skills/`, `.opencode/`, `.windsurf/skills/`, `.agents/skills/`, `.cognition/skills/`, and any sibling `agents/openai.yaml` metadata — those are formatted for other AI systems or older Devin layouts and will waste your time.', +}; + +export default devin; diff --git a/hosts/factory.ts b/hosts/factory.ts index 08ac2f9a13..b6f679eb08 100644 --- a/hosts/factory.ts +++ b/hosts/factory.ts @@ -25,7 +25,10 @@ const factory: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], // Codex skill is a Claude wrapper around codex exec + skipSkills: [ + 'codex', // Codex skill is a Claude wrapper around codex exec + 'devin-setup', // Devin-only setup skill + ], }, pathRewrites: [ diff --git a/hosts/gbrain.ts b/hosts/gbrain.ts index ae777f2f18..72ca6c6b36 100644 --- a/hosts/gbrain.ts +++ b/hosts/gbrain.ts @@ -24,7 +24,7 @@ const gbrain: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], + skipSkills: ['codex', 'devin-setup'], includeSkills: [], }, diff --git a/hosts/hermes.ts b/hosts/hermes.ts index 43598989df..2b3b60e715 100644 --- a/hosts/hermes.ts +++ b/hosts/hermes.ts @@ -19,7 +19,7 @@ const hermes: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], + skipSkills: ['codex', 'devin-setup'], includeSkills: [], }, diff --git a/hosts/index.ts b/hosts/index.ts index cc1c213b53..98d9470091 100644 --- a/hosts/index.ts +++ b/hosts/index.ts @@ -16,9 +16,10 @@ import cursor from './cursor'; import openclaw from './openclaw'; import hermes from './hermes'; import gbrain from './gbrain'; +import devin from './devin'; /** All registered host configs. Add new hosts here. */ -export const ALL_HOST_CONFIGS: HostConfig[] = [claude, codex, factory, kiro, opencode, slate, cursor, openclaw, hermes, gbrain]; +export const ALL_HOST_CONFIGS: HostConfig[] = [claude, codex, factory, kiro, opencode, slate, cursor, openclaw, hermes, gbrain, devin]; /** Map from host name to config. */ export const HOST_CONFIG_MAP: Record = Object.fromEntries( @@ -65,4 +66,4 @@ export function getExternalHosts(): HostConfig[] { } // Re-export individual configs for direct import -export { claude, codex, factory, kiro, opencode, slate, cursor, openclaw, hermes, gbrain }; +export { claude, codex, factory, kiro, opencode, slate, cursor, openclaw, hermes, gbrain, devin }; diff --git a/hosts/kiro.ts b/hosts/kiro.ts index 31adc7c724..a9d1fd73d7 100644 --- a/hosts/kiro.ts +++ b/hosts/kiro.ts @@ -19,7 +19,10 @@ const kiro: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], // Codex skill is a Claude wrapper around codex exec + skipSkills: [ + 'codex', // Codex skill is a Claude wrapper around codex exec + 'devin-setup', // Devin-only setup skill + ], }, pathRewrites: [ diff --git a/hosts/openclaw.ts b/hosts/openclaw.ts index f8268b5c7e..f83e623aa1 100644 --- a/hosts/openclaw.ts +++ b/hosts/openclaw.ts @@ -22,7 +22,7 @@ const openclaw: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], + skipSkills: ['codex', 'devin-setup'], includeSkills: [], }, diff --git a/hosts/opencode.ts b/hosts/opencode.ts index 3ad0901ec1..f8d0c911ef 100644 --- a/hosts/opencode.ts +++ b/hosts/opencode.ts @@ -19,7 +19,7 @@ const opencode: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], + skipSkills: ['codex', 'devin-setup'], }, pathRewrites: [ diff --git a/hosts/slate.ts b/hosts/slate.ts index 0c29cf8f64..8803e3093e 100644 --- a/hosts/slate.ts +++ b/hosts/slate.ts @@ -19,7 +19,7 @@ const slate: HostConfig = { generation: { generateMetadata: false, - skipSkills: ['codex'], + skipSkills: ['codex', 'devin-setup'], }, pathRewrites: [ diff --git a/scripts/install-devin.sh b/scripts/install-devin.sh new file mode 100755 index 0000000000..8da89b74c6 --- /dev/null +++ b/scripts/install-devin.sh @@ -0,0 +1,151 @@ +#!/usr/bin/env bash +# gstack one-shot installer for Devin for Terminal CLI on Linux/macOS. +# +# Usage (paste this single line into your terminal): +# +# curl -fsSL https://raw.githubusercontent.com/louisCalderon888/gstack/devin/1777864006-add-devin-host/scripts/install-devin.sh | bash +# +# Or, if you already cloned the repo: +# +# bash ~/dev/gstack/scripts/install-devin.sh +# +# What it does: +# 1. Verifies / installs Bun (gstack's package manager + runner). +# 2. Verifies / installs Node 20 LTS via nvm (Playwright requires Node ≥14). +# 3. Clones or pulls the gstack fork into ~/dev/gstack on the Devin branch. +# 4. Removes broken or stale symlinks under ~/.cognition/skills and +# ~/.config/devin/skills from earlier install attempts. +# 5. Runs `./setup --host devin` (degrades gracefully if Playwright fails on +# newer Linux releases like Ubuntu 26.04). +# 6. Verifies that ~/.config/devin/skills/ is populated and that a sample +# SKILL.md is reachable through the symlinks. +# +# Safe to re-run: every step is idempotent. + +set -e + +REPO_URL="${GSTACK_REPO_URL:-https://github.com/louisCalderon888/gstack}" +REPO_BRANCH="${GSTACK_REPO_BRANCH:-devin/1777864006-add-devin-host}" +GSTACK_DIR="${GSTACK_DIR:-$HOME/dev/gstack}" + +bold() { printf '\033[1m%s\033[0m\n' "$*"; } +info() { printf ' %s\n' "$*"; } +warn() { printf ' \033[33m%s\033[0m\n' "$*"; } +fail() { printf ' \033[31m%s\033[0m\n' "$*" >&2; } + +bold "==> 1. Checking Bun" +if ! command -v bun >/dev/null 2>&1; then + info "Bun not found — installing from https://bun.sh ..." + curl -fsSL https://bun.sh/install | bash + # Pick up bun in the current shell. + export BUN_INSTALL="$HOME/.bun" + export PATH="$BUN_INSTALL/bin:$PATH" +fi +if ! command -v bun >/dev/null 2>&1; then + fail "Bun install failed. Open a new terminal and re-run, or install manually: https://bun.sh" + exit 1 +fi +info "bun $(bun --version)" + +bold "==> 2. Checking Node.js (need ≥14 for Playwright)" +NODE_OK=0 +if command -v node >/dev/null 2>&1; then + NODE_MAJOR="$(node -p 'process.versions.node.split(".")[0]' 2>/dev/null || echo 0)" + if [ "$NODE_MAJOR" -ge 14 ] 2>/dev/null; then + info "node $(node --version)" + NODE_OK=1 + fi +fi +if [ "$NODE_OK" -eq 0 ]; then + if ! command -v nvm >/dev/null 2>&1 && [ ! -s "$HOME/.nvm/nvm.sh" ]; then + info "Installing nvm (Node Version Manager)..." + curl -fsSL https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.1/install.sh | bash + fi + # shellcheck disable=SC1090,SC1091 + export NVM_DIR="$HOME/.nvm" + [ -s "$NVM_DIR/nvm.sh" ] && . "$NVM_DIR/nvm.sh" + info "Installing Node 20 LTS via nvm..." + nvm install 20 >/dev/null + nvm use 20 >/dev/null + info "node $(node --version)" +fi + +bold "==> 3. Cloning / updating gstack at $GSTACK_DIR" +if [ ! -d "$GSTACK_DIR/.git" ]; then + mkdir -p "$(dirname "$GSTACK_DIR")" + info "Cloning $REPO_URL ..." + git clone "$REPO_URL" "$GSTACK_DIR" +fi +cd "$GSTACK_DIR" +info "Fetching latest commits..." +git fetch origin --quiet +info "Checking out branch: $REPO_BRANCH" +git checkout "$REPO_BRANCH" --quiet +git pull --ff-only origin "$REPO_BRANCH" --quiet +info "HEAD: $(git log --oneline -1)" + +bold "==> 4. Cleaning stale / broken symlinks" +# Phase 1 (pre-fix) wrote to ~/.cognition/skills. The Devin CLI doesn't read that +# path. Remove leftovers so they don't confuse future debugging. +if [ -d "$HOME/.cognition/skills" ]; then + info "Removing legacy ~/.cognition/skills/" + rm -rf "$HOME/.cognition/skills" + rmdir "$HOME/.cognition" 2>/dev/null || true +fi +# Remove ~/.config/devin/skills entirely so the setup script recreates only the +# valid symlinks. Re-running setup wouldn't auto-prune broken ones from earlier +# runs (e.g. symlinks pointing at the old .cognition/ path, now deleted). +if [ -d "$HOME/.config/devin/skills" ]; then + info "Resetting ~/.config/devin/skills/ (will be repopulated by setup)" + rm -rf "$HOME/.config/devin/skills" +fi + +bold "==> 5. Installing dependencies" +# `bun install --frozen-lockfile` first so we always match committed lockfile, +# then fall back to a regular install if the lockfile is out of date. +bun install --frozen-lockfile 2>/dev/null || bun install + +bold "==> 6. Running ./setup --host devin" +# The setup script handles Playwright failures on Ubuntu 26.04 gracefully now — +# if Chromium can't be installed, it prints a warning and continues so CLI skills +# still get installed. Browser-dependent skills (/qa, /browse, /canary, +# /design-review, /setup-browser-cookies) will explain workarounds at runtime. +./setup --host devin + +bold "==> 7. Verifying installation" +SKILL_COUNT="$(ls "$HOME/.config/devin/skills/" 2>/dev/null | wc -l)" +info "Skills installed in ~/.config/devin/skills/: $SKILL_COUNT" +if [ "$SKILL_COUNT" -lt 40 ]; then + fail "Expected ≥40 skills, got $SKILL_COUNT. Setup may have failed." + exit 1 +fi + +# Spot-check a couple of skills by name + file existence. +for SKILL in gstack-review gstack-ship gstack-office-hours gstack-devin-setup gstack-qa; do + if [ ! -f "$HOME/.config/devin/skills/$SKILL/SKILL.md" ]; then + fail "Missing: ~/.config/devin/skills/$SKILL/SKILL.md" + exit 1 + fi +done +info "Spot checks passed: gstack-review, gstack-ship, gstack-office-hours, gstack-devin-setup, gstack-qa all reachable." + +# Optional Playwright smoke test (don't fail the whole installer on this). +bold "==> 8. Browser smoke test (optional)" +if "$GSTACK_DIR/browse/dist/browse" goto https://example.com >/dev/null 2>&1; then + info "Browser binary works — /qa, /browse, /canary, /design-review are fully usable." +else + warn "Browser binary couldn't launch Chromium on this host." + warn "All CLI-only skills still work in Devin CLI. To enable browser skills, retry:" + warn " cd $GSTACK_DIR && PLAYWRIGHT_BROWSERS_VALIDATE_HOST_REQUIREMENTS=0 bunx playwright install chromium" + warn "Or install system Chromium: sudo snap install chromium" +fi + +echo "" +bold "Installation complete." +echo "" +echo "Next steps:" +echo " 1. Close this terminal and open a new one (so PATH updates take effect)." +echo " 2. cd into any project, run \`devin\`, type / and you'll see all gstack-* skills." +echo " 3. The first time you start a Devin workspace, run /gstack-devin-setup to" +echo " register /ship and /land-and-deploy as Playbooks and seed Knowledge Notes." +echo "" diff --git a/scripts/resolvers/design.ts b/scripts/resolvers/design.ts index fc6d6ecee6..a65f2202cf 100644 --- a/scripts/resolvers/design.ts +++ b/scripts/resolvers/design.ts @@ -1,5 +1,6 @@ import type { TemplateContext } from './types'; import { AI_SLOP_BLACKLIST, OPENAI_HARD_REJECTIONS, OPENAI_LITMUS_CHECKS } from './constants'; +import { devinSubSessionBlock, isDevinHost } from './devin-helpers'; export function generateDesignReviewLite(ctx: TemplateContext): string { const litmusList = OPENAI_LITMUS_CHECKS.map((item, i) => `${i + 1}. ${item}`).join(' '); @@ -550,6 +551,85 @@ export function generateDesignOutsideVoices(ctx: TemplateContext): string { const isDesignReview = ctx.skillName === 'design-review'; const isDesignConsultation = ctx.skillName === 'design-consultation'; + // Devin host: replace `codex exec` design voice with a Devin sub-session. + // Same OpenAI-style rubric (hard rejections + litmus + hard rules) but the + // child session is a fresh-context Devin agent, not a Codex CLI invocation. + if (isDevinHost(ctx)) { + let devinPrompt: string; + if (isPlanDesignReview) { + devinPrompt = `You are an independent senior product designer reviewing a development plan. Read the plan file at [plan-file-path] (the parent session will tell you the absolute path). + +Evaluate against these criteria: + +HARD REJECTION — flag if ANY apply: +${rejectionList} + +LITMUS CHECKS — answer YES or NO for each: +${litmusList} + +HARD RULES — first classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, then flag violations of the matching rule set: +- MARKETING: First viewport as one composition, brand-first hierarchy, full-bleed hero, 2-3 intentional motions, composition-first layout +- APP UI: Calm surface hierarchy, dense but readable, utility language, minimal chrome +- UNIVERSAL: CSS variables for colors, no default font stacks, one job per section, cards earn existence + +For each finding: what's wrong, what will happen if it ships unresolved, and the specific fix. Be opinionated. No hedging. Reference plan-section or file:line when applicable.`; + } else if (isDesignReview) { + devinPrompt = `You are an independent senior product designer doing a source-code design audit. Review the frontend source code in this repo. + +Evaluate against design hard rules: +- Spacing: systematic (design tokens / CSS variables) or magic numbers? +- Typography: expressive purposeful fonts or default stacks? +- Color: CSS variables with defined system, or hardcoded hex scattered? +- Responsive: breakpoints defined? calc(100svh - header) for heroes? Mobile tested? +- A11y: ARIA landmarks, alt text, contrast ratios, 44px touch targets? +- Motion: 2-3 intentional animations, or zero / ornamental only? +- Cards: used only when card IS the interaction? No decorative card grids? + +First classify as MARKETING/LANDING PAGE vs APP UI vs HYBRID, then apply matching rules. + +LITMUS CHECKS — answer YES/NO: +${litmusList} + +HARD REJECTION — flag if ANY apply: +${rejectionList} + +Be specific. Reference file:line for every finding.`; + } else if (isDesignConsultation) { + devinPrompt = `Given this product context (parent session will paste the product brief), propose a complete design direction. You are a senior independent designer — be opinionated, specific, surprising. + +Required output: +- Visual thesis: one sentence describing mood, material, and energy +- Typography: specific font names (NOT defaults — no Inter / Roboto / Arial / system) + hex colors +- Color system: CSS variables for background, surface, primary text, muted text, accent +- Layout: composition-first, not component-first. First viewport as poster, not document +- Differentiation: 2 deliberate departures from category norms +- Anti-slop guarantees: no purple gradients, no 3-column icon grids, no centered everything, no decorative blobs + +Be opinionated. Be specific. Do not hedge. This is YOUR design direction — own it.`; + } else { + devinPrompt = `You are an independent senior product designer. The parent session will provide the design context to review. Apply the standard gstack design rubric (hard rejections + litmus + hard rules) and report findings with severity (critical/high/medium) and specific fixes. + +LITMUS CHECKS — answer YES/NO: +${litmusList} + +HARD REJECTION — flag if ANY apply: +${rejectionList} + +Be opinionated. Reference file:line or plan-section for every finding.`; + } + + const role = isDesignConsultation ? 'design direction proposer' : 'outside-voice design reviewer'; + const heading = `Outside Voice — Design Review (Devin sub-session${isDesignReview ? ', automatic' : ', optional'})`; + return devinSubSessionBlock({ + heading, + role, + sessionTitle: `gstack: design outside-voice (${ctx.skillName})`, + tags: ['design-review', 'outside-voice'], + prompt: devinPrompt, + presentationHeader: 'DESIGN OUTSIDE VOICE (Devin sub-session)', + }); + } + // Determine opt-in behavior and reasoning effort const isAutomatic = isDesignReview; // design-review runs automatically const reasoningEffort = isDesignConsultation ? 'medium' : 'high'; // creative vs analytical diff --git a/scripts/resolvers/devin-helpers.ts b/scripts/resolvers/devin-helpers.ts new file mode 100644 index 0000000000..5a457ff34a --- /dev/null +++ b/scripts/resolvers/devin-helpers.ts @@ -0,0 +1,355 @@ +/** + * Devin-native resolver helpers. + * + * Devin hosts replace `claude -p` / `codex exec` "second opinion" calls with + * **child Devin sessions** spawned via the `devin_mcp` tool. They replace + * `gbrain` cross-session memory with **Devin Knowledge Notes**, also via + * `devin_mcp`. + * + * Each helper returns a markdown prose block — exactly the same shape as the + * Claude/Codex variants — but the prose tells Devin to use its native MCP + * tools instead of shelling out to another agent CLI. The resolvers in + * review.ts, design.ts, gbrain.ts, and review-army.ts branch on + * `ctx.host === 'devin'` and emit these blocks. + * + * Why prose, not code: gstack skills are prompts read by an LLM, not scripts + * executed by an interpreter. The LLM (Devin) decides when to call MCP tools + * based on what the prose tells it to do. We emit instructions, not code. + * + * IMPORTANT: All sub-session calls are non-blocking from a quality perspective + * — if a sub-session fails, times out, or returns no useful output, the parent + * skill continues. Quality enhancements never gate shipping. + */ + +import type { TemplateContext } from './types'; + +interface DevinSubSessionParams { + /** Section heading shown to the user, e.g. "Phase 3.5: Cross-Model Second Opinion". */ + heading: string; + /** Short role for the child session, e.g. "adversarial reviewer" or "outside voice". */ + role: string; + /** Title used as the child session's title. */ + sessionTitle: string; + /** Tags applied to the child session. Always prefixed with "gstack". */ + tags: string[]; + /** The prompt the child session will receive. May reference [PLACEHOLDERS] the parent fills in. */ + prompt: string; + /** Header used when presenting the child's output verbatim, e.g. "SECOND OPINION (Devin sub-session)". */ + presentationHeader: string; + /** If true, this is one of N parallel sub-sessions and prose should mention parallel dispatch. */ + parallel?: boolean; +} + +/** + * Generate prose that tells Devin to spawn a child session and present its output. + * + * The prose covers: when to spawn (gated by user opt-in if applicable), how to + * spawn (devin_session_create), how to wait for completion, how to read output, + * how to handle errors, and how to present the result. + */ +export function devinSubSessionBlock(p: DevinSubSessionParams): string { + const tagList = ['"gstack"', ...p.tags.map((t) => `"${t}"`)].join(', '); + return `## ${p.heading} + +Spawn an independent **Devin sub-session** to act as a fresh, unbiased ${p.role}. +The child session runs in its own VM with no access to this conversation — +genuine independence, like a second engineer reading the diff cold. + +**Step 1: Derive the repo identifier.** + +\`\`\`bash +REPO_URL=$(git config --get remote.origin.url) +# Convert to owner/repo format. Examples: +# git@github.com:owner/repo.git -> owner/repo +# https://github.com/owner/repo.git -> owner/repo +REPO_SLUG=$(echo "$REPO_URL" | sed -E 's#(git@github\\.com:|https?://github\\.com/)##; s#\\.git$##') +echo "REPO_SLUG: $REPO_SLUG" +\`\`\` + +**Step 2: Assemble the prompt.** The child has no conversation history, so +front-load all relevant context (problem statement, key answers, codebase +notes) into a single self-contained prompt. The prompt template: + +\`\`\` +${p.prompt} +\`\`\` + +**Step 3: Spawn the child session via \`devin_mcp\`.** Use: + +\`\`\` +tool_name = "devin_session_create" +tool_args = { + "sessions": [ + { + "prompt": "", + "title": "${p.sessionTitle}", + "tags": [${tagList}]${p.parallel ? ',\n "repos": [""]' : ',\n "repos": [""]'} + } + ] +} +\`\`\` + +The response returns a session_id without the \`devin-\` prefix. Save the +prefixed form: \`SESSION_ID="devin-"\`. + +**Step 4: Wait for the child to make progress.** Poll every 60 seconds via +\`devin_mcp\` \`tool_name="devin_session_interact"\` with +\`{"action": "get", "session_id": "$SESSION_ID"}\`. Stop polling when: +- \`status_enum\` is \`"blocked"\` (the child is waiting on a question or has + finished its work and is asking for confirmation), OR +- \`status_enum\` is \`"stopped"\` (the child terminated itself), OR +- 5 minutes have elapsed (hard deadline — sub-sessions are quality enhancements, + not gates). + +**Step 5: Read the final output.** Once the child is blocked or stopped, fetch +its messages via \`devin_session_interact\` with +\`{"action": "get_messages", "session_id": "$SESSION_ID", "first": 50}\`. The +final \`devin_message\` events contain the review output. + +**Step 6: Terminate the child** with +\`{"action": "terminate", "session_id": "$SESSION_ID", "archive_on_terminate": true}\` +to release the child's VM and keep the inbox clean. + +**Step 7: Present findings under a \`${p.presentationHeader}\` header.** +Show the child's final output verbatim — do not truncate or summarize. + +**Error handling (all non-blocking — ${p.role} review is informational, not a gate):** +- If \`devin_session_create\` returns a permissions / quota error, note: "Devin + sub-session unavailable — quota or permission issue. Continuing without + ${p.role} review." +- If the 5-minute deadline expires before the child blocks/stops: terminate + the child via \`devin_session_interact\` action="terminate" and continue. +- If the child errors out internally (status_enum=\`stopped\` with no useful + output): note and continue. +- If \`devin_mcp\` is not available in this Devin environment (e.g. running on + a Devin instance without sub-session permissions): note and skip the + ${p.role} review entirely. + +Sub-session output is **informational** — the parent skill ALWAYS continues +regardless of whether the child ran, succeeded, or said anything useful.`; +} + +interface DevinKnowledgeLoadParams { + /** Skill name for context-specific guidance (e.g. "investigate" branches into data-research). */ + skillName: string; + /** Optional special-case instruction appended after the main load block. */ + extraInstruction?: string; +} + +/** + * Replacement for {{GBRAIN_CONTEXT_LOAD}} on Devin hosts. + * + * Tells Devin to search Knowledge Notes (via devin_knowledge_manage) for + * relevant context BEFORE starting the skill. Same semantics as the gbrain + * search step, but uses Devin's native cross-session memory. + */ +export function devinKnowledgeLoadBlock(p: DevinKnowledgeLoadParams): string { + let base = `## Knowledge Notes Context Load (Devin) + +Before starting this skill, search **Devin Knowledge Notes** for relevant +context. Knowledge Notes persist across sessions — they replace gbrain on Devin. + +**Step 1: Derive the repo identifier** (for repo-pinned note filtering): + +\`\`\`bash +REPO_URL=$(git config --get remote.origin.url) +REPO_SLUG=$(echo "$REPO_URL" | sed -E 's#(git@github\\.com:|https?://github\\.com/)##; s#\\.git$##') +\`\`\` + +**Step 2: Extract 2-4 keywords** from the user's request (nouns, error names, +file paths, technical terms). Example: for "the login page is broken after +deploy", keywords are "login broken deploy". + +**Step 3: Search via \`devin_mcp\`.** Use: + +\`\`\` +tool_name = "devin_knowledge_manage" +tool_args = { + "action": "list", + "search": "", + "pinned_repo": "", + "first": 10 +} +\`\`\` + +The response includes a list of notes with \`note_id\`, \`name\`, \`trigger\`, and +a snippet of the body. If no results, broaden to the single most specific +keyword and search again. If still no results, also try with \`pinned_repo\` +omitted (org-wide notes). + +**Step 4: Read the top relevant notes.** For each note that looks relevant, +fetch the full body via: + +\`\`\` +tool_name = "devin_knowledge_manage" +tool_args = { + "action": "get", + "note_id": "" +} +\`\`\` + +Read the top 3 most relevant notes for context. + +**Step 5: Use this knowledge to inform your analysis.** Cite specific notes +when your analysis builds on them ("Per knowledge note 'Login deploy +checklist'..."). If no relevant notes exist, proceed without — Knowledge Notes +are an enhancement, not a prerequisite.`; + + if (p.skillName === 'investigate') { + base += `\n\n**Investigate-specific:** If the user's request is about tracking, +extracting, or researching structured data (e.g., "track this data", "extract +from emails", "build a tracker"), look for Knowledge Notes tagged +\`data-research\` or with \`trigger\` mentioning structured-data extraction. +Apply their methodology if found.`; + } + + if (p.extraInstruction) { + base += `\n\n${p.extraInstruction}`; + } + + return base; +} + +interface DevinKnowledgeSaveParams { + skillName: string; +} + +/** + * Replacement for {{GBRAIN_SAVE_RESULTS}} on Devin hosts. + * + * Tells Devin to persist the skill's output as a Knowledge Note (via + * devin_knowledge_manage create). Maps each skill to a note name + trigger + * convention. + */ +export function devinKnowledgeSaveBlock(p: DevinKnowledgeSaveParams): string { + // Per-skill save metadata (mirror of gbrain's skillSaveMap). + const skillMeta: Record< + string, + { titlePrefix: string; trigger: string; folder?: string } + > = { + 'office-hours': { + titlePrefix: 'Office Hours', + trigger: 'When the user asks about the project vision, scope, or design rationale.', + folder: 'gstack/office-hours', + }, + investigate: { + titlePrefix: 'Investigation', + trigger: 'When the user reports an issue similar to the one investigated, or asks about the root cause.', + folder: 'gstack/investigations', + }, + 'plan-ceo-review': { + titlePrefix: 'CEO Plan', + trigger: 'When the user asks about scope or strategic decisions for this feature.', + folder: 'gstack/plans', + }, + retro: { + titlePrefix: 'Retro', + trigger: 'When the user asks about engineering velocity, recent shipping patterns, or per-person contributions.', + folder: 'gstack/retros', + }, + 'plan-eng-review': { + titlePrefix: 'Eng Review', + trigger: 'When the user asks about architecture decisions or technical tradeoffs for this feature.', + folder: 'gstack/plans', + }, + ship: { + titlePrefix: 'Release', + trigger: 'When the user asks about what shipped in this version or the deploy details.', + folder: 'gstack/releases', + }, + cso: { + titlePrefix: 'Security Audit', + trigger: 'When the user asks about the security posture, threat model, or audit findings.', + folder: 'gstack/security', + }, + 'design-consultation': { + titlePrefix: 'Design System', + trigger: 'When the user asks about the design system, brand decisions, or design tokens for this project.', + folder: 'gstack/design', + }, + }; + + const meta = skillMeta[p.skillName] || { + titlePrefix: 'Skill Output', + trigger: `When the user asks about results from /${p.skillName}.`, + folder: 'gstack', + }; + + return `## Save Results to Knowledge Notes (Devin) + +After completing this skill, persist the results to **Devin Knowledge Notes** +for future reference across sessions. + +**Step 1: Derive the repo identifier** (for repo-pinning): + +\`\`\`bash +REPO_URL=$(git config --get remote.origin.url) +REPO_SLUG=$(echo "$REPO_URL" | sed -E 's#(git@github\\.com:|https?://github\\.com/)##; s#\\.git$##') +\`\`\` + +**Step 2: Assemble the note body.** The body should be the full markdown +output of this skill (decisions, findings, recommendations). Include enough +context that a future Devin session can use the note without needing to +re-derive everything. + +**Step 3: Create the note via \`devin_mcp\`.** Use: + +\`\`\` +tool_name = "devin_knowledge_manage" +tool_args = { + "action": "create", + "name": "${meta.titlePrefix}: ", + "body": "", + "trigger": "${meta.trigger}", + "pinned_repo": "" +} +\`\`\` + +Replace \`\` with a short topic (e.g., the issue summary, +feature name, or version). Keep the name under 80 characters. + +**Step 4: Dedup check.** Before creating, search for existing notes with +similar names via \`{"action": "list", "search": "", +"pinned_repo": ""}\`. If a near-duplicate exists, prefer +\`{"action": "update", "note_id": ""}\` over creating a new one. +v3 PUT is full-replace, so include the merged content. + +**Step 5: Entity enrichment (optional).** If the skill output mentions actual +person names or company names, search Knowledge Notes for each. If no note +exists, create a stub note with \`{"action": "create", "name": "", "body": "Stub note — mentioned in /${p.skillName} output for +$REPO_SLUG.", "trigger": "When the user asks about ."}\`. +Only enrich actual person/company names — skip product names, section +headings, technical terms, and file paths. + +**Error handling:** If \`devin_knowledge_manage\` is unavailable (e.g. Devin +sub-session quota issues, missing permissions), note and skip — saving is an +enhancement, not a prerequisite. The skill output is already in the parent +session's transcript.`; +} + +/** + * Plan-review variant of devinSubSessionBlock — used by CODEX_PLAN_REVIEW. + * Identical mechanics but distinct framing for plan-mode reviews. + */ +export function devinPlanReviewBlock(opts: { + heading: string; + prompt: string; +}): string { + return devinSubSessionBlock({ + heading: opts.heading, + role: 'outside-voice plan reviewer', + sessionTitle: 'gstack: outside-voice plan review', + tags: ['plan-review', 'outside-voice'], + prompt: opts.prompt, + presentationHeader: 'OUTSIDE VOICE (Devin sub-session)', + }); +} + +/** + * Convenience: detect whether a TemplateContext is a Devin host. + * Useful for inline branching in resolvers. + */ +export function isDevinHost(ctx: TemplateContext): boolean { + return ctx.host === 'devin'; +} diff --git a/scripts/resolvers/gbrain.ts b/scripts/resolvers/gbrain.ts index c6e54423ba..67b89878fb 100644 --- a/scripts/resolvers/gbrain.ts +++ b/scripts/resolvers/gbrain.ts @@ -11,8 +11,16 @@ * Compatible with GBrain >= v0.10.0 (search CLI, doctor --fast --json, entity enrichment). */ import type { TemplateContext } from './types'; +import { devinKnowledgeLoadBlock, devinKnowledgeSaveBlock, isDevinHost } from './devin-helpers'; export function generateGBrainContextLoad(ctx: TemplateContext): string { + // Devin host: replace gbrain CLI calls with Knowledge Notes search via + // devin_knowledge_manage. Knowledge Notes persist across sessions and play + // the same cross-session-memory role as gbrain. + if (isDevinHost(ctx)) { + return devinKnowledgeLoadBlock({ skillName: ctx.skillName }); + } + let base = `## Brain Context Load Before starting this skill, search your brain for relevant context: @@ -37,6 +45,13 @@ Any non-zero exit code from gbrain commands should be treated as a transient fai } export function generateGBrainSaveResults(ctx: TemplateContext): string { + // Devin host: replace gbrain put_page calls with Knowledge Notes create via + // devin_knowledge_manage. Same per-skill mapping (office-hours, investigate, + // ship, etc.) but persisted as Devin Knowledge Notes pinned to the repo. + if (isDevinHost(ctx)) { + return devinKnowledgeSaveBlock({ skillName: ctx.skillName }); + } + const skillSaveMap: Record = { 'office-hours': 'Save the design document as a brain page:\n```bash\ngbrain put_page --title "Office Hours: " --tags "design-doc," <<\'EOF\'\n\nEOF\n```', 'investigate': 'Save the root cause analysis as a brain page:\n```bash\ngbrain put_page --title "Investigation: " --tags "investigation," <<\'EOF\'\n\nEOF\n```', diff --git a/scripts/resolvers/review-army.ts b/scripts/resolvers/review-army.ts index 516ce3c8d4..6068acfb03 100644 --- a/scripts/resolvers/review-army.ts +++ b/scripts/resolvers/review-army.ts @@ -8,8 +8,13 @@ * 4. Feed merged findings into the existing Fix-First pipeline * * Shipped as Release 2 of the self-learning roadmap (SELF_LEARNING_V0.md). + * + * On Devin hosts, parallel specialist subagents are replaced with parallel + * **child Devin sessions** spawned via `devin_session_create` (one call, + * multiple session specs in the `sessions` array). Findings merge identically. */ import type { TemplateContext } from './types'; +import { isDevinHost } from './devin-helpers'; function generateSpecialistSelection(ctx: TemplateContext): string { const isShip = ctx.skillName === 'ship'; @@ -81,7 +86,79 @@ Note which specialists were selected, gated, and skipped. Print the selection: "Dispatching N specialists: [names]. Skipped: [names] (scope not detected). Gated: [names] (0 findings in N+ reviews)."`; } +function generateSpecialistDispatchDevin(ctx: TemplateContext): string { + return `### Dispatch specialists in parallel via Devin sub-sessions + +For each selected specialist, spawn a **child Devin session** via the +\`devin_mcp\` tool with \`tool_name="devin_session_create"\`. Send all session +creation requests in a **single call** by passing multiple specs in the +\`sessions\` array — Devin's REST v3 will create them concurrently. This is +the Devin equivalent of "launch all specialists in a single message". + +Each spec in the \`sessions\` array: +- \`prompt\`: the specialist prompt assembled below (checklist + stack + learnings + JSON output instructions) +- \`title\`: \`"Review Army — {specialist}"\` +- \`tags\`: \`["gstack", "review-army", "{specialist}"]\` +- \`repos\`: \`[""]\` (derive from \`git config --get remote.origin.url\`) + +**Each specialist sub-session prompt assembly:** + +The prompt for each specialist contains: + +1. The specialist's checklist content (you already read the file above) +2. Stack context: "This is a {STACK} project." +3. Past learnings for this domain (if any exist): + +\`\`\`bash +${ctx.paths.binDir}/gstack-learnings-search --type pitfall --query "{specialist domain}" --limit 5 2>/dev/null || true +\`\`\` + +If learnings are found, include them: "Past learnings for this domain: {learnings}" + +4. Instructions: + +"You are a specialist code reviewer for the gstack Review Army. Read the +checklist below, then run \`git diff origin/\` (the parent will tell you +the base branch, typically main or master) to get the full diff. Apply the +checklist against the diff. + +For each finding, output a JSON object on its own line: +{\\"severity\\":\\"CRITICAL|INFORMATIONAL\\",\\"confidence\\":N,\\"path\\":\\"file\\",\\"line\\":N,\\"category\\":\\"category\\",\\"summary\\":\\"description\\",\\"fix\\":\\"recommended fix\\",\\"fingerprint\\":\\"path:line:category\\",\\"specialist\\":\\"name\\"} + +Required fields: severity, confidence, path, category, summary, specialist. +Optional: line, fix, fingerprint, evidence, test_stub. + +If you can write a test that would catch this issue, include it in the \`test_stub\` field. +Use the detected test framework ({TEST_FW}). Write a minimal skeleton — describe/it/test +blocks with clear intent. Skip test_stub for architectural or design-only findings. + +If no findings: output \`NO FINDINGS\` and nothing else. +Do not output anything else — no preamble, no summary, no commentary. + +Stack context: {STACK} +Past learnings: {learnings or 'none'} + +CHECKLIST: +{checklist content}" + +**Polling:** After spawning all specialists, poll their status every 60 seconds +via \`devin_session_interact\` with \`{"action": "get", "session_id": "devin-"}\`. +Continue when each child reaches \`status_enum="blocked"\` or \`"stopped"\`, +or when 10 minutes have elapsed (hard deadline). + +**Read each child's output** via \`{"action": "get_messages", "session_id": "devin-", "first": 50}\` +and parse the JSON findings from the final \`devin_message\` events. + +**Terminate each child** via \`{"action": "terminate", "session_id": "devin-", "archive_on_terminate": true}\` +to release VMs. + +**Failure handling:** If a sub-session fails or times out, log the failure and +continue with results from successful specialists. Specialists are additive — +partial results are better than no results.`; +} + function generateSpecialistDispatch(ctx: TemplateContext): string { + if (isDevinHost(ctx)) return generateSpecialistDispatchDevin(ctx); return `### Dispatch specialists in parallel For each selected specialist, launch an independent subagent via the Agent tool. @@ -204,6 +281,39 @@ function generateRedTeam(ctx: TemplateContext): string { const isShip = ctx.skillName === 'ship'; const stepMerge = isShip ? '9.2' : '4.6'; const fixFirstRef = isShip ? 'the Fix-First flow (item 4)' : 'Step 5 Fix-First'; + + if (isDevinHost(ctx)) { + return `### Red Team dispatch (conditional, Devin sub-session) + +**Activation:** Only if DIFF_LINES > 200 OR any specialist produced a CRITICAL finding. + +If activated, spawn one more **child Devin session** via \`devin_mcp\` +\`devin_session_create\` (single spec in the \`sessions\` array). + +The Red Team child receives: +1. The red-team checklist from \`${ctx.paths.skillRoot}/review/specialists/red-team.md\` + (parent reads it from disk and inlines it in the prompt) +2. The merged specialist findings from Step ${stepMerge} (so it knows what was already caught) +3. The base branch name and the git diff command + +Spec: +- \`prompt\`: "You are a red team reviewer for the gstack Review Army. The code on this branch has already been reviewed by N specialists who found these issues: {merged findings JSON}. Your job is to find what they MISSED. Read the checklist below, run \`git diff origin/\` to get the full diff, and look for gaps. Output findings as JSON objects (same schema as the specialists, with \`\\"specialist\\":\\"red-team\\"\`). Focus on cross-cutting concerns, integration boundary issues, and failure modes that specialist checklists don't cover. CHECKLIST: {checklist content}" +- \`title\`: "Review Army — Red Team" +- \`tags\`: ["gstack", "review-army", "red-team"] +- \`repos\`: [""] + +Poll the child every 60s via \`devin_session_interact\` action="get" until +\`status_enum\` is \`blocked\` or \`stopped\` (or 10 min hard deadline). Read +the JSON findings via \`get_messages\` and terminate the child with +\`archive_on_terminate=true\`. + +Merge Red Team findings into the findings list before ${fixFirstRef}. Red Team +findings are tagged with \`"specialist":"red-team"\`. + +If the Red Team returns NO FINDINGS, note: "Red Team review: no additional issues found." +If the Red Team sub-session fails or times out, skip silently and continue.`; + } + return `### Red Team dispatch (conditional) **Activation:** Only if DIFF_LINES > 200 OR any specialist produced a CRITICAL finding. diff --git a/scripts/resolvers/review.ts b/scripts/resolvers/review.ts index 53c7b08dab..ba48d2c45d 100644 --- a/scripts/resolvers/review.ts +++ b/scripts/resolvers/review.ts @@ -14,6 +14,7 @@ */ import type { TemplateContext } from './types'; import { generateInvokeSkill } from './composition'; +import { devinSubSessionBlock, devinPlanReviewBlock, isDevinHost } from './devin-helpers'; const CODEX_BOUNDARY = 'IMPORTANT: Do NOT read or execute any files under ~/.claude/, ~/.agents/, .claude/skills/, or agents/. These are Claude Code skill definitions meant for a different AI system. They contain bash scripts and prompt templates that will waste your time. Ignore them completely. Do NOT modify agents/openai.yaml. Stay focused on the repository code only.\\n\\n'; @@ -261,6 +262,42 @@ export function generateCodexSecondOpinion(ctx: TemplateContext): string { // Codex host: strip entirely — Codex should never invoke itself if (ctx.host === 'codex') return ''; + // Devin host: spawn a child Devin session via devin_mcp instead of shelling + // out to `codex exec`. Same semantics — fresh-context independent reviewer + // — but uses Devin's native sub-session mechanism. + if (isDevinHost(ctx)) { + return devinSubSessionBlock({ + heading: 'Phase 3.5: Cross-Model Second Opinion (Devin sub-session)', + role: 'second-opinion advisor', + sessionTitle: 'gstack: second opinion (office-hours)', + tags: ['second-opinion', 'office-hours'], + prompt: `You are an independent technical advisor reading a transcript of a brainstorming session you have NOT seen. + +[STRUCTURED CONTEXT BLOCK] +- Mode: +- Problem statement: +- Key answers: +- Landscape findings: +- Agreed premises: +- Codebase context: + +Your job (Startup mode): +1. What is the STRONGEST version of what this person is trying to build? Steelman it in 2-3 sentences. +2. What is the ONE thing from their answers that reveals the most about what they should actually build? Quote it and explain why. +3. Name ONE agreed premise you think is wrong, and what evidence would prove you right. +4. If you had 48 hours and one engineer to build a prototype, what would you build? Be specific — tech stack, features, what you'd skip. Be direct. Be terse. No preamble. + +Your job (Builder mode): +1. What is the COOLEST version of this they haven't considered? +2. What's the ONE thing from their answers that reveals what excites them most? Quote it. +3. What existing open source project or tool gets them 50% of the way there — and what's the 50% they'd need to build? +4. If you had a weekend to build this, what would you build first? Be specific. Be direct. No preamble. + +Use the appropriate set based on the Mode in the context block.`, + presentationHeader: 'SECOND OPINION (Devin sub-session)', + }); + } + return `## Phase 3.5: Cross-Model Second Opinion (optional) **Binary check first:** @@ -415,6 +452,56 @@ export function generateAdversarialStep(ctx: TemplateContext): string { const isShip = ctx.skillName === 'ship'; const stepNum = isShip ? '11' : '5.7'; + // Devin host: spawn a child Devin session for adversarial review. Replaces + // both the Claude Agent-tool subagent and the `codex exec` adversarial pass + // with a single fresh-context Devin sub-session. + if (isDevinHost(ctx)) { + return `## Step ${stepNum}: Adversarial review (Devin sub-session, always-on) + +Every diff gets adversarial review from a fresh-context Devin sub-session. +LOC is not a proxy for risk — a 5-line auth change can be critical. + +` + devinSubSessionBlock({ + heading: 'Adversarial Review', + role: 'adversarial reviewer (attacker + chaos engineer)', + sessionTitle: 'gstack: adversarial review', + tags: ['adversarial-review', 'pre-landing'], + prompt: `You are an independent senior engineer doing adversarial review on this branch. + +Read the diff first: +\`\`\`bash +git diff origin/ +\`\`\` + +(Replace \`\` with the actual base branch the parent session detected — typically \`main\` or \`master\`. The parent will pass this in the prompt.) + +Your job is to find ways this code will fail in production. Think like an attacker and a chaos engineer. Look for: +- Edge cases, race conditions, security holes +- Resource leaks, failure modes, silent data corruption +- Logic errors that produce wrong results silently +- Error handling that swallows failures +- Trust boundary violations + +Be adversarial. Be thorough. No compliments — just the problems. + +For each finding, classify as FIXABLE (you know how to fix it) or INVESTIGATE (needs human judgment). + +After listing findings, end your output with ONE line in the canonical format: + Recommendation: because + +Examples: +- "Recommendation: Fix the unbounded retry at queue.ts:78 because it'll DoS the worker pool under sustained 429s" +- "Recommendation: Ship as-is because the strongest finding is a theoretical race that requires conditions we can't trigger in production" + +The reason MUST point to a specific finding (or no-fix rationale). Generic reasons like 'because it's safer' do not qualify.`, + presentationHeader: `ADVERSARIAL REVIEW (Devin sub-session) — Step ${stepNum}`, + }) + ` + +**FIXABLE findings** flow into the same Fix-First pipeline as the structured +review. **INVESTIGATE findings** are presented as informational. The +sub-session output is informational — it never blocks shipping.`; + } + return `## Step ${stepNum}: Adversarial review (always-on) Every diff gets adversarial review from both Claude and Codex. LOC is not a proxy for risk — a 5-line auth change can be critical. @@ -545,6 +632,56 @@ export function generateCodexPlanReview(ctx: TemplateContext): string { // Codex host: strip entirely — Codex should never invoke itself if (ctx.host === 'codex') return ''; + // Devin host: spawn a child session for the outside-voice plan review. + // Replaces both the `codex exec` path and the Claude Agent-tool subagent + // fallback with a single fresh-context Devin sub-session. + if (isDevinHost(ctx)) { + return devinPlanReviewBlock({ + heading: 'Outside Voice — Independent Plan Challenge (Devin sub-session, recommended)', + prompt: `You are a brutally honest technical reviewer examining a development plan that has already been through a multi-section review. + +Your job is NOT to repeat that review. Instead, find what it missed. Look for: +- Logical gaps and unstated assumptions that survived the review scrutiny +- Overcomplexity (is there a fundamentally simpler approach the review was too deep in the weeds to see?) +- Feasibility risks the review took for granted +- Missing dependencies or sequencing issues +- Strategic miscalibration (is this the right thing to build at all?) + +Be direct. Be terse. No compliments. Just the problems. + +THE PLAN: + + +After listing the gaps and risks, end with one line in the canonical format: + Recommendation: because `, + }) + ` + +**Cross-model tension handling:** After presenting the outside voice findings, +note any points where the outside voice disagrees with the review findings +from earlier sections. Flag these as: + +\`\`\` +CROSS-MODEL TENSION: + [Topic]: Review said X. Outside voice says Y. [Present both perspectives neutrally. + State what context you might be missing that would change the answer.] +\`\`\` + +**User Sovereignty:** Do NOT auto-incorporate outside voice recommendations +into the plan. Present each tension point to the user. The user decides. +Cross-model agreement is a strong signal — present it as such — but it is +NOT permission to act. You may state which argument you find more compelling, +but you MUST NOT apply the change without explicit user approval. + +For each substantive tension point, use AskUserQuestion: + +> "Cross-model disagreement on [topic]. The review found [X] but the outside voice argues [Y]. [One sentence on what context you might be missing.]" +> +> RECOMMENDATION: Choose [A or B] because [one-line reason explaining which argument is more compelling and why]. Completeness: A=X/10, B=Y/10. +> +> A) Apply the outside voice's correction +> B) Stay with the review's original finding`; + } + return `## Outside Voice — Independent Plan Challenge (optional, recommended) After all review sections are complete, offer an independent second opinion from a diff --git a/setup b/setup index 4c1763f9fd..00602e5f99 100755 --- a/setup +++ b/setup @@ -24,6 +24,8 @@ FACTORY_SKILLS="$HOME/.factory/skills" FACTORY_GSTACK="$FACTORY_SKILLS/gstack" OPENCODE_SKILLS="$HOME/.config/opencode/skills" OPENCODE_GSTACK="$OPENCODE_SKILLS/gstack" +DEVIN_SKILLS="$HOME/.config/devin/skills" +DEVIN_GSTACK="$DEVIN_SKILLS/gstack" IS_WINDOWS=0 case "$(uname -s)" in @@ -43,7 +45,7 @@ TEAM_MODE=0 NO_TEAM_MODE=0 while [ $# -gt 0 ]; do case "$1" in - --host) [ -z "$2" ] && echo "Missing value for --host (expected claude, codex, kiro, factory, opencode, openclaw, hermes, gbrain, or auto)" >&2 && exit 1; HOST="$2"; shift 2 ;; + --host) [ -z "$2" ] && echo "Missing value for --host (expected claude, codex, kiro, factory, opencode, devin, openclaw, hermes, gbrain, or auto)" >&2 && exit 1; HOST="$2"; shift 2 ;; --host=*) HOST="${1#--host=}"; shift ;; --local) LOCAL_INSTALL=1; shift ;; --prefix) SKILL_PREFIX=1; SKILL_PREFIX_FLAG=1; shift ;; @@ -56,7 +58,7 @@ while [ $# -gt 0 ]; do done case "$HOST" in - claude|codex|kiro|factory|opencode|auto) ;; + claude|codex|kiro|factory|opencode|devin|auto) ;; openclaw) echo "" echo "OpenClaw integration uses a different model — OpenClaw spawns Claude Code" @@ -91,7 +93,7 @@ case "$HOST" in echo "GBrain setup and brain skills ship from the GBrain repo." echo "" exit 0 ;; - *) echo "Unknown --host value: $HOST (expected claude, codex, kiro, factory, opencode, openclaw, hermes, gbrain, or auto)" >&2; exit 1 ;; + *) echo "Unknown --host value: $HOST (expected claude, codex, kiro, factory, opencode, devin, openclaw, hermes, gbrain, or auto)" >&2; exit 1 ;; esac # ─── Resolve skill prefix preference ───────────────────────── @@ -143,6 +145,19 @@ if [ "$LOCAL_INSTALL" -eq 1 ]; then echo "Error: --local is only supported for Claude Code (not Codex)." >&2 exit 1 fi + if [ "$HOST" = "devin" ]; then + # For Devin, --local means "commit skills to this repo so Devin (CLI + remote VM) + # discovers them on session start". Generate to .devin/skills/gstack-* in the + # current working directory; no global install. + log "Generating Devin skills into $(pwd)/.devin/skills/..." + ( + cd "$SOURCE_GSTACK_DIR" + bun install --frozen-lockfile 2>/dev/null || bun install + bun run gen:skill-docs --host devin + ) + log "Done. Commit .devin/skills/ to your repo so Devin discovers gstack skills on session start." + exit 0 + fi INSTALL_SKILLS_DIR="$(pwd)/.claude/skills" mkdir -p "$INSTALL_SKILLS_DIR" HOST="claude" @@ -155,14 +170,16 @@ INSTALL_CODEX=0 INSTALL_KIRO=0 INSTALL_FACTORY=0 INSTALL_OPENCODE=0 +INSTALL_DEVIN=0 if [ "$HOST" = "auto" ]; then command -v claude >/dev/null 2>&1 && INSTALL_CLAUDE=1 command -v codex >/dev/null 2>&1 && INSTALL_CODEX=1 command -v kiro-cli >/dev/null 2>&1 && INSTALL_KIRO=1 command -v droid >/dev/null 2>&1 && INSTALL_FACTORY=1 command -v opencode >/dev/null 2>&1 && INSTALL_OPENCODE=1 + command -v devin >/dev/null 2>&1 && INSTALL_DEVIN=1 # If none found, default to claude - if [ "$INSTALL_CLAUDE" -eq 0 ] && [ "$INSTALL_CODEX" -eq 0 ] && [ "$INSTALL_KIRO" -eq 0 ] && [ "$INSTALL_FACTORY" -eq 0 ] && [ "$INSTALL_OPENCODE" -eq 0 ]; then + if [ "$INSTALL_CLAUDE" -eq 0 ] && [ "$INSTALL_CODEX" -eq 0 ] && [ "$INSTALL_KIRO" -eq 0 ] && [ "$INSTALL_FACTORY" -eq 0 ] && [ "$INSTALL_OPENCODE" -eq 0 ] && [ "$INSTALL_DEVIN" -eq 0 ]; then INSTALL_CLAUDE=1 fi elif [ "$HOST" = "claude" ]; then @@ -175,6 +192,8 @@ elif [ "$HOST" = "factory" ]; then INSTALL_FACTORY=1 elif [ "$HOST" = "opencode" ]; then INSTALL_OPENCODE=1 +elif [ "$HOST" = "devin" ]; then + INSTALL_DEVIN=1 fi migrate_direct_codex_install() { @@ -321,13 +340,35 @@ if [ "$INSTALL_OPENCODE" -eq 1 ] && [ "$NEEDS_BUILD" -eq 0 ]; then ) fi +# 1e. Generate .devin/ Devin skill docs +# Devin is a remote cloud agent — its sessions clone the user's repo into a fresh +# Ubuntu VM and discover SKILL.md files at session start. The generated skills +# must therefore land *in the repo* (committed to git), not in a global $HOME +# directory. See docs/DEVIN.md for full architecture. +if [ "$INSTALL_DEVIN" -eq 1 ] && [ "$NEEDS_BUILD" -eq 0 ]; then + log "Generating .devin/ skill docs (Devin)..." + ( + cd "$SOURCE_GSTACK_DIR" + bun install --frozen-lockfile 2>/dev/null || bun install + bun run gen:skill-docs --host devin + ) +fi + # 2. Ensure Playwright's Chromium is available +# +# Some host OS releases (e.g. Ubuntu 26.04 before its build is added to +# Playwright's host allowlist) fail the OS validation check inside +# `playwright install` even though the Chromium binary itself is +# forward-compatible and runs fine. The official escape hatch is +# PLAYWRIGHT_BROWSERS_VALIDATE_HOST_REQUIREMENTS=0 — it disables the +# pre-download host check while still pulling the same binary. if ! ensure_playwright_browser; then echo "Installing Playwright Chromium..." + PLAYWRIGHT_INSTALL_OK=1 ( cd "$SOURCE_GSTACK_DIR" - bunx playwright install chromium - ) + PLAYWRIGHT_BROWSERS_VALIDATE_HOST_REQUIREMENTS=0 bunx playwright install chromium + ) || PLAYWRIGHT_INSTALL_OK=0 if [ "$IS_WINDOWS" -eq 1 ]; then # On Windows, Node.js launches Chromium (not Bun — see oven-sh/bun#4253). @@ -355,10 +396,28 @@ if ! ensure_playwright_browser; then echo "gstack setup failed: Playwright Chromium could not be launched via Node.js" >&2 echo " This is a known issue with Bun on Windows (oven-sh/bun#4253)." >&2 echo " Ensure Node.js is installed and 'node -e \"require('playwright')\"' works." >&2 - else - echo "gstack setup failed: Playwright Chromium could not be launched" >&2 + exit 1 fi - exit 1 + + # Linux/macOS: degrade gracefully. Browser-dependent skills (/qa, /browse, + # /canary, /design-review, /setup-browser-cookies) will warn at runtime; the + # rest of gstack continues to work. This is especially important for the + # Devin host — we want CLI skill discovery to succeed even when the local + # Chromium build isn't available for the host OS yet. + echo "" >&2 + echo "warning: Playwright Chromium could not be installed or launched on this host." >&2 + echo " This is non-fatal. Skill installation will continue." >&2 + echo " Affected skills: /qa, /browse, /canary, /design-review, /setup-browser-cookies" >&2 + echo " These will still appear in the slash-command menu and explain workarounds at runtime." >&2 + if [ "$(uname -s)" = "Linux" ]; then + echo "" >&2 + echo " On Ubuntu 26.04 and other very-new releases, retry with:" >&2 + echo " cd $SOURCE_GSTACK_DIR && PLAYWRIGHT_BROWSERS_VALIDATE_HOST_REQUIREMENTS=0 bunx playwright install chromium" >&2 + echo " Or install system Chromium and let Playwright pick it up:" >&2 + echo " sudo snap install chromium # Ubuntu" >&2 + echo " sudo apt-get install -y chromium # Debian-based" >&2 + fi + echo "" >&2 fi # 3. Ensure ~/.gstack global state directory exists @@ -935,6 +994,44 @@ if [ "$INSTALL_OPENCODE" -eq 1 ]; then echo " opencode skills: $OPENCODE_SKILLS" fi +# 6d. Install for Devin (Cognition AI) +# Devin runs in two surfaces: the local Devin for Terminal CLI (skills loaded from +# ~/.config/devin/skills/ globally or .devin/skills/ per project) and remote/cloud +# VM sessions (also scan .devin/skills/ when the repo is cloned). The canonical +# install for cloud-only use is per-repo via `./setup --host devin --local`. The +# global install below is for users running Devin CLI locally on their machine. +if [ "$INSTALL_DEVIN" -eq 1 ]; then + mkdir -p "$DEVIN_SKILLS" + # Symlink the generated skills from the gstack source into ~/.config/devin/skills/ + DEVIN_SOURCE_DIR="$SOURCE_GSTACK_DIR/.devin/skills" + if [ ! -d "$DEVIN_SOURCE_DIR" ]; then + log " Generating .devin/ skill docs..." + ( cd "$SOURCE_GSTACK_DIR" && bun run gen:skill-docs --host devin ) + fi + if [ -d "$DEVIN_SOURCE_DIR" ]; then + _devin_linked=() + for skill_dir in "$DEVIN_SOURCE_DIR"/gstack*/; do + [ -f "$skill_dir/SKILL.md" ] || continue + _devin_skill_name="$(basename "$skill_dir")" + [ "$_devin_skill_name" = "gstack" ] && continue + _devin_target="$DEVIN_SKILLS/$_devin_skill_name" + if [ -L "$_devin_target" ] || [ ! -e "$_devin_target" ]; then + ln -snf "$skill_dir" "$_devin_target" + _devin_linked+=("$_devin_skill_name") + fi + done + if [ ${#_devin_linked[@]} -gt 0 ]; then + echo " linked skills: ${_devin_linked[*]}" + fi + else + echo " warning: .devin/skills/ generation failed — run 'bun run gen:skill-docs --host devin' manually" >&2 + fi + echo "gstack ready (devin)." + echo " browse: $BROWSE_BIN" + echo " devin skills (global): $DEVIN_SKILLS" + echo " source skills (per repo): commit \$REPO/.devin/skills/ via './setup --host devin --local' inside the project." +fi + # 7. Create .agents/ sidecar symlinks for the real Codex skill target. # The root Codex skill ends up pointing at $SOURCE_GSTACK_DIR/.agents/skills/gstack, # so the runtime assets must live there for both global and repo-local installs. diff --git a/test/gen-skill-docs.test.ts b/test/gen-skill-docs.test.ts index 4c20343581..68fe6332cc 100644 --- a/test/gen-skill-docs.test.ts +++ b/test/gen-skill-docs.test.ts @@ -85,7 +85,10 @@ const ALL_SKILLS = (() => { return skills; })(); -const CLAUDE_SKIPPED_SKILL_DIRS = new Set(['claude']); +// Devin-only skills (skipped from Claude / Codex / Factory / etc. via host +// `generation.skipSkills`). Mirror that here so the dynamic Claude test list +// matches what the generator actually emits. +const CLAUDE_SKIPPED_SKILL_DIRS = new Set(['claude', 'devin-setup']); const CLAUDE_GENERATED_SKILLS = ALL_SKILLS.filter(skill => !CLAUDE_SKIPPED_SKILL_DIRS.has(skill.dir)); describe('gen-skill-docs', () => { @@ -1601,6 +1604,7 @@ describe('Codex generation (--host codex)', () => { for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) { if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue; if (entry.name === 'codex') continue; // /codex is excluded from Codex output + if (entry.name === 'devin-setup') continue; // /devin-setup is Devin-only if (!fs.existsSync(path.join(ROOT, entry.name, 'SKILL.md.tmpl'))) continue; const codexName = entry.name.startsWith('gstack-') ? entry.name : `gstack-${entry.name}`; if (isSymlinkLoop(codexName)) continue; @@ -1920,6 +1924,7 @@ describe('Factory generation (--host factory)', () => { for (const entry of fs.readdirSync(ROOT, { withFileTypes: true })) { if (!entry.isDirectory() || entry.name.startsWith('.') || entry.name === 'node_modules') continue; if (entry.name === 'codex') continue; + if (entry.name === 'devin-setup') continue; // Devin-only skill if (!fs.existsSync(path.join(ROOT, entry.name, 'SKILL.md.tmpl'))) continue; const factoryName = entry.name.startsWith('gstack-') ? entry.name : `gstack-${entry.name}`; if (isSymlinkLoop(factoryName)) continue; @@ -2243,16 +2248,17 @@ describe('setup script validation', () => { expect(fnBody).toContain('rm -f "$target"'); }); - test('setup supports --host auto|claude|codex|kiro|opencode', () => { + test('setup supports --host auto|claude|codex|kiro|opencode|devin', () => { expect(setupContent).toContain('--host'); - expect(setupContent).toContain('claude|codex|kiro|factory|opencode|auto'); + expect(setupContent).toContain('claude|codex|kiro|factory|opencode|devin|auto'); }); - test('auto mode detects claude, codex, kiro, and opencode binaries', () => { + test('auto mode detects claude, codex, kiro, opencode, and devin binaries', () => { expect(setupContent).toContain('command -v claude'); expect(setupContent).toContain('command -v codex'); expect(setupContent).toContain('command -v kiro-cli'); expect(setupContent).toContain('command -v opencode'); + expect(setupContent).toContain('command -v devin'); }); // T1: Sidecar skip guard — prevents .agents/skills/gstack from being linked as a skill diff --git a/test/host-config.test.ts b/test/host-config.test.ts index 5770570332..ab0d2cc88d 100644 --- a/test/host-config.test.ts +++ b/test/host-config.test.ts @@ -30,8 +30,8 @@ const ROOT = path.resolve(import.meta.dir, '..'); // ─── hosts/index.ts ───────────────────────────────────────── describe('hosts/index.ts', () => { - test('ALL_HOST_CONFIGS has 10 hosts', () => { - expect(ALL_HOST_CONFIGS.length).toBe(10); + test('ALL_HOST_CONFIGS has 11 hosts', () => { + expect(ALL_HOST_CONFIGS.length).toBe(11); }); test('ALL_HOST_NAMES matches config names', () => {