diff --git a/CLAUDE.md b/CLAUDE.md index f06ac32..8aaa8b8 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -44,7 +44,7 @@ When verifying a change: then `tail -f ~/.agentbox/logs/create.log` until you see the BEGIN/END markers for each step. If a step's END never arrives, you've found the hang — inspect that step rather than killing the whole command. -- **Test projects**: use the `examples/` directory mainly, or `../agentbox-test-repo` to test push/pull on a test repo setup on GitHub, and `../agentbox-test-repo-gh` for the same repo but with https origin using `gh` tool. +- **Test projects**: use the `examples/` directory mainly, or `../agentbox-test-repo` to test push/pull on a test repo setup on GitHub, and `../agentbox-test-repo-gh` for the same repo but with https origin using `gh` tool. Also `../express-server` can be used to test the setup wizard since it doesn't have an `agentbox.yaml` file. - **Use Agentbox inside Agentbox**: start a container with `agentbox claude --shared-docker-cache --carry-yes` to have a box ready with agentbox compiled and in the path and reuse docker cache for faster builds. For Images build use `docker build --network=host -t agentbox/box:dev -f apps/cli/runtime/docker/Dockerfile.box apps/cli/runtime/docker` instead of `agentbox prepare` because the box runs without `CAP_SYS_PTRACE`. ## Conventions diff --git a/apps/cli/src/commands/_cloud-attach.ts b/apps/cli/src/commands/_cloud-attach.ts index 3b98762..f8d18ea 100644 --- a/apps/cli/src/commands/_cloud-attach.ts +++ b/apps/cli/src/commands/_cloud-attach.ts @@ -4,7 +4,7 @@ import { homedir } from 'node:os'; import { join } from 'node:path'; import { spinner } from '@clack/prompts'; import { DEFAULT_RELAY_PORT } from '@agentbox/sandbox-docker'; -import type { BoxRecord } from '@agentbox/core'; +import type { BoxRecord, Provider } from '@agentbox/core'; import type { AttachOpenIn } from '@agentbox/config'; import { providerForBox } from '../provider/registry.js'; import { runWrappedAttach } from '../wrapped-pty/index.js'; @@ -159,16 +159,7 @@ export async function cloudAgentAttach(args: CloudAgentAttachArgs): Promise 0) { - const pre = await provider.buildAttach(box, 'agent', { - sessionName: args.sessionName, - command, - detached: true, - }); - try { - await runDetached(pre.argv, pre.env); - } finally { - if (pre.cleanup) await pre.cleanup(); - } + await startDetachedSession(provider, box, args.sessionName, command); } let spec = await provider.buildAttach(box, 'agent', { @@ -259,6 +250,61 @@ export async function cloudAgentAttach(args: CloudAgentAttachArgs): Promise { + if (!provider.buildAttach) { + throw new Error(`provider '${provider.name}' does not support detached sessions`); + } + const spec = await provider.buildAttach(box, 'agent', { + sessionName, + command, + detached: true, + }); + try { + await runDetached(spec.argv, spec.env); + } finally { + if (spec.cleanup) await spec.cleanup(); + } +} + +/** + * Provision-time entry point for background (`-i`) cloud jobs: resolve the + * provider, ensure the box is running, then pre-start a detached agent tmux + * session seeded with `extraArgs` (the seed prompt as first positional + the + * user's post-`--` args). Mirrors the `probeState`/`start` guard in + * `cloudAgentAttach` so a box that came up paused still gets its session. A + * later `agentbox attach` finds the running session via + * `tmux has-session` and just attaches. + */ +export async function cloudAgentStartDetached(args: { + box: BoxRecord; + binary: string; + sessionName: string; + extraArgs?: string[]; +}): Promise { + const provider = await providerForBox(args.box); + let box = args.box; + const state = await provider.probeState(box); + if (state === 'missing') { + throw new Error(`cloud sandbox for ${box.name} is missing; was it destroyed?`); + } + if (state !== 'running') { + box = await provider.start(box); + } + const command = buildCloudAttachInnerCommand(args.binary, args.extraArgs); + await startDetachedSession(provider, box, args.sessionName, command); +} + /** * Run an attach-style argv non-interactively to completion (used for the * `detached` session pre-start). stdio is ignored — the remote command only diff --git a/apps/cli/src/commands/_run-queued-job.ts b/apps/cli/src/commands/_run-queued-job.ts index e24965a..f699f85 100644 --- a/apps/cli/src/commands/_run-queued-job.ts +++ b/apps/cli/src/commands/_run-queued-job.ts @@ -1,10 +1,15 @@ /** * Internal worker the relay's queue loop spawns as a detached child to run a - * queued `-i` job. Hidden from `--help`. Reads a queue manifest by id, runs - * the same `createBox` + `startXxxSession` codepath the foreground claude / - * codex / opencode commands run in non-`-i` mode, then exits when tmux is up. + * queued `-i` job. Hidden from `--help`. Reads a queue manifest by id, then + * runs the same create + session-start codepath the foreground claude / codex + * / opencode commands run in non-`-i` mode, then exits when tmux is up. * **Never** attaches — the in-box session keeps running for the user to * re-attach later. + * + * Docker bakes the seeded prompt straight into `tmux new-session` at create + * time (`runDockerJob`). Cloud providers (daytona/hetzner/vercel) create the + * box, then pre-start a detached tmux session seeded with the same prompt via + * `cloudAgentStartDetached` → `buildAttach({ detached: true })` (`runCloudJob`). */ import { Command } from 'commander'; @@ -30,6 +35,8 @@ import { resolveLimits } from '../limits.js'; import { openCommandLog } from '../lib/log-file.js'; import { buildPromptArgs } from '../lib/queue/build-prompt-args.js'; import { applyClaudeSkipPermissions, applyCodexSkipPermissions } from '../lib/skip-permissions.js'; +import { providerForCreate } from '../provider/registry.js'; +import { cloudAgentStartDetached } from './_cloud-attach.js'; export const runQueuedJobCommand = new Command('_run-queued-job') .description('internal: run a queued background agent job (do not invoke directly)') @@ -50,16 +57,18 @@ export const runQueuedJobCommand = new Command('_run-queued-job') // manually we still run, but the relay's accounting will be off — that // is the user's problem (and exactly why this command is hidden). - // Run the create + session path. Cloud paths are intentionally NOT - // supported here (the cloud agent attach starts the tmux session lazily - // on first attach; with no attach there's nowhere to seed the prompt). - // The submit-side already rejected cloud in that case. - // The worker records boxId on the outer `job` the instant the box is - // created (via onBoxCreated), so the catch block below preserves the - // box attribution even if the session start throws afterwards. - await runDockerJob(job, log, (boxId) => { + // Run the create + session path, routed by provider. The worker records + // boxId on the outer `job` the instant the box is created (via + // onBoxCreated), so the catch block below preserves the box attribution + // even if the session start throws afterwards. + const onBoxCreated = (boxId: string): void => { if (job) job = { ...job, boxId }; - }); + }; + if ((job.providerName || 'docker') === 'docker') { + await runDockerJob(job, log, onBoxCreated); + } else { + await runCloudJob(job, log, onBoxCreated); + } const done: QueueJob = { ...job, @@ -104,9 +113,6 @@ async function runDockerJob( }); const projectRoot = (await findProjectRoot(opts.workspace)).root; const providerName = job.providerName || cfg.effective.box.provider || 'docker'; - if (providerName !== 'docker') { - throw new Error(`worker only supports docker provider (got "${providerName}")`); - } const providerDefault = resolveDefaultCheckpoint(cfg.effective, providerName); const checkpointRef = opts.snapshot && opts.snapshot.length > 0 @@ -217,6 +223,90 @@ async function runDockerJob( } } +/** + * Cloud (daytona/hetzner/vercel) variant of the queue worker. Mirrors the + * foreground cloud-create path (`cloudAgentCreate`): `provider.create` does the + * credential-volume seed, git-bundle workspace seed, and ctl daemon. We then + * pre-start a detached agent tmux session seeded with the same prompt+args the + * docker path bakes into `tmux new-session`. Carry / env-file import / explicit + * branch selection are omitted (the docker `-i` worker omits them too). + */ +async function runCloudJob( + job: QueueJob, + log: ReturnType, + onBoxCreated: (boxId: string) => void, +): Promise { + const opts = job.createOpts; + const cfg = await loadEffectiveConfig(opts.workspace, { + cliOverrides: buildOverridesFromJob(job), + }); + const projectRoot = (await findProjectRoot(opts.workspace)).root; + const providerName = job.providerName || cfg.effective.box.provider || 'docker'; + const provider = await providerForCreate({ flag: providerName, config: cfg.effective }); + + const providerDefault = resolveDefaultCheckpoint(cfg.effective, providerName); + const checkpointRef = + opts.snapshot && opts.snapshot.length > 0 + ? opts.snapshot + : providerDefault.length > 0 + ? providerDefault + : undefined; + + // browser.default = 'playwright' | 'both' implies installing playwright even + // if box.withPlaywright wasn't explicitly set (mirrors the foreground path). + const withPlaywright = + cfg.effective.box.withPlaywright || cfg.effective.browser.default !== 'agent-browser'; + + log.write(`creating cloud box (${providerName}) for agent=${job.agent}`); + const result = await provider.create({ + workspacePath: opts.workspace, + name: opts.name && opts.name.length > 0 ? opts.name : undefined, + checkpointRef, + image: cfg.effective.box.image, + withPlaywright, + withEnv: cfg.effective.box.withEnv, + vnc: { enabled: cfg.effective.box.vnc }, + limits: resolveLimits(cfg.effective.box, opts), + projectRoot, + onLog: (line) => log.write(line), + }); + log.write(`box created: ${result.record.id}`); + + // Record boxId before the session starts so a crash mid-launch is still + // attributable to a box and the working-agent gate can join it to its box. + onBoxCreated(result.record.id); + await writeJob({ ...job, boxId: result.record.id }); + + const promptedArgs = buildPromptArgs(job.agent, job.prompt, job.agentArgs); + + let binary: string; + let sessionName: string; + let extraArgs: string[]; + if (job.agent === 'claude-code') { + binary = 'claude'; + sessionName = cfg.effective.claude.sessionName; + extraArgs = applyClaudeSkipPermissions(promptedArgs, cfg.effective); + } else if (job.agent === 'codex') { + binary = 'codex'; + sessionName = cfg.effective.codex.sessionName; + extraArgs = applyCodexSkipPermissions(promptedArgs, cfg.effective); + } else if (job.agent === 'opencode') { + binary = 'opencode'; + sessionName = cfg.effective.opencode.sessionName; + extraArgs = promptedArgs; + } else { + throw new Error(`unknown agent kind: ${String(job.agent satisfies QueueAgentKind)}`); + } + + log.write(`starting detached ${job.agent} session`); + await cloudAgentStartDetached({ + box: result.record, + binary, + sessionName, + extraArgs, + }); +} + function buildOverridesFromJob(job: QueueJob): Partial { const opts = job.createOpts; const box: NonNullable = {}; diff --git a/apps/cli/src/commands/claude.ts b/apps/cli/src/commands/claude.ts index fdbe98f..b97df23 100644 --- a/apps/cli/src/commands/claude.ts +++ b/apps/cli/src/commands/claude.ts @@ -508,16 +508,11 @@ export const claudeCommand = new Command('claude') const isCloud = providerName !== 'docker'; // -i / --initial-prompt: background mode. Write a queue manifest and exit; - // the relay's queue loop spawns the worker as a slot frees. Docker-only - // for v1 — the cloud `cloudAgentCreate` path starts the tmux session - // lazily on first attach, so a "create but don't attach" cloud run has no - // chance to seed the prompt. + // the relay's queue loop spawns the worker as a slot frees. Works on every + // provider — the worker creates the box and pre-starts the seeded session + // (docker bakes the prompt into `tmux new-session`; cloud pre-starts a + // detached tmux session via `buildAttach({ detached: true })`). if (opts.initialPrompt && opts.initialPrompt.length > 0) { - if (isCloud) { - log.error('-i / --initial-prompt is currently docker-only (cloud sessions only start on attach).'); - cmdLog.close(); - process.exit(2); - } try { await assertAgentCredsAvailable({ agent: 'claude-code', diff --git a/apps/cli/src/commands/codex.ts b/apps/cli/src/commands/codex.ts index 77f2695..83dd4fa 100644 --- a/apps/cli/src/commands/codex.ts +++ b/apps/cli/src/commands/codex.ts @@ -454,11 +454,6 @@ export const codexCommand = new Command('codex') : undefined; if (opts.initialPrompt && opts.initialPrompt.length > 0) { - if (isCloud) { - log.error('-i / --initial-prompt is currently docker-only (cloud sessions only start on attach).'); - cmdLog.close(); - process.exit(2); - } try { await assertAgentCredsAvailable({ agent: 'codex', diff --git a/apps/cli/src/commands/opencode.ts b/apps/cli/src/commands/opencode.ts index 4d0ce54..adb11cd 100644 --- a/apps/cli/src/commands/opencode.ts +++ b/apps/cli/src/commands/opencode.ts @@ -430,11 +430,6 @@ export const opencodeCommand = new Command('opencode') : undefined; if (opts.initialPrompt && opts.initialPrompt.length > 0) { - if (isCloud) { - log.error('-i / --initial-prompt is currently docker-only (cloud sessions only start on attach).'); - cmdLog.close(); - process.exit(2); - } try { await assertAgentCredsAvailable({ agent: 'opencode', diff --git a/apps/cli/test/cloud-attach.test.ts b/apps/cli/test/cloud-attach.test.ts index 4b85e5c..a21ba03 100644 --- a/apps/cli/test/cloud-attach.test.ts +++ b/apps/cli/test/cloud-attach.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it } from 'vitest'; import { buildCloudAttachInnerCommand } from '../src/commands/_cloud-attach.js'; +import { buildPromptArgs } from '../src/lib/queue/build-prompt-args.js'; /** * The launcher embeds args as base64. To verify the round-trip we extract the @@ -66,4 +67,15 @@ describe('buildCloudAttachInnerCommand', () => { expect(buildCloudAttachInnerCommand('opencode', ['-m', 'gpt-5'])).toContain('exec opencode'); expect(buildCloudAttachInnerCommand('codex', ['-m', 'gpt-5'])).toContain('exec codex'); }); + + // Contract the cloud `-i` queue worker (runCloudJob) depends on: it builds + // the launcher args via `buildPromptArgs(prompt, userArgs)` and hands them to + // `cloudAgentStartDetached` → `buildCloudAttachInnerCommand`. The seed prompt + // must land as the first positional and post-`--` args (e.g. + // `--permission-mode=plan`) must be forwarded verbatim. + it('forwards a seeded prompt + custom args through the launcher in order', () => { + const args = buildPromptArgs('claude-code', 'fix the failing test', ['--permission-mode=plan']); + const cmd = buildCloudAttachInnerCommand('claude', args); + expect(decodeArgs(cmd)).toEqual(['fix the failing test', '--permission-mode=plan']); + }); }); diff --git a/apps/web/public/cover.jpg b/apps/web/public/cover.jpg index 5f7057f..4479e58 100644 Binary files a/apps/web/public/cover.jpg and b/apps/web/public/cover.jpg differ diff --git a/docs/cover.jpg b/docs/cover.jpg index 5f7057f..df19362 100644 Binary files a/docs/cover.jpg and b/docs/cover.jpg differ diff --git a/skills/agentbox/SKILL.md b/skills/agentbox/SKILL.md index 8c03ec9..942103f 100644 --- a/skills/agentbox/SKILL.md +++ b/skills/agentbox/SKILL.md @@ -11,7 +11,7 @@ If you find yourself *inside* a box (`/workspace` exists and `AGENTBOX_RELAY_URL ## What AgentBox is, in one paragraph -AgentBox spins up one isolated sandbox per agent run — a local Docker container (default), a Daytona cloud sandbox (`--provider daytona`), or a Hetzner VPS (`--provider hetzner`). Each box has its own `/workspace`, but the host's `.git/` is shared, so commits made inside the box land on the host immediately. The agent inside the box has **no host credentials** — `git push`, opening URLs in the host browser, capturing checkpoints, and all other host-side operations flow through a small host process called the **relay** that runs alongside the CLI. +AgentBox spins up one isolated sandbox per agent run — a local Docker container (default), or a Hetzner VPS (`--provider hetzner`), or Vercel Sandbox (`--provider vercel`), or a partial support on Daytona cloud sandbox (`--provider daytona`). Each box has its own `/workspace`, but the host's `.git/` is shared, so commits made inside the box land on the host immediately. The agent inside the box has **no host credentials** — `git push`, opening URLs in the host browser, capturing checkpoints, and all other host-side operations flow through a small host process called the **relay** that runs alongside the CLI. ## The two starting commands @@ -26,7 +26,7 @@ agentbox create --provider hetzner # cloud VPS (requires `agentbox prepare -- agentbox create --attach # drop into a shell inside the box after create ``` -Useful flags: `-n ` (friendly box name), `--provider docker|daytona|hetzner`, `--attach`, `-w ` (workspace to mount; defaults to `cwd`), `--snapshot ` (start from a checkpoint). +Useful flags: `-n ` (friendly box name), `--provider docker|daytona|hetzner|vercel`, `--attach`, `-w ` (workspace to mount; defaults to `cwd`), `--snapshot ` (start from a checkpoint). Non-docker providers require a one-time `agentbox prepare --provider ` to bake the base image / snapshot. @@ -64,7 +64,7 @@ agentbox dashboard # TUI with status + leader-key actions agentbox claude attach # reattach to a specific box ``` -Caveats: `-i` is currently **docker-only** (cloud sessions only start on attach, so background-mode has no place to seed the prompt). The host must have valid Claude Code credentials. +`-i` works on every provider — pass `--provider daytona|hetzner|vercel` (or set `box.provider`) and the queued job creates a cloud box and pre-starts the seeded agent session detached, same as docker. The host must have valid agent credentials. Extra args after `--` are forwarded to the in-box agent (e.g. `agentbox claude -i "" --provider vercel -- --permission-mode=plan`). ## Git through the host relay @@ -105,7 +105,7 @@ If a PR op appears to hang, tell the user to check the dashboard footer for the | `agentbox url [n\|name]` | Open the box's web app URL (`.localhost` via Portless) in the host browser. | | `agentbox screen [n\|name]` | Open the box's **own** Chromium via VNC — useful for OAuth flows the agent inside the box initiates. | | `agentbox code [n\|name]` | Open VS Code / Cursor pointed at the box. | -| `agentbox prepare --provider ` | One-time base image / snapshot build for `daytona` or `hetzner`. With no `--provider`, prints status across all providers. | +| `agentbox prepare --provider ` | One-time base image / snapshot build for `daytona` or `hetzner` or `vercel`. With no `--provider`, prints status across all providers. | | `agentbox prune --provider ` | Clean up orphan boxes / images / snapshots for a provider (docker + daytona supported; hetzner pending). | Per-project numeric index (`1`, `2`, …) and friendly name (`review`, `smoke`) both work wherever `` is accepted. Index `1` is the first box created in the current workspace. @@ -114,7 +114,7 @@ Per-project numeric index (`1`, `2`, …) and friendly name (`review`, `smoke`) 1. **Never assume the host needs SSH keys forwarded into a box** — git is handled by the relay, by design. 2. **Use `-i` whenever the user asks for parallel agent work** rather than spawning multiple foreground sessions. Then point them at `agentbox dashboard` to watch progress. -3. **Pick the provider deliberately.** `docker` is the fast default. `--provider hetzner` gives a real VPS (heavier, isolated, requires `agentbox prepare --provider hetzner` once). `--provider daytona` is the managed cloud option. +3. **Pick the provider deliberately.** `docker` is the fast default. `--provider hetzner` gives a real VPS (heavier, isolated, requires `agentbox prepare --provider hetzner` once). `--provider vercel` is the managed cloud option. 4. **Cross-check before recommending a command.** If a flag isn't listed here, run `agentbox --help` (it's safe and read-only) before suggesting it to the user. 5. **`/agentbox-setup` is a different skill.** It runs *inside* a box to generate `/workspace/agentbox.yaml`. Don't conflate the two.