diff --git a/packages/core/fixtures/AGENTOS_SYSTEM_PROMPT.md b/crates/agentos-sidecar/src/AGENTOS_SYSTEM_PROMPT.md similarity index 100% rename from packages/core/fixtures/AGENTOS_SYSTEM_PROMPT.md rename to crates/agentos-sidecar/src/AGENTOS_SYSTEM_PROMPT.md diff --git a/crates/agentos-sidecar/src/acp_extension.rs b/crates/agentos-sidecar/src/acp_extension.rs index ce28abff7..5d97bf5da 100644 --- a/crates/agentos-sidecar/src/acp_extension.rs +++ b/crates/agentos-sidecar/src/acp_extension.rs @@ -68,10 +68,10 @@ const OPENCODE_DEFAULT_CONTEXT_PATHS: [&str; 11] = [ "OPENCODE.md", "OPENCODE.local.md", ]; -const AGENTOS_SYSTEM_PROMPT: &str = include_str!(concat!( - env!("CARGO_MANIFEST_DIR"), - "/../../packages/core/fixtures/AGENTOS_SYSTEM_PROMPT.md" -)); +// Embedded next to this source so `cargo publish` packages it (an out-of-crate +// `include_str!` path breaks the isolated package-verify build). The TypeScript +// side reads the same file from this location for its sanity check. +const AGENTOS_SYSTEM_PROMPT: &str = include_str!("AGENTOS_SYSTEM_PROMPT.md"); /// Hard ceiling on the `stdout_buffer` retained on an `AcpSessionRecord` between /// requests. The buffer only ever holds the partial trailing line not yet parsed /// into a complete JSON-RPC message, so this also bounds the per-session record diff --git a/packages/core/package.json b/packages/core/package.json index b2e7048c4..f9de2fb81 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -6,8 +6,7 @@ "main": "./dist/index.js", "types": "./dist/index.d.ts", "files": [ - "dist", - "fixtures" + "dist" ], "exports": { ".": { diff --git a/packages/core/src/agent-os.ts b/packages/core/src/agent-os.ts index 70e160248..e480585d8 100644 --- a/packages/core/src/agent-os.ts +++ b/packages/core/src/agent-os.ts @@ -5527,11 +5527,121 @@ interface AgentOsSidecarState { * are cheap incremental tenants of one process rather than one-process-each. */ nativeProcess?: Promise; + /** + * The shared sidecar's child process + stdio, cached for synchronous + * ref/unref. Unref'd when no VM leases are active so a one-shot host process + * can exit after `dispose()`; re-ref'd while leases are live. + */ + sharedChild?: SidecarEventLoopHandle; + /** + * Number of live "holds" on the shared sidecar's event-loop reference. A hold + * is taken for the WHOLE create→use→dispose lifetime of every VM lease (not + * just while it sits in `activeLeases`), so a VM that is still mid-creation + * still counts. The child + stdio are ref'd while this is >0 and unref'd at 0. + * A counter (not a boolean) so concurrent create/dispose cannot clobber each + * other — Node ref/unref is not itself counted. + */ + eventLoopHolds?: number; } const sidecarStates = new WeakMap(); const sharedSidecars = new Map(); +interface RefCountableHandle { + ref?(): unknown; + unref?(): unknown; +} + +interface SidecarEventLoopHandle extends RefCountableHandle { + stdin?: RefCountableHandle | null; + stdout?: RefCountableHandle | null; + stderr?: RefCountableHandle | null; + kill?(signal?: string | number): unknown; +} + +let sidecarProcessExitHookInstalled = false; + +/** + * Install a one-time, synchronous `process.on("exit")` hook that SIGKILLs any + * pooled shared sidecar child. Once a one-shot host process is allowed to exit + * (its sidecar handles are unref'd at 0 leases), this reaps the sidecar + * immediately instead of waiting for its stdin-EOF grace window — no orphan, no + * delay. We deliberately do NOT install SIGINT/SIGTERM handlers: a library + * should not hijack the host's signal handling. SIGINT still reaches the sidecar + * via the process group, and SIGTERM-driven exit still closes its stdin. + */ +function ensureSidecarProcessExitCleanup(): void { + if (sidecarProcessExitHookInstalled) return; + sidecarProcessExitHookInstalled = true; + process.on("exit", () => { + for (const sidecar of sharedSidecars.values()) { + try { + sidecarStates.get(sidecar)?.sharedChild?.kill?.("SIGKILL"); + } catch { + // best-effort reap; the process is exiting regardless + } + } + }); +} + +function sidecarChildHandle(client: unknown): SidecarEventLoopHandle | undefined { + // SidecarProcess -> StdioSidecarProtocolClient.child (the spawned ChildProcess). + const protocolClient = ( + client as { protocolClient?: { child?: SidecarEventLoopHandle } } | undefined + )?.protocolClient; + return protocolClient?.child ?? undefined; +} + +/** + * Apply the current hold state to the shared sidecar's child + stdio: ref them + * while ≥1 hold is live so in-flight VM work keeps the host process alive; unref + * them at 0 so a one-shot script exits on its own after `dispose()`. The sidecar + * process itself stays running (reusable) and self-exits on stdin EOF when the + * host finally goes away. Best-effort: never let ref/unref break VM lifecycle. + */ +function applySharedSidecarHold(state: AgentOsSidecarState): void { + const child = state.sharedChild; + if (!child) return; + const hold = (state.eventLoopHolds ?? 0) > 0; + for (const handle of [child, child.stdin, child.stdout, child.stderr]) { + if (!handle) continue; + try { + if (hold) handle.ref?.(); + else handle.unref?.(); + } catch { + // ref/unref is an optimization, not correctness-critical + } + } +} + +/** + * Take a hold for the entire create→use→dispose lifetime of one VM lease. Taken + * BEFORE VM creation starts (not when the lease lands in `activeLeases`) so a VM + * that is still mid-creation keeps the sidecar ref'd and a concurrent dispose + * cannot unref it out from under the in-flight create. + */ +function acquireSharedSidecarHold(state: AgentOsSidecarState): void { + state.eventLoopHolds = (state.eventLoopHolds ?? 0) + 1; + if (state.eventLoopHolds === 1) applySharedSidecarHold(state); +} + +/** Release a hold taken by {@link acquireSharedSidecarHold}; unref at 0. */ +function releaseSharedSidecarHold(state: AgentOsSidecarState): void { + const current = state.eventLoopHolds ?? 0; + if (current <= 0) { + // The `holdReleased` guard makes each lease release exactly once, so this + // should be unreachable. Warn rather than silently floor, per the repo's + // no-silent-masking rule, so an accounting bug surfaces instead of hiding. + state.eventLoopHolds = 0; + console.warn( + "[agentos] shared sidecar event-loop hold released more than acquired", + ); + return; + } + state.eventLoopHolds = current - 1; + if (state.eventLoopHolds === 0) applySharedSidecarHold(state); +} + /** * Spawn-once accessor for a sidecar handle's shared native process. Concurrent * callers await the same promise, so one `AgentOsSidecar` maps to exactly one @@ -5542,6 +5652,7 @@ function ensureSharedSidecarNativeProcess( ): Promise { const state = getSidecarState(sidecar); if (!state.nativeProcess) { + ensureSidecarProcessExitCleanup(); state.nativeProcess = (async () => { const client = SidecarProcess.spawn({ cwd: REPO_ROOT, @@ -5549,8 +5660,39 @@ function ensureSharedSidecarNativeProcess( args: [], frameTimeoutMs: NATIVE_SIDECAR_FRAME_TIMEOUT_MS, }); - const session = await client.authenticateAndOpenSession(); - return { client, session }; + // Track the child immediately — BEFORE the handshake await — so a + // failed `authenticateAndOpenSession()` can still reap it (otherwise + // the spawned child is untracked, unreapable, and pins the loop). + state.sharedChild = sidecarChildHandle(client); + if (!state.sharedChild) { + // We reached into @secure-exec/core internals to get the child for + // idle-unref. If that shape ever changes this returns undefined and + // the optimization silently stops working (one-shot scripts would + // hang again). Make it loud rather than a silent regression. + console.warn( + "[agentos] could not resolve the shared sidecar child handle; " + + "standalone scripts may not exit cleanly after dispose(). " + + "This usually means @secure-exec/core internals changed.", + ); + } + // Apply the current hold state to the just-spawned child. + applySharedSidecarHold(state); + try { + const session = await client.authenticateAndOpenSession(); + return { client, session }; + } catch (error) { + // Spawn/handshake failed: reap the child, drop the cached handle, + // and CLEAR the rejected promise so the next create() retries + // instead of permanently wedging on a rejected `nativeProcess`. + try { + state.sharedChild?.kill?.("SIGKILL"); + } catch { + // already gone + } + state.sharedChild = undefined; + state.nativeProcess = undefined; + throw error; + } })(); } return state.nativeProcess; @@ -5565,6 +5707,14 @@ async function disposeSharedSidecarNativeProcess( return; } state.nativeProcess = undefined; + // The cached child is now dead; drop it (symmetric with the assignment in + // ensureSharedSidecarNativeProcess). We deliberately do NOT zero + // `eventLoopHolds` here: this runs only from `AgentOsSidecar.dispose()`, which + // has already set the handle to `disposing` (so no new lease can acquire) and + // drained `activeLeases`; the disposed handle's state is then abandoned. Force- + // zeroing a shared counter could clobber a hold on a freshly re-acquired + // process generation, so it is left to the balanced acquire/release pairs. + state.sharedChild = undefined; try { const { client } = await pending; await client.dispose(); @@ -5700,6 +5850,18 @@ async function leaseAgentOsSidecarVm( }, }); + // Hold the shared sidecar's event-loop ref for this lease's WHOLE lifetime — + // taken now, before VM creation, so a concurrent dispose cannot unref the + // sidecar while this create is still in flight. Released exactly once on + // dispose or on a failed create. + acquireSharedSidecarHold(state); + let holdReleased = false; + const releaseHold = () => { + if (holdReleased) return; + holdReleased = true; + releaseSharedSidecarHold(state); + }; + let disposed = false; let leaseRecord: AgentOsSidecarLeaseRecord | undefined; @@ -5726,6 +5888,10 @@ async function leaseAgentOsSidecarVm( state.activeLeases.delete(leaseRecord!); state.description.activeVmCount = state.activeLeases.size; await client.dispose(); + // Release this lease's hold; the shared sidecar is unref'd only + // once the last hold (across all in-flight + active leases) drops, + // so a one-shot host process can then exit on its own. + releaseHold(); }, }; @@ -5737,6 +5903,7 @@ async function leaseAgentOsSidecarVm( return lease; } catch (error) { await client.dispose().catch(() => {}); + releaseHold(); throw error; } } diff --git a/packages/core/tests/agent-pkg-matrix.e2e.test.ts b/packages/core/tests/agent-pkg-matrix.e2e.test.ts new file mode 100644 index 000000000..6355008f4 --- /dev/null +++ b/packages/core/tests/agent-pkg-matrix.e2e.test.ts @@ -0,0 +1,196 @@ +import { execFileSync, spawnSync } from "node:child_process"; +import { cpSync, mkdtempSync, rmSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; + +/** + * AGENT × PACKAGE-MANAGER e2e MATRIX (real API, skipped by default). + * + * For every package manager (npm/pnpm/yarn/bun) × every agent + * (pi/pi-cli/claude/opencode) this installs the PUBLISHED packages into an + * isolated temp project and asserts a real user can: install → create a session + * → prompt → stream tokens LIVE. It is the regression net for the exact issues + * that bit us shipping the preview: + * + * - retired/stale model ids (Anthropic 404 → empty turn), + * - opencode needing a config file (model + provider baseURL ending in /v1) + cwd, + * - permission keys being fs/network/childProcess/process/env (not filesystem/…), + * - gap-based streaming detection (opencode bursts chunks but still delivers live), + * - ACP bootstrap flakiness (retry before failing). + * + * SKIPPED BY DEFAULT: it does real network installs and hits a real LLM API. Run + * it deliberately: + * + * AGENTOS_MATRIX_E2E=1 ANTHROPIC_API_KEY=sk-... \ + * pnpm --dir packages/core exec vitest run tests/agent-pkg-matrix.e2e.test.ts + * + * Env knobs: + * AGENTOS_MATRIX_E2E=1 required to enable (also gated out in vitest.config.ts) + * ANTHROPIC_API_KEY required + * AGENTOS_MATRIX_CORE @rivet-dev/agentos-core version/tag (default "latest") + * AGENTOS_MATRIX_AGENTS @agentos-software/* version/tag (default "latest") + * AGENTOS_MATRIX_MODEL opencode model id (default a current Haiku) + * AGENTOS_MATRIX_PMS comma list to restrict package managers + * AGENTOS_MATRIX_AGENTS_LIST comma list to restrict agents + */ + +const ENABLED = process.env.AGENTOS_MATRIX_E2E === "1"; +const CORE_VERSION = process.env.AGENTOS_MATRIX_CORE || "latest"; +const AGENTS_VERSION = process.env.AGENTOS_MATRIX_AGENTS || "latest"; +const CELL = resolve(import.meta.dirname, "fixtures/agent-matrix-cell.mjs"); +const CELL_TIMEOUT_MS = 240_000; + +const AGENT_PKGS: Record = { + pi: "@agentos-software/pi", + "pi-cli": "@agentos-software/pi-cli", + claude: "@agentos-software/claude-code", + opencode: "@agentos-software/opencode", +}; + +function commandAvailable(cmd: string): boolean { + try { + const r = spawnSync(cmd, ["--version"], { stdio: "ignore" }); + return r.status === 0; + } catch { + return false; + } +} + +function installArgs(pm: string, pkgs: string[]): Array<[string, string[]]> { + switch (pm) { + case "npm": + return [ + ["npm", ["init", "-y"]], + ["npm", ["install", "--no-audit", "--no-fund", ...pkgs]], + ]; + case "pnpm": + return [ + ["pnpm", ["init"]], + ["pnpm", ["add", ...pkgs]], + ]; + case "yarn": + return [ + ["yarn", ["init", "-y"]], + ["yarn", ["add", ...pkgs]], + ]; + case "bun": + return [ + ["bun", ["init", "-y"]], + ["bun", ["add", ...pkgs]], + ]; + default: + throw new Error(`unknown package manager ${pm}`); + } +} + +const ALL_PMS = (process.env.AGENTOS_MATRIX_PMS || "npm,pnpm,yarn,bun") + .split(",") + .map((s) => s.trim()) + .filter(Boolean); +const ALL_AGENTS = ( + process.env.AGENTOS_MATRIX_AGENTS_LIST || "pi,pi-cli,claude,opencode" +) + .split(",") + .map((s) => s.trim()) + .filter(Boolean); + +const availablePms = ALL_PMS.filter(commandAvailable); + +describe.skipIf(!ENABLED)("agent × package-manager e2e matrix (real API)", () => { + const tmpDirs: string[] = []; + + beforeAll(() => { + if (!process.env.ANTHROPIC_API_KEY) { + throw new Error( + "AGENTOS_MATRIX_E2E requires ANTHROPIC_API_KEY in the environment", + ); + } + const skipped = ALL_PMS.filter((p) => !availablePms.includes(p)); + if (skipped.length) { + // eslint-disable-next-line no-console + console.warn( + `[matrix] package managers not on PATH, skipping: ${skipped.join(", ")}`, + ); + } + // eslint-disable-next-line no-console + console.log( + `[matrix] core=${CORE_VERSION} agents=${AGENTS_VERSION} pms=[${availablePms.join(",")}] agents=[${ALL_AGENTS.join(",")}]`, + ); + }); + + afterAll(() => { + for (const d of tmpDirs) { + try { + rmSync(d, { recursive: true, force: true }); + } catch {} + } + }); + + for (const pm of availablePms) { + for (const agent of ALL_AGENTS) { + it( + `${pm} + ${agent}: install → session → live token streaming`, + // opencode's ACP bootstrap (and real LLM APIs) flake transiently; + // retry the whole cell so a flake doesn't red the gate. Persistent + // failures still fail after the retries. + { timeout: CELL_TIMEOUT_MS + 200_000, retry: 2 }, + async () => { + const dir = mkdtempSync(join(tmpdir(), `agentos-matrix-${pm}-${agent}-`)); + tmpDirs.push(dir); + // yarn 1.x global cache contends under repeated runs; isolate it. + const cacheDir = join(dir, ".pm-cache"); + const childEnv = { + ...process.env, + AGENT: agent, + YARN_CACHE_FOLDER: cacheDir, + npm_config_cache: cacheDir, + }; + + const pkgs = [ + `@rivet-dev/agentos-core@${CORE_VERSION}`, + `${AGENT_PKGS[agent]}@${AGENTS_VERSION}`, + ]; + for (const [cmd, args] of installArgs(pm, pkgs)) { + execFileSync(cmd, args, { + cwd: dir, + env: childEnv, + stdio: "pipe", + timeout: 180_000, + }); + } + + cpSync(CELL, join(dir, "agent-matrix-cell.mjs")); + + const run = spawnSync("node", ["agent-matrix-cell.mjs"], { + cwd: dir, + env: childEnv, + encoding: "utf8", + timeout: CELL_TIMEOUT_MS, + }); + + const line = (run.stdout || "") + .split("\n") + .find((l) => l.startsWith("E2E_RESULT_JSON:")); + if (!line) { + throw new Error( + `no E2E_RESULT_JSON from ${pm}/${agent}.\nstdout:\n${run.stdout}\nstderr:\n${(run.stderr || "").slice(-2000)}`, + ); + } + const result = JSON.parse(line.slice("E2E_RESULT_JSON:".length)); + + // eslint-disable-next-line no-console + console.log(`[matrix] ${pm}/${agent}:`, JSON.stringify(result.metrics)); + + expect(result.ok, `prompt produced output (err: ${result.error})`).toBe( + true, + ); + expect( + result.streaming, + `tokens streamed live (metrics: ${JSON.stringify(result.metrics)})`, + ).toBe(true); + }, + ); + } + } +}); diff --git a/packages/core/tests/fixtures/agent-matrix-cell.mjs b/packages/core/tests/fixtures/agent-matrix-cell.mjs new file mode 100644 index 000000000..efe474570 --- /dev/null +++ b/packages/core/tests/fixtures/agent-matrix-cell.mjs @@ -0,0 +1,168 @@ +// One matrix cell, run as a STANDALONE node process inside a freshly-installed +// temp project (so it exercises the published packages exactly as a user would). +// +// Resolves @rivet-dev/agentos-core + the agent's @agentos-software/* package from +// the temp project's own node_modules, opens a session, sends a prompt, and asserts +// that tokens stream LIVE mid-turn (the ACP streaming contract) — then prints a +// single `E2E_RESULT_JSON:{...}` line and exits 0 on PASS. +// +// Driven by env: AGENT (pi|pi-cli|claude|opencode), ANTHROPIC_API_KEY, +// AGENTOS_MATRIX_MODEL (opencode model id; must be a CURRENT id). + +const AGENT = process.env.AGENT || "pi"; +const ANTHROPIC_API_KEY = process.env.ANTHROPIC_API_KEY; +const ANTHROPIC_BASE_URL = + process.env.ANTHROPIC_BASE_URL || "https://api.anthropic.com"; +// OpenCode pins an explicit model; a retired id 404s and the turn ends empty, so +// this is intentionally configurable and defaults to a current model. +const OPENCODE_MODEL = + process.env.AGENTOS_MATRIX_MODEL || "anthropic/claude-haiku-4-5-20251001"; + +const PKG = { + pi: "@agentos-software/pi", + "pi-cli": "@agentos-software/pi-cli", + claude: "@agentos-software/claude-code", + opencode: "@agentos-software/opencode", +}[AGENT]; +if (!PKG) throw new Error(`unknown AGENT ${AGENT}`); + +const { AgentOs } = await import("@rivet-dev/agentos-core"); +const software = (await import(PKG)).default; + +const result = { agent: AGENT, pkg: PKG, ok: false, streaming: false, error: null }; + +let vm; +let sessionId; +try { + vm = await AgentOs.create({ + software: [software], + // Real LLM egress needs network; the secure baseline denies it by default. + // Keys are fs/network/childProcess/process/env (NOT filesystem/environment). + permissions: { + fs: "allow", + network: "allow", + childProcess: "allow", + process: "allow", + env: "allow", + }, + }); + + const homeDir = "/home/agentos"; + const env = { HOME: homeDir }; + if (ANTHROPIC_API_KEY) { + env.ANTHROPIC_API_KEY = ANTHROPIC_API_KEY; + env.ANTHROPIC_BASE_URL = ANTHROPIC_BASE_URL; + } + + // OpenCode has no built-in default model/provider: write its config FIRST or the + // prompt resolves empty. The Anthropic baseURL MUST end in /v1 (else 404). + if (AGENT === "opencode") { + await vm.mkdir(`${homeDir}/.config/opencode`, { recursive: true }); + await vm.writeFile( + `${homeDir}/.config/opencode/opencode.json`, + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + autoupdate: false, + share: "disabled", + snapshot: false, + model: OPENCODE_MODEL, + provider: { + anthropic: { options: { baseURL: `${ANTHROPIC_BASE_URL}/v1` } }, + }, + }), + ); + } + + // pi/pi-cli read provider config from ~/.pi/agent/models.json + if (AGENT === "pi" || AGENT === "pi-cli") { + await vm.mkdir(`${homeDir}/.pi/agent`, { recursive: true }); + await vm.writeFile( + `${homeDir}/.pi/agent/models.json`, + JSON.stringify({ + providers: { + anthropic: { baseUrl: ANTHROPIC_BASE_URL, apiKey: ANTHROPIC_API_KEY }, + }, + }), + ); + } + + // OpenCode (and others) need an existing cwd; the default /workspace may not exist. + const workspaceDir = `${homeDir}/workspace`; + await vm.mkdir(workspaceDir, { recursive: true }); + + // ACP bootstrap can flake; retry a couple times before declaring a failure. + let created; + for (let attempt = 1; attempt <= 3; attempt++) { + try { + created = await vm.createSession(AGENT, { cwd: workspaceDir, env }); + break; + } catch (err) { + if (attempt === 3) throw err; + } + } + sessionId = created.sessionId; + + const events = []; + let promptStart = 0; + vm.onSessionEvent(sessionId, (event) => { + events.push({ + method: event.method, + kind: event.params?.update?.sessionUpdate, + t: performance.now() - promptStart, + }); + }); + + promptStart = performance.now(); + const { text, response } = await vm.prompt( + sessionId, + "Write a haiku about the ocean. Output only the haiku.", + ); + const resolvedAt = performance.now() - promptStart; + + const updates = events.filter((e) => e.method === "session/update"); + const chunks = updates.filter( + (e) => e.kind === "agent_message_chunk" || e.kind === "agent_thought_chunk", + ); + const firstChunk = chunks.length ? chunks[0].t : NaN; + const lastChunk = chunks.length ? chunks[chunks.length - 1].t : NaN; + const chunksBeforeResolve = chunks.filter((e) => e.t < resolvedAt - 50).length; + const span = lastChunk - firstChunk; + const gap = resolvedAt - firstChunk; // live-delivery signal (the ACP fix) + + // Streaming contract: >=2 text chunks delivered LIVE mid-turn, not batched at + // prompt resolution. The batching bug clusters EVERY chunk at the resolve + // instant (firstChunk == resolve, so gap ~= 0). Live delivery puts the first + // chunk meaningfully before resolve. `gap > 100` cleanly separates the two + // without false-failing agents (e.g. opencode) that emit a tight, short burst + // on a fast turn — those still arrive hundreds of ms before resolve. `span` + // is kept only as an informational metric, not a pass condition. + const streaming = + chunks.length >= 2 && chunksBeforeResolve >= 2 && gap > 100; + + result.ok = !response?.error && (text || "").length > 0; + result.streaming = streaming; + result.metrics = { + resolvedAt: Math.round(resolvedAt), + totalUpdates: updates.length, + chunks: chunks.length, + chunksBeforeResolve, + firstChunkAt: Math.round(firstChunk), + lastChunkAt: Math.round(lastChunk), + spanMs: Math.round(span), + gapMs: Math.round(gap), + textLen: (text || "").length, + textSample: (text || "").slice(0, 80), + }; +} catch (err) { + result.error = String(err?.stack || err); +} finally { + try { + if (sessionId) vm?.closeSession(sessionId); + } catch {} + try { + await vm?.dispose(); + } catch {} +} + +console.log("E2E_RESULT_JSON:" + JSON.stringify(result)); +process.exit(result.ok && result.streaming ? 0 : 1); diff --git a/packages/core/tests/fixtures/shared-sidecar-clean-exit-script.mjs b/packages/core/tests/fixtures/shared-sidecar-clean-exit-script.mjs new file mode 100644 index 000000000..4790aba91 --- /dev/null +++ b/packages/core/tests/fixtures/shared-sidecar-clean-exit-script.mjs @@ -0,0 +1,20 @@ +// Standalone script (as a user would write): create a VM on the default +// (shared) sidecar, do one op, dispose, and DO NOT call process.exit(). +// +// A correct dispose() must let node exit on its own — the shared sidecar's +// child process + stdio handles must not keep the event loop alive after the +// last VM lease is released. Imports the built package entry, like a consumer. +import { resolve } from "node:path"; +import { pathToFileURL } from "node:url"; + +const entry = pathToFileURL( + resolve(import.meta.dirname, "../../dist/index.js"), +).href; +const { AgentOs } = await import(entry); + +const vm = await AgentOs.create(); +await vm.writeFile("/clean-exit.txt", "ok"); +await vm.dispose(); + +console.log("SCRIPT_DONE"); +// Intentionally NO process.exit(): the process must terminate on its own. diff --git a/packages/core/tests/os-instructions.test.ts b/packages/core/tests/os-instructions.test.ts index 4e7f105f7..e6303dc1e 100644 --- a/packages/core/tests/os-instructions.test.ts +++ b/packages/core/tests/os-instructions.test.ts @@ -5,7 +5,9 @@ import { AgentOs } from "../src/agent-os.js"; const OS_INSTRUCTIONS_FIXTURE = resolve( import.meta.dirname, - "../fixtures/AGENTOS_SYSTEM_PROMPT.md", + // The sidecar crate embeds this prompt; it lives next to the Rust source so + // `cargo publish` can package it. This test only sanity-checks its contents. + "../../../crates/agentos-sidecar/src/AGENTOS_SYSTEM_PROMPT.md", ); // ── base prompt fixture sanity ───────────────────────────────────────── diff --git a/packages/core/tests/shared-sidecar-clean-exit.test.ts b/packages/core/tests/shared-sidecar-clean-exit.test.ts new file mode 100644 index 000000000..b57426971 --- /dev/null +++ b/packages/core/tests/shared-sidecar-clean-exit.test.ts @@ -0,0 +1,57 @@ +import { spawnSync } from "node:child_process"; +import { existsSync } from "node:fs"; +import { resolve } from "node:path"; +import { describe, expect, it } from "vitest"; + +// The fixture imports the built package entry (dist), like a consumer would. +// `pnpm test` builds packages/core first; when running this file standalone +// without a build, skip with a clear reason instead of a confusing import error. +const DIST_ENTRY = resolve(import.meta.dirname, "../dist/index.js"); +const distMissing = !existsSync(DIST_ENTRY); +if (distMissing) { + // eslint-disable-next-line no-console + console.warn( + `[shared-sidecar-clean-exit] skipped: build packages/core first (missing ${DIST_ENTRY})`, + ); +} + +/** + * REGRESSION: a standalone script that creates a VM and calls `await + * vm.dispose()` must let the node process exit on its own. + * + * `AgentOs.create()` uses the process-global SHARED sidecar pool. `vm.dispose()` + * releases the VM lease, but the shared sidecar's child process + stdio sockets + * used to stay referenced, keeping the event loop alive forever — every + * one-shot quickstart script (hello-world, filesystem, cron, agent-session…) + * hung on exit and had to be SIGINT'd. The fix unrefs the shared sidecar's + * handles when no leases are active (re-refs on the next lease), so the loop can + * drain. This runs the script as a real subprocess and asserts it exits by + * itself, with no `process.exit()` escape hatch. + */ +describe("shared sidecar clean exit", () => { + it.skipIf(distMissing)("a standalone create()+dispose() script exits on its own", () => { + const script = resolve( + import.meta.dirname, + "fixtures/shared-sidecar-clean-exit-script.mjs", + ); + const result = spawnSync(process.execPath, [script], { + cwd: resolve(import.meta.dirname, ".."), + encoding: "utf8", + timeout: 60_000, + }); + + const diag = `exit=${result.status} signal=${result.signal}\nstdout: ${result.stdout ?? ""}\nstderr: ${(result.stderr ?? "").slice(-800)}`; + + // The script logic should complete regardless of the exit behavior. + expect(result.stdout ?? "", `script never finished its work.\n${diag}`).toContain( + "SCRIPT_DONE", + ); + // The real assertion: the process terminated on its own (was not killed + // by the spawn timeout). A hang leaves signal === "SIGTERM". + expect( + result.signal, + `process did not exit on its own within 60s — the shared sidecar kept the event loop alive.\n${diag}`, + ).toBeNull(); + expect(result.status, `non-zero exit.\n${diag}`).toBe(0); + }, 90_000); +}); diff --git a/packages/core/vitest.config.ts b/packages/core/vitest.config.ts index e407895ec..6e13417ef 100644 --- a/packages/core/vitest.config.ts +++ b/packages/core/vitest.config.ts @@ -61,7 +61,13 @@ const KNOWN_FAILING_E2E_FILES = [ "tests/codex-fullturn.test.ts", ]; +// Real-API, real-install matrix (agent × package manager). Hits a live LLM API +// and runs real npm/pnpm/yarn/bun installs, so it is excluded from BOTH the +// default run and the AGENTOS_E2E_FULL sweep. Enable only with AGENTOS_MATRIX_E2E=1. +const MATRIX_E2E_FILES = ["tests/agent-pkg-matrix.e2e.test.ts"]; + const runFullE2e = process.env.AGENTOS_E2E_FULL === "1"; +const runMatrixE2e = process.env.AGENTOS_MATRIX_E2E === "1"; export default defineConfig({ test: { @@ -76,6 +82,7 @@ export default defineConfig({ exclude: [ ...configDefaults.exclude, ...(runFullE2e ? [] : [...SLOW_E2E_FILES, ...KNOWN_FAILING_E2E_FILES]), + ...(runMatrixE2e ? [] : MATRIX_E2E_FILES), ], }, }); diff --git a/website/src/content/docs/docs/agents/opencode.mdx b/website/src/content/docs/docs/agents/opencode.mdx index 0de716e1c..7b2c8ce09 100644 --- a/website/src/content/docs/docs/agents/opencode.mdx +++ b/website/src/content/docs/docs/agents/opencode.mdx @@ -57,6 +57,36 @@ OpenCode auto-detects a provider when its key is present on the session's `env`, See [LLM Credentials](/docs/llm-credentials), and OpenCode's [providers docs](https://opencode.ai/docs/providers/) for the full list. +## Model configuration + +To pin a specific model — or point a provider at a custom endpoint — write an OpenCode config file into the VM before creating the session. OpenCode reads `/.config/opencode/opencode.json` (the agent's `HOME` is `/home/agentos` by default). + + + +```ts +// Write the config before creating the session +await agent.mkdir("/home/agentos/.config/opencode", { recursive: true }); +await agent.writeFile( + "/home/agentos/.config/opencode/opencode.json", + JSON.stringify({ + $schema: "https://opencode.ai/config.json", + model: "anthropic/claude-haiku-4-5-20251001", // use a current model id + provider: { + // The Anthropic baseURL MUST include /v1, or requests 404. + anthropic: { options: { baseURL: "https://api.anthropic.com/v1" } }, + }, + }), +); + +const session = await agent.createSession("opencode", { + env: { ANTHROPIC_API_KEY: process.env.ANTHROPIC_API_KEY! }, +}); +``` + ## Skills OpenCode discovers `SKILL.md` files from its skills directory. Write the skill into the VM before creating a session and OpenCode loads it automatically. diff --git a/website/src/content/docs/docs/sessions.mdx b/website/src/content/docs/docs/sessions.mdx index 12dc80854..0c060249a 100644 --- a/website/src/content/docs/docs/sessions.mdx +++ b/website/src/content/docs/docs/sessions.mdx @@ -52,6 +52,10 @@ The second argument to `createSession` accepts: - **`additionalInstructions`**: text appended to the agent's system prompt. - **`skipOsInstructions`**: skip the base OS instructions injection. Tool documentation is still included. +## Agent-specific configuration + +Most agents need only a provider API key in `env` (see [LLM Credentials](/docs/llm-credentials)), but some require extra setup before a session — for example, OpenCode needs a model + provider config file written into the VM. See the per-agent pages under [Agents](/docs/agents/pi) — e.g. [OpenCode](/docs/agents/opencode) — for the details. + ## Send a prompt