diff --git a/.changeset/desktop-local-fetch.md b/.changeset/desktop-local-fetch.md new file mode 100644 index 0000000000..f91d3f99ab --- /dev/null +++ b/.changeset/desktop-local-fetch.md @@ -0,0 +1,6 @@ +--- +'@electric-ax/agents-desktop': patch +'@electric-ax/agents-server-ui': patch +--- + +Route local desktop mutating agents-server requests through the Electron main process so CORS preflights cannot stall behind renderer connection limits. diff --git a/.changeset/harden-pull-wake-runner.md b/.changeset/harden-pull-wake-runner.md new file mode 100644 index 0000000000..b61498f2f5 --- /dev/null +++ b/.changeset/harden-pull-wake-runner.md @@ -0,0 +1,7 @@ +--- +'@electric-ax/agents-runtime': patch +'@electric-ax/agents-server': patch +'@electric-ax/agents': patch +--- + +Harden pull-wake runner lifecycle with a state machine, heartbeat-driven stream resets, and exponential reconnect backoff (1s-30s). Add granular `status` field to `PullWakeRunnerHealth` (`stopped | starting | connecting | streaming | reconnecting | stopping`). The `onError` callback is now reporting-only (`(Error) => void`) - it can no longer control runner lifecycle. `stop()` rethrows `drainWakes` errors so callers observe wake handler failures. Event-driven heartbeat throttling avoids stale diagnostics between fixed-interval heartbeats. Durable Streams clients now append stream and `__ds` subscription control paths to the configured backend URL prefix without inferring a `/v1/stream` layout, so pull-wake subscriptions work behind arbitrary DS backend prefixes. Remove the stale `StreamClient.getConsumerState()` helper for the old Durable Streams `/consumers` endpoint. diff --git a/.changeset/local-desktop-principal.md b/.changeset/local-desktop-principal.md new file mode 100644 index 0000000000..ac23cf9885 --- /dev/null +++ b/.changeset/local-desktop-principal.md @@ -0,0 +1,6 @@ +--- +'@electric-ax/agents-desktop': patch +'@electric-ax/agents-server-ui': patch +--- + +Default unauthenticated local desktop sessions to the `system:dev-local` principal and resolve optimistic send principals at mutation time so pending messages do not render as `unknown`. diff --git a/.changeset/pull-wake-health-diagnostics.md b/.changeset/pull-wake-health-diagnostics.md new file mode 100644 index 0000000000..c57ab9c354 --- /dev/null +++ b/.changeset/pull-wake-health-diagnostics.md @@ -0,0 +1,9 @@ +--- +'@electric-ax/agents-server': patch +'@electric-ax/agents-runtime': patch +'@electric-ax/agents-desktop': patch +'@electric-ax/agents': patch +'electric-ax': patch +--- + +Add pull-wake runner health check endpoint and rename `owner_user_id` to `owner_principal` across the runners system. The `GET /_electric/runners/:id/health` endpoint returns comprehensive diagnostics including runner state, client-reported stream/heartbeat/claim metrics, active claims, and dispatch stats with a derived health status (healthy/degraded/unhealthy). The `PullWakeRunner` now tracks internal diagnostics and reports them to the server via heartbeats, stored in a separate `runner_runtime_diagnostics` table so the main `runners` shape stays stable for normal UI sync. The `owner_user_id` → `owner_principal` rename stores canonical principal URLs instead of keys, with strict validation and canonicalization at route boundaries. The migration expires active runner claims and deletes existing runner rows as part of the principal rewrite. This is a breaking change with no backward compatibility — all callers must send principal URLs. diff --git a/.changeset/pull-wake-session-startup-ui.md b/.changeset/pull-wake-session-startup-ui.md new file mode 100644 index 0000000000..b9cafee602 --- /dev/null +++ b/.changeset/pull-wake-session-startup-ui.md @@ -0,0 +1,5 @@ +--- +'@electric-ax/agents-server-ui': patch +--- + +Send new-session initial messages through the spawn request so pull-wake sessions can start without waiting for the UI to preload the entity stream. diff --git a/docs/agents-development.md b/docs/agents-development.md deleted file mode 100644 index 70e3ca6bc6..0000000000 --- a/docs/agents-development.md +++ /dev/null @@ -1,205 +0,0 @@ -# Electric Agents — Development Guide - -## Package overview - -The agents subsystem lives in seven packages under `packages/`: - -| Package | Description | -| --------------------------------- | ------------------------------------------------------------------------------------- | -| `agents-runtime` | Core runtime — entity definitions, context, handler lifecycle | -| `agents-mcp` | MCP (Model Context Protocol) bridge library used by built-in agents | -| `agents-server` | Orchestration server — wake registry, scheduling, Electric + Postgres integration | -| `agents` | Built-in agents (Horton & Worker) with tools (bash, read, write, edit, fetch, search) | -| `agents-server-ui` | React dashboard for agent monitoring and interaction | -| `agents-desktop` | Electron wrapper around `agents-server-ui` for a native desktop experience | -| `agents-server-conformance-tests` | Conformance test suite for agents-server | - -## Prerequisites - -- **Docker Desktop** running (for Postgres + Electric) -- **Node.js** and **pnpm** (see `.tool-versions` for exact versions) -- **`.env` file** at the project root with at least `ANTHROPIC_API_KEY` (needed by built-in agents). Both entrypoints call `process.loadEnvFile()` on startup, loading from the current working directory — so always run entrypoints from the project root. - -## Quick start: `./scripts/dev.sh` - -For day-to-day development, use the bundled dev script: - -```sh -./scripts/dev.sh build # one-shot install + build of all required packages -./scripts/dev.sh start # docker + 5 dev processes; Ctrl-C to stop -./scripts/dev.sh start --detach # same, but exits after spawning (logs to .dev-logs/) -./scripts/dev.sh start --with-agents # also spawn built-in agents (Horton + Worker) -./scripts/dev.sh desktop # run the Electron desktop app in this terminal -./scripts/dev.sh stop # stop processes + docker compose down -./scripts/dev.sh teardown # stop + remove Postgres volume + .streams-data/ -./scripts/dev.sh status # show which services are running -``` - -`desktop` is a separate command because the Electron app is interactive — it opens a window. Run it in its own terminal after `start` has the rest of the stack up; Ctrl-C in that terminal closes the app without touching the backing services. - -`build` covers `typescript-client`, `agents-runtime`, `agents-mcp`, `agents-server`, and `agents`. Re-run it after any dep change before restarting — entrypoints do not auto-restart on `dist/` rebuilds. - -**Built-in agents (`packages/agents`)** register against `agents-server` at startup and will fail with `Stream not found` if they race ahead of it. Pass `--with-agents` to `start` to spawn them after `agents-server` binds `:4437`. Without the flag, run them manually in a separate terminal once `start` reports the server is up — Ctrl-C in that terminal stops only the built-in agents: - -```sh -ELECTRIC_AGENTS_SERVER_URL=http://localhost:4437 \ - node packages/agents/dist/entrypoint.js -``` - -The rest of this document describes the manual flow that the script automates. - -## Starting the dev environment - -All commands below assume you are in the project root. All `pnpm dev` commands use `tsdown --watch` (or Vite for the UI) — they do an initial build then watch for changes. The build order matters because packages import from each other's `dist/`. - -### Step 1 — Install dependencies and build workspace prerequisites - -In a fresh checkout or worktree, workspace packages have no `dist/` directories. The agent packages depend on `@electric-sql/client` (the typescript-client) and on `@electric-ax/agents-mcp` at runtime, so both must be built before starting any agent server. - -```sh -pnpm install -pnpm -C packages/typescript-client build -pnpm -C packages/agents-mcp build -``` - -### Step 2 — Start backing services (Postgres + Electric + Jaeger) - -```sh -docker compose -f packages/agents-server/docker-compose.dev.yml up -d -``` - -Services will be available at: - -- PostgreSQL: `localhost:5432` (electric_agents/electric_agents) -- Electric API: `http://localhost:3060` -- Jaeger UI: `http://localhost:16686` (tracing) - -### Step 3 — Build agents-runtime - -`agents-server` and `agents` both depend on `agents-runtime`, so it must be built first. - -```sh -pnpm -C packages/agents-runtime dev -# wait for "Build complete" before step 4 -``` - -### Step 4 — Build agents-server and agents - -These can be started in parallel once the runtime is built. - -```sh -# Terminal 2: -pnpm -C packages/agents-server dev - -# Terminal 3: -pnpm -C packages/agents dev -``` - -Wait for both "Build complete" messages before step 5. - -### Step 5 — Start the server processes - -Run entrypoints from the project root so they pick up the root `.env` file. - -```sh -# Terminal 4: agents-server -DATABASE_URL=postgresql://electric_agents:electric_agents@localhost:5432/electric_agents \ - ELECTRIC_AGENTS_ELECTRIC_URL=http://localhost:3060 \ - ELECTRIC_INSECURE=true \ - node packages/agents-server/dist/entrypoint.js -``` - -The agents-server will start on `http://localhost:4437` with an embedded durable streams server. - -```sh -# Terminal 5: built-in agents (Horton + Worker) -ELECTRIC_AGENTS_SERVER_URL=http://localhost:4437 \ - node packages/agents/dist/entrypoint.js -``` - -The built-in agents server starts on `http://localhost:4448` and auto-registers Horton and Worker entity types. - -### Step 6 — Start the agents UI dashboard - -```sh -pnpm -C packages/agents-server-ui dev -``` - -Vite dev server with HMR — changes appear instantly. - -## Environment variables reference - -### agents-server - -| Variable | Default | Description | -| ------------------------------------- | --------- | --------------------------------------------------- | -| `DATABASE_URL` | — | Postgres connection URL (required) | -| `ELECTRIC_AGENTS_ELECTRIC_URL` | — | Electric sync service URL | -| `ELECTRIC_AGENTS_HOST` | `0.0.0.0` | Bind address | -| `ELECTRIC_AGENTS_PORT` | `4437` | Server port | -| `ELECTRIC_AGENTS_BASE_URL` | — | Public webhook base URL | -| `ELECTRIC_AGENTS_STREAMS_DATA_DIR` | — | Local streams data directory | -| `ELECTRIC_AGENTS_DURABLE_STREAMS_URL` | — | External durable streams URL (omit to use embedded) | - -### agents (built-in) - -| Variable | Default | Description | -| ------------------------------ | ----------- | ---------------------------- | -| `ELECTRIC_AGENTS_SERVER_URL` | — | agents-server URL (required) | -| `ANTHROPIC_API_KEY` | — | Claude API key (required) | -| `ELECTRIC_AGENTS_BUILTIN_HOST` | `127.0.0.1` | Bind address | -| `ELECTRIC_AGENTS_BUILTIN_PORT` | `4448` | Server port | - -## Running tests - -```sh -# Runtime unit tests (no services needed) -cd packages/agents-runtime -pnpm test - -# Server tests (requires Postgres + Electric via docker-compose.dev.yml) -cd packages/agents-server -pnpm test - -# Built-in agents tests -cd packages/agents -pnpm test - -# All with coverage -pnpm coverage # in any agent package -``` - -## Iterating on agent packages - -All agent packages use `tsdown` for building. The `pnpm dev` command in each starts a watch-mode rebuild, so changes are picked up automatically. - -- **Runtime changes** (`agents-runtime`): Rebuild propagates to `agents-server` and `agents` since they depend on it via workspace links. -- **Server changes** (`agents-server`): Restart `node dist/entrypoint.js` after rebuild (watch mode rebuilds but does not restart the process). -- **Agent logic changes** (`agents`): Same — restart the entrypoint after rebuild. -- **UI changes** (`agents-server-ui`): Vite HMR — changes appear instantly. - -## Working with examples - -The `examples/deep-survey` example demonstrates a custom agent with its own entity types: - -```sh -cd examples/deep-survey -pnpm install -pnpm dev # starts both server (tsx watch) and UI (vite) in parallel -``` - -It requires the agents-server backing services (Postgres + Electric) to be running. - -## Local state - -- **Postgres** (docker volume) — entity types, entities, wake registrations, scheduling state. -- **Durable streams** — in-memory by default in dev. Data resets on server restart. Set `ELECTRIC_AGENTS_STREAMS_DATA_DIR` to persist streams to disk (uses lmdb + log files). - -To clear all state: stop the servers and run `docker compose down -v` to remove the Postgres volume. - -## Teardown - -```sh -docker compose -f packages/agents-server/docker-compose.dev.yml down # stop services -docker compose -f packages/agents-server/docker-compose.dev.yml down -v # stop + remove volumes -``` diff --git a/docs/agents-principals-implementation-plan.md b/docs/agents-principals-implementation-plan.md deleted file mode 100644 index 81bce2700a..0000000000 --- a/docs/agents-principals-implementation-plan.md +++ /dev/null @@ -1,789 +0,0 @@ -# Principals implementation plan - -Issue: - -## Goal - -Add **principals** as a first-class entity type so every action in the agents system traces to an owning identity. - -Principals are entity streams addressed as: - -```txt -/principal/user:kyle -/principal/agent:ci-bot -/principal/service:github -/principal/system:framework -/principal/system:dev-local -``` - -Inbound requests carry the principal in a trusted header set by edge/auth middleware: - -```txt -Electric-Principal: user:kyle -``` - -In local/dev mode, missing headers default to: - -```txt -system:dev-local -``` - -Because agents are pre-release, there is no backwards compatibility path for unauthenticated/no-principal requests. API routes should error if the request has no principal. - -## Request context - -Move request identity from user-centric naming to principal-centric naming: - -```ts -// before -authenticatedUser?: AuthenticatedRequestUser - -// after -principal: Principal -``` - -All routes, including internal routes, should include a principal. Use one principal-aware entry point into the verbs instead of parallel unauthenticated/internal code paths. - -There are no first-class "users" in the agents runtime; there are only principals, one kind of which may be `user`. - -## Creator field - -Use **`created_by`** for the immutable entity creator/owner field. It stores a principal entity URL, e.g. `/principal/user:kyle`. - -## Principal model - -Add a new module: - -```txt -packages/agents-server/src/principal.ts -``` - -Types: - -```ts -export type PrincipalKind = 'user' | 'agent' | 'service' | 'system' - -export interface Principal { - kind: PrincipalKind - id: string - key: string // `${kind}:${id}` - url: string // `/principal/${kind}:${id}` -} -``` - -Header constant: - -```ts -export const ELECTRIC_PRINCIPAL_HEADER = 'electric-principal' -``` - -Helpers: - -```ts -export function parsePrincipalKey(input: string): Principal -export function principalUrl(key: string): string -export function principalKeyFromUrl(url: string): string | null -export function getPrincipalFromRequest(request: Request): Principal | null -export function getDevPrincipal(): Principal -``` - -Validation rules: - -- Principal key is `{kind}:{id}`. -- Split on the first colon only. -- Additional colons are allowed in the id so principals can use ids from external systems. -- Kind is one of: - - `user` - - `agent` - - `service` - - `system` -- ID must be non-empty. -- ID must not contain `/`. - -Examples: - -```txt -user:kyle ✅ -agent:ci-bot ✅ -service:github ✅ -system:framework ✅ -system:dev-local ✅ -user:clerk:user_123 ✅ id contains additional colon -service:github:installation ✅ id contains additional colon -user:/kyle ❌ slash -admin:kyle ❌ unknown kind -``` - -## Request principal extraction - -Wire request extraction wherever the server builds `TenantContext`. - -Likely files to inspect/change: - -```txt -packages/agents-server/src/host.ts -packages/agents-server/src/routing/global-router.ts -packages/agents-server/src/entrypoint-lib.ts -packages/agents-server/src/dev-asserted-auth.ts -packages/agents-server/src/authenticated-user-format.ts -packages/agents-server/src/electric-agents-types.ts -``` - -The `authenticated-user-format` module may become obsolete or should be renamed/reworked as a principal formatter/parser. - -Desired behavior: - -```ts -const headerValue = request.headers.get('electric-principal') - -const principal = headerValue - ? parsePrincipalKey(headerValue) - : isDevOrInsecure - ? getDevPrincipal() // system:dev-local - : null -``` - -If no principal exists, return an auth/invalid-request error. - -As part of this, replace user-centric request auth names with principal-centric names: - -- `AuthenticatedRequestUser` → `AuthenticatedRequestPrincipal` or just `RequestPrincipal` -- `AuthenticateRequest` should return a `Principal`/principal assertion, not a user object -- `ctx.authenticatedUser` → `ctx.principal` -- fields such as `userId` should become principal fields, e.g. `principal.key`, `principal.url`, `principal.kind`, `principal.id` - -The agents server trusts this header. Auth middleware/proxy is responsible for setting it correctly. - -## Built-in `principal` entity type - -Principals must be normal entities, so ensure a built-in entity type named `principal` exists. - -Add to `PostgresRegistry`: - -```ts -async ensureEntityType(et: ElectricAgentsEntityType): Promise -``` - -Behavior: - -- Insert if missing. -- If present, return existing unchanged. -- Do not bump revisions on every server startup. - -Seed at server/runtime startup: - -```ts -await registry.ensureEntityType({ - name: 'principal', - description: 'built-in principal entity', - inbox_schemas: { - update_identity: principalUpdateIdentityMessageSchema, - }, - state_schemas: { - identity: principalIdentityStateSchema, - }, - revision: 1, - created_at: now, - updated_at: now, -}) -``` - -The `principal` entity type has one built-in state collection: - -- `identity` — trusted profile/identity information for the principal. - -The `principal` entity type has one built-in inbox message: - -- `update_identity` — request to create/update the `identity` state row. - -The `principal` entity type is immutable from user/API code. It is created and modified only by system code. - -## Principal identity state - -Add built-in schema definitions for principal identity. - -Identity state row: - -```ts -const principalIdentityStateSchema = { - type: 'object', - additionalProperties: false, - required: ['kind', 'id', 'key', 'url', 'updated_at'], - properties: { - kind: { enum: ['user', 'agent', 'service', 'system'] }, - id: { type: 'string' }, - key: { type: 'string' }, - url: { type: 'string' }, - display_name: { type: 'string' }, - email: { type: 'string' }, - avatar_url: { type: 'string' }, - auth_provider: { type: 'string' }, - auth_subject: { type: 'string' }, - claims: { - type: 'object', - additionalProperties: true, - }, - created_at: { type: 'string' }, - updated_at: { type: 'string' }, - }, -} -``` - -Identity state uses a single stable key: - -```txt -identity/self -``` - -Update message schema: - -```ts -const principalUpdateIdentityMessageSchema = { - type: 'object', - additionalProperties: false, - required: ['identity'], - properties: { - identity: principalIdentityStateSchema, - }, -} -``` - -The `update_identity` inbox message is how principal identity is created/updated. Anyone can target a principal entity with this message shape at the protocol/schema level, but authorization must restrict who is allowed to send it. - -In Electric Cloud, a built-in system entity should send `update_identity` when: - -- a user logs in via Google/SSO/etc. -- a CI bot principal is created -- a service integration principal is provisioned -- identity/profile data changes in the upstream auth system - -Non-system principals should not be allowed to send `update_identity` unless explicitly authorized by deployment-specific policy. - -## Persistence changes - -Add migration: - -```txt -packages/agents-server/drizzle/0006_principals.sql -``` - -SQL: - -```sql -ALTER TABLE entities - ADD COLUMN created_by text; - -CREATE INDEX idx_entities_created_by - ON entities (tenant_id, created_by); -``` - -Update Drizzle schema in: - -```txt -packages/agents-server/src/db/schema.ts -``` - -Add to `entities`: - -```ts -createdBy: text(`created_by`), -``` - -Update server types in: - -```txt -packages/agents-server/src/electric-agents-types.ts -``` - -Add to `ElectricAgentsEntity`: - -```ts -created_by?: string -``` - -Add to `PublicElectricAgentsEntity`: - -```ts -created_by?: string -``` - -Add to `TypedSpawnRequest`: - -```ts -created_by?: string -``` - -Update `toPublicEntity()` to include `created_by`. - -Update registry in: - -```txt -packages/agents-server/src/entity-registry.ts -``` - -- `createEntity()` writes `createdBy: entity.created_by ?? null` -- `rowToEntity()` reads `created_by` -- `listEntities()` accepts `created_by?: string` -- `listEntities()` filters on `entities.createdBy` - -Update route list filtering in: - -```txt -packages/agents-server/src/routing/entities-router.ts -``` - -Support: - -```txt -GET /_electric/entities?created_by=/principal/user:kyle -``` - -## Lazy principal materialization - -Add to `EntityManager`: - -```ts -async ensurePrincipal(principal: Principal): Promise -``` - -Behavior: - -1. Check `registry.getEntity(principal.url)`. -2. If found, return it. -3. If missing, create a `principal` entity at that URL. - -Principal spawn details: - -```ts -await this.spawn('principal', { - instance_id: principal.key, - args: { - kind: principal.kind, - id: principal.id, - key: principal.key, - }, - tags: { - principal_kind: principal.kind, - principal_id: principal.id, - }, - created_by: principal.url, -}) -``` - -On creation, also initialize `identity/self` with the built-in identity state: - -```ts -{ - kind: principal.kind, - id: principal.id, - key: principal.key, - url: principal.url, - created_at: now, - updated_at: now, -} -``` - -If trusted auth/profile claims are available during materialization, include the mapped fields in `identity/self`. - -Need to avoid recursive principal creation. Either: - -- Add an internal spawn option such as `{ skipPrincipalEnsure: true }`, or -- Implement `ensurePrincipal()` using a lower-level helper that creates the entity without trying to ensure `created_by` first. - -Recommended internal rule for `created_by`: - -```ts -const createdBy = req.created_by ?? parentEntity?.created_by -``` - -This means child/worker agents inherit the initiating principal from their parent unless explicitly overridden. - -For principal entities themselves, `created_by` can be their own URL: - -```txt -/principal/user:kyle created_by=/principal/user:kyle -/principal/system:dev-local created_by=/principal/system:dev-local -``` - -## Route behavior - -File: - -```txt -packages/agents-server/src/routing/entities-router.ts -``` - -### Principal route materialization - -Current `withExistingEntity()` returns 404 if the entity is missing. Adjust for principal URLs: - -```ts -if (!entity && request.params.type === 'principal') { - const principal = parsePrincipalKey(request.params.instanceId) - const materialized = await ctx.entityManager.ensurePrincipal(principal) - request.entityRoute = { entityUrl, entity: materialized } - return undefined -} -``` - -This enables: - -```txt -POST /_electric/entities/principal/user:bob/send -``` - -to create Bob's principal stream on first reference. - -### Spawn - -In `spawnEntity()`: - -1. Require `ctx.principal`. -2. Ensure the inbound principal exists. -3. Pass `created_by: ctx.principal.url` to `entityManager.spawn()`. -4. Use principal as the initial message sender. - -Pseudo-code: - -```ts -const principal = requirePrincipal(ctx) -await ctx.entityManager.ensurePrincipal(principal) - -const entity = await ctx.entityManager.spawn(request.params.type, { - instance_id: request.params.instanceId, - args: parsed.args, - tags: parsed.tags, - parent: parsed.parent, - dispatch_policy: dispatchPolicy, - initialMessage: undefined, - wake: parsed.wake, - created_by: principal.url, -}) - -if (parsed.initialMessage !== undefined) { - await ctx.entityManager.send(entity.url, { - from: principal.url, - payload: parsed.initialMessage, - }) -} -``` - -### Send - -In `sendEntity()`: - -1. Require `ctx.principal`. -2. Ensure the inbound principal exists. -3. Default `from` to `ctx.principal.url`. -4. Reject client-supplied `from` if present and not equal to `ctx.principal.url`. - -Recommended v1 security posture: - -- HTTP `send` should not allow arbitrary `from` spoofing. -- Use `ctx.principal.url` as sender. -- Internal APIs/tools should also pass through the same principal-aware verb entry point. - -So update route behavior to: - -```ts -await ctx.entityManager.send(entityUrl, { - from: principal.url, - payload: parsed.payload, - key: parsed.key, - type: parsed.type, -}) -``` - -Do not allow callers to assert arbitrary principals via body. The request/context principal is the sender. - -### Principal identity updates - -Principal entities accept an `update_identity` inbox message, but ordinary principals must not be able to send it by default. - -Route/send authorization should enforce: - -- `update_identity` to `/principal/*` is allowed from built-in system principals. -- `update_identity` from non-system principals is rejected unless deployment policy explicitly allows it. -- Other messages to `/principal/*` can continue through normal send authorization. - -This preserves the uniform send mechanism while letting Electric Cloud run a built-in system entity that creates/updates principals from trusted auth events. - -### Sharing/authz tags - -Sharing is app-specific and should not be first-class in this PR. Apps can build sharing systems with tags or entity state. - -A future PR may reserve protected tag namespaces such as `share:*`, `acl:*`, `authz:*`, or `system:*`, but this principals PR does not implement protected tag namespaces or tag authorization rules. - -### Schedule/future-send - -Future-send route currently accepts `from` in body. - -For the same anti-spoofing reason, schedule routes should ignore/reject body `from` and use `ctx.principal.url`: - -```ts -from: principal.url -``` - -## Inter-principal messaging - -Once principal entities are lazy-materialized, the existing send mechanism works: - -```http -POST /_electric/entities/principal/user:bob/send -Electric-Principal: user:kyle -Content-Type: application/json - -{ - "payload": { "text": "hello" } -} -``` - -Result: - -- `/principal/user:kyle` is ensured. -- `/principal/user:bob` is ensured. -- Bob's principal inbox receives a message with: - -```ts -from: '/principal/user:kyle' -``` - -## Handler/runtime context - -Issue requirement: - -> Authorization v1: Handler decides which tools/functions to expose based on principal context. - -Handlers need access to principal information. - -Likely files to inspect/change: - -```txt -packages/agents-runtime/src/create-handler.ts -packages/agents-runtime/src/setup-context.ts -packages/agents-runtime/src/types.ts -packages/agents-server/src/entity-manager.ts // enrichPayload() -``` - -`EntityManager.enrichPayload()` currently injects `entity` info into webhook payloads. Add: - -```ts -entity: { - ..., - createdBy: entity.created_by, -}, -principal: entity.created_by - ? { - url: entity.created_by, - key: principalKeyFromUrl(entity.created_by), - } - : undefined, -``` - -Then expose this through runtime handler context as: - -```ts -ctx.principal -ctx.entity.created_by -``` - -This enables handler-level authorization: - -```ts -const tools = ctx.principal?.kind === 'user' ? userTools : serviceTools - -await runAgent({ tools }) -``` - -## Authorization v1 - -Keep authorization flexible and handler-level. - -What this implementation should do now: - -- Identify the inbound principal. -- Persist owner/creator on spawned agents. -- Prevent routes from spoofing `from` in request bodies. -- Materialize principal streams lazily. -- Expose principal to handlers. - -What this implementation should **not** do yet: - -- Capability expressions in entity streams. -- Named capability sets. -- Delegation semantics. -- General cross-principal policy engine beyond the built-in `update_identity` restriction. -- First-class sharing system, protected tag namespaces, public sharing links, or signed URLs. -- Principal garbage collection. - -## Tests - -### Principal parser tests - -File: - -```txt -packages/agents-server/test/principal.test.ts -``` - -Cases: - -- `user:kyle` → `/principal/user:kyle` -- `agent:ci-bot` → `/principal/agent:ci-bot` -- `service:github` → `/principal/service:github` -- `system:framework` → `/principal/system:framework` -- `system:dev-local` → `/principal/system:dev-local` -- reject missing colon -- allow additional colons in the id, e.g. `user:clerk:user_123` -- reject slash -- reject empty id -- reject unknown kind - -### Spawn records owner principal - -- Send `PUT /_electric/entities//` with `Electric-Principal: user:kyle`. -- Assert spawned entity has: - -```ts -created_by: '/principal/user:kyle' -``` - -- Assert `/principal/user:kyle` exists. - -### Child spawn inherits `created_by` - -Using `EntityManager.spawn()` directly: - -1. Create parent with `created_by: '/principal/user:kyle'`. -2. Spawn child with `parent` and no explicit `created_by`. -3. Assert child has same `created_by`. - -### Send uses principal as `from` - -- Create an entity. -- `POST /send` with `Electric-Principal: user:kyle` and no body `from`. -- Read stream. -- Assert inbox event value has: - -```ts -from: '/principal/user:kyle' -``` - -### Public send does not allow spoofed `from` - -- `POST /send` with `Electric-Principal: user:kyle` and body `from: '/principal/user:alice'`. -- Assert the route rejects the request with 400/422. - -### Sending to unmaterialized principal creates it - -- `POST /_electric/entities/principal/user:bob/send` -- Header: `Electric-Principal: user:kyle` -- Assert: - - `/principal/user:bob` exists. - - `/principal/user:kyle` exists. - - Bob's inbox has `from: /principal/user:kyle`. - -### Principal identity is initialized - -- Materialize `/principal/user:kyle`. -- Assert its state contains `identity/self` with: - -```ts -{ - kind: 'user', - id: 'kyle', - key: 'user:kyle', - url: '/principal/user:kyle', -} -``` - -### System principal can update identity - -- Send `update_identity` to `/principal/user:kyle` from a built-in system principal. -- Assert `identity/self` is updated with trusted fields such as `email`, `display_name`, `auth_provider`, and `auth_subject`. - -### Non-system principal cannot update identity - -- Send `update_identity` to `/principal/user:kyle` from `/principal/user:alice`. -- Assert the route rejects the request with 401/403. - -### Missing principal in production fails - -- Simulate production/non-dev/non-insecure context. -- Spawn/send without `Electric-Principal`. -- Assert 401/400. - -### Dev fallback - -- Simulate dev/insecure context. -- Spawn/send without header. -- Assert: - -```ts -ctx.principal.url === '/principal/system:dev-local' -created_by === '/principal/system:dev-local' -``` - -### List by owner - -- Spawn two agents under `user:kyle` and one under `user:alice`. -- Request: - -```txt -GET /_electric/entities?created_by=/principal/user:kyle -``` - -- Assert only Kyle's entities are returned. - -## Implementation order - -### Phase 1 — Types and persistence - -1. Add `principal.ts` parser/helpers. -2. Add `created_by` migration. -3. Update Drizzle schema. -4. Update entity types/public types. -5. Update registry create/read/list. - -### Phase 2 — Built-in principal type - -6. Add `PostgresRegistry.ensureEntityType()`. -7. Add built-in `principal` identity state and `update_identity` inbox schemas. -8. Seed built-in `principal` entity type during server startup. - -### Phase 3 — Context and route behavior - -9. Replace user-centric request context with principal-centric context: - - `AuthenticatedRequestUser` → principal-oriented type - - `ctx.authenticatedUser` → `ctx.principal` - - update/rename `authenticated-user-format.ts` if still needed -10. Wire header extraction and dev fallback. -11. Require principal for all API routes. -12. Lazy-materialize `/principal/*` in `withExistingEntity()`. -13. Ensure inbound principal during spawn/send. -14. Persist `created_by` on spawn. -15. Use principal as `from` for send/initial messages/schedules. -16. Enforce the built-in `update_identity` send restriction for principal entities. - -### Phase 4 — Runtime handler context - -17. Include `createdBy`/principal in webhook enrichment. -18. Expose `ctx.principal` in runtime handler context. - -### Phase 5 — Tests/docs - -19. Add parser tests. -20. Add spawn/send/materialization/list tests. -21. Add identity initialization and `update_identity` authorization tests. -22. Update agents development docs with header/trust-boundary/dev fallback notes. - -## Implementation decisions - -1. All API routes require a principal. Missing principal is an error, except local/dev mode where the server supplies `system:dev-local`. -2. Internal routes/code paths should include a principal too. There should be one principal-aware entry point into verbs. -3. Request/body `from` must not spoof principals. Use `ctx.principal.url` as the sender; reject mismatches. -4. `created_by` is immutable. -5. The `principal` entity type is immutable from user/API code and may only be created/modified by system code. -6. Principal identity lives in built-in `identity/self` state. -7. Principal identity updates use the built-in `update_identity` inbox message and are restricted to built-in system principals unless deployment policy explicitly allows otherwise. -8. Sharing is app-specific and out of scope for this PR. Apps may use tags or entity state; protected tag namespaces can be added in a future PR. diff --git a/docs/agents-principals-implementation-plan.pdf b/docs/agents-principals-implementation-plan.pdf deleted file mode 100644 index 2b977d6425..0000000000 Binary files a/docs/agents-principals-implementation-plan.pdf and /dev/null differ diff --git a/packages/agents-desktop/README.md b/packages/agents-desktop/README.md index a89244ef62..5f2698e89c 100644 --- a/packages/agents-desktop/README.md +++ b/packages/agents-desktop/README.md @@ -11,19 +11,22 @@ Desktop app for Electric Agents, built with Electron. ### Running the dev server ```bash -ELECTRIC_DESKTOP_PRINCIPAL="system:dev-local" pnpm dev +pnpm dev ``` This starts both the UI dev server (with HMR) and the Electron main process. +For a local unauthenticated agents-server, desktop defaults the pull-wake +runner owner to the same `system:dev-local` principal that agents-server uses in +dev fallback mode. ### Environment variables -| Variable | Default | Description | -| -------------------------------------------- | ------------------ | --------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `ELECTRIC_DESKTOP_PRINCIPAL` | _(none)_ | Sets the `electric-principal` header on all requests to the agents-server. Use `system:dev-local` for local development without auth. | -| `ELECTRIC_DESKTOP_PULL_WAKE_OWNER_USER_ID` | `local-desktop` | Override the `owner_user_id` used when registering the pull-wake runner. When `ELECTRIC_DESKTOP_PRINCIPAL` is set, this is derived from it automatically. | -| `ELECTRIC_DESKTOP_PULL_WAKE_RUNNER_ID` | _(auto-generated)_ | Fixed runner ID for the pull-wake runner. | -| `ELECTRIC_DESKTOP_PULL_WAKE_REGISTER_RUNNER` | `true` | Set to `false` to skip runner registration (runner must already exist on the server). | +| Variable | Default | Description | +| -------------------------------------------- | ------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `ELECTRIC_DESKTOP_PRINCIPAL` | _(none)_ | Sets the `electric-principal` header on all requests to the agents-server. Usually unnecessary for local development because agents-server falls back to `system:dev-local`. | +| `ELECTRIC_DESKTOP_PULL_WAKE_OWNER_PRINCIPAL` | `/principal/system%3Adev-local` | Override the `owner_principal` used when registering the pull-wake runner. When `ELECTRIC_DESKTOP_PRINCIPAL` is set, this is derived from it automatically. | +| `ELECTRIC_DESKTOP_PULL_WAKE_RUNNER_ID` | _(auto-generated)_ | Fixed runner ID for the pull-wake runner. | +| `ELECTRIC_DESKTOP_PULL_WAKE_REGISTER_RUNNER` | `true` | Set to `false` to skip runner registration (runner must already exist on the server). | ### Settings diff --git a/packages/agents-desktop/src/main.ts b/packages/agents-desktop/src/main.ts index 801db292ac..4f886b867e 100644 --- a/packages/agents-desktop/src/main.ts +++ b/packages/agents-desktop/src/main.ts @@ -98,6 +98,21 @@ type DesktopState = { pullWakeRunnerId: string | null } +type DesktopServerFetchRequest = { + url: string + method: string + headers: Record + body: string | null +} + +type DesktopServerFetchResponse = { + url: string + status: number + statusText: string + headers: Record + body: string +} + type ServerConnectionState = { serverId: string status: ServerConnectionStatus @@ -204,7 +219,7 @@ const APP_ICON_FILE = process.platform === `darwin` ? `icon-mac.png` : `icon.png` const APP_ICON_PATH = path.join(RESOURCE_DIR, `assets`, APP_ICON_FILE) const APP_DISPLAY_NAME = `Electric Agents` -const MAX_CONNECTIONS_PER_HOST = `256` +const IGNORE_CONNECTION_LIMIT_DOMAINS = `localhost,127.0.0.1` const SETTINGS_VERSION = 2 const GLOBAL_API_KEYS_REF = `api-keys:global` const RECONNECT_BASE_MS = 1_000 @@ -229,11 +244,16 @@ if (DESKTOP_USER_DATA_DIR) { const MCP_OAUTH_REDIRECT_BASE = `http://127.0.0.1:53117` // Electric streams can hold many long-polling HTTP requests open to the same -// agents server. Raise Chromium's default per-host connection cap before -// Electron creates its network context so those streams do not queue behind it. +// local agents server. Electron supports bypassing Chromium's connection cap +// for a domain list; this must run before Electron creates its network context. app.commandLine.appendSwitch( - `max-connections-per-host`, - MAX_CONNECTIONS_PER_HOST + `ignore-connections-limit`, + IGNORE_CONNECTION_LIMIT_DOMAINS +) +console.info( + `[agents-desktop] ignore-connections-limit=${app.commandLine.getSwitchValue( + `ignore-connections-limit` + )}` ) /** @@ -253,10 +273,11 @@ const PULL_WAKE_REGISTER_RUNNER = : [`1`, `true`].includes( process.env.ELECTRIC_DESKTOP_PULL_WAKE_REGISTER_RUNNER.trim().toLowerCase() ) -const PULL_WAKE_OWNER_USER_ID = - process.env.ELECTRIC_DESKTOP_PULL_WAKE_OWNER_USER_ID?.trim() || - `local-desktop` -const DEV_PRINCIPAL = ((): string | null => { +const PULL_WAKE_OWNER_PRINCIPAL = + process.env.ELECTRIC_DESKTOP_PULL_WAKE_OWNER_PRINCIPAL?.trim() || + `/principal/system%3Adev-local` +const DEFAULT_LOCAL_DEV_PRINCIPAL = `system:dev-local` +const EXPLICIT_DEV_PRINCIPAL = ((): string | null => { const raw = process.env.ELECTRIC_DESKTOP_PRINCIPAL?.trim() || null if (!raw) return null const colon = raw.indexOf(`:`) @@ -271,6 +292,7 @@ const DEV_PRINCIPAL = ((): string | null => { return raw })() const ELECTRIC_PRINCIPAL_HEADER = `electric-principal` +const PRINCIPAL_KEY_PREFIXES = new Set([`user`, `agent`, `service`, `system`]) function mergeHeaders( ...sources: Array | undefined> @@ -291,15 +313,32 @@ function hasHeader( return headers ? new Headers(headers).has(name) : false } -function runnerOwnerUserIdFromHeaders( +function runnerOwnerPrincipalFromHeaders( headers: Record | undefined -): string { +): string | undefined { const normalized = new Headers(headers) - return ( - normalized.get(`authorization`)?.trim() || - normalized.get(ELECTRIC_PRINCIPAL_HEADER)?.trim() || - PULL_WAKE_OWNER_USER_ID - ) + const principalKey = normalized.get(ELECTRIC_PRINCIPAL_HEADER)?.trim() + if (principalKey) { + return principalKey.startsWith(`/principal/`) + ? principalKey + : `/principal/${encodeURIComponent(principalKey)}` + } + if (normalized.has(`authorization`)) return undefined + return PULL_WAKE_OWNER_PRINCIPAL +} + +function runnerOwnerPrincipalFromUserId( + userId: string | null | undefined +): string | undefined { + const trimmed = userId?.trim() + if (!trimmed) return undefined + if (trimmed.startsWith(`/principal/`)) return trimmed + const colon = trimmed.indexOf(`:`) + const principalKey = + colon > 0 && PRINCIPAL_KEY_PREFIXES.has(trimmed.slice(0, colon)) + ? trimmed + : `user:${trimmed}` + return `/principal/${encodeURIComponent(principalKey)}` } /** @@ -518,12 +557,40 @@ function findCloudServerForUrl(requestUrl: string): ServerConfig | null { return fallbackMatches.length === 1 ? fallbackMatches[0]! : null } +function findSavedServerForUrl(requestUrl: string): ServerConfig | null { + let parsed: URL + try { + parsed = new URL(requestUrl) + } catch { + return null + } + + for (const server of settings.servers) { + let base: URL + try { + base = new URL(server.url) + } catch { + continue + } + if (base.origin !== parsed.origin) continue + const basePath = base.pathname.replace(/\/+$/, ``) + if ( + basePath === `` || + parsed.pathname === basePath || + parsed.pathname.startsWith(`${basePath}/`) + ) { + return server + } + } + return null +} + /** - * Decorate outgoing requests bound for a saved cloud agent server - * with `Authorization: Bearer ` and - * `x-electric-service: ` headers. Two injection points, - * both reading from the same in-memory agents-token map - * (`SecretStore`-backed): + * Decorate outgoing requests bound for saved agent servers with the + * configured server headers. Cloud agent servers also receive + * `Authorization: Bearer ` and `x-electric-service: + * ` headers. Two injection points, both reading from the + * same in-memory agents-token map (`SecretStore`-backed): * * 1. Renderer fetches — Electron's * `session.webRequest.onBeforeSendHeaders` hook catches anything @@ -543,7 +610,10 @@ function findCloudServerForUrl(requestUrl: string): ServerConfig | null { */ function installCloudAuthHeaderInjection(): void { session.defaultSession.webRequest.onBeforeSendHeaders((details, callback) => { - const extra = buildCloudAuthHeaders(details.url) + const extra = mergeHeaders( + buildSavedServerHeaders(details.url) ?? undefined, + buildCloudAuthHeaders(details.url) ?? undefined + ) if (!extra) { callback({ requestHeaders: details.requestHeaders }) return @@ -556,6 +626,86 @@ function installCloudAuthHeaderInjection(): void { installCloudAuthUndiciInterceptor() } +function buildSavedServerHeaders(url: string): Record | null { + const server = findSavedServerForUrl(url) + if (!server) return null + return mergeHeaders(injectDevPrincipalHeaders(server).headers) ?? null +} + +function assertDesktopServerFetchAllowed( + request: unknown +): DesktopServerFetchRequest { + if (!request || typeof request !== `object`) { + throw new Error(`Invalid desktop server fetch request`) + } + const raw = request as Partial + if (typeof raw.url !== `string` || raw.url.trim().length === 0) { + throw new Error(`Invalid desktop server fetch URL`) + } + if (typeof raw.method !== `string` || raw.method.trim().length === 0) { + throw new Error(`Invalid desktop server fetch method`) + } + if (!raw.headers || typeof raw.headers !== `object`) { + throw new Error(`Invalid desktop server fetch headers`) + } + if (raw.body !== null && typeof raw.body !== `string`) { + throw new Error(`Invalid desktop server fetch body`) + } + + const url = raw.url.trim() + const method = raw.method.trim().toUpperCase() + if (![`POST`, `PUT`, `PATCH`, `DELETE`].includes(method)) { + throw new Error(`Desktop server fetch only supports mutating requests`) + } + const server = findSavedServerForUrl(url) + if (!server || server.source === `electric-cloud`) { + throw new Error( + `Desktop server fetch is only available for saved local servers` + ) + } + let parsed: URL + try { + parsed = new URL(url) + } catch { + throw new Error(`Invalid desktop server fetch URL`) + } + if ( + parsed.protocol !== `http:` || + !isLocalLoopbackHostname(parsed.hostname) + ) { + throw new Error(`Desktop server fetch only supports local HTTP servers`) + } + + return { + url, + method, + headers: normalizeHeaderRecord(raw.headers) ?? {}, + body: raw.body, + } +} + +async function desktopServerFetch( + request: unknown +): Promise { + const checked = assertDesktopServerFetchAllowed(request) + const headers = mergeHeaders( + buildSavedServerHeaders(checked.url) ?? undefined, + checked.headers + ) + const response = await fetch(checked.url, { + method: checked.method, + headers, + body: checked.body, + }) + return { + url: response.url, + status: response.status, + statusText: response.statusText, + headers: headersToRecord(response.headers), + body: await response.text(), + } +} + /** * Build the cloud-auth headers to inject on a request to `url`, or * `null` if the URL doesn't target a saved cloud agent server (or we @@ -782,6 +932,17 @@ function headersToRecord(headers: Headers): Record { return record } +function isLocalLoopbackHostname(hostname: string): boolean { + const normalized = hostname.toLowerCase() + return ( + normalized === `localhost` || + normalized === `127.0.0.1` || + normalized === `0.0.0.0` || + normalized === `[::1]` || + normalized === `::1` + ) +} + function normalizeServers( value: unknown, activeUrl?: string | null @@ -1198,10 +1359,17 @@ function localRuntimeStatusLabel(status: LocalRuntimeStatus): string { } function injectDevPrincipalHeaders(server: ServerConfig): ServerConfig { - if (!DEV_PRINCIPAL) return server + if (server.source === `electric-cloud`) return server + const principal = + EXPLICIT_DEV_PRINCIPAL ?? + (hasHeader(server.headers, ELECTRIC_PRINCIPAL_HEADER) || + hasHeader(server.headers, `authorization`) + ? null + : DEFAULT_LOCAL_DEV_PRINCIPAL) + if (!principal) return server return { ...server, - headers: { ...server.headers, [ELECTRIC_PRINCIPAL_HEADER]: DEV_PRINCIPAL }, + headers: { ...server.headers, [ELECTRIC_PRINCIPAL_HEADER]: principal }, } } @@ -1907,23 +2075,23 @@ async function startRuntime(serverId: string): Promise { const runtimeHeaders = mergeHeaders(serverWithPrincipal.headers) // For `electric-cloud` source servers, the cloud-agents-server // authenticates each request via `x-electric-asserted-user-id` - // headers (injected by the undici / webRequest hooks) and checks - // the pull-wake `owner_user_id` against that asserted user. So the - // runner must register with the cloud user's id, not the - // `local-desktop` fallback we use for unauthenticated local servers. + // headers (injected by the undici / webRequest hooks). Register the + // runner under that user principal instead of the dev-local + // fallback used for unauthenticated local servers. const cloudAuthUserId = activeServer.source === `electric-cloud` ? (cloudAuth?.getState().userId ?? null) : null - const runnerOwnerUserId = - cloudAuthUserId ?? runnerOwnerUserIdFromHeaders(runtimeHeaders) + const runnerOwnerPrincipal = + runnerOwnerPrincipalFromUserId(cloudAuthUserId) ?? + runnerOwnerPrincipalFromHeaders(runtimeHeaders) console.info( `[agents-desktop] Starting built-in agents runtime for server ${activeServer.url}` ) console.info(`[agents-desktop] Pull-wake runner id: ${runnerId}`) if (PULL_WAKE_REGISTER_RUNNER) { console.info( - `[agents-desktop] Pull-wake runner registration enabled; owner user id: ${runnerOwnerUserId}` + `[agents-desktop] Pull-wake runner registration enabled; owner principal: ${runnerOwnerPrincipal ?? `(derived from auth)`}` ) } else { console.info( @@ -1947,7 +2115,9 @@ async function startRuntime(serverId: string): Promise { pullWake: { runnerId, registerRunner: PULL_WAKE_REGISTER_RUNNER, - ownerUserId: PULL_WAKE_REGISTER_RUNNER ? runnerOwnerUserId : undefined, + ownerPrincipal: PULL_WAKE_REGISTER_RUNNER + ? runnerOwnerPrincipal + : undefined, label: `Electric Agents Desktop`, headers: runtimeHeaders, claimHeaders: runtimeHeaders, @@ -2317,6 +2487,9 @@ function registerIpcHandlers(): void { const win = BrowserWindow.fromWebContents(event.sender) return desktopStateForWindow(win) }) + ipcMain.handle(`desktop:server-fetch`, (_event, request: unknown) => + desktopServerFetch(request) + ) ipcMain.handle( `desktop:set-active-server`, async (_event, server: ServerConfig | null) => { diff --git a/packages/agents-desktop/src/preload.ts b/packages/agents-desktop/src/preload.ts index f017e823a4..5a4565a36c 100644 --- a/packages/agents-desktop/src/preload.ts +++ b/packages/agents-desktop/src/preload.ts @@ -80,6 +80,21 @@ type DesktopState = { pullWakeRunnerId: string | null } +type DesktopServerFetchRequest = { + url: string + method: string + headers: Record + body: string | null +} + +type DesktopServerFetchResponse = { + url: string + status: number + statusText: string + headers: Record + body: string +} + type ServerConnectionState = { serverId: string status: ServerConnectionStatus @@ -263,6 +278,10 @@ const api = { ipcRenderer.invoke(`desktop:save-servers`, servers), getDesktopState: (): Promise => ipcRenderer.invoke(`desktop:get-state`), + serverFetch: ( + request: DesktopServerFetchRequest + ): Promise => + ipcRenderer.invoke(`desktop:server-fetch`, request), setNativeAppearance: (appearance: DesktopAppearance): Promise => ipcRenderer.invoke(`desktop:set-native-appearance`, appearance), setActiveServer: (server: ServerConfig | null): Promise => diff --git a/packages/agents-runtime/src/create-handler.ts b/packages/agents-runtime/src/create-handler.ts index 4317ed8ccd..8128cb82bc 100644 --- a/packages/agents-runtime/src/create-handler.ts +++ b/packages/agents-runtime/src/create-handler.ts @@ -4,7 +4,7 @@ */ import { zodToJsonSchema } from 'zod-to-json-schema' -import { processWebhookWake } from './process-wake' +import { processWake } from './process-wake' import { getEntityType, listEntityTypes } from './define-entity' import { DEFAULT_OUTPUT_SCHEMAS } from './default-output-schemas' import { passthrough } from './entity-schema' @@ -118,13 +118,11 @@ export interface RuntimeRouter { options?: Pick ) => void - /** - * Dispatch an already-parsed webhook wake notification. - */ + /** Dispatch an already-parsed webhook wake notification. */ dispatchWebhookWake: (notification: WebhookNotification) => void /** - * Wait for all in-flight webhook wake handlers to settle. + * Wait for all in-flight wake handlers to settle. * Throws any wake errors instead of hiding them behind logs. */ drainWakes: () => Promise @@ -240,7 +238,7 @@ export function createRuntimeRouter( const wakeLabel = notification.entity?.url ?? notification.streamPath const controller = new AbortController() const wake: Promise = Promise.resolve( - processWebhookWake(notification, { + processWake(notification, { ...wakeConfig, ...options, shutdownSignal: controller.signal, diff --git a/packages/agents-runtime/src/index.ts b/packages/agents-runtime/src/index.ts index 14ed10070d..5668a1f3b2 100644 --- a/packages/agents-runtime/src/index.ts +++ b/packages/agents-runtime/src/index.ts @@ -205,7 +205,7 @@ export type { TaggedQuery, } from './observation-sources' -export { processWake, processWebhookWake } from './process-wake' +export { processWake } from './process-wake' export type { ProcessWakeConfig } from './types' export { DEFAULT_OUTPUT_SCHEMAS } from './default-output-schemas' @@ -239,6 +239,8 @@ export type { PullWakeEvent, PullWakeRunner, PullWakeRunnerConfig, + PullWakeRunnerHealth, + PullWakeRunnerStatus, PullWakeStreamResponse, } from './pull-wake-runner' diff --git a/packages/agents-runtime/src/process-wake.ts b/packages/agents-runtime/src/process-wake.ts index f74caeabb9..a8cac7eaaf 100644 --- a/packages/agents-runtime/src/process-wake.ts +++ b/packages/agents-runtime/src/process-wake.ts @@ -54,7 +54,7 @@ interface ClaimCallbackResponse { claimToken?: string token?: string writeToken?: string - error?: { code: string } + error?: { code?: string; message?: string } } interface RawClaimCallbackResponse extends Omit { @@ -146,6 +146,17 @@ function applyClaimTokenHeader( } } +function parseClaimCallbackResponseBody( + body: string +): RawClaimCallbackResponse { + if (!body) return {} + try { + return JSON.parse(body) as RawClaimCallbackResponse + } catch { + return { error: { message: `Non-JSON claim callback response` } } + } +} + function constructWakeEvent( notification: WebhookNotification, catchUpEvents?: Array @@ -297,7 +308,7 @@ function createInFlightTracker() { } } -export async function processWebhookWake( +export async function processWake( notification: WebhookNotification, config: ProcessWakeConfig ): Promise { @@ -327,6 +338,8 @@ export async function processWebhookWake( } const serverHeaders = await resolveHeadersProvider(config.claimHeaders) const debugWakeTypes = process.env.ELECTRIC_AGENTS_DEBUG_WAKE_TYPES === `1` + let claimCallbackStatus: number | null = null + let claimCallbackResponseBody = `` if (!typeName) { // Don't ack — let the server's own timeout reclaim the wake. @@ -814,7 +827,11 @@ export async function processWebhookWake( }) ) .then(async (response) => { - const rawClaimData = (await response.json()) as RawClaimCallbackResponse + claimCallbackStatus = response.status + claimCallbackResponseBody = await response.text() + const rawClaimData = parseClaimCallbackResponseBody( + claimCallbackResponseBody + ) claimData = { ...rawClaimData, ok: @@ -825,6 +842,13 @@ export async function processWebhookWake( if (claimData.claimToken) activeClaimToken = claimData.claimToken if (claimData.token) activeClaimToken = claimData.token claimMs = +(performance.now() - claimT0).toFixed(2) + if (!claimData.ok) { + const logClaimCallbackReturned = + response.status === 401 ? log.error : log.warn + logClaimCallbackReturned( + `claim callback returned status=${response.status} ok=false claimMs=${claimMs} hasWriteToken=${Boolean(claimData.writeToken)} errorCode=${claimData.error?.code ?? `(none)`} errorMessage=${claimData.error?.message ?? `(none)`} callback=${callback} responseBody=${claimCallbackResponseBody || `(empty)`}` + ) + } return claimData }) @@ -838,7 +862,14 @@ export async function processWebhookWake( lastCatchUpOffset = db.offset } - if (!claimed.ok) return null + if (!claimed.ok) { + const logClaimCallbackRejected = + claimCallbackStatus === 401 ? log.error : log.warn + logClaimCallbackRejected( + `claim callback rejected wake status=${claimCallbackStatus ?? `(unknown)`} errorCode=${claimed.error?.code ?? `(none)`} errorMessage=${claimed.error?.message ?? `(none)`} callback=${callback} responseBody=${claimCallbackResponseBody || `(empty)`}` + ) + return null + } claimedWake = true writeToken = claimed.writeToken ?? `` @@ -1820,8 +1851,6 @@ export async function processWebhookWake( return result } -export const processWake: typeof processWebhookWake = processWebhookWake - async function sendDone( callback: string, token: string, diff --git a/packages/agents-runtime/src/pull-wake-runner.ts b/packages/agents-runtime/src/pull-wake-runner.ts index a729787a2c..e78c774f53 100644 --- a/packages/agents-runtime/src/pull-wake-runner.ts +++ b/packages/agents-runtime/src/pull-wake-runner.ts @@ -26,10 +26,11 @@ export interface PullWakeRunnerConfig { claimTokenHeader?: ProcessWakeConfig[`claimTokenHeader`] wakeStreamPath?: string heartbeatIntervalMs?: number + eventHeartbeatThrottleMs?: number leaseMs?: number heartbeatPath?: string claimPath?: string - onError?: (error: Error) => boolean | void + onError?: (error: Error) => void streamFactory?: (opts: { url: string headers?: Record @@ -51,16 +52,85 @@ export interface PullWakeRunner { waitForStopped: () => Promise readonly running: boolean readonly offset: string | undefined + getHealth: () => PullWakeRunnerHealth } +export type PullWakeRunnerStatus = + | `stopped` + | `starting` + | `connecting` + | `streaming` + | `reconnecting` + | `stopping` + +export interface PullWakeRunnerHealth { + running: boolean + status: PullWakeRunnerStatus + offset: string | undefined + started_at: string | null + stream_connected: boolean + stream_connected_since: string | null + reconnect_count: number + last_error: string | null + last_error_at: string | null + last_heartbeat_at: string | null + last_heartbeat_ok: boolean + last_claim_at: string | null + last_claim_result: `claimed` | `no_work` | `error` | null + last_dispatch_at: string | null + events_received: number + claims_succeeded: number + claims_skipped: number + claims_failed: number +} + +type PullWakeRunnerState = + | `stopped` + | `starting` + | `running.connecting` + | `running.streaming` + | `running.reconnecting` + | `stopping` + +const INITIAL_RECONNECT_BACKOFF_MS = 1_000 +const MAX_RECONNECT_BACKOFF_MS = 30_000 +const CLAIM_ACTOR_STOP_GRACE_MS = 1_000 +const DEFAULT_EVENT_HEARTBEAT_THROTTLE_MS = 2_000 +const HEARTBEAT_FAILURE_STREAM_RESET_THRESHOLD = 2 + export function createPullWakeRunner( config: PullWakeRunnerConfig ): PullWakeRunner { + let state: PullWakeRunnerState = `stopped` let controller: AbortController | null = null let loop: Promise | null = null let response: PullWakeStreamResponse | null = null let heartbeatTimer: ReturnType | null = null - let currentOffset = config.offset + let eventHeartbeatTimer: ReturnType | null = null + let heartbeatInFlight: Promise | null = null + let heartbeatPending = false + let currentOffset = config.offset ?? `-1` + let startedAt: string | null = null + let streamConnected = false + let streamConnectedSince: string | null = null + let reconnectCount = 0 + let lastError: string | null = null + let lastErrorAt: string | null = null + let lastHeartbeatAt: string | null = null + let lastHeartbeatOk = false + let lastClaimAt: string | null = null + let lastClaimResult: PullWakeRunnerHealth[`last_claim_result`] = null + let lastDispatchAt: string | null = null + let eventsReceived = 0 + let claimsSucceeded = 0 + let claimsSkipped = 0 + let claimsFailed = 0 + let consecutiveHeartbeatFailures = 0 + let acceptingClaims = false + let nextReconnectBackoffMs = INITIAL_RECONNECT_BACKOFF_MS + let streamResetError: Error | null = null + let stopPromise: Promise | null = null + const claimActors = new Set>() const wakePath = config.wakeStreamPath ?? @@ -68,6 +138,10 @@ export function createPullWakeRunner( const wakeUrl = appendPathToUrl(config.baseUrl, wakePath) const heartbeatIntervalMs = config.heartbeatIntervalMs ?? DEFAULT_RUNNER_HEARTBEAT_INTERVAL_MS + const eventHeartbeatThrottleMs = Math.max( + 0, + config.eventHeartbeatThrottleMs ?? DEFAULT_EVENT_HEARTBEAT_THROTTLE_MS + ) const leaseMs = config.leaseMs ?? heartbeatIntervalMs * 3 const heartbeatPath = config.heartbeatPath ?? @@ -78,6 +152,45 @@ export function createPullWakeRunner( `/_electric/runners/${encodeURIComponent(config.runnerId)}/claim` const claimUrl = appendPathToUrl(config.baseUrl, claimPath) + const toStatus = (): PullWakeRunnerStatus => { + switch (state) { + case `stopped`: + return `stopped` + case `starting`: + return `starting` + case `running.connecting`: + return `connecting` + case `running.streaming`: + return `streaming` + case `running.reconnecting`: + return `reconnecting` + case `stopping`: + return `stopping` + } + } + + const buildDiagnostics = (): Omit< + PullWakeRunnerHealth, + `running` | `offset` + > => ({ + status: toStatus(), + started_at: startedAt, + stream_connected: streamConnected, + stream_connected_since: streamConnectedSince, + reconnect_count: reconnectCount, + last_error: lastError, + last_error_at: lastErrorAt, + last_heartbeat_at: lastHeartbeatAt, + last_heartbeat_ok: lastHeartbeatOk, + last_claim_at: lastClaimAt, + last_claim_result: lastClaimResult, + last_dispatch_at: lastDispatchAt, + events_received: eventsReceived, + claims_succeeded: claimsSucceeded, + claims_skipped: claimsSkipped, + claims_failed: claimsFailed, + }) + const resolveHeaders = async (): Promise> => { const init = typeof config.headers === `function` @@ -100,10 +213,49 @@ export function createPullWakeRunner( const reportError = (err: unknown): void => { const error = err instanceof Error ? err : new Error(String(err)) - if (config.onError?.(error) !== true) throw error + lastError = error.message + lastErrorAt = new Date().toISOString() + try { + config.onError?.(error) + } catch (reporterError) { + // onError is reporting-only; reporters must not control runner lifecycle. + console.error(`Pull-wake runner onError callback failed`, reporterError) + } } - const heartbeat = async (signal: AbortSignal): Promise => { + const requestStreamReconnect = (error: Error): void => { + if (!streamConnected || streamResetError) return + streamResetError = error + response?.cancel?.(error) + } + + const notifyHeartbeatChange = (): void => { + const signal = controller?.signal + if (!signal || signal.aborted || eventHeartbeatThrottleMs <= 0) return + if (eventHeartbeatTimer) return + eventHeartbeatTimer = setTimeout(() => { + eventHeartbeatTimer = null + requestHeartbeat(signal) + }, eventHeartbeatThrottleMs) + } + + const requestHeartbeat = (signal: AbortSignal): void => { + if (signal.aborted) return + heartbeatPending = true + if (heartbeatInFlight) return + heartbeatInFlight = flushHeartbeats(signal).finally(() => { + heartbeatInFlight = null + }) + } + + const flushHeartbeats = async (signal: AbortSignal): Promise => { + while (heartbeatPending && !signal.aborted) { + heartbeatPending = false + await sendHeartbeat(signal) + } + } + + const sendHeartbeat = async (signal: AbortSignal): Promise => { try { const headers = new Headers(await resolveHeaders()) headers.set(`content-type`, `application/json`) @@ -112,37 +264,54 @@ export function createPullWakeRunner( headers, body: JSON.stringify({ lease_ms: leaseMs, - ...(currentOffset !== undefined - ? { wake_stream_offset: currentOffset } - : {}), + wake_stream_offset: currentOffset, + diagnostics: buildDiagnostics(), }), signal, }) + lastHeartbeatAt = new Date().toISOString() if (!res.ok) { throw new Error( `Pull-wake runner heartbeat failed for ${config.runnerId}: ${res.status} ${await res.text()}` ) } + lastHeartbeatOk = true + consecutiveHeartbeatFailures = 0 } catch (err) { if (!signal.aborted) { - config.onError?.(err instanceof Error ? err : new Error(String(err))) + lastHeartbeatOk = false + consecutiveHeartbeatFailures++ + reportError(err) + if ( + consecutiveHeartbeatFailures >= + HEARTBEAT_FAILURE_STREAM_RESET_THRESHOLD + ) { + requestStreamReconnect( + err instanceof Error ? err : new Error(String(err)) + ) + } } } } const startHeartbeat = (signal: AbortSignal): void => { if (heartbeatIntervalMs <= 0) return - void heartbeat(signal) + requestHeartbeat(signal) heartbeatTimer = setInterval(() => { - void heartbeat(signal) + requestHeartbeat(signal) }, heartbeatIntervalMs) } const stopHeartbeat = (): void => { + heartbeatPending = false if (heartbeatTimer) { clearInterval(heartbeatTimer) heartbeatTimer = null } + if (eventHeartbeatTimer) { + clearTimeout(eventHeartbeatTimer) + eventHeartbeatTimer = null + } } const streamFactory = @@ -161,107 +330,317 @@ export function createPullWakeRunner( offset: opts.offset, signal: opts.signal, onError: (error) => { - config.onError?.(error) + reportError(error) return {} }, })) as PullWakeStreamResponse }) + const recordClaimSkipped = (): null => { + lastClaimResult = `no_work` + claimsSkipped++ + notifyHeartbeatChange() + return null + } + + const recordClaimError = (): void => { + lastClaimResult = `error` + claimsFailed++ + notifyHeartbeatChange() + } + const claimWake = async ( event: PullWakeEvent, signal: AbortSignal ): Promise => { - const headers = new Headers(await resolveHeaders()) - headers.set(`content-type`, `application/json`) - const response = await fetch(claimUrl, { - method: `POST`, - headers, - signal, - body: JSON.stringify(event), - }) - if (response.status === 204) return null - if (!response.ok) { - const text = await response.text() - if ( - response.status === 409 && - (text.includes(`ALREADY_CLAIMED`) || text.includes(`NO_PENDING_WORK`)) - ) { - return null + lastClaimAt = new Date().toISOString() + lastClaimResult = null + notifyHeartbeatChange() + let claimErrorRecorded = false + try { + const headers = new Headers(await resolveHeaders()) + headers.set(`content-type`, `application/json`) + const response = await fetch(claimUrl, { + method: `POST`, + headers, + signal, + body: JSON.stringify(event), + }) + if (response.status === 204) return recordClaimSkipped() + if (!response.ok) { + const text = await response.text() + if ( + response.status === 409 && + (text.includes(`ALREADY_CLAIMED`) || text.includes(`NO_PENDING_WORK`)) + ) { + return recordClaimSkipped() + } + recordClaimError() + claimErrorRecorded = true + throw new Error( + `Pull-wake claim failed for ${config.runnerId}: ${response.status} ${text}` + ) } - throw new Error( - `Pull-wake claim failed for ${config.runnerId}: ${response.status} ${text}` - ) - } - const notification = (await response.json()) as WakeNotification & { - done?: boolean + const notification = (await response.json()) as WakeNotification & { + done?: boolean + } + if (notification.done) return recordClaimSkipped() + lastClaimResult = `claimed` + claimsSucceeded++ + notifyHeartbeatChange() + return notification + } catch (err) { + if (signal.aborted) { + throw err + } + if (!claimErrorRecorded) { + recordClaimError() + } + throw err } - if (notification.done) return null - return notification } - const run = async (): Promise => { - const signal = controller!.signal + const isRunningState = (): boolean => + state === `starting` || state.startsWith(`running.`) + + const claimAndDispatch = async ( + event: PullWakeEvent, + signal: AbortSignal + ): Promise => { try { - response = await streamFactory({ - url: wakeUrl, - headers: await resolveHeaders(), - offset: currentOffset, - signal, + const notification = await claimWake(event, signal) + if (!notification) return + if (!acceptingClaims || signal.aborted) { + return + } + try { + config.runtime.dispatchWake(notification, { + claimHeaders: resolveClaimHeaders, + claimTokenHeader: config.claimTokenHeader, + }) + } catch (err) { + reportError(err) + notifyHeartbeatChange() + return + } + lastDispatchAt = new Date().toISOString() + notifyHeartbeatChange() + } catch (err) { + if (!signal.aborted) { + reportError(err) + } + } + } + + const spawnClaimActor = (event: PullWakeEvent, signal: AbortSignal): void => { + let actor: Promise + actor = claimAndDispatch(event, signal).finally(() => { + claimActors.delete(actor) + }) + claimActors.add(actor) + } + + const waitForClaimActors = async ( + timeoutMs = CLAIM_ACTOR_STOP_GRACE_MS + ): Promise => { + const deadline = Date.now() + timeoutMs + while (claimActors.size > 0) { + const remainingMs = deadline - Date.now() + if (remainingMs <= 0) return false + const result = await new Promise<`settled` | `timeout`>((resolve) => { + const timer = setTimeout(() => resolve(`timeout`), remainingMs) + void Promise.allSettled([...claimActors]).then(() => { + clearTimeout(timer) + resolve(`settled`) + }) }) + if (result === `timeout`) return false + } + return true + } + + const sleep = async (ms: number, signal: AbortSignal): Promise => { + if (ms <= 0 || signal.aborted) return + await new Promise((resolve) => { + const timer = setTimeout(resolve, ms) + signal.addEventListener( + `abort`, + () => { + clearTimeout(timer) + resolve() + }, + { once: true } + ) + }) + } + + const consumeWakeStream = async (signal: AbortSignal): Promise => { + streamResetError = null + response = await streamFactory({ + url: wakeUrl, + headers: await resolveHeaders(), + offset: currentOffset, + signal, + }) + state = `running.streaming` + streamConnected = true + streamConnectedSince = new Date().toISOString() + nextReconnectBackoffMs = INITIAL_RECONNECT_BACKOFF_MS + notifyHeartbeatChange() + + try { for await (const event of response.jsonStream()) { if (signal.aborted) break - if (event?.type !== `wake`) continue - const notification = await claimWake(event, signal) - if (notification) { - config.runtime.dispatchWake(notification, { - claimHeaders: resolveClaimHeaders, - claimTokenHeader: config.claimTokenHeader, - }) - await config.runtime.drainWakes() + if (event?.type === `wake`) { + eventsReceived++ + notifyHeartbeatChange() + if (acceptingClaims && !signal.aborted) spawnClaimActor(event, signal) + } + if ( + response.offset !== undefined && + response.offset !== currentOffset + ) { + currentOffset = response.offset + notifyHeartbeatChange() } - if (response.offset !== undefined) currentOffset = response.offset } await response.closed?.catch((err) => { if (!signal.aborted) throw err }) - } catch (err) { - if (!signal.aborted) { - reportError(err) + if (streamResetError && !signal.aborted) { + throw streamResetError } } finally { - stopHeartbeat() + streamConnected = false + streamConnectedSince = null + response = null + if (!signal.aborted) notifyHeartbeatChange() + } + } + + const run = async (): Promise => { + const signal = controller!.signal + acceptingClaims = true + try { + while (!signal.aborted) { + state = `running.connecting` + notifyHeartbeatChange() + try { + await consumeWakeStream(signal) + if (!signal.aborted) { + state = `running.reconnecting` + notifyHeartbeatChange() + const backoffMs = nextReconnectBackoffMs + nextReconnectBackoffMs = Math.min( + nextReconnectBackoffMs * 2, + MAX_RECONNECT_BACKOFF_MS + ) + await sleep(backoffMs, signal) + } + } catch (err) { + if (!signal.aborted) { + reconnectCount++ + reportError(err) + state = `running.reconnecting` + notifyHeartbeatChange() + const backoffMs = nextReconnectBackoffMs + nextReconnectBackoffMs = Math.min( + nextReconnectBackoffMs * 2, + MAX_RECONNECT_BACKOFF_MS + ) + await sleep(backoffMs, signal) + } + } + } + } finally { + acceptingClaims = false + streamConnected = false + streamConnectedSince = null response = null controller = null + if (state !== `stopping`) state = `stopped` + } + } + + const stopRunner = async (): Promise => { + if (state === `stopped`) return + state = `stopping` + acceptingClaims = false + controller?.abort() + stopHeartbeat() + response?.cancel?.(new Error(`pull wake runner stopped`)) + if (!(await waitForClaimActors())) { + claimActors.clear() } + config.runtime.abortWakes() + await loop?.catch((err) => { + if (!(err instanceof Error && err.name === `AbortError`)) throw err + }) + let drainError: unknown + try { + await config.runtime.drainWakes() + } catch (err) { + reportError(err) + drainError = err + } finally { + state = `stopped` + } + if (drainError) throw drainError } return { start() { - if (loop) return + if (loop || stopPromise) return + state = `starting` controller = new AbortController() + reconnectCount = 0 + lastError = null + lastErrorAt = null + lastHeartbeatAt = null + lastHeartbeatOk = false + lastClaimAt = null + lastClaimResult = null + lastDispatchAt = null + eventsReceived = 0 + claimsSucceeded = 0 + claimsSkipped = 0 + claimsFailed = 0 + consecutiveHeartbeatFailures = 0 + nextReconnectBackoffMs = INITIAL_RECONNECT_BACKOFF_MS + streamResetError = null + startedAt = new Date().toISOString() startHeartbeat(controller.signal) loop = run().finally(() => { loop = null + stopHeartbeat() }) }, async stop() { - controller?.abort() - stopHeartbeat() - response?.cancel?.(new Error(`pull wake runner stopped`)) - config.runtime.abortWakes() - await loop?.catch((err) => { - if (!(err instanceof Error && err.name === `AbortError`)) throw err + stopPromise ??= stopRunner().finally(() => { + stopPromise = null }) - await config.runtime.drainWakes() + await stopPromise }, async waitForStopped() { + if (stopPromise) { + await stopPromise + return + } await loop + if (stopPromise) await stopPromise }, get running() { - return loop !== null + return isRunningState() }, get offset() { return currentOffset }, + getHealth(): PullWakeRunnerHealth { + return { + running: isRunningState(), + offset: currentOffset, + ...buildDiagnostics(), + } + }, } } diff --git a/packages/agents-runtime/test/create-handler.test.ts b/packages/agents-runtime/test/create-handler.test.ts index a4dbba30cb..7907b2d522 100644 --- a/packages/agents-runtime/test/create-handler.test.ts +++ b/packages/agents-runtime/test/create-handler.test.ts @@ -11,13 +11,12 @@ import type { StandardSchemaV1, } from '@standard-schema/spec' -const { processWebhookWakeMock } = vi.hoisted(() => ({ - processWebhookWakeMock: vi.fn(), +const { processWakeMock } = vi.hoisted(() => ({ + processWakeMock: vi.fn(), })) vi.mock(`../src/process-wake`, () => ({ - processWebhookWake: processWebhookWakeMock, - processWake: processWebhookWakeMock, + processWake: processWakeMock, })) function makeStandardSchema( @@ -63,10 +62,14 @@ function makeResponse() { } } +function flushAsyncWork(): Promise { + return new Promise((resolve) => setTimeout(resolve, 0)) +} + describe(`createRuntimeHandler`, () => { beforeEach(() => { clearRegistry() - processWebhookWakeMock.mockReset() + processWakeMock.mockReset() }) afterEach(() => { @@ -77,7 +80,7 @@ describe(`createRuntimeHandler`, () => { defineEntity(`test-agent`, { handler: async () => {} }) let resolveWake!: () => void - processWebhookWakeMock.mockImplementation( + processWakeMock.mockImplementation( () => new Promise((resolve) => { resolveWake = resolve @@ -118,7 +121,7 @@ describe(`createRuntimeHandler`, () => { 'content-type': `application/json`, }) expect(res.end).toHaveBeenCalledWith(JSON.stringify({ ok: true })) - expect(processWebhookWakeMock).toHaveBeenCalledWith( + expect(processWakeMock).toHaveBeenCalledWith( notification, expect.objectContaining({ baseUrl: `http://localhost:3000`, @@ -135,7 +138,7 @@ describe(`createRuntimeHandler`, () => { defineEntity(`test-agent`, { handler: async () => {} }) let resolveWake!: () => void - processWebhookWakeMock.mockImplementation( + processWakeMock.mockImplementation( () => new Promise((resolve) => { resolveWake = resolve @@ -195,7 +198,7 @@ describe(`createRuntimeHandler`, () => { it(`records wake errors in debugState() until drained`, async () => { defineEntity(`test-agent`, { handler: async () => {} }) - processWebhookWakeMock.mockRejectedValueOnce(new Error(`wake failed`)) + processWakeMock.mockRejectedValueOnce(new Error(`wake failed`)) const handler = createRuntimeHandler({ baseUrl: `http://localhost:3000`, @@ -230,8 +233,7 @@ describe(`createRuntimeHandler`, () => { ) expect(response.status).toBe(200) - await Promise.resolve() - await Promise.resolve() + await flushAsyncWork() expect(handler.debugState()).toMatchObject({ pendingWakeCount: 0, @@ -260,7 +262,7 @@ describe(`createRuntimeHandler`, () => { await handler.onEnter(req, res) - expect(processWebhookWakeMock).not.toHaveBeenCalled() + expect(processWakeMock).not.toHaveBeenCalled() expect(res.writeHead).toHaveBeenCalledWith(400, { 'content-type': `application/json`, }) @@ -293,7 +295,7 @@ describe(`createRuntimeHandler`, () => { await handler.onEnter(req, res) - expect(processWebhookWakeMock).not.toHaveBeenCalled() + expect(processWakeMock).not.toHaveBeenCalled() expect(res.writeHead).toHaveBeenCalledWith(400, { 'content-type': `application/json`, }) @@ -314,7 +316,7 @@ describe(`createRuntimeHandler`, () => { ) expect(response).toBeNull() - expect(processWebhookWakeMock).not.toHaveBeenCalled() + expect(processWakeMock).not.toHaveBeenCalled() }) it(`returns 503 for unknown entity types`, async () => { @@ -354,7 +356,7 @@ describe(`createRuntimeHandler`, () => { await expect(response.json()).resolves.toMatchObject({ error: expect.stringContaining(`nonexistent-agent`), }) - expect(processWebhookWakeMock).not.toHaveBeenCalled() + expect(processWakeMock).not.toHaveBeenCalled() }) it(`routes matching fetch requests through handleRequest`, async () => { @@ -396,7 +398,7 @@ describe(`createRuntimeHandler`, () => { expect(response).toBeInstanceOf(Response) expect(response?.status).toBe(200) await expect(response?.json()).resolves.toEqual({ ok: true }) - expect(processWebhookWakeMock).toHaveBeenCalledWith( + expect(processWakeMock).toHaveBeenCalledWith( notification, expect.objectContaining({ baseUrl: `http://localhost:3000`, diff --git a/packages/agents-runtime/test/pull-wake-runner.test.ts b/packages/agents-runtime/test/pull-wake-runner.test.ts index e844fc6bed..bc382db673 100644 --- a/packages/agents-runtime/test/pull-wake-runner.test.ts +++ b/packages/agents-runtime/test/pull-wake-runner.test.ts @@ -15,42 +15,115 @@ vi.mock(`@durable-streams/client`, () => ({ DurableStream: durableStreamMocks.DurableStream, })) +function wakeEvent(id: string): PullWakeEvent { + return { + type: `wake`, + subscription_id: `runner:runner-1`, + stream: `chat/${id}/main`, + generation: 7, + ts: 123, + } +} + +function notification(id: string): WakeNotification { + return { + consumerId: `wake-${id}`, + epoch: 7, + wakeId: `wake-${id}`, + streamPath: `/chat/${id}/main`, + streams: [{ path: `/chat/${id}/main`, offset: `12` }], + callback: `http://server/_electric/callback-forward/wake-${id}`, + claimToken: `claim-token-${id}`, + entity: { + type: `chat`, + status: `idle`, + url: `/chat/${id}`, + streams: { main: `/chat/${id}/main`, error: `/chat/${id}/error` }, + }, + } +} + +function deferred(): { + promise: Promise + resolve: (value: T | PromiseLike) => void + reject: (reason?: unknown) => void +} { + let resolve!: (value: T | PromiseLike) => void + let reject!: (reason?: unknown) => void + const promise = new Promise((res, rej) => { + resolve = res + reject = rej + }) + return { promise, resolve, reject } +} + +async function waitFor( + assertion: () => void, + timeoutMs = 1_000 +): Promise { + const started = Date.now() + let lastError: unknown + while (Date.now() - started < timeoutMs) { + try { + assertion() + return + } catch (err) { + lastError = err + await new Promise((resolve) => setTimeout(resolve, 5)) + } + } + throw lastError +} + +function runtime() { + return { + dispatchWake: vi.fn(), + drainWakes: vi.fn(async () => undefined), + abortWakes: vi.fn(), + } +} + describe(`createPullWakeRunner`, () => { afterEach(() => { durableStreamMocks.DurableStream.mockClear() durableStreamMocks.stream.mockReset() + vi.useRealTimers() vi.unstubAllGlobals() }) + it(`starts from the beginning when no wake stream offset is committed`, async () => { + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() {}, + closed: Promise.resolve(), + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 0, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledWith( + expect.objectContaining({ offset: `-1` }) + ) + }) + + await runner.stop() + }) + it(`claims compact DS wake events before dispatching runtime wakes`, async () => { - const event: PullWakeEvent = { - type: `wake`, - subscription_id: `runner:runner-1`, - stream: `chat/one/main`, - generation: 7, - ts: 123, - } - const notification: WakeNotification = { - consumerId: `wake-1`, - epoch: 7, - wakeId: `wake-1`, - streamPath: `/chat/one/main`, - streams: [{ path: `/chat/one/main`, offset: `12` }], - callback: `http://server/_electric/callback-forward/wake-1`, - claimToken: `claim-token`, - entity: { - type: `chat`, - status: `idle`, - url: `/chat/one`, - streams: { main: `/chat/one/main`, error: `/chat/one/error` }, - }, - } + const event = wakeEvent(`one`) + const claimed = notification(`one`) const fetchMock = vi.fn(async (_input: RequestInfo | URL) => - Response.json(notification) + Response.json(claimed) ) vi.stubGlobal(`fetch`, fetchMock) - const dispatchWake = vi.fn() - const drainWakes = vi.fn(async () => undefined) + const testRuntime = runtime() const streamFactory = vi.fn(async () => ({ offset: `42`, async *jsonStream() { @@ -62,11 +135,7 @@ describe(`createPullWakeRunner`, () => { const runner = createPullWakeRunner({ baseUrl: `http://server`, runnerId: `runner-1`, - runtime: { - dispatchWake, - drainWakes, - abortWakes: vi.fn(), - }, + runtime: testRuntime, headers: { 'x-test-runner': `runner-1` }, claimHeaders: { authorization: `Bearer session-token` }, claimTokenHeader: `electric-claim-token`, @@ -75,7 +144,9 @@ describe(`createPullWakeRunner`, () => { }) runner.start() - await runner.waitForStopped() + await waitFor(() => { + expect(testRuntime.dispatchWake).toHaveBeenCalledTimes(1) + }) expect(streamFactory).toHaveBeenCalledWith( expect.objectContaining({ @@ -89,22 +160,19 @@ describe(`createPullWakeRunner`, () => { body: JSON.stringify(event), }) ) - expect(dispatchWake).toHaveBeenCalledWith(notification, { + expect(testRuntime.dispatchWake).toHaveBeenCalledWith(claimed, { claimHeaders: expect.any(Function), claimTokenHeader: `electric-claim-token`, }) - expect(drainWakes).toHaveBeenCalledTimes(1) + expect(testRuntime.drainWakes).not.toHaveBeenCalled() expect(runner.offset).toBe(`42`) + + await runner.stop() + expect(testRuntime.drainWakes).toHaveBeenCalledTimes(1) }) it(`skips stale wake events when claim returns no pending work`, async () => { - const event: PullWakeEvent = { - type: `wake`, - subscription_id: `runner:runner-1`, - stream: `chat/one/main`, - generation: 7, - ts: 123, - } + const event = wakeEvent(`one`) const fetchMock = vi.fn(async (_input: RequestInfo | URL) => Response.json( { @@ -117,8 +185,7 @@ describe(`createPullWakeRunner`, () => { ) ) vi.stubGlobal(`fetch`, fetchMock) - const dispatchWake = vi.fn() - const drainWakes = vi.fn(async () => undefined) + const testRuntime = runtime() const onError = vi.fn() const streamFactory = vi.fn(async () => ({ offset: `42`, @@ -131,11 +198,7 @@ describe(`createPullWakeRunner`, () => { const runner = createPullWakeRunner({ baseUrl: `http://server`, runnerId: `runner-1`, - runtime: { - dispatchWake, - drainWakes, - abortWakes: vi.fn(), - }, + runtime: testRuntime, headers: { 'x-test-runner': `runner-1` }, heartbeatIntervalMs: 0, streamFactory, @@ -143,29 +206,75 @@ describe(`createPullWakeRunner`, () => { }) runner.start() - await runner.waitForStopped() + await waitFor(() => { + expect(fetchMock).toHaveBeenCalledTimes(1) + }) - expect(fetchMock).toHaveBeenCalled() - expect(dispatchWake).not.toHaveBeenCalled() - expect(drainWakes).not.toHaveBeenCalled() + expect(testRuntime.dispatchWake).not.toHaveBeenCalled() + expect(testRuntime.drainWakes).not.toHaveBeenCalled() expect(onError).not.toHaveBeenCalled() expect(runner.offset).toBe(`42`) + + await runner.stop() + }) + + it(`exposes diagnostics via getHealth()`, async () => { + const event = wakeEvent(`one`) + const fetchMock = vi.fn(async (_input: RequestInfo | URL) => + Response.json(notification(`one`)) + ) + vi.stubGlobal(`fetch`, fetchMock) + const testRuntime = runtime() + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() { + yield event + }, + closed: Promise.resolve(), + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + streamFactory, + }) + + const healthBefore = runner.getHealth() + expect(healthBefore.running).toBe(false) + expect(healthBefore.started_at).toBeNull() + expect(healthBefore.events_received).toBe(0) + + runner.start() + await waitFor(() => { + expect(testRuntime.dispatchWake).toHaveBeenCalledTimes(1) + }) + + const healthDuring = runner.getHealth() + expect(healthDuring.running).toBe(true) + expect(healthDuring.started_at).not.toBeNull() + expect(healthDuring.events_received).toBe(1) + expect(healthDuring.claims_succeeded).toBe(1) + expect(healthDuring.last_claim_result).toBe(`claimed`) + expect(healthDuring.last_dispatch_at).not.toBeNull() + expect(healthDuring.offset).toBe(`42`) + + await runner.stop() + expect(runner.getHealth().running).toBe(false) }) it(`preserves base URL query parameters on stream, claim, and heartbeat requests`, async () => { - const fetchMock = vi.fn(async (_input: RequestInfo | URL) => { + const fetchMock = vi.fn(async (input: RequestInfo | URL) => { + const url = String(input) + if (url.includes(`/heartbeat`)) return Response.json({}) return new Response(null, { status: 204 }) }) vi.stubGlobal(`fetch`, fetchMock) const streamFactory = vi.fn(async () => ({ offset: `42`, async *jsonStream() { - yield { - type: `wake`, - subscription_id: `runner:runner-1`, - stream: `chat/one/main`, - generation: 7, - } satisfies PullWakeEvent + yield wakeEvent(`one`) }, closed: Promise.resolve(), })) @@ -173,31 +282,244 @@ describe(`createPullWakeRunner`, () => { const runner = createPullWakeRunner({ baseUrl: `http://server/root?secret=s1`, runnerId: `runner-1`, - runtime: { - dispatchWake: vi.fn(), - drainWakes: vi.fn(), - abortWakes: vi.fn(), - }, - heartbeatIntervalMs: 1, + runtime: runtime(), + heartbeatIntervalMs: 5, streamFactory, }) runner.start() - await runner.waitForStopped() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledWith( + expect.objectContaining({ + url: `http://server/root/runners/runner-1/wake?secret=s1`, + }) + ) + expect(fetchMock).toHaveBeenCalledWith( + `http://server/root/_electric/runners/runner-1/heartbeat?secret=s1`, + expect.any(Object) + ) + expect(fetchMock).toHaveBeenCalledWith( + `http://server/root/_electric/runners/runner-1/claim?secret=s1`, + expect.objectContaining({ method: `POST` }) + ) + }) - expect(streamFactory).toHaveBeenCalledWith( - expect.objectContaining({ - url: `http://server/root/runners/runner-1/wake?secret=s1`, - }) + await runner.stop() + }) + + it(`sends a throttled heartbeat when runner diagnostics change`, async () => { + const heartbeatBodies: Array> = [] + const fetchMock = vi.fn( + async (input: RequestInfo | URL, init?: RequestInit) => { + if (String(input).includes(`/heartbeat`)) { + heartbeatBodies.push(JSON.parse(String(init?.body))) + return Response.json({}) + } + return Response.json(notification(`one`)) + } ) - expect(fetchMock).toHaveBeenCalledWith( - `http://server/root/_electric/runners/runner-1/heartbeat?secret=s1`, - expect.any(Object) + vi.stubGlobal(`fetch`, fetchMock) + const testRuntime = runtime() + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() { + yield wakeEvent(`one`) + }, + closed: Promise.resolve(), + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 60_000, + eventHeartbeatThrottleMs: 20, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(testRuntime.dispatchWake).toHaveBeenCalledTimes(1) + }) + await waitFor(() => { + expect(heartbeatBodies.length).toBe(2) + }) + + const diagnostics = heartbeatBodies[1]!.diagnostics as Record< + string, + unknown + > + expect(diagnostics.events_received).toBe(1) + expect(diagnostics.claims_succeeded).toBe(1) + expect(heartbeatBodies[1]!.wake_stream_offset).toBe(`42`) + + await runner.stop() + }) + + it(`does not schedule event heartbeats for unchanged stream offsets`, async () => { + const heartbeatBodies: Array> = [] + const yieldEvent = deferred() + const streamClosed = deferred() + const fetchMock = vi.fn( + async (_input: RequestInfo | URL, init?: RequestInit) => { + heartbeatBodies.push(JSON.parse(String(init?.body))) + return Response.json({}) + } ) - expect(fetchMock).toHaveBeenCalledWith( - `http://server/root/_electric/runners/runner-1/claim?secret=s1`, - expect.objectContaining({ method: `POST` }) + vi.stubGlobal(`fetch`, fetchMock) + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() { + await yieldEvent.promise + yield { type: `noop` } as unknown as PullWakeEvent + await streamClosed.promise + }, + cancel: () => streamClosed.resolve(), + closed: streamClosed.promise, + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + offset: `42`, + heartbeatIntervalMs: 60_000, + eventHeartbeatThrottleMs: 5, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(heartbeatBodies.length).toBe(2) + }) + + yieldEvent.resolve() + await new Promise((resolve) => setTimeout(resolve, 20)) + + expect(heartbeatBodies.length).toBe(2) + await runner.stop() + }) + + it(`coalesces event heartbeats while a heartbeat is in flight`, async () => { + const firstHeartbeat = deferred() + const yieldWake = deferred() + const streamClosed = deferred() + let heartbeatCalls = 0 + const fetchMock = vi.fn( + async (input: RequestInfo | URL, init?: RequestInit) => { + if (String(input).includes(`/heartbeat`)) { + heartbeatCalls++ + JSON.parse(String(init?.body)) + return heartbeatCalls === 1 + ? firstHeartbeat.promise + : Response.json({}) + } + return Response.json(notification(`one`)) + } ) + vi.stubGlobal(`fetch`, fetchMock) + const testRuntime = runtime() + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() { + await yieldWake.promise + yield wakeEvent(`one`) + await streamClosed.promise + }, + cancel: () => streamClosed.resolve(), + closed: streamClosed.promise, + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + eventHeartbeatThrottleMs: 1, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledTimes(1) + expect(heartbeatCalls).toBe(1) + }) + + yieldWake.resolve() + await waitFor(() => { + expect(testRuntime.dispatchWake).toHaveBeenCalledTimes(1) + }) + await new Promise((resolve) => setTimeout(resolve, 10)) + expect(heartbeatCalls).toBe(1) + + firstHeartbeat.resolve(Response.json({})) + await waitFor(() => { + expect(heartbeatCalls).toBe(2) + }) + + await runner.stop() + }) + + it(`resets heartbeat failure counters across restarts`, async () => { + const heartbeatFailures = [deferred(), deferred()] + const streamClosed = [deferred(), deferred()] + const cancel = [ + vi.fn(() => streamClosed[0]!.resolve()), + vi.fn(() => streamClosed[1]!.resolve()), + ] + let heartbeatCalls = 0 + const fetchMock = vi.fn(async (input: RequestInfo | URL) => { + if (String(input).includes(`/heartbeat`)) { + const failure = heartbeatFailures[heartbeatCalls++] + if (failure) return failure.promise + return Response.json({}) + } + return new Response(null, { status: 204 }) + }) + vi.stubGlobal(`fetch`, fetchMock) + const streamFactory = vi.fn(async () => { + const index = streamFactory.mock.calls.length - 1 + return { + async *jsonStream() { + await streamClosed[index]!.promise + }, + cancel: cancel[index], + closed: streamClosed[index]!.promise, + } + }) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 60_000, + eventHeartbeatThrottleMs: 0, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledTimes(1) + expect(heartbeatCalls).toBe(1) + }) + heartbeatFailures[0]!.resolve(new Response(`failed`, { status: 500 })) + await waitFor(() => { + expect(runner.getHealth().last_heartbeat_ok).toBe(false) + }) + await runner.stop() + + runner.start() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledTimes(2) + expect(heartbeatCalls).toBe(2) + }) + heartbeatFailures[1]!.resolve(new Response(`failed`, { status: 500 })) + await waitFor(() => { + expect(runner.getHealth().last_heartbeat_ok).toBe(false) + }) + + expect(cancel[1]).not.toHaveBeenCalled() + await runner.stop() }) it(`resolves async headers before opening the durable stream`, async () => { @@ -212,11 +534,7 @@ describe(`createPullWakeRunner`, () => { const runner = createPullWakeRunner({ baseUrl: `http://server`, runnerId: `runner-1`, - runtime: { - dispatchWake: vi.fn(), - drainWakes: vi.fn(), - abortWakes: vi.fn(), - }, + runtime: runtime(), headers: async () => ({ Authorization: `Bearer tenant-token`, 'X-Tenant': `tenant-a`, @@ -225,7 +543,10 @@ describe(`createPullWakeRunner`, () => { }) runner.start() - await runner.waitForStopped() + await waitFor(() => { + expect(durableStreamMocks.DurableStream).toHaveBeenCalledTimes(1) + }) + await runner.stop() expect(durableStreamMocks.DurableStream).toHaveBeenCalledWith( expect.objectContaining({ @@ -244,4 +565,448 @@ describe(`createPullWakeRunner`, () => { ) expect(fetchMock).not.toHaveBeenCalled() }) + + it(`continues reading and claiming while runtime wakes are pending`, async () => { + const events = [wakeEvent(`one`), wakeEvent(`two`)] + const fetchMock = vi.fn( + async (_input: RequestInfo | URL, init?: RequestInit) => + Response.json( + notification(JSON.parse(String(init?.body)).stream.split(`/`)[1]) + ) + ) + vi.stubGlobal(`fetch`, fetchMock) + const testRuntime = runtime() + const streamFactory = vi.fn(async () => ({ + offset: `84`, + async *jsonStream() { + yield events[0]! + yield events[1]! + }, + closed: Promise.resolve(), + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(testRuntime.dispatchWake).toHaveBeenCalledTimes(2) + }) + + expect(fetchMock).toHaveBeenCalledTimes(2) + expect(testRuntime.drainWakes).not.toHaveBeenCalled() + await runner.stop() + }) + + it(`skips dispatch from a claim actor after shutdown begins`, async () => { + const claimResponse = deferred() + const fetchMock = vi.fn(async () => claimResponse.promise) + vi.stubGlobal(`fetch`, fetchMock) + const calls: Array = [] + const testRuntime = { + dispatchWake: vi.fn(() => calls.push(`dispatch`)), + abortWakes: vi.fn(() => calls.push(`abort`)), + drainWakes: vi.fn(async () => { + calls.push(`drain`) + }), + } + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() { + yield wakeEvent(`one`) + }, + closed: Promise.resolve(), + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(fetchMock).toHaveBeenCalledTimes(1) + }) + const stopped = runner.stop() + claimResponse.resolve(Response.json(notification(`one`))) + await stopped + + expect(testRuntime.dispatchWake).not.toHaveBeenCalled() + expect(calls).toEqual([`abort`, `drain`]) + expect(runner.getHealth().claims_succeeded).toBe(1) + expect(runner.getHealth().claims_skipped).toBe(0) + }) + + it(`keeps heartbeating degraded diagnostics while reconnecting`, async () => { + const heartbeatBodies: Array> = [] + const fetchMock = vi.fn( + async (input: RequestInfo | URL, init?: RequestInit) => { + if (String(input).includes(`/heartbeat`)) { + heartbeatBodies.push(JSON.parse(String(init?.body))) + return Response.json({}) + } + return new Response(null, { status: 204 }) + } + ) + vi.stubGlobal(`fetch`, fetchMock) + const onError = vi.fn() + const streamFactory = vi.fn(async () => { + throw new Error(`stream failed`) + }) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 5, + streamFactory, + onError, + }) + + runner.start() + await waitFor(() => { + expect(onError).toHaveBeenCalledWith(expect.any(Error)) + expect( + heartbeatBodies.some((body) => { + const diagnostics = body.diagnostics as + | Record + | undefined + return ( + diagnostics?.stream_connected === false && + diagnostics?.reconnect_count === 1 + ) + }) + ).toBe(true) + }) + + await runner.stop() + }) + + it(`forces the stream to reconnect after repeated heartbeat failures`, async () => { + vi.useFakeTimers() + const firstStreamOpened = deferred() + const secondStreamOpened = deferred() + const firstStreamClosed = deferred() + const secondStreamClosed = deferred() + const firstCancel = vi.fn(() => firstStreamClosed.resolve()) + const streamFactory = vi.fn(async () => { + if (streamFactory.mock.calls.length === 1) { + firstStreamOpened.resolve() + return { + async *jsonStream() { + await firstStreamClosed.promise + }, + cancel: firstCancel, + closed: firstStreamClosed.promise, + } + } + secondStreamOpened.resolve() + return { + async *jsonStream() { + await secondStreamClosed.promise + }, + cancel: () => secondStreamClosed.resolve(), + closed: secondStreamClosed.promise, + } + }) + let heartbeatCalls = 0 + const fetchMock = vi.fn(async () => { + heartbeatCalls++ + if (heartbeatCalls <= 2) { + throw new Error(`connect ECONNREFUSED 127.0.0.1:4437`) + } + return Response.json({}) + }) + vi.stubGlobal(`fetch`, fetchMock) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 10, + eventHeartbeatThrottleMs: 0, + streamFactory, + }) + + runner.start() + await firstStreamOpened.promise + await vi.advanceTimersByTimeAsync(20) + + expect(firstCancel).toHaveBeenCalledWith(expect.any(Error)) + + await vi.advanceTimersByTimeAsync(1_000) + await secondStreamOpened.promise + + expect(streamFactory).toHaveBeenCalledTimes(2) + expect(runner.getHealth().reconnect_count).toBe(1) + + await runner.stop() + }) + + it(`marks heartbeat unhealthy before reporting heartbeat errors`, async () => { + const observedHeartbeatOk: Array = [] + let runner: ReturnType + const fetchMock = vi.fn(async (input: RequestInfo | URL) => { + if (!String(input).includes(`/heartbeat`)) { + return new Response(null, { status: 204 }) + } + return fetchMock.mock.calls.length === 1 + ? Response.json({}) + : new Response(`heartbeat failed`, { status: 500 }) + }) + vi.stubGlobal(`fetch`, fetchMock) + const streamFactory = vi.fn(async () => ({ + async *jsonStream() {}, + closed: Promise.resolve(), + })) + + runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 5, + streamFactory, + onError: () => { + observedHeartbeatOk.push(runner.getHealth().last_heartbeat_ok) + }, + }) + + runner.start() + await waitFor(() => { + expect(observedHeartbeatOk).toContain(false) + }) + + await runner.stop() + }) + + it(`keeps onError reporting-only when the reporter throws`, async () => { + durableStreamMocks.stream.mockImplementationOnce( + async (opts: { onError: (error: Error) => unknown }) => { + opts.onError(new Error(`durable stream failed`)) + return { + async *jsonStream() {}, + closed: Promise.resolve(), + } + } + ) + const fetchMock = vi.fn(async (input: RequestInfo | URL) => { + if (String(input).includes(`/heartbeat`)) { + return new Response(`heartbeat failed`, { status: 500 }) + } + return new Response(null, { status: 204 }) + }) + vi.stubGlobal(`fetch`, fetchMock) + const onError = vi.fn(() => { + throw new Error(`reporter failed`) + }) + const consoleError = vi + .spyOn(console, `error`) + .mockImplementation(() => undefined) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 5, + onError, + }) + + runner.start() + await waitFor(() => { + expect(onError).toHaveBeenCalledWith(expect.any(Error)) + expect(runner.running).toBe(true) + }) + + expect(runner.getHealth().last_error).toMatch(/failed/) + expect(consoleError).toHaveBeenCalledWith( + `Pull-wake runner onError callback failed`, + expect.any(Error) + ) + await expect(runner.stop()).resolves.toBeUndefined() + consoleError.mockRestore() + }) + + it(`does not let a stuck claim actor block stop or a later restart`, async () => { + vi.useFakeTimers() + const claimStarted = deferred() + const secondClaimStarted = deferred() + const fetchMock = vi + .fn() + .mockImplementationOnce(async () => { + claimStarted.resolve() + return new Promise(() => {}) + }) + .mockImplementationOnce(async () => { + secondClaimStarted.resolve() + return new Promise(() => {}) + }) + vi.stubGlobal(`fetch`, fetchMock) + const calls: Array = [] + const testRuntime = { + dispatchWake: vi.fn(() => calls.push(`dispatch`)), + abortWakes: vi.fn(() => calls.push(`abort`)), + drainWakes: vi.fn(async () => { + calls.push(`drain`) + }), + } + const streamFactory = vi.fn(async () => ({ + offset: `42`, + async *jsonStream() { + yield wakeEvent(`one`) + }, + closed: Promise.resolve(), + })) + + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + streamFactory, + }) + + runner.start() + await claimStarted.promise + const stopped = runner.stop() + await vi.advanceTimersByTimeAsync(1_000) + await stopped + + expect(testRuntime.dispatchWake).not.toHaveBeenCalled() + expect(calls).toEqual([`abort`, `drain`]) + + runner.start() + await secondClaimStarted.promise + const secondStop = runner.stop() + await vi.advanceTimersByTimeAsync(1_000) + await secondStop + expect(fetchMock).toHaveBeenCalledTimes(2) + }) + + it(`throws drain errors after recording them and marking the runner stopped`, async () => { + const drainError = new Error(`drain failed`) + const onError = vi.fn() + const testRuntime = { + dispatchWake: vi.fn(), + abortWakes: vi.fn(), + drainWakes: vi.fn(async () => { + throw drainError + }), + } + const streamFactory = vi.fn(async () => ({ + async *jsonStream() {}, + closed: Promise.resolve(), + })) + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + streamFactory, + onError, + }) + + runner.start() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledTimes(1) + }) + await expect(runner.stop()).rejects.toThrow(`drain failed`) + + expect(runner.running).toBe(false) + expect(onError).toHaveBeenCalledWith(drainError) + expect(runner.getHealth().last_error).toBe(`drain failed`) + }) + + it(`shares one shutdown sequence across concurrent stop calls`, async () => { + const streamClosed = deferred() + const drainStarted = deferred() + const drainReleased = deferred() + const testRuntime = { + dispatchWake: vi.fn(), + abortWakes: vi.fn(), + drainWakes: vi.fn(async () => { + drainStarted.resolve() + await drainReleased.promise + }), + } + const streamFactory = vi.fn(async () => ({ + async *jsonStream() { + await streamClosed.promise + }, + cancel: () => streamClosed.resolve(), + closed: streamClosed.promise, + })) + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: testRuntime, + heartbeatIntervalMs: 0, + streamFactory, + }) + + runner.start() + await waitFor(() => { + expect(streamFactory).toHaveBeenCalledTimes(1) + }) + + const firstStop = runner.stop() + const secondStop = runner.stop() + let waitForStoppedResolved = false + const stopped = runner.waitForStopped().then(() => { + waitForStoppedResolved = true + }) + await drainStarted.promise + + expect(testRuntime.abortWakes).toHaveBeenCalledTimes(1) + expect(testRuntime.drainWakes).toHaveBeenCalledTimes(1) + expect(waitForStoppedResolved).toBe(false) + + runner.start() + expect(streamFactory).toHaveBeenCalledTimes(1) + + drainReleased.resolve() + await Promise.all([firstStop, secondStop, stopped]) + + expect(testRuntime.abortWakes).toHaveBeenCalledTimes(1) + expect(testRuntime.drainWakes).toHaveBeenCalledTimes(1) + }) + + it(`uses exponential reconnect backoff between failed connection attempts`, async () => { + vi.useFakeTimers() + const attempts = [deferred(), deferred(), deferred()] + const streamFactory = vi.fn(async () => { + attempts[streamFactory.mock.calls.length - 1]?.resolve() + throw new Error(`stream failed`) + }) + const runner = createPullWakeRunner({ + baseUrl: `http://server`, + runnerId: `runner-1`, + runtime: runtime(), + heartbeatIntervalMs: 0, + streamFactory, + }) + + runner.start() + await attempts[0]!.promise + await vi.advanceTimersByTimeAsync(999) + expect(streamFactory).toHaveBeenCalledTimes(1) + await vi.advanceTimersByTimeAsync(1) + await attempts[1]!.promise + expect(streamFactory).toHaveBeenCalledTimes(2) + + await vi.advanceTimersByTimeAsync(1_999) + expect(streamFactory).toHaveBeenCalledTimes(2) + await vi.advanceTimersByTimeAsync(1) + await attempts[2]!.promise + expect(streamFactory).toHaveBeenCalledTimes(3) + + await runner.stop() + }) }) diff --git a/packages/agents-runtime/test/runtime-dsl.ts b/packages/agents-runtime/test/runtime-dsl.ts index c2086ffa71..3836a2395d 100644 --- a/packages/agents-runtime/test/runtime-dsl.ts +++ b/packages/agents-runtime/test/runtime-dsl.ts @@ -260,7 +260,7 @@ async function startServers(registry: EntityRegistry): Promise { await timeStep(`DurableStreamTestServer.start`, () => dsServer.start()) const electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: `${dsServer.url}/v1/stream`, port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, diff --git a/packages/agents-server-ui/src/components/SettingsMenu.tsx b/packages/agents-server-ui/src/components/SettingsMenu.tsx index 98c8971eac..ff516b52b2 100644 --- a/packages/agents-server-ui/src/components/SettingsMenu.tsx +++ b/packages/agents-server-ui/src/components/SettingsMenu.tsx @@ -171,13 +171,13 @@ export function SettingsMenu(): React.ReactElement { truncate className={styles.runtimeUrl} > - {runtimeUrl} + Pull-wake ) : ( {localRuntimeDisabled ? `Disabled for this server` - : `No runtime URL yet`} + : `Runtime not started`} )} {runtimeError && ( diff --git a/packages/agents-server-ui/src/components/settings/SettingsScreen.tsx b/packages/agents-server-ui/src/components/settings/SettingsScreen.tsx index 269ea82d9d..4e13d47e30 100644 --- a/packages/agents-server-ui/src/components/settings/SettingsScreen.tsx +++ b/packages/agents-server-ui/src/components/settings/SettingsScreen.tsx @@ -58,7 +58,7 @@ export function SettingsScreen({ )} -
+

{title}

{children} diff --git a/packages/agents-server-ui/src/components/settings/SettingsSidebar.tsx b/packages/agents-server-ui/src/components/settings/SettingsSidebar.tsx index 7dd2845694..e5ffb331af 100644 --- a/packages/agents-server-ui/src/components/settings/SettingsSidebar.tsx +++ b/packages/agents-server-ui/src/components/settings/SettingsSidebar.tsx @@ -2,6 +2,7 @@ import { useCallback } from 'react' import { useNavigate } from '@tanstack/react-router' import { ArrowLeft, + Brain, KeyRound, Palette, Plug, @@ -20,6 +21,7 @@ export type SettingsCategoryId = | `servers` | `credentials` | `appearance` + | `local-runtime` | `mcp-servers` interface CategoryDef { @@ -94,6 +96,12 @@ export function SettingsSidebar({ icon: , visible: true, }, + { + id: `local-runtime`, + label: `Local Runtime`, + icon: , + visible: isDesktop, + }, { id: `mcp-servers`, label: `MCP Servers`, diff --git a/packages/agents-server-ui/src/components/settings/pages/LocalRuntimePage.tsx b/packages/agents-server-ui/src/components/settings/pages/LocalRuntimePage.tsx index d74dc03c07..5d78849ead 100644 --- a/packages/agents-server-ui/src/components/settings/pages/LocalRuntimePage.tsx +++ b/packages/agents-server-ui/src/components/settings/pages/LocalRuntimePage.tsx @@ -1,10 +1,19 @@ -import { useEffect, useState } from 'react' +import { useEffect, useMemo, useState } from 'react' +import { eq, useLiveQuery } from '@tanstack/react-db' +import { appendPathToUrl } from '@electric-ax/agents-runtime/client' import { Play, RefreshCw, Square } from 'lucide-react' import { loadDesktopState, onDesktopStateChanged, type DesktopState, } from '../../../lib/server-connection' +import { + createRunnerRuntimeDiagnosticsCollection, + useElectricAgents, + type ElectricRunner, + type ElectricRunnerRuntimeDiagnostics, +} from '../../../lib/ElectricAgentsProvider' +import { formatRelativeTime } from '../../../lib/formatTime' import { Badge, Button, Icon, Stack, Text } from '../../../ui' import { SettingsRow, SettingsScreen, SettingsSection } from '../SettingsScreen' @@ -18,6 +27,103 @@ const STATUS_TONES: Record< error: { label: `Error`, tone: `danger` }, } +const RUNNER_HEALTH_TONES: Record< + `healthy` | `degraded` | `unhealthy` | `unknown`, + { label: string; tone: `success` | `warning` | `danger` | `neutral` } +> = { + healthy: { label: `Healthy`, tone: `success` }, + degraded: { label: `Degraded`, tone: `warning` }, + unhealthy: { label: `Unhealthy`, tone: `danger` }, + unknown: { label: `Unknown`, tone: `neutral` }, +} + +function parseTime(value: string | null | undefined): number | null { + if (!value) return null + const parsed = Date.parse(value) + return Number.isFinite(parsed) ? parsed : null +} + +function runnerHealth( + runner: ElectricRunner | null, + runtimeDiagnostics: ElectricRunnerRuntimeDiagnostics | null, + now: number = Date.now() +): { status: keyof typeof RUNNER_HEALTH_TONES; issues: Array } { + if (!runner) return { status: `unknown`, issues: [`Runner not synced`] } + const issues: Array = [] + let status: keyof typeof RUNNER_HEALTH_TONES = `healthy` + const escalate = (floor: `degraded` | `unhealthy`) => { + if (floor === `unhealthy`) status = `unhealthy` + else if (status === `healthy`) status = `degraded` + } + + if (runner.admin_status === `disabled`) { + escalate(`unhealthy`) + issues.push(`Disabled`) + } + + const leaseExpiresAt = parseTime( + runtimeDiagnostics?.liveness_lease_expires_at ?? + runner.liveness_lease_expires_at + ) + if (leaseExpiresAt === null) { + escalate(`degraded`) + issues.push(`No heartbeat`) + } else if (leaseExpiresAt <= now) { + escalate(`unhealthy`) + issues.push(`Lease expired`) + } + + const diagnostics = runtimeDiagnostics?.diagnostics ?? runner.diagnostics + if (!diagnostics) { + if (runtimeDiagnostics?.last_seen_at ?? runner.last_seen_at) { + escalate(`degraded`) + issues.push(`No diagnostics`) + } + } else { + if (diagnostics.stream_connected === false) { + escalate(`degraded`) + issues.push(`Stream disconnected`) + } + if (diagnostics.last_heartbeat_ok === false) { + escalate(`degraded`) + issues.push(`Heartbeat failed`) + } + if ((diagnostics.reconnect_count ?? 0) > 5) { + escalate(`degraded`) + issues.push(`${diagnostics.reconnect_count} reconnects`) + } + } + + return { status, issues } +} + +function timeLabel(value: string | null | undefined): string { + const ts = parseTime(value) + return ts === null ? `-` : formatRelativeTime(ts) +} + +function countLabel(value: number | undefined): string { + return String(value ?? 0) +} + +type RunnerDiagnostics = NonNullable + +function runtimeConnectionLabel(value: string | null | undefined): string { + if (!value) return `-` + return `Pull-wake` +} + +function runnerHealthEndpoint( + baseUrl: string | null | undefined, + runnerId: string | null | undefined +): string | null { + if (!baseUrl || !runnerId) return null + return appendPathToUrl( + baseUrl, + `/_electric/runners/${encodeURIComponent(runnerId)}/health` + ) +} + /** * Settings → Local Runtime. Shows the lifecycle state of the bundled * Horton runtime managed by the Electron main process and exposes @@ -31,6 +137,54 @@ const STATUS_TONES: Record< export function LocalRuntimePage(): React.ReactElement { const isDesktop = typeof window !== `undefined` && Boolean(window.electronAPI) const [state, setState] = useState(null) + const [now, setNow] = useState(() => Date.now()) + const { runnersCollection } = useElectricAgents() + const runnerId = state?.pullWakeRunnerId ?? null + const { data: runnerRows = [] } = useLiveQuery( + (query) => { + if (!runnersCollection || !runnerId) return undefined + return query + .from({ runner: runnersCollection }) + .where(({ runner }) => eq(runner.id, runnerId)) + }, + [runnersCollection, runnerId] + ) + const runner = runnerRows[0] ?? null + const healthEndpoint = runnerHealthEndpoint( + state?.activeServer?.url, + runnerId + ) + const diagnosticsCollection = useMemo(() => { + if (!state?.activeServer?.url || !runnerId) return null + return createRunnerRuntimeDiagnosticsCollection( + state.activeServer.url, + runnerId + ) + }, [state?.activeServer?.url, runnerId]) + const { data: runtimeDiagnosticsRows = [] } = useLiveQuery( + (query) => { + if (!diagnosticsCollection) return undefined + return query.from({ diagnostics: diagnosticsCollection }) + }, + [diagnosticsCollection] + ) + const runnerTelemetry = runtimeDiagnosticsRows[0] ?? null + const health = runnerHealth(runner, runnerTelemetry, now) + const healthTone = RUNNER_HEALTH_TONES[health.status] + const diagnostics: RunnerDiagnostics | null = + runnerTelemetry?.diagnostics ?? runner?.diagnostics ?? null + + useEffect(() => { + if (!isDesktop) return + const interval = window.setInterval(() => setNow(Date.now()), 5000) + return () => window.clearInterval(interval) + }, [isDesktop]) + + useEffect(() => { + return () => { + diagnosticsCollection?.cleanup() + } + }, [diagnosticsCollection]) useEffect(() => { if (!isDesktop) return @@ -82,11 +236,11 @@ export function LocalRuntimePage(): React.ReactElement { control={{statusInfo.label}} /> - {state?.runtimeUrl ?? `—`} + {runtimeConnectionLabel(state?.runtimeUrl)} } /> @@ -102,6 +256,96 @@ export function LocalRuntimePage(): React.ReactElement { )} + + + {runnerId ?? `-`} + + } + /> + 0 ? health.issues.join(`, `) : `No issues` + } + control={{healthTone.label}} + /> + + {healthEndpoint ?? `-`} + + } + /> + + {diagnostics?.stream_connected === false + ? `Disconnected` + : diagnostics?.stream_connected === true + ? `Connected` + : `Unknown`} + + } + /> + + {timeLabel(runnerTelemetry?.last_seen_at ?? runner?.last_seen_at)} + + } + /> + + {countLabel(diagnostics?.events_received)} events + + } + /> + + {diagnostics?.last_claim_result ?? `none`} + + } + /> + {diagnostics?.last_error && ( + + {diagnostics.last_error} + + } + /> + )} + + {runtimeUrl && ( - Runtime: {runtimeUrl} + Runtime: Pull-wake )} {isDesktop ? ( diff --git a/packages/agents-server-ui/src/components/views/NewSessionView.tsx b/packages/agents-server-ui/src/components/views/NewSessionView.tsx index 716a11d2f8..dfbc5931f5 100644 --- a/packages/agents-server-ui/src/components/views/NewSessionView.tsx +++ b/packages/agents-server-ui/src/components/views/NewSessionView.tsx @@ -11,11 +11,8 @@ import { useLiveQuery } from '@tanstack/react-db' import { eq, not } from '@tanstack/db' import { nanoid } from 'nanoid' import { useElectricAgents } from '../../lib/ElectricAgentsProvider' -import { useServerConnection } from '../../hooks/useServerConnection' import { useWorkspace } from '../../hooks/useWorkspace' import { useRecentWorkingDirectories } from '../../hooks/useRecentWorkingDirectories' -import { connectEntityStream } from '../../lib/entity-connection' -import { createSendMessageAction } from '../../lib/sendMessage' import { Icon, Select, Stack, Text } from '../../ui' import { SchemaForm, hasSchemaProperties, isObjectSchema } from '../SchemaForm' import { WorkingDirectoryPicker } from '../WorkingDirectoryPicker' @@ -96,7 +93,6 @@ export function NewSessionView({ setToolbarTitle, }: StandaloneViewProps): React.ReactElement { const { entityTypesCollection, spawnEntity } = useElectricAgents() - const { activeServer } = useServerConnection() const { helpers } = useWorkspace() const [selected, setSelected] = useState(null) const [error, setError] = useState(null) @@ -132,18 +128,10 @@ export function NewSessionView({ [entityTypes] ) - const baseUrl = activeServer?.url ?? null - /** - * Spawn an entity, optionally followed by a `/send` of an initial - * user message. We prefer this two-step over `initialMessage` on - * spawn so the message goes through the same path as the regular - * MessageInput (which is the proven path that wakes horton). - * - * On success we *replace this tile* with the freshly-created entity. - * That keeps the workspace layout intact (other tiles around us - * stay in place) and feels like opening a file in VS Code's - * "untitled" tab — the placeholder turns into the new content. + * Spawn an entity and let the server enqueue any initial user message. + * The server links dispatch before writing that message, avoiding a + * client-side stream preload on the critical path to the first wake. */ const doSpawn = useCallback( async ( @@ -167,6 +155,7 @@ export function NewSessionView({ }, } : {}), + ...(initialUserText ? { initialMessage: initialUserText } : {}), }) const entityUrl = `/${typeName}/${name}` try { @@ -174,29 +163,13 @@ export function NewSessionView({ helpers.openEntity(entityUrl, { target: { tileId, position: `replace` }, }) - if (initialUserText && baseUrl) { - const connection = await connectEntityStream({ baseUrl, entityUrl }) - try { - const sendInitialMessage = createSendMessageAction({ - db: connection.db, - baseUrl, - entityUrl, - }) - await sendInitialMessage({ - text: initialUserText, - mode: `immediate`, - }).isPersisted.promise - } finally { - connection.close() - } - } } catch (err) { setError( `Could not start session: ${err instanceof Error ? err.message : String(err)}.` ) } }, - [helpers, spawnEntity, baseUrl, tileId] + [helpers, spawnEntity, tileId] ) const handleSelectType = useCallback( diff --git a/packages/agents-server-ui/src/hooks/useServerConnection.tsx b/packages/agents-server-ui/src/hooks/useServerConnection.tsx index ff09ab5c61..3d8db47b40 100644 --- a/packages/agents-server-ui/src/hooks/useServerConnection.tsx +++ b/packages/agents-server-ui/src/hooks/useServerConnection.tsx @@ -138,16 +138,13 @@ export function ServerConnectionProvider({ : isDesktop ? [] : [currentServer()] - const active = desktopState?.selectedServerId - ? (next.find( - (server) => server.id === desktopState.selectedServerId - ) ?? null) - : desktopState?.activeServer && - next.some( - (server) => server.url === desktopState.activeServer?.url - ) - ? desktopState.activeServer - : (next[0] ?? null) + const active = + desktopState?.activeServer ?? + (desktopState?.selectedServerId + ? (next.find( + (server) => server.id === desktopState.selectedServerId + ) ?? null) + : (next[0] ?? null)) registerActiveBaseUrl(active?.url ?? null) registerActiveServerHeaders(active) setServers(next) @@ -180,8 +177,8 @@ export function ServerConnectionProvider({ const nextServers = state.servers ?? servers setServers(nextServers) const active = - nextServers.find((server) => server.id === state.selectedServerId) ?? state.activeServer ?? + nextServers.find((server) => server.id === state.selectedServerId) ?? null registerActiveBaseUrl(active?.url ?? null) registerActiveServerHeaders(active) diff --git a/packages/agents-server-ui/src/lib/ElectricAgentsProvider.tsx b/packages/agents-server-ui/src/lib/ElectricAgentsProvider.tsx index f59993ebb0..d2a2bc59f3 100644 --- a/packages/agents-server-ui/src/lib/ElectricAgentsProvider.tsx +++ b/packages/agents-server-ui/src/lib/ElectricAgentsProvider.tsx @@ -44,8 +44,58 @@ const entityTypeSchema = z.object({ updated_at: z.string(), }) +const runnerDiagnosticsSchema = z.object({ + started_at: z.string().nullable().optional(), + stream_connected: z.boolean().optional(), + stream_connected_since: z.string().nullable().optional(), + reconnect_count: z.number().optional(), + last_error: z.string().nullable().optional(), + last_error_at: z.string().nullable().optional(), + last_heartbeat_at: z.string().nullable().optional(), + last_heartbeat_ok: z.boolean().optional(), + last_claim_at: z.string().nullable().optional(), + last_claim_result: z + .enum([`claimed`, `no_work`, `error`]) + .nullable() + .optional(), + last_dispatch_at: z.string().nullable().optional(), + events_received: z.number().optional(), + claims_succeeded: z.number().optional(), + claims_skipped: z.number().optional(), + claims_failed: z.number().optional(), +}) + +const runnerSchema = z.object({ + id: z.string(), + owner_principal: z.string(), + label: z.string(), + kind: z.string(), + admin_status: z.enum([`enabled`, `disabled`]), + wake_stream: z.string(), + wake_stream_offset: z.string().nullable().optional(), + last_seen_at: z.string().nullable().optional(), + liveness_lease_expires_at: z.string().nullable().optional(), + diagnostics: runnerDiagnosticsSchema.nullable().optional(), + created_at: z.string(), + updated_at: z.string(), +}) + +const runnerRuntimeDiagnosticsSchema = z.object({ + runner_id: z.string(), + owner_principal: z.string(), + wake_stream_offset: z.string().nullable().optional(), + last_seen_at: z.string(), + liveness_lease_expires_at: z.string(), + diagnostics: runnerDiagnosticsSchema.nullable().optional(), + updated_at: z.string(), +}) + export type ElectricEntity = z.infer export type ElectricEntityType = z.infer +export type ElectricRunner = z.infer +export type ElectricRunnerRuntimeDiagnostics = z.infer< + typeof runnerRuntimeDiagnosticsSchema +> // --- Collection factories --- @@ -97,12 +147,51 @@ function createEntityTypesCollection(baseUrl: string) { ) } +function createRunnersCollection(baseUrl: string) { + return createCollection( + electricCollectionOptions({ + id: `runners`, + schema: runnerSchema, + shapeOptions: { + url: appendPathToUrl(baseUrl, `/_electric/electric/v1/shape`), + params: { table: `runners` }, + fetchClient: serverFetch, + }, + getKey: (item) => item.id, + }) + ) +} + +export function createRunnerRuntimeDiagnosticsCollection( + baseUrl: string, + runnerId: string +) { + return createCollection( + electricCollectionOptions({ + id: `runner-runtime-diagnostics:${baseUrl}:${runnerId}`, + schema: runnerRuntimeDiagnosticsSchema, + shapeOptions: { + url: appendPathToUrl(baseUrl, `/_electric/electric/v1/shape`), + params: { + table: `runner_runtime_diagnostics`, + where: `runner_id = $1`, + params: { '1': runnerId }, + }, + fetchClient: serverFetch, + }, + getKey: (item) => item.runner_id, + }) + ) +} + type EntitiesCollection = ReturnType type EntityTypesCollection = ReturnType +type RunnersCollection = ReturnType type AppCollections = { entities: EntitiesCollection entityTypes: EntityTypesCollection + runners: RunnersCollection } const appCollectionsCache = new Map() @@ -113,6 +202,7 @@ function getOrCreateAppCollections(baseUrl: string): AppCollections { const collections = { entities: createEntitiesCollection(baseUrl), entityTypes: createEntityTypesCollection(baseUrl), + runners: createRunnersCollection(baseUrl), } appCollectionsCache.set(baseUrl, collections) return collections @@ -123,6 +213,7 @@ function cleanupAppCollections(baseUrl: string): void { if (!collections) return collections.entities.cleanup() collections.entityTypes.cleanup() + collections.runners.cleanup() appCollectionsCache.delete(baseUrl) } @@ -139,6 +230,7 @@ export async function preloadAppCollections( await Promise.all([ collections.entities.preload(), collections.entityTypes.preload(), + collections.runners.preload(), ]) return collections } @@ -280,6 +372,7 @@ function createForkEntity(baseUrl: string) { interface ElectricAgentsState { entitiesCollection: EntitiesCollection | null entityTypesCollection: EntityTypesCollection | null + runnersCollection: RunnersCollection | null spawnEntity: ReturnType | null killEntity: ReturnType | null forkEntity: ReturnType | null @@ -288,6 +381,7 @@ interface ElectricAgentsState { const ElectricAgentsContext = createContext({ entitiesCollection: null, entityTypesCollection: null, + runnersCollection: null, spawnEntity: null, killEntity: null, forkEntity: null, @@ -316,16 +410,19 @@ export function ElectricAgentsProvider({ return { entitiesCollection: null, entityTypesCollection: null, + runnersCollection: null, spawnEntity: null, killEntity: null, forkEntity: null, } } - const { entities, entityTypes } = getOrCreateAppCollections(baseUrl) + const { entities, entityTypes, runners } = + getOrCreateAppCollections(baseUrl) return { entitiesCollection: entities, entityTypesCollection: entityTypes, + runnersCollection: runners, spawnEntity: createSpawnAction(baseUrl, entities), killEntity: createKillAction(baseUrl, entities), forkEntity: createForkEntity(baseUrl), diff --git a/packages/agents-server-ui/src/lib/auth-fetch.test.ts b/packages/agents-server-ui/src/lib/auth-fetch.test.ts index a3b21b23d2..c661687add 100644 --- a/packages/agents-server-ui/src/lib/auth-fetch.test.ts +++ b/packages/agents-server-ui/src/lib/auth-fetch.test.ts @@ -1,5 +1,10 @@ import { afterEach, describe, expect, it, vi } from 'vitest' -import { registerActiveServerHeaders, serverFetch } from './auth-fetch' +import { + getActivePrincipal, + getConfiguredActivePrincipal, + registerActiveServerHeaders, + serverFetch, +} from './auth-fetch' describe(`server fetch helpers`, () => { afterEach(() => { @@ -81,4 +86,141 @@ describe(`server fetch helpers`, () => { const headers = new Headers(fetchMock.mock.calls[0][1]?.headers) expect(headers.has(`authorization`)).toBe(false) }) + + it(`leaves configured headers to desktop injection inside Electron`, async () => { + ;(globalThis as { window?: unknown }).window = { + electronAPI: {}, + } + registerActiveServerHeaders({ + name: `Local`, + url: `http://localhost:4437`, + headers: { 'electric-principal': `system:dev-local` }, + }) + + const fetchMock = vi + .spyOn(globalThis, `fetch`) + .mockResolvedValue(new Response(`ok`)) + + await serverFetch( + `http://localhost:4437/_electric/entities/horton/a/send`, + { + method: `POST`, + headers: { 'content-type': `text/plain` }, + } + ) + + expect(fetchMock.mock.calls[0][0]).toBe( + `http://localhost:4437/_electric/entities/horton/a/send` + ) + const headers = new Headers(fetchMock.mock.calls[0][1]?.headers) + expect(headers.get(`content-type`)).toBe(`text/plain`) + expect(headers.has(`electric-principal`)).toBe(false) + }) + + it(`routes local mutating requests through the desktop server fetch transport`, async () => { + const desktopFetch = vi.fn().mockResolvedValue({ + url: `http://127.0.0.1:4437/_electric/entities/horton/a`, + status: 204, + statusText: `No Content`, + headers: {}, + body: ``, + }) + ;(globalThis as { window?: unknown }).window = { + electronAPI: { serverFetch: desktopFetch }, + } + registerActiveServerHeaders({ + name: `Local`, + url: `http://127.0.0.1:4437`, + headers: { 'electric-principal': `system:dev-local` }, + }) + + const fetchMock = vi.spyOn(globalThis, `fetch`) + + const response = await serverFetch( + `http://127.0.0.1:4437/_electric/entities/horton/a`, + { + method: `PUT`, + headers: { 'content-type': `application/json` }, + body: JSON.stringify({}), + } + ) + + expect(response.status).toBe(204) + expect(fetchMock).not.toHaveBeenCalled() + expect(desktopFetch).toHaveBeenCalledWith({ + url: `http://127.0.0.1:4437/_electric/entities/horton/a`, + method: `PUT`, + headers: { 'content-type': `application/json` }, + body: `{}`, + }) + }) + + it(`keeps local GET requests in the browser in Electron`, async () => { + const desktopFetch = vi.fn() + ;(globalThis as { window?: unknown }).window = { + electronAPI: { serverFetch: desktopFetch }, + } + registerActiveServerHeaders({ + name: `Local`, + url: `http://127.0.0.1:4437`, + headers: { 'electric-principal': `system:dev-local` }, + }) + + const fetchMock = vi + .spyOn(globalThis, `fetch`) + .mockResolvedValue(new Response(`ok`)) + + await serverFetch(`http://127.0.0.1:4437/_electric/shape`) + + expect(desktopFetch).not.toHaveBeenCalled() + expect(fetchMock).toHaveBeenCalledOnce() + }) + + it(`keeps non-local mutating requests in the browser in Electron`, async () => { + const desktopFetch = vi.fn() + ;(globalThis as { window?: unknown }).window = { + electronAPI: { serverFetch: desktopFetch }, + } + registerActiveServerHeaders({ + name: `Cloud`, + url: `https://agents.example.test`, + headers: { Authorization: `Bearer tenant-token` }, + }) + + const fetchMock = vi + .spyOn(globalThis, `fetch`) + .mockResolvedValue(new Response(`ok`)) + + await serverFetch( + `https://agents.example.test/_electric/entities/horton/a`, + { + method: `PUT`, + body: JSON.stringify({}), + } + ) + + expect(desktopFetch).not.toHaveBeenCalled() + expect(fetchMock).toHaveBeenCalledOnce() + }) + + it(`returns the active principal as a canonical principal URL`, () => { + registerActiveServerHeaders({ + name: `Tenant`, + url: `https://agents.example.test`, + headers: { 'electric-principal': `system:dev-local` }, + }) + + expect(getActivePrincipal()).toBe(`/principal/system%3Adev-local`) + expect(getConfiguredActivePrincipal()).toBe(`/principal/system%3Adev-local`) + }) + + it(`uses the local dev principal when no active principal is configured`, () => { + registerActiveServerHeaders({ + name: `Local`, + url: `http://127.0.0.1:4437`, + }) + + expect(getConfiguredActivePrincipal()).toBe(null) + expect(getActivePrincipal()).toBe(`/principal/system%3Adev-local`) + }) }) diff --git a/packages/agents-server-ui/src/lib/auth-fetch.ts b/packages/agents-server-ui/src/lib/auth-fetch.ts index 77674bcc78..07f70a6d7a 100644 --- a/packages/agents-server-ui/src/lib/auth-fetch.ts +++ b/packages/agents-server-ui/src/lib/auth-fetch.ts @@ -1,3 +1,8 @@ +import type { + DesktopServerFetchRequest, + DesktopServerFetchResponse, +} from './server-connection' + type ServerHeaderConfig = { name?: string url: string @@ -9,8 +14,19 @@ type ActiveServerHeaders = { headers: Record } +const DEFAULT_ACTIVE_PRINCIPAL = `system:dev-local` +const DESKTOP_SERVER_FETCH_METHODS = new Set([`POST`, `PUT`, `PATCH`, `DELETE`]) +const NULL_BODY_STATUSES = new Set([204, 205, 304]) + let activeServerHeaders: ActiveServerHeaders | null = null +function principalUrl(principal: string): string { + const trimmed = principal.trim() + return trimmed.startsWith(`/principal/`) + ? trimmed + : `/principal/${encodeURIComponent(trimmed)}` +} + function normalizeHeaders( headers: Record | undefined ): Record { @@ -69,6 +85,85 @@ function matchesActiveServer(input: RequestInfo | URL): boolean { ) } +function isLocalHttpUrl(url: URL): boolean { + if (url.protocol !== `http:`) return false + const hostname = url.hostname.toLowerCase() + return ( + hostname === `localhost` || + hostname === `127.0.0.1` || + hostname === `0.0.0.0` || + hostname === `[::1]` || + hostname === `::1` + ) +} + +function activeServerIsLocal(): boolean { + if (!activeServerHeaders) return false + try { + return isLocalHttpUrl(new URL(activeServerHeaders.baseUrl)) + } catch { + return false + } +} + +function requestMethod(input: RequestInfo | URL, init: RequestInit): string { + return ( + init.method ?? + (input instanceof Request ? input.method : undefined) ?? + `GET` + ).toUpperCase() +} + +function desktopServerFetchApi(): + | (( + request: DesktopServerFetchRequest + ) => Promise) + | undefined { + if (typeof window === `undefined`) return undefined + return window.electronAPI?.serverFetch +} + +function shouldUseDesktopServerFetch( + input: RequestInfo | URL, + init: RequestInit +): boolean { + const method = requestMethod(input, init) + return ( + DESKTOP_SERVER_FETCH_METHODS.has(method) && + activeServerIsLocal() && + matchesActiveServer(input) && + Boolean(desktopServerFetchApi()) + ) +} + +async function desktopFetchBody( + input: RequestInfo | URL, + init: RequestInit +): Promise { + if (init.body === undefined || init.body === null) { + if (input instanceof Request) { + if (input.bodyUsed) return undefined + return await input.clone().text() + } + return null + } + if (typeof init.body === `string`) return init.body + if (init.body instanceof URLSearchParams) return init.body.toString() + if (init.body instanceof Blob) return await init.body.text() + return undefined +} + +function responseFromDesktopFetch( + response: DesktopServerFetchResponse +): Response { + const body = NULL_BODY_STATUSES.has(response.status) ? null : response.body + return new Response(body, { + status: response.status, + statusText: response.statusText, + headers: response.headers, + }) +} + export function registerActiveServerHeaders( server: ServerHeaderConfig | null ): void { @@ -85,24 +180,56 @@ export function getConfiguredServerHeaders( return matchesActiveServer(input) ? (activeServerHeaders?.headers ?? {}) : {} } +export function getConfiguredActivePrincipal(): string | null { + const principal = activeServerHeaders?.headers[`electric-principal`] + return principal ? principalUrl(principal) : null +} + export function getActivePrincipal(): string { - return activeServerHeaders?.headers[`electric-principal`] ?? `unknown` + return ( + getConfiguredActivePrincipal() ?? principalUrl(DEFAULT_ACTIVE_PRINCIPAL) + ) +} + +function hasDesktopHeaderInjection(): boolean { + return ( + typeof window !== `undefined` && + Boolean((window as { electronAPI?: unknown }).electronAPI) + ) } export async function serverFetch( input: RequestInfo | URL, init: RequestInit = {} ): Promise { + const method = requestMethod(input, init) const headers = new Headers( input instanceof Request ? input.headers : undefined ) new Headers(init.headers).forEach((value, key) => { headers.set(key, value) }) - for (const [key, value] of Object.entries( - getConfiguredServerHeaders(input) - )) { - if (!headers.has(key)) headers.set(key, value) + if (!hasDesktopHeaderInjection()) { + for (const [key, value] of Object.entries( + getConfiguredServerHeaders(input) + )) { + if (!headers.has(key)) headers.set(key, value) + } + } + if (shouldUseDesktopServerFetch(input, init)) { + const api = desktopServerFetchApi() + const url = urlFromInput(input) + const body = await desktopFetchBody(input, init) + if (api && url && body !== undefined) { + return responseFromDesktopFetch( + await api({ + url: url.toString(), + method, + headers: Object.fromEntries(headers.entries()), + body, + }) + ) + } } return fetch(input, { ...init, headers }) } diff --git a/packages/agents-server-ui/src/lib/sendMessage.ts b/packages/agents-server-ui/src/lib/sendMessage.ts index 018d7152fe..a4553e4396 100644 --- a/packages/agents-server-ui/src/lib/sendMessage.ts +++ b/packages/agents-server-ui/src/lib/sendMessage.ts @@ -2,6 +2,7 @@ import { createOptimisticAction } from '@tanstack/db' import { generateKeyBetween } from 'fractional-indexing' import { getActivePrincipal, + getConfiguredActivePrincipal, getConfiguredServerHeaders, serverFetch, } from './auth-fetch' @@ -180,7 +181,7 @@ export function createSendMessageAction({ db, baseUrl, entityUrl, - from = getActivePrincipal(), + from, onOptimisticMessage, }: { db: EntityStreamDBWithActions @@ -191,11 +192,12 @@ export function createSendMessageAction({ }) { const action = createOptimisticAction({ onMutate: ({ text, mode, key, seq, position }) => { + const sender = from ?? getActivePrincipal() const now = new Date().toISOString() const message: OptimisticInboxMessage = { key, _seq: seq, - from, + from: sender, payload: { text }, timestamp: now, mode, @@ -211,7 +213,10 @@ export function createSendMessageAction({ }, mutationFn: async ({ text, key, mode, position }) => { const url = entityApiUrl(baseUrl, entityUrl, `/send`) - const sender = await resolveSenderPrincipalUrl(url, from) + const sender = await resolveSenderPrincipalUrl( + url, + from ?? getConfiguredActivePrincipal() ?? `` + ) const res = await serverFetch(url, { method: `POST`, headers: { 'content-type': `application/json` }, diff --git a/packages/agents-server-ui/src/lib/server-connection.ts b/packages/agents-server-ui/src/lib/server-connection.ts index 1c99ac7836..0e2ec05e8d 100644 --- a/packages/agents-server-ui/src/lib/server-connection.ts +++ b/packages/agents-server-ui/src/lib/server-connection.ts @@ -41,6 +41,21 @@ export interface DesktopState { pullWakeRunnerId: string | null } +export interface DesktopServerFetchRequest { + url: string + method: string + headers: Record + body: string | null +} + +export interface DesktopServerFetchResponse { + url: string + status: number + statusText: string + headers: Record + body: string +} + export interface ServerConnectionState { serverId: string status: ServerConnectionStatus @@ -227,6 +242,9 @@ declare global { getServers: () => Promise> saveServers: (servers: Array) => Promise getDesktopState?: () => Promise + serverFetch?: ( + request: DesktopServerFetchRequest + ) => Promise setNativeAppearance?: (appearance: DesktopAppearance) => Promise setActiveServer?: (server: ServerConfig | null) => Promise setSelectedServer?: (serverId: string | null) => Promise diff --git a/packages/agents-server-ui/src/main.tsx b/packages/agents-server-ui/src/main.tsx index dc494a5c75..4f966fe34f 100644 --- a/packages/agents-server-ui/src/main.tsx +++ b/packages/agents-server-ui/src/main.tsx @@ -23,16 +23,40 @@ import { App } from './App' // ngrok's free tier intercepts browser requests with an HTML warning page // (status 200, no CORS header) — every fetch to an ngrok host fails CORS -// as a result. Setting `ngrok-skip-browser-warning` on every outbound -// request makes ngrok pass through to the upstream. No effect on requests -// to non-ngrok hosts. Covers the durable-streams client's internal fetches -// too, since it calls through the global fetch. +// as a result. Set `ngrok-skip-browser-warning` only for ngrok hosts: +// adding a custom header to local sends forces CORS preflights. +function isNgrokHost(input: RequestInfo | URL): boolean { + try { + const url = + input instanceof Request + ? new URL(input.url, window.location.href) + : new URL(input, window.location.href) + return ( + url.hostname === `ngrok-free.app` || + url.hostname.endsWith(`.ngrok-free.app`) || + url.hostname === `ngrok.app` || + url.hostname.endsWith(`.ngrok.app`) || + url.hostname === `ngrok.dev` || + url.hostname.endsWith(`.ngrok.dev`) || + url.hostname === `ngrok.io` || + url.hostname.endsWith(`.ngrok.io`) || + url.hostname === `ngrok-free.dev` || + url.hostname.endsWith(`.ngrok-free.dev`) + ) + } catch { + return false + } +} + const originalFetch = window.fetch.bind(window) window.fetch = ( input: RequestInfo | URL, init?: RequestInit ): Promise => { - const headers = new Headers(init?.headers ?? {}) + if (!isNgrokHost(input)) return originalFetch(input, init) + const headers = new Headers( + init?.headers ?? (input instanceof Request ? input.headers : undefined) + ) if (!headers.has(`ngrok-skip-browser-warning`)) { headers.set(`ngrok-skip-browser-warning`, `true`) } diff --git a/packages/agents-server-ui/src/router.tsx b/packages/agents-server-ui/src/router.tsx index 4844872622..8030d23fb4 100644 --- a/packages/agents-server-ui/src/router.tsx +++ b/packages/agents-server-ui/src/router.tsx @@ -48,6 +48,7 @@ import { AppearancePage } from './components/settings/pages/AppearancePage' import { CredentialsPage } from './components/settings/pages/CredentialsPage' import { ServersPage } from './components/settings/pages/ServersPage' import { McpServersPage } from './components/settings/pages/McpServersPage' +import { LocalRuntimePage } from './components/settings/pages/LocalRuntimePage' import styles from './router.module.css' const SETTINGS_CATEGORY_IDS: ReadonlyArray = [ @@ -56,6 +57,7 @@ const SETTINGS_CATEGORY_IDS: ReadonlyArray = [ `servers`, `credentials`, `appearance`, + `local-runtime`, `mcp-servers`, ] @@ -430,6 +432,8 @@ function SettingsCategoryPage(): React.ReactElement { return case `credentials`: return + case `local-runtime`: + return case `mcp-servers`: return case `general`: diff --git a/packages/agents-server/drizzle/0007_runner_diagnostics_and_principal.sql b/packages/agents-server/drizzle/0007_runner_diagnostics_and_principal.sql new file mode 100644 index 0000000000..b2573b9709 --- /dev/null +++ b/packages/agents-server/drizzle/0007_runner_diagnostics_and_principal.sql @@ -0,0 +1,22 @@ +UPDATE consumer_claims +SET status = 'expired', updated_at = NOW() +WHERE status = 'active' AND runner_id IS NOT NULL; +--> statement-breakpoint +UPDATE entity_dispatch_state +SET active_runner_id = NULL, + active_consumer_id = NULL, + active_epoch = NULL, + active_claimed_at = NULL, + active_lease_expires_at = NULL, + updated_at = NOW() +WHERE active_runner_id IS NOT NULL; +--> statement-breakpoint +DELETE FROM runners; +--> statement-breakpoint +ALTER TABLE runners RENAME COLUMN owner_user_id TO owner_principal; +--> statement-breakpoint +DROP INDEX IF EXISTS idx_runners_owner_user_id; +--> statement-breakpoint +CREATE INDEX idx_runners_owner_principal ON runners (tenant_id, owner_principal); +--> statement-breakpoint +ALTER TABLE runners ADD COLUMN diagnostics jsonb; diff --git a/packages/agents-server/drizzle/0008_runner_runtime_diagnostics.sql b/packages/agents-server/drizzle/0008_runner_runtime_diagnostics.sql new file mode 100644 index 0000000000..4ab187475b --- /dev/null +++ b/packages/agents-server/drizzle/0008_runner_runtime_diagnostics.sql @@ -0,0 +1,50 @@ +CREATE TABLE runner_runtime_diagnostics ( + tenant_id text NOT NULL DEFAULT 'default', + runner_id text NOT NULL, + owner_principal text NOT NULL, + wake_stream_offset text, + last_seen_at timestamp with time zone NOT NULL, + liveness_lease_expires_at timestamp with time zone NOT NULL, + diagnostics jsonb, + updated_at timestamp with time zone NOT NULL DEFAULT NOW(), + PRIMARY KEY (tenant_id, runner_id) +); +--> statement-breakpoint +CREATE INDEX idx_runner_runtime_diagnostics_owner + ON runner_runtime_diagnostics (tenant_id, owner_principal); +--> statement-breakpoint +CREATE INDEX idx_runner_runtime_diagnostics_liveness + ON runner_runtime_diagnostics (tenant_id, liveness_lease_expires_at); +--> statement-breakpoint +INSERT INTO runner_runtime_diagnostics ( + tenant_id, + runner_id, + owner_principal, + wake_stream_offset, + last_seen_at, + liveness_lease_expires_at, + diagnostics, + updated_at +) +SELECT + tenant_id, + id, + owner_principal, + wake_stream_offset, + COALESCE(last_seen_at, updated_at), + COALESCE(liveness_lease_expires_at, updated_at), + diagnostics, + updated_at +FROM runners +WHERE last_seen_at IS NOT NULL + OR liveness_lease_expires_at IS NOT NULL + OR wake_stream_offset IS NOT NULL + OR diagnostics IS NOT NULL; +--> statement-breakpoint +ALTER TABLE runners DROP COLUMN diagnostics; +--> statement-breakpoint +ALTER TABLE runners DROP COLUMN wake_stream_offset; +--> statement-breakpoint +ALTER TABLE runners DROP COLUMN last_seen_at; +--> statement-breakpoint +ALTER TABLE runners DROP COLUMN liveness_lease_expires_at; diff --git a/packages/agents-server/drizzle/meta/_journal.json b/packages/agents-server/drizzle/meta/_journal.json index 8331830f37..4574ed99be 100644 --- a/packages/agents-server/drizzle/meta/_journal.json +++ b/packages/agents-server/drizzle/meta/_journal.json @@ -50,6 +50,20 @@ "when": 1776268800000, "tag": "0006_principals", "breakpoints": true + }, + { + "idx": 7, + "version": "7", + "when": 1778899200000, + "tag": "0007_runner_diagnostics_and_principal", + "breakpoints": true + }, + { + "idx": 8, + "version": "7", + "when": 1778976000000, + "tag": "0008_runner_runtime_diagnostics", + "breakpoints": true } ] } diff --git a/packages/agents-server/src/db/schema.ts b/packages/agents-server/src/db/schema.ts index a866efa288..15a1d234bc 100644 --- a/packages/agents-server/src/db/schema.ts +++ b/packages/agents-server/src/db/schema.ts @@ -106,16 +106,11 @@ export const runners = pgTable( { tenantId: text(`tenant_id`).notNull().default(`default`), id: text(`id`).notNull(), - ownerUserId: text(`owner_user_id`).notNull(), + ownerPrincipal: text(`owner_principal`).notNull(), label: text(`label`).notNull(), kind: text(`kind`).notNull().default(`local`), adminStatus: text(`admin_status`).notNull().default(`enabled`), wakeStream: text(`wake_stream`).notNull(), - wakeStreamOffset: text(`wake_stream_offset`), - lastSeenAt: timestamp(`last_seen_at`, { withTimezone: true }), - livenessLeaseExpiresAt: timestamp(`liveness_lease_expires_at`, { - withTimezone: true, - }), createdAt: timestamp(`created_at`, { withTimezone: true }) .notNull() .defaultNow(), @@ -126,12 +121,11 @@ export const runners = pgTable( (table) => [ primaryKey({ columns: [table.tenantId, table.id] }), unique(`uq_runners_wake_stream`).on(table.tenantId, table.wakeStream), - index(`idx_runners_owner_user_id`).on(table.tenantId, table.ownerUserId), - index(`idx_runners_admin_status`).on(table.tenantId, table.adminStatus), - index(`idx_runners_liveness_lease_expires_at`).on( + index(`idx_runners_owner_principal`).on( table.tenantId, - table.livenessLeaseExpiresAt + table.ownerPrincipal ), + index(`idx_runners_admin_status`).on(table.tenantId, table.adminStatus), check( `chk_runners_kind`, sql`${table.kind} IN ('local', 'cloud-worker', 'sandbox', 'ci', 'server')` @@ -143,6 +137,35 @@ export const runners = pgTable( ] ) +export const runnerRuntimeDiagnostics = pgTable( + `runner_runtime_diagnostics`, + { + tenantId: text(`tenant_id`).notNull().default(`default`), + runnerId: text(`runner_id`).notNull(), + ownerPrincipal: text(`owner_principal`).notNull(), + wakeStreamOffset: text(`wake_stream_offset`), + lastSeenAt: timestamp(`last_seen_at`, { withTimezone: true }).notNull(), + livenessLeaseExpiresAt: timestamp(`liveness_lease_expires_at`, { + withTimezone: true, + }).notNull(), + diagnostics: jsonb(`diagnostics`), + updatedAt: timestamp(`updated_at`, { withTimezone: true }) + .notNull() + .defaultNow(), + }, + (table) => [ + primaryKey({ columns: [table.tenantId, table.runnerId] }), + index(`idx_runner_runtime_diagnostics_owner`).on( + table.tenantId, + table.ownerPrincipal + ), + index(`idx_runner_runtime_diagnostics_liveness`).on( + table.tenantId, + table.livenessLeaseExpiresAt + ), + ] +) + export const entityDispatchState = pgTable( `entity_dispatch_state`, { diff --git a/packages/agents-server/src/electric-agents-types.ts b/packages/agents-server/src/electric-agents-types.ts index e32d771a85..26482350ed 100644 --- a/packages/agents-server/src/electric-agents-types.ts +++ b/packages/agents-server/src/electric-agents-types.ts @@ -2,7 +2,10 @@ * Types for the Electric Agents entity runtime. */ -import type { WebhookNotification } from '@electric-ax/agents-runtime' +import type { + PullWakeRunnerHealth, + WebhookNotification, +} from '@electric-ax/agents-runtime' import type { Principal } from './principal.js' type WakeNotification = WebhookNotification @@ -105,7 +108,7 @@ export interface RunnerActiveClaim { export interface ElectricAgentsRunner { id: string - owner_user_id: string + owner_principal: string label: string kind: RunnerKind admin_status: RunnerAdminStatus @@ -115,13 +118,14 @@ export interface ElectricAgentsRunner { active_claims?: Array wake_stream: string wake_stream_offset?: string + diagnostics?: Record created_at: string updated_at: string } export interface RegisterRunnerRequest { id: string - owner_user_id: string + owner_principal: string label: string kind?: RunnerKind admin_status?: RunnerAdminStatus @@ -133,6 +137,48 @@ export interface RunnerHeartbeatRequest { wake_stream_offset?: string wakeStreamOffset?: string liveness_lease_expires_at?: string + diagnostics?: Record +} + +export type RunnerHealthStatus = `healthy` | `degraded` | `unhealthy` +export type RunnerClientDiagnostics = Partial< + Omit +> + +export interface RunnerHealthResponse { + runner: { + id: string + admin_status: RunnerAdminStatus + liveness_status: RunnerLiveness | `expired` + lease_expires_at: string | null + lease_remaining_ms: number | null + wake_stream: string + wake_stream_offset: string | null + last_seen_at: string | null + created_at: string + } + client: RunnerClientDiagnostics | null + claims: { + active_count: number + active: Array<{ + consumer_id: string + epoch: number + entity_url: string + stream_path: string + claimed_at: string + last_heartbeat_at: string | null + lease_expires_at: string | null + }> + } + dispatch: { + entities_with_active_claim: number + entities_with_outstanding_wake: number + entities_with_pending_work: number + } + health: { + status: RunnerHealthStatus + issues: Array + } } export interface EntityDispatchState { diff --git a/packages/agents-server/src/entity-registry.ts b/packages/agents-server/src/entity-registry.ts index 46ccc2ccdf..a0c5b23426 100644 --- a/packages/agents-server/src/entity-registry.ts +++ b/packages/agents-server/src/entity-registry.ts @@ -7,6 +7,7 @@ import { entityDispatchState, entityManifestSources, entityTypes, + runnerRuntimeDiagnostics, runners, tagStreamOutbox, } from './db/schema.js' @@ -73,7 +74,7 @@ export interface TagStreamOutboxRow { export interface RegisterRunnerInput { id: string - ownerUserId: string + ownerPrincipal: string label: string kind?: RunnerKind adminStatus?: RunnerAdminStatus @@ -82,10 +83,22 @@ export interface RegisterRunnerInput { export interface HeartbeatRunnerInput { runnerId: string + ownerPrincipal: string heartbeatAt?: Date livenessLeaseExpiresAt?: Date leaseMs?: number wakeStreamOffset?: string + diagnostics?: Record +} + +export interface RunnerRuntimeDiagnostics { + runner_id: string + owner_principal: string + wake_stream_offset?: string + last_seen_at: string + liveness_lease_expires_at: string + diagnostics?: Record + updated_at: string } export interface MaterializeActiveClaimInput { @@ -140,7 +153,7 @@ export class PostgresRegistry { .values({ tenantId: this.tenantId, id: input.id, - ownerUserId: input.ownerUserId, + ownerPrincipal: input.ownerPrincipal, label: input.label, kind: input.kind ?? `local`, adminStatus: input.adminStatus ?? `enabled`, @@ -150,7 +163,7 @@ export class PostgresRegistry { .onConflictDoUpdate({ target: [runners.tenantId, runners.id], set: { - ownerUserId: input.ownerUserId, + ownerPrincipal: input.ownerPrincipal, label: input.label, kind: input.kind ?? `local`, adminStatus: input.adminStatus ?? `enabled`, @@ -176,11 +189,11 @@ export class PostgresRegistry { } async listRunners(filter?: { - ownerUserId?: string + ownerPrincipal?: string }): Promise> { const conditions = [eq(runners.tenantId, this.tenantId)] - if (filter?.ownerUserId) { - conditions.push(eq(runners.ownerUserId, filter.ownerUserId)) + if (filter?.ownerPrincipal) { + conditions.push(eq(runners.ownerPrincipal, filter.ownerPrincipal)) } const rows = await this.db .select() @@ -198,22 +211,66 @@ export class PostgresRegistry { input.livenessLeaseExpiresAt ?? new Date(now.getTime() + (input.leaseMs ?? DEFAULT_RUNNER_LEASE_MS)) - const rows = await this.db - .update(runners) - .set({ + await this.db + .insert(runnerRuntimeDiagnostics) + .values({ + tenantId: this.tenantId, + runnerId: input.runnerId, + ownerPrincipal: input.ownerPrincipal, lastSeenAt: now, livenessLeaseExpiresAt: leaseExpiresAt, - ...(input.wakeStreamOffset !== undefined - ? { wakeStreamOffset: input.wakeStreamOffset } - : {}), + wakeStreamOffset: input.wakeStreamOffset, + diagnostics: input.diagnostics, updatedAt: now, }) + .onConflictDoUpdate({ + target: [ + runnerRuntimeDiagnostics.tenantId, + runnerRuntimeDiagnostics.runnerId, + ], + set: { + lastSeenAt: now, + ownerPrincipal: input.ownerPrincipal, + livenessLeaseExpiresAt: leaseExpiresAt, + ...(input.wakeStreamOffset !== undefined + ? { wakeStreamOffset: input.wakeStreamOffset } + : {}), + ...(input.diagnostics !== undefined + ? { diagnostics: input.diagnostics } + : {}), + updatedAt: now, + }, + }) + + const runner = await this.getRunner(input.runnerId) + if (!runner) return null + return { + ...runner, + last_seen_at: now.toISOString(), + liveness_lease_expires_at: leaseExpiresAt.toISOString(), + ...(input.wakeStreamOffset !== undefined + ? { wake_stream_offset: input.wakeStreamOffset } + : {}), + ...(input.diagnostics !== undefined + ? { diagnostics: input.diagnostics } + : {}), + } + } + + async getRunnerDiagnostics( + runnerId: string + ): Promise { + const rows = await this.db + .select() + .from(runnerRuntimeDiagnostics) .where( - and(eq(runners.tenantId, this.tenantId), eq(runners.id, input.runnerId)) + and( + eq(runnerRuntimeDiagnostics.tenantId, this.tenantId), + eq(runnerRuntimeDiagnostics.runnerId, runnerId) + ) ) - .returning() - - return rows[0] ? this.rowToRunner(rows[0]) : null + .limit(1) + return rows[0] ? this.rowToRunnerRuntimeDiagnostics(rows[0]) : null } async setRunnerAdminStatus( @@ -366,6 +423,54 @@ export class PostgresRegistry { return claim } + async getActiveClaimsForRunner( + runnerId: string + ): Promise> { + const rows = await this.db + .select() + .from(consumerClaims) + .where( + and( + eq(consumerClaims.tenantId, this.tenantId), + eq(consumerClaims.runnerId, runnerId), + eq(consumerClaims.status, `active`) + ) + ) + return rows.map((row) => this.rowToConsumerClaim(row)) + } + + async getDispatchStatsForRunner(runnerId: string): Promise<{ + entities_with_active_claim: number + entities_with_outstanding_wake: number + entities_with_pending_work: number + }> { + const rows = await this.db + .select() + .from(entityDispatchState) + .where( + and( + eq(entityDispatchState.tenantId, this.tenantId), + eq(entityDispatchState.activeRunnerId, runnerId) + ) + ) + + let activeClaim = 0 + let outstandingWake = 0 + let pendingWork = 0 + for (const row of rows) { + if (row.activeConsumerId) activeClaim++ + if (row.outstandingWakeId && !row.activeConsumerId) outstandingWake++ + const pending = row.pendingSourceStreams as Array | null + if (pending && pending.length > 0) pendingWork++ + } + + return { + entities_with_active_claim: activeClaim, + entities_with_outstanding_wake: outstandingWake, + entities_with_pending_work: pendingWork, + } + } + private entityTypeWhere(name: string) { return and( eq(entityTypes.tenantId, this.tenantId), @@ -1146,27 +1251,32 @@ export class PostgresRegistry { } private rowToRunner(row: typeof runners.$inferSelect): ElectricAgentsRunner { - const now = Date.now() - const livenessExpiry = row.livenessLeaseExpiresAt?.getTime() return { id: row.id, - owner_user_id: row.ownerUserId, + owner_principal: row.ownerPrincipal, label: row.label, kind: assertRunnerKind(row.kind), admin_status: assertRunnerAdminStatus(row.adminStatus), - liveness: - livenessExpiry !== undefined && livenessExpiry > now - ? `online` - : `offline`, - last_seen_at: row.lastSeenAt?.toISOString(), - liveness_lease_expires_at: row.livenessLeaseExpiresAt?.toISOString(), wake_stream: row.wakeStream, - wake_stream_offset: row.wakeStreamOffset ?? undefined, created_at: row.createdAt.toISOString(), updated_at: row.updatedAt.toISOString(), } } + private rowToRunnerRuntimeDiagnostics( + row: typeof runnerRuntimeDiagnostics.$inferSelect + ): RunnerRuntimeDiagnostics { + return { + runner_id: row.runnerId, + owner_principal: row.ownerPrincipal, + wake_stream_offset: row.wakeStreamOffset ?? undefined, + last_seen_at: row.lastSeenAt.toISOString(), + liveness_lease_expires_at: row.livenessLeaseExpiresAt.toISOString(), + diagnostics: (row.diagnostics as Record) ?? undefined, + updated_at: row.updatedAt.toISOString(), + } + } + private rowToConsumerClaim( row: typeof consumerClaims.$inferSelect ): ConsumerClaim { diff --git a/packages/agents-server/src/host.ts b/packages/agents-server/src/host.ts index b7e569aeb5..de556f8bdd 100644 --- a/packages/agents-server/src/host.ts +++ b/packages/agents-server/src/host.ts @@ -3,7 +3,7 @@ import { PostgresRegistry } from './entity-registry.js' import { EntityProjector } from './entity-projector.js' import { ElectricAgentsTenantRuntime } from './runtime.js' import { PostgresSchedulerClient, Scheduler } from './scheduler.js' -import { StreamClient, durableStreamsServiceUrl } from './stream-client.js' +import { StreamClient } from './stream-client.js' import { TagStreamOutboxDrainer } from './tag-stream-outbox-drainer.js' import { DEFAULT_TENANT_ID, UnregisteredTenantError } from './tenant.js' import { WakeRegistry } from './wake-registry.js' @@ -313,10 +313,9 @@ export class AgentsHost { private createStreamClient(config: AgentsHostTenantConfig): StreamClient { if (config.streamClient) return config.streamClient if (config.durableStreamsUrl) { - return new StreamClient( - durableStreamsServiceUrl(config.durableStreamsUrl, config.serviceId), - { bearer: config.durableStreamsBearer } - ) + return new StreamClient(config.durableStreamsUrl, { + bearer: config.durableStreamsBearer, + }) } throw new Error( `AgentsHost tenant "${config.serviceId}" must provide a streamClient or durableStreamsUrl` diff --git a/packages/agents-server/src/principal.ts b/packages/agents-server/src/principal.ts index 767be23828..98ed98361e 100644 --- a/packages/agents-server/src/principal.ts +++ b/packages/agents-server/src/principal.ts @@ -20,7 +20,7 @@ const PRINCIPAL_KINDS = new Set([ export function parsePrincipalKey(input: string): Principal { const colon = input.indexOf(`:`) - if (colon <= 0) throw new Error(`Invalid principal key`) + if (colon <= 0) throw new Error(`Invalid principal identifier`) const kind = input.slice(0, colon) as PrincipalKind const id = input.slice(colon + 1) if (!PRINCIPAL_KINDS.has(kind)) throw new Error(`Invalid principal kind`) @@ -33,24 +33,38 @@ export function principalUrl(key: string): string { return parsePrincipalKey(key).url } -export function principalKeyFromUrl(url: string): string | null { +export function parsePrincipalUrl(url: string): Principal | null { if (!url.startsWith(`/principal/`)) return null const segment = url.slice(`/principal/`.length) if (!segment || segment.includes(`/`)) return null try { - const key = decodeURIComponent(segment) // Principal URLs produced by parsePrincipalKey/principalUrl are canonical // encoded single path segments, but accept legacy unencoded single-segment // URLs here so callers can canonicalize them via parsePrincipalKey(key).url. - return parsePrincipalKey(key).key + return parsePrincipalKey(decodeURIComponent(segment)) } catch { return null } } +export function parsePrincipalInput(input: string): Principal | null { + const urlPrincipal = parsePrincipalUrl(input) + if (urlPrincipal) return urlPrincipal + try { + return parsePrincipalKey(input) + } catch { + return null + } +} + +export function isPrincipalUrl(url: string): boolean { + return parsePrincipalUrl(url) !== null +} + export function getPrincipalFromRequest(request: Request): Principal | null { const value = request.headers.get(ELECTRIC_PRINCIPAL_HEADER) - return value ? parsePrincipalKey(value) : null + if (!value) return null + return parsePrincipalInput(value) } export function getDevPrincipal(): Principal { @@ -66,9 +80,8 @@ const BUILT_IN_SYSTEM_PRINCIPAL_IDS = new Set([ export function isBuiltInSystemPrincipalUrl(url: string | undefined): boolean { if (!url?.startsWith(`/principal/`)) return false try { - const key = principalKeyFromUrl(url) - if (!key) return false - const principal = parsePrincipalKey(key) + const principal = parsePrincipalUrl(url) + if (!principal) return false return ( principal.kind === `system` && BUILT_IN_SYSTEM_PRINCIPAL_IDS.has(principal.id) @@ -84,9 +97,8 @@ export function principalFromCreatedBy( | { url: string; key?: string | null; kind?: string; id?: string } | undefined { if (!createdBy) return undefined - const key = principalKeyFromUrl(createdBy) - if (!key) return { url: createdBy, key: null } - const principal = parsePrincipalKey(key) + const principal = parsePrincipalUrl(createdBy) + if (!principal) return { url: createdBy, key: null } return { url: principal.url, key: principal.key, diff --git a/packages/agents-server/src/routing/context.ts b/packages/agents-server/src/routing/context.ts index 2a0b1971f9..5b5b6b948a 100644 --- a/packages/agents-server/src/routing/context.ts +++ b/packages/agents-server/src/routing/context.ts @@ -19,7 +19,7 @@ export interface TenantContext { principal: Principal publicUrl: string localUrl?: string - /** Resolved Durable Streams root URL for this tenant. */ + /** Durable Streams backend URL prefix. Stream and control paths are appended as-is. */ durableStreamsUrl: string durableStreamsBearer?: DurableStreamsBearerProvider durableStreamsRouting?: DurableStreamsRoutingAdapter diff --git a/packages/agents-server/src/routing/dispatch-policy.ts b/packages/agents-server/src/routing/dispatch-policy.ts index afbe38273f..c3c0985ae8 100644 --- a/packages/agents-server/src/routing/dispatch-policy.ts +++ b/packages/agents-server/src/routing/dispatch-policy.ts @@ -8,6 +8,7 @@ import { ErrCodeUnauthorized, } from '../electric-agents-types.js' import { runnerWakeStream } from '../entity-registry.js' +import { DurableStreamsSubscriptionError } from '../stream-client.js' import { rewriteLoopbackWebhookUrl } from '../utils/webhook-url.js' import { serverLog } from '../utils/log.js' import type { @@ -16,6 +17,9 @@ import type { ElectricAgentsEntity, } from '../electric-agents-types.js' import type { TenantContext } from './context.js' +import type { SubscriptionCreateInput } from '../stream-client.js' + +const linkedDispatchSubscriptions = new WeakMap>() export function subscriptionIdForDispatchTarget( target: DispatchTarget @@ -31,7 +35,7 @@ function subscriptionIdForEntityDispatchTarget( entityUrl: string ): string { const base = subscriptionIdForDispatchTarget(target) - if (!target.subscription_id) return base + if (!target.subscription_id && target.type !== `runner`) return base const digest = createHash(`sha256`).update(entityUrl).digest(`hex`) return `${base}:${digest.slice(0, 16)}` } @@ -109,12 +113,92 @@ function sameDispatchDestination( return false } +function subscriptionHasStream( + ctx: TenantContext, + existing: { streams?: Array }, + streamPath: string +): boolean { + const normalizedStream = streamPath.replace(/^\/+/, ``) + const backendStream = `${ctx.service}/${normalizedStream}` + return ( + existing.streams?.some((stream) => { + const path = typeof stream === `string` ? stream : stream.path + if (!path) return false + const normalized = path.replace(/^\/+/, ``) + return normalized === normalizedStream || normalized === backendStream + }) ?? false + ) +} + +function dispatchLinkCacheKey( + ctx: TenantContext, + subscriptionId: string, + streamPath: string +): string { + return `${ctx.service}:${subscriptionId}:${streamPath}` +} + +function getDispatchLinkCache(ctx: TenantContext): Set { + let cache = linkedDispatchSubscriptions.get(ctx.streamClient) + if (!cache) { + cache = new Set() + linkedDispatchSubscriptions.set(ctx.streamClient, cache) + } + return cache +} + +function isSubscriptionAlreadyExistsError(err: unknown): boolean { + if (!(err instanceof DurableStreamsSubscriptionError)) return false + if (err.status === 409) return true + return ( + err.code === `SUBSCRIPTION_ALREADY_EXISTS` || + err.code === `ALREADY_EXISTS` || + /already exists/i.test(err.errorMessage ?? err.body ?? err.message) + ) +} + +async function ensureSubscriptionIncludesStream( + ctx: TenantContext, + subscriptionId: string, + streamPath: string, + input: SubscriptionCreateInput, + existing: { streams?: Array } | null +): Promise { + if (!existing) { + try { + await ctx.streamClient.putSubscription(subscriptionId, input) + return + } catch (err) { + if (!isSubscriptionAlreadyExistsError(err)) throw err + existing = await ctx.streamClient.getSubscription(subscriptionId) + if (!existing) { + serverLog.warn( + `[dispatch-policy] subscription create raced with existing subscription but it could not be read`, + { subscriptionId, stream: streamPath } + ) + return + } + } + } + + if (!subscriptionHasStream(ctx, existing, streamPath)) { + await ctx.streamClient.addSubscriptionStreams(subscriptionId, [streamPath]) + } +} + export async function assertDispatchPolicyAllowed( ctx: TenantContext, policy: DispatchPolicy | undefined ): Promise { const target = policy?.targets[0] if (!target || target.type !== `runner`) return + if (!ctx.principal) { + throw new ElectricAgentsError( + ErrCodeUnauthorized, + `Runner dispatch requires an authenticated owner`, + 401 + ) + } const runner = await ctx.entityManager.registry.getRunner(target.runnerId) if (!runner) { @@ -124,7 +208,7 @@ export async function assertDispatchPolicyAllowed( 404 ) } - if (ctx.principal && runner.owner_user_id !== ctx.principal.key) { + if (runner.owner_principal !== ctx.principal.url) { throw new ElectricAgentsError( ErrCodeUnauthorized, `Runner dispatch requires the authenticated owner`, @@ -143,7 +227,19 @@ export async function linkEntityDispatchSubscription( ) const target = dispatchPolicy?.targets[0] if (!target) return - await linkStreamToTargetSubscription(ctx, target, entity) + const subscriptionId = subscriptionIdForEntityDispatchTarget( + target, + entity.url + ) + const cacheKey = dispatchLinkCacheKey( + ctx, + subscriptionId, + entity.streams.main + ) + const cache = getDispatchLinkCache(ctx) + if (cache.has(cacheKey)) return + await linkStreamToTargetSubscription(ctx, target, entity, subscriptionId) + cache.add(cacheKey) } export async function unlinkEntityDispatchSubscription( @@ -160,6 +256,9 @@ export async function unlinkEntityDispatchSubscription( target, entity.url ) + getDispatchLinkCache(ctx).delete( + dispatchLinkCacheKey(ctx, subscriptionId, entity.streams.main) + ) await ctx.streamClient .removeSubscriptionStream(subscriptionId, entity.streams.main) .catch((err) => { @@ -174,13 +273,13 @@ export async function unlinkEntityDispatchSubscription( async function linkStreamToTargetSubscription( ctx: TenantContext, target: DispatchTarget, - entity: ElectricAgentsEntity + entity: ElectricAgentsEntity, + subscriptionId: string ): Promise { const streamPath = entity.streams.main - const subscriptionId = subscriptionIdForEntityDispatchTarget( - target, - entity.url - ) + await ctx.streamClient.ensure(streamPath, { + contentType: `application/json`, + }) const existing = await ctx.streamClient.getSubscription(subscriptionId) if (target.type === `runner`) { @@ -196,16 +295,18 @@ async function linkStreamToTargetSubscription( await ctx.streamClient.ensure(wakeStream, { contentType: `application/json`, }) - if (!existing) { - await ctx.streamClient.putSubscription(subscriptionId, { + await ensureSubscriptionIncludesStream( + ctx, + subscriptionId, + streamPath, + { type: `pull-wake`, streams: [streamPath], wake_stream: wakeStream, description: `Electric Agents runner ${target.runnerId}`, - }) - return - } - await ctx.streamClient.addSubscriptionStreams(subscriptionId, [streamPath]) + }, + existing + ) return } @@ -221,16 +322,18 @@ async function linkStreamToTargetSubscription( ctx.publicUrl, `/_electric/webhook-forward/${encodeURIComponent(subscriptionId)}` ) - if (!existing) { - await ctx.streamClient.putSubscription(subscriptionId, { + await ensureSubscriptionIncludesStream( + ctx, + subscriptionId, + streamPath, + { type: `webhook`, streams: [streamPath], webhook: { url: forwardUrl }, description: `Electric Agents webhook ${subscriptionId}`, - }) - } else { - await ctx.streamClient.addSubscriptionStreams(subscriptionId, [streamPath]) - } + }, + existing + ) await ctx.pgDb .insert(subscriptionWebhooks) .values({ diff --git a/packages/agents-server/src/routing/durable-streams-routing-adapter.ts b/packages/agents-server/src/routing/durable-streams-routing-adapter.ts index d557b2e6e1..38d258a339 100644 --- a/packages/agents-server/src/routing/durable-streams-routing-adapter.ts +++ b/packages/agents-server/src/routing/durable-streams-routing-adapter.ts @@ -12,12 +12,11 @@ export interface DurableStreamsRoutingAdapter { } function appendSearch(target: URL, source: URL): URL { - target.search = source.search - return target -} - -function removeServiceQuery(target: URL): URL { - target.searchParams.delete(`service`) + source.searchParams.forEach((value, key) => { + if (key !== `service`) { + target.searchParams.append(key, value) + } + }) return target } @@ -32,7 +31,7 @@ function appendRequestPathToStreamRoot(input: DurableStreamsRoutingInput): URL { target.pathname = path ? `${withoutTrailingSlash(target.pathname)}/${path}` : withoutTrailingSlash(target.pathname) - return removeServiceQuery(appendSearch(target, incomingUrl)) + return appendSearch(target, incomingUrl) } export const streamRootDurableStreamsRoutingAdapter: DurableStreamsRoutingAdapter = diff --git a/packages/agents-server/src/routing/electric-proxy-router.ts b/packages/agents-server/src/routing/electric-proxy-router.ts index 0e658f4d8c..5142d1fc39 100644 --- a/packages/agents-server/src/routing/electric-proxy-router.ts +++ b/packages/agents-server/src/routing/electric-proxy-router.ts @@ -38,6 +38,7 @@ async function proxyElectric( electricUrl: ctx.electricUrl, electricSecret: ctx.electricSecret, tenantId: ctx.service, + principalUrl: ctx.principal.url, }) const headers = new Headers(request.headers) headers.delete(`host`) diff --git a/packages/agents-server/src/routing/entities-router.ts b/packages/agents-server/src/routing/entities-router.ts index 3e05f559c7..b6c8ef3141 100644 --- a/packages/agents-server/src/routing/entities-router.ts +++ b/packages/agents-server/src/routing/entities-router.ts @@ -530,10 +530,10 @@ async function sendEntity( await ctx.entityManager.ensurePrincipal(principal) const { entityUrl, entity } = requireExistingEntityRoute(request) - if (!entity.dispatch_policy) { - const updatedEntity = await backfillEntityDispatchPolicy(ctx, entity) - await linkEntityDispatchSubscription(ctx, updatedEntity) - } + const dispatchEntity = entity.dispatch_policy + ? entity + : await backfillEntityDispatchPolicy(ctx, entity) + await linkEntityDispatchSubscription(ctx, dispatchEntity) if (parsed.afterMs && parsed.afterMs > 0) { await ctx.entityManager.enqueueDelayedSend( diff --git a/packages/agents-server/src/routing/hooks.ts b/packages/agents-server/src/routing/hooks.ts index 9c6e22dcb0..fa25c5ec18 100644 --- a/packages/agents-server/src/routing/hooks.ts +++ b/packages/agents-server/src/routing/hooks.ts @@ -1,6 +1,7 @@ import { SpanKind, SpanStatusCode } from '@opentelemetry/api' import { apiError } from '../electric-agents-http.js' import { ElectricAgentsError } from '../entity-manager.js' +import { ELECTRIC_PRINCIPAL_HEADER } from '../principal.js' import { ATTR, extractTraceContext, tracer } from '../tracing.js' import { serverLog } from '../utils/log.js' import type { Span } from '@opentelemetry/api' @@ -80,7 +81,13 @@ export function applyCors( ) headers.set( `access-control-allow-headers`, - `content-type, authorization, electric-claim-token, ngrok-skip-browser-warning` + [ + `content-type`, + `authorization`, + `electric-claim-token`, + ELECTRIC_PRINCIPAL_HEADER, + `ngrok-skip-browser-warning`, + ].join(`, `) ) headers.set(`access-control-expose-headers`, `*`) return new Response(response.body, { diff --git a/packages/agents-server/src/routing/runners-router.ts b/packages/agents-server/src/routing/runners-router.ts index 3f5d83e293..4a5b74a97d 100644 --- a/packages/agents-server/src/routing/runners-router.ts +++ b/packages/agents-server/src/routing/runners-router.ts @@ -9,11 +9,12 @@ import { ErrCodeNotFound, ErrCodeNotRunning, ErrCodeUnauthorized, + type RunnerHealthResponse, } from '../electric-agents-types.js' import { routeBody, withSchema } from './schema.js' import { subscriptionIdForDispatchTarget } from './dispatch-policy.js' import { withLeadingSlash } from './tenant-stream-paths.js' -import { principalFromCreatedBy } from '../principal.js' +import { parsePrincipalUrl, principalFromCreatedBy } from '../principal.js' import type { JsonRouteRequest } from './schema.js' import type { RouterType } from 'itty-router' import type { TenantContext } from './context.js' @@ -35,7 +36,7 @@ export type RunnersRoutes = RouterType< const registerRunnerBodySchema = Type.Object({ id: Type.String(), - owner_user_id: Type.Optional(Type.String()), + owner_principal: Type.Optional(Type.String()), label: Type.String(), kind: Type.Optional( Type.Union([ @@ -57,6 +58,7 @@ const heartbeatBodySchema = Type.Object({ wake_stream_offset: Type.Optional(Type.String()), wakeStreamOffset: Type.Optional(Type.String()), liveness_lease_expires_at: Type.Optional(Type.String()), + diagnostics: Type.Optional(Type.Record(Type.String(), Type.Unknown())), }) const claimBodySchema = Type.Object( @@ -72,6 +74,35 @@ const claimBodySchema = Type.Object( type RegisterRunnerBody = Static type HeartbeatBody = Static type ClaimBody = Static +type RunnerClientDiagnostics = NonNullable + +const runnerClientStatuses = new Set([ + `stopped`, + `starting`, + `connecting`, + `streaming`, + `reconnecting`, + `stopping`, +]) +const runnerLastClaimResults = new Set< + NonNullable +>([`claimed`, `no_work`, `error`]) +const runnerStringOrNullDiagnostics = [ + `started_at`, + `stream_connected_since`, + `last_error`, + `last_error_at`, + `last_heartbeat_at`, + `last_claim_at`, + `last_dispatch_at`, +] as const +const runnerNumberDiagnostics = [ + `reconnect_count`, + `events_received`, + `claims_succeeded`, + `claims_skipped`, + `claims_failed`, +] as const export const runnersRouter: RunnersRoutes = Router< RunnersRouteRequest, @@ -83,6 +114,7 @@ export const runnersRouter: RunnersRoutes = Router< runnersRouter.post(`/`, withSchema(registerRunnerBodySchema), registerRunner) runnersRouter.get(`/`, listRunners) +runnersRouter.get(`/:id/health`, runnerHealth) runnersRouter.get(`/:id`, getRunner) runnersRouter.post(`/:id/heartbeat`, withSchema(heartbeatBodySchema), heartbeat) runnersRouter.post(`/:id/enable`, setEnabled) @@ -100,31 +132,104 @@ function firstQueryValue( return Array.isArray(value) ? value[0] : value } +function requireAuthenticatedPrincipal( + ctx: TenantContext +): NonNullable { + if (ctx.principal) return ctx.principal + throw new ElectricAgentsError( + ErrCodeUnauthorized, + `Runner route requires an authenticated principal`, + 401 + ) +} + +function canonicalOwnerPrincipal(input: string): string | null { + return parsePrincipalUrl(input)?.url ?? null +} + +function sanitizeRunnerDiagnostics( + diagnostics: Record | null | undefined +): RunnerClientDiagnostics | undefined { + if (!diagnostics) return undefined + const sanitized: Record = {} + + if ( + typeof diagnostics.status === `string` && + runnerClientStatuses.has( + diagnostics.status as RunnerClientDiagnostics[`status`] + ) + ) { + sanitized.status = diagnostics.status + } + if (typeof diagnostics.stream_connected === `boolean`) { + sanitized.stream_connected = diagnostics.stream_connected + } + if (typeof diagnostics.last_heartbeat_ok === `boolean`) { + sanitized.last_heartbeat_ok = diagnostics.last_heartbeat_ok + } + if ( + diagnostics.last_claim_result === null || + (typeof diagnostics.last_claim_result === `string` && + runnerLastClaimResults.has( + diagnostics.last_claim_result as NonNullable< + RunnerClientDiagnostics[`last_claim_result`] + > + )) + ) { + sanitized.last_claim_result = diagnostics.last_claim_result + } + + for (const key of runnerStringOrNullDiagnostics) { + const value = diagnostics[key] + if (typeof value === `string` || value === null) { + sanitized[key] = value + } + } + for (const key of runnerNumberDiagnostics) { + const value = diagnostics[key] + if (typeof value === `number` && Number.isFinite(value) && value >= 0) { + sanitized[key] = value + } + } + + return Object.keys(sanitized).length > 0 + ? (sanitized as RunnerClientDiagnostics) + : undefined +} + async function registerRunner( request: RunnersRouteRequest, ctx: TenantContext ): Promise { const parsed = routeBody(request) - const ownerUserId = parsed.owner_user_id ?? ctx.principal?.key - if (!ownerUserId) { + const principal = requireAuthenticatedPrincipal(ctx) + const ownerPrincipal = parsed.owner_principal ?? principal.url + if (!ownerPrincipal) { throw new ElectricAgentsError( ErrCodeInvalidRequest, - `owner_user_id is required when no authenticated user is present`, + `owner_principal is required when no authenticated principal is present`, 400 ) } - - if (ctx.principal && ownerUserId !== ctx.principal.key) { + const canonicalOwner = canonicalOwnerPrincipal(ownerPrincipal) + if (!canonicalOwner) { + throw new ElectricAgentsError( + ErrCodeInvalidRequest, + `owner_principal must be a valid principal URL (e.g. /principal/user%3Aalice), got: ${ownerPrincipal}`, + 400 + ) + } + if (canonicalOwner !== principal.url) { throw new ElectricAgentsError( ErrCodeUnauthorized, - `owner_user_id must match the authenticated user`, + `owner_principal must match the authenticated principal`, 403 ) } const runner = await ctx.entityManager.registry.createRunner({ id: parsed.id, - ownerUserId, + ownerPrincipal: canonicalOwner, label: parsed.label, kind: parsed.kind, adminStatus: parsed.admin_status, @@ -140,16 +245,27 @@ async function listRunners( request: RunnersRouteRequest, ctx: TenantContext ): Promise { - const requestedOwner = firstQueryValue(request.query.owner_user_id) - if (ctx.principal && requestedOwner && requestedOwner !== ctx.principal.key) { + const principal = requireAuthenticatedPrincipal(ctx) + const requestedOwner = firstQueryValue(request.query.owner_principal) + const canonicalRequestedOwner = requestedOwner + ? canonicalOwnerPrincipal(requestedOwner) + : undefined + if (requestedOwner && !canonicalRequestedOwner) { + throw new ElectricAgentsError( + ErrCodeInvalidRequest, + `owner_principal must be a valid principal URL (e.g. /principal/user%3Aalice), got: ${requestedOwner}`, + 400 + ) + } + if (canonicalRequestedOwner && canonicalRequestedOwner !== principal.url) { throw new ElectricAgentsError( ErrCodeUnauthorized, - `owner_user_id must match the authenticated user`, + `owner_principal must match the authenticated principal`, 403 ) } const runners = await ctx.entityManager.registry.listRunners({ - ownerUserId: ctx.principal?.key ?? requestedOwner, + ownerPrincipal: principal.url, }) return json(runners) } @@ -159,7 +275,7 @@ async function getRunner( ctx: TenantContext ): Promise { const runner = await requireRunner(ctx, routeParam(request, `id`)) - assertRunnerOwnerIfAuthenticated(ctx, runner.owner_user_id) + assertRunnerOwnerIfAuthenticated(ctx, runner.owner_principal) return json(runner) } @@ -168,16 +284,19 @@ async function heartbeat( ctx: TenantContext ): Promise { const runnerId = routeParam(request, `id`) + requireAuthenticatedPrincipal(ctx) const existing = await requireRunner(ctx, runnerId) - assertRunnerOwnerIfAuthenticated(ctx, existing.owner_user_id) + assertRunnerOwnerIfAuthenticated(ctx, existing.owner_principal) const parsed = routeBody(request) const runner = await ctx.entityManager.registry.heartbeatRunner({ runnerId, + ownerPrincipal: existing.owner_principal, leaseMs: parsed.lease_ms, wakeStreamOffset: parsed.wake_stream_offset ?? parsed.wakeStreamOffset, livenessLeaseExpiresAt: parsed.liveness_lease_expires_at ? new Date(parsed.liveness_lease_expires_at) : undefined, + diagnostics: sanitizeRunnerDiagnostics(parsed.diagnostics), }) if (!runner) { throw new ElectricAgentsError(ErrCodeNotFound, `Runner not found`, 404) @@ -205,8 +324,9 @@ async function setRunnerStatus( adminStatus: `enabled` | `disabled` ): Promise { const runnerId = routeParam(request, `id`) + requireAuthenticatedPrincipal(ctx) const existing = await requireRunner(ctx, runnerId) - assertRunnerOwnerIfAuthenticated(ctx, existing.owner_user_id) + assertRunnerOwnerIfAuthenticated(ctx, existing.owner_principal) const runner = await ctx.entityManager.registry.setRunnerAdminStatus( runnerId, adminStatus @@ -222,8 +342,9 @@ async function claimWake( ctx: TenantContext ): Promise { const runnerId = routeParam(request, `id`) + const principal = requireAuthenticatedPrincipal(ctx) const runner = await requireRunner(ctx, runnerId) - if (ctx.principal && runner.owner_user_id !== ctx.principal.key) { + if (runner.owner_principal !== principal.url) { throw new ElectricAgentsError( ErrCodeUnauthorized, `Runner claim requires the authenticated owner`, @@ -297,10 +418,10 @@ async function requireRunner(ctx: TenantContext, runnerId: string) { function assertRunnerOwnerIfAuthenticated( ctx: TenantContext, - ownerUserId: string + ownerPrincipal: string ): void { - if (!ctx.principal) return - if (ownerUserId === ctx.principal.key) return + requireAuthenticatedPrincipal(ctx) + if (ownerPrincipal === ctx.principal.url) return throw new ElectricAgentsError( ErrCodeUnauthorized, `Runner access requires the authenticated owner`, @@ -308,6 +429,122 @@ function assertRunnerOwnerIfAuthenticated( ) } +async function runnerHealth( + request: RunnersRouteRequest, + ctx: TenantContext +): Promise { + const runnerId = routeParam(request, `id`) + const runner = await requireRunner(ctx, runnerId) + assertRunnerOwnerIfAuthenticated(ctx, runner.owner_principal) + const runtimeDiagnostics = + await ctx.entityManager.registry.getRunnerDiagnostics(runnerId) + + const now = Date.now() + const parsedLeaseExpiresAt = runtimeDiagnostics?.liveness_lease_expires_at + ? new Date(runtimeDiagnostics.liveness_lease_expires_at).getTime() + : null + const leaseExpiresAt = + parsedLeaseExpiresAt !== null && Number.isFinite(parsedLeaseExpiresAt) + ? parsedLeaseExpiresAt + : null + + let livenessStatus: `online` | `offline` | `expired` + if (runner.admin_status === `disabled`) { + livenessStatus = `offline` + } else if (leaseExpiresAt !== null && leaseExpiresAt > now) { + livenessStatus = `online` + } else if (leaseExpiresAt !== null) { + livenessStatus = `expired` + } else { + livenessStatus = `offline` + } + + const [activeClaims, dispatchStats] = await Promise.all([ + ctx.entityManager.registry.getActiveClaimsForRunner(runnerId), + ctx.entityManager.registry.getDispatchStatsForRunner(runnerId), + ]) + + const clientDiagnostics = + sanitizeRunnerDiagnostics(runtimeDiagnostics?.diagnostics) ?? null + const issues: Array = [] + let healthStatus: `healthy` | `degraded` | `unhealthy` = `healthy` + + const escalate = (floor: `degraded` | `unhealthy`): void => { + if (floor === `unhealthy`) healthStatus = `unhealthy` + else if (healthStatus === `healthy`) healthStatus = `degraded` + } + + if (runner.admin_status === `disabled`) { + escalate(`unhealthy`) + issues.push(`Runner is disabled`) + } + if (livenessStatus === `expired`) { + escalate(`unhealthy`) + const ago = leaseExpiresAt ? Math.round((now - leaseExpiresAt) / 1000) : 0 + issues.push(`Heartbeat lease expired ${ago}s ago`) + } + if (livenessStatus === `offline` && runner.admin_status === `enabled`) { + escalate(`degraded`) + issues.push(`Runner has never sent a heartbeat`) + } + if (clientDiagnostics) { + if (clientDiagnostics.stream_connected === false) { + escalate(`degraded`) + issues.push(`Client reports stream disconnected`) + } + if (clientDiagnostics.last_heartbeat_ok === false) { + escalate(`degraded`) + issues.push(`Client reports last heartbeat failed`) + } + if ( + typeof clientDiagnostics.reconnect_count === `number` && + clientDiagnostics.reconnect_count > 5 + ) { + escalate(`degraded`) + issues.push( + `Client has reconnected ${clientDiagnostics.reconnect_count} times` + ) + } + } else if (runtimeDiagnostics?.last_seen_at) { + escalate(`degraded`) + issues.push(`No client diagnostics available`) + } + + const body: RunnerHealthResponse = { + runner: { + id: runner.id, + admin_status: runner.admin_status, + liveness_status: livenessStatus, + lease_expires_at: + leaseExpiresAt !== null + ? (runtimeDiagnostics?.liveness_lease_expires_at ?? null) + : null, + lease_remaining_ms: + leaseExpiresAt !== null ? Math.max(0, leaseExpiresAt - now) : null, + wake_stream: runner.wake_stream, + wake_stream_offset: runtimeDiagnostics?.wake_stream_offset ?? null, + last_seen_at: runtimeDiagnostics?.last_seen_at ?? null, + created_at: runner.created_at, + }, + client: clientDiagnostics, + claims: { + active_count: activeClaims.length, + active: activeClaims.map((c) => ({ + consumer_id: c.consumer_id, + epoch: c.epoch, + entity_url: c.entity_url, + stream_path: c.stream_path, + claimed_at: c.claimed_at, + last_heartbeat_at: c.last_heartbeat_at ?? null, + lease_expires_at: c.lease_expires_at ?? null, + })), + }, + dispatch: dispatchStats, + health: { status: healthStatus, issues }, + } + return json(body) +} + async function notificationFromClaim( ctx: TenantContext, input: { diff --git a/packages/agents-server/src/runtime.ts b/packages/agents-server/src/runtime.ts index 1715aa3aa1..29038e5b6c 100644 --- a/packages/agents-server/src/runtime.ts +++ b/packages/agents-server/src/runtime.ts @@ -11,7 +11,7 @@ import { import { SchemaValidator } from './electric-agents/schema-validator.js' import { serverLog } from './utils/log.js' import { isPermanentElectricAgentsError } from './scheduler.js' -import { StreamClient, durableStreamsServiceUrl } from './stream-client.js' +import { StreamClient } from './stream-client.js' import { DEFAULT_TENANT_ID } from './tenant.js' import type { DrizzleDB } from './db/index.js' import type { EntityBridgeCoordinator } from './entity-bridge-manager.js' @@ -63,12 +63,9 @@ export class ElectricAgentsTenantRuntime { if (options.streamClient) { this.streamClient = options.streamClient } else if (options.durableStreamsUrl) { - this.streamClient = new StreamClient( - durableStreamsServiceUrl(options.durableStreamsUrl, this.serviceId, { - scope: `stream-root`, - }), - { bearer: options.durableStreamsBearer } - ) + this.streamClient = new StreamClient(options.durableStreamsUrl, { + bearer: options.durableStreamsBearer, + }) } else { throw new Error(`Either durableStreamsUrl or streamClient is required`) } diff --git a/packages/agents-server/src/server.ts b/packages/agents-server/src/server.ts index 43714c342e..dfd9da2ba4 100644 --- a/packages/agents-server/src/server.ts +++ b/packages/agents-server/src/server.ts @@ -9,7 +9,7 @@ import { import { createDb, runMigrations } from './db/index.js' import { ossServerRouter } from './routing/oss-server-router.js' import { startStandaloneAgentsRuntime } from './standalone-runtime.js' -import { StreamClient, durableStreamsServiceUrl } from './stream-client.js' +import { StreamClient } from './stream-client.js' import { DEFAULT_TENANT_ID } from './tenant.js' import { getDevPrincipal, getPrincipalFromRequest } from './principal.js' import { apiError } from './electric-agents-http.js' @@ -120,6 +120,16 @@ function createMockAgentBootstrap(options: { return { runtime, registry } } +function durableStreamTestServerBackendUrl(origin: string): string { + // DurableStreamTestServer.start() returns the HTTP origin, while the + // reference server's stream backend is mounted under /v1/stream. + // User-provided durableStreamsUrl values are already backend prefixes and + // are passed through unchanged. + const url = new URL(origin) + url.pathname = `${url.pathname.replace(/\/+$/, ``)}/v1/stream` + return url.toString().replace(/\/+$/, ``) +} + export class ElectricAgentsServer { private server?: Server private electricAgentsManager?: StartedStandaloneAgentsRuntime[`manager`] @@ -143,12 +153,9 @@ export class ElectricAgentsServer { } this.options = options this.streamClient = options.durableStreamsUrl - ? new StreamClient( - durableStreamsServiceUrl(options.durableStreamsUrl, this.tenantId, { - scope: `stream-root`, - }), - { bearer: options.durableStreamsBearer } - ) + ? new StreamClient(options.durableStreamsUrl, { + bearer: options.durableStreamsBearer, + }) : null! } @@ -185,13 +192,11 @@ export class ElectricAgentsServer { serverLog.info( `[agent-server] durable streams server started at ${streamsUrl}` ) - this.options.durableStreamsUrl = streamsUrl - this.streamClient = new StreamClient( - durableStreamsServiceUrl(streamsUrl, this.tenantId, { - scope: `stream-root`, - }), - { bearer: this.options.durableStreamsBearer } - ) + this.options.durableStreamsUrl = + durableStreamTestServerBackendUrl(streamsUrl) + this.streamClient = new StreamClient(this.options.durableStreamsUrl, { + bearer: this.options.durableStreamsBearer, + }) } this.streamsAgent = new Agent({ @@ -404,7 +409,7 @@ export class ElectricAgentsServer { principal, publicUrl: this.publicUrl, localUrl: this._url, - durableStreamsUrl: this.streamClient.baseUrl, + durableStreamsUrl: this.options.durableStreamsUrl!, durableStreamsBearer: this.options.durableStreamsBearer, durableStreamsRouting: this.options.durableStreamsRouting, durableStreamsDispatcher: this.streamsAgent, diff --git a/packages/agents-server/src/standalone-runtime.ts b/packages/agents-server/src/standalone-runtime.ts index 3e97160b79..5f94c5b856 100644 --- a/packages/agents-server/src/standalone-runtime.ts +++ b/packages/agents-server/src/standalone-runtime.ts @@ -4,7 +4,7 @@ import { EntityBridgeManager } from './entity-bridge-manager.js' import { serverLog } from './utils/log.js' import { ElectricAgentsTenantRuntime } from './runtime.js' import { Scheduler } from './scheduler.js' -import { StreamClient, durableStreamsServiceUrl } from './stream-client.js' +import { StreamClient } from './stream-client.js' import { TagStreamOutboxDrainer } from './tag-stream-outbox-drainer.js' import { DEFAULT_TENANT_ID } from './tenant.js' import { WakeRegistry } from './wake-registry.js' @@ -57,12 +57,9 @@ export async function startStandaloneAgentsRuntime( const streamClient = options.streamClient ?? (options.durableStreamsUrl - ? new StreamClient( - durableStreamsServiceUrl(options.durableStreamsUrl, serviceId, { - scope: `stream-root`, - }), - { bearer: options.durableStreamsBearer } - ) + ? new StreamClient(options.durableStreamsUrl, { + bearer: options.durableStreamsBearer, + }) : undefined) if (!streamClient) { throw new Error(`Either durableStreamsUrl or streamClient is required`) diff --git a/packages/agents-server/src/stream-client.ts b/packages/agents-server/src/stream-client.ts index 482bbcefa6..3d5b511212 100644 --- a/packages/agents-server/src/stream-client.ts +++ b/packages/agents-server/src/stream-client.ts @@ -14,8 +14,6 @@ export interface StreamClientOptions { bearer?: DurableStreamsBearerProvider } -type DurableStreamsUrlScope = `service` | `stream-root` - export interface StreamAppendResult { offset: string } @@ -34,15 +32,6 @@ export interface WaitForMessagesResult { timedOut: boolean } -export interface ConsumerStateResponse { - state: string - wake_id?: string | null - webhook?: { - wake_id?: string | null - subscription_id?: string - } -} - export interface SubscriptionStreamInfo { path: string tail_offset?: string @@ -131,6 +120,16 @@ export async function applyDurableStreamsBearer( } } +function appendPathToBaseUrl(baseUrl: string, path: string): string { + const url = new URL(baseUrl) + const basePath = url.pathname.replace(/\/+$/, ``) + const childPath = path.replace(/^\/+/, ``) + url.pathname = childPath + ? `${basePath === `/` ? `` : basePath}/${childPath}` + : basePath || `/` + return url.toString().replace(/\/+$/, ``) +} + function durableStreamsBearerHeaders( bearer: DurableStreamsBearerProvider | undefined ): HeadersRecord | undefined { @@ -141,33 +140,6 @@ function durableStreamsBearerHeaders( } } -export function durableStreamsServiceUrl( - baseUrl: string, - serviceId: string, - options: { scope?: DurableStreamsUrlScope } = {} -): string { - const url = new URL(baseUrl) - if (/\/v1\/streams\/[^/]+\/?$/.test(url.pathname)) { - return baseUrl.replace(/\/+$/, ``) - } - if (/\/v1\/stream\/[^/]+\/?$/.test(url.pathname)) { - return baseUrl.replace(/\/+$/, ``) - } - const scope = options.scope ?? `service` - const encodedServiceId = encodeURIComponent(serviceId) - const path = url.pathname.replace(/\/+$/, ``) || `/` - if (path.endsWith(`/v1/streams`)) { - url.pathname = `${path}/${encodedServiceId}` - } else if (path.endsWith(`/v1/stream`)) { - url.pathname = scope === `service` ? `${path}/${encodedServiceId}` : path - } else if (scope === `stream-root`) { - url.pathname = `${path === `/` ? `` : path}/v1/stream` - } else { - url.pathname = `${path === `/` ? `` : path}/v1/stream/${encodedServiceId}` - } - return url.toString().replace(/\/+$/, ``) -} - function isNotFoundError(err: unknown): boolean { return ( (err instanceof DurableStreamError && err.code === ErrCodeNotFound) || @@ -201,7 +173,7 @@ export class StreamClient { ) {} private streamUrl(path: string): string { - return `${this.baseUrl}${path}` + return appendPathToBaseUrl(this.baseUrl, path) } private streamHeaders(): HeadersRecord | undefined { @@ -228,9 +200,10 @@ export class StreamClient { } private subscriptionUrl(subscriptionId: string): string { - const url = new URL(this.baseUrl) - url.pathname = `${url.pathname.replace(/\/+$/, ``)}/__ds/subscriptions/${encodeURIComponent(subscriptionId)}` - return url.toString() + return appendPathToBaseUrl( + this.baseUrl, + `/__ds/subscriptions/${encodeURIComponent(subscriptionId)}` + ) } private subscriptionChildUrl( @@ -270,7 +243,7 @@ export class StreamClient { }) const headers: Record = { 'content-type': `application/json`, - 'Stream-Forked-From': sourcePath, + 'Stream-Forked-From': new URL(this.streamUrl(sourcePath)).pathname, } injectTraceHeaders(headers) @@ -815,20 +788,4 @@ export class StreamClient { JSON.parse(text) as SubscriptionResponse ) } - - async getConsumerState( - consumerId: string - ): Promise { - const res = await fetch( - `${this.baseUrl}/consumers/${encodeURIComponent(consumerId)}`, - { method: `GET`, headers: await this.requestHeaders() } - ) - if (res.status === 404) return null - if (!res.ok) { - throw new Error( - `Consumer query failed: ${res.status} ${await res.text()}` - ) - } - return res.json() as Promise - } } diff --git a/packages/agents-server/src/utils/server-utils.ts b/packages/agents-server/src/utils/server-utils.ts index 8be189bd1e..d5df79927e 100644 --- a/packages/agents-server/src/utils/server-utils.ts +++ b/packages/agents-server/src/utils/server-utils.ts @@ -95,6 +95,7 @@ export function buildElectricProxyTarget(options: { electricUrl: string electricSecret?: string tenantId: string + principalUrl?: string }): URL { const targetPath = options.incomingUrl.pathname.replace( `/_electric/electric`, @@ -130,9 +131,19 @@ export function buildElectricProxyTarget(options: { } else if (table === `runners`) { target.searchParams.set( `columns`, - `"tenant_id","id","owner_user_id","label","kind","admin_status","wake_stream","wake_stream_offset","last_seen_at","liveness_lease_expires_at","created_at","updated_at"` + `"tenant_id","id","owner_principal","label","kind","admin_status","wake_stream","created_at","updated_at"` ) - applyTenantShapeWhere(target, options.tenantId) + applyTenantShapeWhere(target, options.tenantId, [ + `owner_principal = ${sqlStringLiteral(options.principalUrl ?? ``)}`, + ]) + } else if (table === `runner_runtime_diagnostics`) { + target.searchParams.set( + `columns`, + `"tenant_id","runner_id","owner_principal","wake_stream_offset","last_seen_at","liveness_lease_expires_at","diagnostics","updated_at"` + ) + applyTenantShapeWhere(target, options.tenantId, [ + `owner_principal = ${sqlStringLiteral(options.principalUrl ?? ``)}`, + ]) } else if (table === `entity_dispatch_state`) { target.searchParams.set( `columns`, @@ -232,8 +243,15 @@ export function decodeJsonObject( return null } -function applyTenantShapeWhere(target: URL, tenantId: string): void { - const tenantWhere = `tenant_id = ${sqlStringLiteral(tenantId)}` +function applyTenantShapeWhere( + target: URL, + tenantId: string, + extraConditions: Array = [] +): void { + const tenantWhere = [ + `tenant_id = ${sqlStringLiteral(tenantId)}`, + ...extraConditions, + ].join(` AND `) const existingWhere = target.searchParams.get(`where`) target.searchParams.set( `where`, diff --git a/packages/agents-server/test/conformance.test.ts b/packages/agents-server/test/conformance.test.ts index 868ed4b475..2c4f879fde 100644 --- a/packages/agents-server/test/conformance.test.ts +++ b/packages/agents-server/test/conformance.test.ts @@ -15,6 +15,7 @@ import { TEST_POSTGRES_URL, resetElectricAgentsTestBackend, } from './test-backend' +import { durableStreamTestServerUrl } from './test-utils' const CLI_BIN = path.resolve( __dirname, @@ -39,7 +40,7 @@ describe(`Electric Agents Entity Runtime`, () => { await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, @@ -68,7 +69,7 @@ describeCli(`Electric Agents CLI`, () => { await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, @@ -97,7 +98,7 @@ describe(`Electric Agents Mock Agent`, () => { await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, mockStreamFn: MOCK_STREAM_FN, postgresUrl: TEST_POSTGRES_URL, @@ -127,7 +128,7 @@ describeCli(`Electric Agents CLI with Mock Agent`, () => { await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, mockStreamFn: MOCK_STREAM_FN, postgresUrl: TEST_POSTGRES_URL, diff --git a/packages/agents-server/test/dispatch-policy-routing.test.ts b/packages/agents-server/test/dispatch-policy-routing.test.ts index 8f0ce0a529..3352e85fbc 100644 --- a/packages/agents-server/test/dispatch-policy-routing.test.ts +++ b/packages/agents-server/test/dispatch-policy-routing.test.ts @@ -1,5 +1,6 @@ import { describe, expect, it, vi } from 'vitest' import { globalRouter } from '../src/routing/global-router' +import { DurableStreamsSubscriptionError } from '../src/stream-client' import type { TenantContext } from '../src/routing/context' import type { DispatchPolicy, @@ -15,14 +16,15 @@ function request(method: string, path: string, body?: unknown): Request { }) } -function entity(dispatchPolicy?: DispatchPolicy): ElectricAgentsEntity & { - txid: number -} { +function entity( + dispatchPolicy?: DispatchPolicy, + id = `one` +): ElectricAgentsEntity & { txid: number } { return { - url: `/chat/one`, + url: `/chat/${id}`, type: `chat`, status: `idle`, - streams: { main: `/chat/one/main`, error: `/chat/one/error` }, + streams: { main: `/chat/${id}/main`, error: `/chat/${id}/error` }, subscription_id: `chat-handler`, dispatch_policy: dispatchPolicy, write_token: `write-token`, @@ -45,7 +47,7 @@ function buildContext(overrides: Partial = {}): TenantContext { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, publicUrl: `http://server`, durableStreamsUrl: `http://durable.local`, @@ -68,7 +70,7 @@ function buildContext(overrides: Partial = {}): TenantContext { ), getRunner: vi.fn(async () => ({ id: `runner-1`, - owner_user_id: `user:owner@example.com`, + owner_principal: `/principal/user%3Aowner%40example.com`, label: `Local runner`, kind: `local`, admin_status: `enabled`, @@ -78,12 +80,15 @@ function buildContext(overrides: Partial = {}): TenantContext { })), }, ensurePrincipal: vi.fn(async () => undefined), - spawn: vi.fn(async (_type, req) => entity(req.dispatch_policy)), + spawn: vi.fn(async (_type, req) => + entity(req.dispatch_policy, req.instance_id ?? `one`) + ), } as any, streamClient: { getSubscription: vi.fn(async () => null), putSubscription: vi.fn(async () => ({})), addSubscriptionStreams: vi.fn(async () => ({})), + removeSubscriptionStream: vi.fn(async () => ({})), ensure: vi.fn(async () => undefined), } as any, runtime: undefined as any, @@ -113,7 +118,7 @@ describe(`dispatch policy routing`, () => { { contentType: `application/json` } ) expect(ctx.streamClient.putSubscription).toHaveBeenCalledWith( - `runner:runner-1`, + expect.stringMatching(/^runner:runner-1:/), expect.objectContaining({ type: `pull-wake`, streams: [`/chat/one/main`], @@ -122,6 +127,36 @@ describe(`dispatch policy routing`, () => { ) }) + it(`uses separate pull-wake subscriptions for separate runner-targeted entities`, async () => { + const dispatchPolicy: DispatchPolicy = { + targets: [{ type: `runner`, runnerId: `runner-1` }], + } + const ctx = buildContext() + + const first = await globalRouter.fetch( + request(`PUT`, `/_electric/entities/chat/one`, { + dispatch_policy: dispatchPolicy, + }), + ctx + ) + const second = await globalRouter.fetch( + request(`PUT`, `/_electric/entities/chat/two`, { + dispatch_policy: dispatchPolicy, + }), + ctx + ) + + expect(first.status).toBe(201) + expect(second.status).toBe(201) + const subscriptionIds = vi + .mocked(ctx.streamClient.putSubscription) + .mock.calls.map(([subscriptionId]) => subscriptionId) + expect(subscriptionIds).toHaveLength(2) + expect(subscriptionIds[0]).toMatch(/^runner:runner-1:/) + expect(subscriptionIds[1]).toMatch(/^runner:runner-1:/) + expect(subscriptionIds[0]).not.toBe(subscriptionIds[1]) + }) + it(`creates webhook subscriptions and stores the original target`, async () => { const dispatchPolicy: DispatchPolicy = { targets: [{ type: `webhook`, url: `http://runtime.local/wake` }], @@ -175,11 +210,11 @@ describe(`dispatch policy routing`, () => { }) ) expect(ctx.entityManager.send).toHaveBeenCalledWith(`/chat/one`, { - from: `/principal/user:owner@example.com`, + from: `/principal/user%3Aowner%40example.com`, payload: `hello`, }) expect(ctx.streamClient.putSubscription).toHaveBeenCalledWith( - `runner:runner-1`, + expect.stringMatching(/^runner:runner-1:/), expect.objectContaining({ type: `pull-wake`, streams: [`/chat/one/main`], @@ -187,10 +222,8 @@ describe(`dispatch policy routing`, () => { }) ) expect( - (ctx.entityManager.send as any).mock.invocationCallOrder[0] - ).toBeGreaterThan( (ctx.streamClient.putSubscription as any).mock.invocationCallOrder[0] - ) + ).toBeLessThan((ctx.entityManager.send as any).mock.invocationCallOrder[0]) }) it(`links legacy entities through the type default before sending`, async () => { @@ -220,7 +253,7 @@ describe(`dispatch policy routing`, () => { expect(response.status).toBe(204) expect(ctx.streamClient.putSubscription).toHaveBeenCalledWith( - `runner:runner-1`, + expect.stringMatching(/^runner:runner-1:/), expect.objectContaining({ type: `pull-wake`, streams: [`/chat/one/main`], @@ -235,4 +268,123 @@ describe(`dispatch policy routing`, () => { expect.objectContaining({ payload: `hello` }) ) }) + + it(`recreates missing runner dispatch subscriptions before sending`, async () => { + const dispatchPolicy: DispatchPolicy = { + targets: [{ type: `runner`, runnerId: `runner-1` }], + } + const ctx = buildContext() + ;(ctx.entityManager.registry.getEntity as any).mockResolvedValue( + entity(dispatchPolicy) + ) + ctx.entityManager.send = vi.fn(async () => undefined) + + const response = await globalRouter.fetch( + request(`POST`, `/_electric/entities/chat/one/send`, { + payload: `hello`, + }), + ctx + ) + + expect(response.status).toBe(204) + expect(ctx.streamClient.getSubscription).toHaveBeenCalledWith( + expect.stringMatching(/^runner:runner-1:/) + ) + expect(ctx.streamClient.putSubscription).toHaveBeenCalledWith( + expect.stringMatching(/^runner:runner-1:/), + expect.objectContaining({ + type: `pull-wake`, + streams: [`/chat/one/main`], + wake_stream: `/runners/runner-1/wake`, + }) + ) + expect(ctx.streamClient.addSubscriptionStreams).not.toHaveBeenCalled() + expect(ctx.entityManager.send).toHaveBeenCalledWith( + `/chat/one`, + expect.objectContaining({ payload: `hello` }) + ) + }) + + it(`does not re-add already linked runner streams before sending`, async () => { + const dispatchPolicy: DispatchPolicy = { + targets: [{ type: `runner`, runnerId: `runner-1` }], + } + const ctx = buildContext() + ;(ctx.entityManager.registry.getEntity as any).mockResolvedValue( + entity(dispatchPolicy) + ) + ;(ctx.streamClient.getSubscription as any).mockResolvedValue({ + streams: [{ path: `tenant-test/chat/one/main` }], + }) + ctx.entityManager.send = vi.fn(async () => undefined) + + const response = await globalRouter.fetch( + request(`POST`, `/_electric/entities/chat/one/send`, { + payload: `hello`, + }), + ctx + ) + + expect(response.status).toBe(204) + expect(ctx.streamClient.getSubscription).toHaveBeenCalledWith( + expect.stringMatching(/^runner:runner-1:/) + ) + expect(ctx.streamClient.putSubscription).not.toHaveBeenCalled() + expect(ctx.streamClient.addSubscriptionStreams).not.toHaveBeenCalled() + expect(ctx.streamClient.removeSubscriptionStream).not.toHaveBeenCalled() + expect(ctx.entityManager.send).toHaveBeenCalledWith( + `/chat/one`, + expect.objectContaining({ payload: `hello` }) + ) + + const second = await globalRouter.fetch( + request(`POST`, `/_electric/entities/chat/one/send`, { + payload: `again`, + }), + ctx + ) + + expect(second.status).toBe(204) + expect(ctx.streamClient.getSubscription).toHaveBeenCalledTimes(1) + expect(ctx.streamClient.ensure).toHaveBeenCalledTimes(2) + expect(ctx.entityManager.send).toHaveBeenCalledWith( + `/chat/one`, + expect.objectContaining({ payload: `again` }) + ) + }) + + it(`treats runner subscription create conflicts as an idempotent spawn link`, async () => { + const dispatchPolicy: DispatchPolicy = { + targets: [{ type: `runner`, runnerId: `runner-1` }], + } + const ctx = buildContext() + ;(ctx.streamClient.getSubscription as any) + .mockResolvedValueOnce(null) + .mockResolvedValueOnce({ + streams: [{ path: `tenant-test/chat/one/main` }], + }) + ;(ctx.streamClient.putSubscription as any).mockRejectedValueOnce( + new DurableStreamsSubscriptionError( + `Subscription creation failed`, + 409, + JSON.stringify({ + error: { + code: `SUBSCRIPTION_ALREADY_EXISTS`, + message: `Subscription already exists`, + }, + }) + ) + ) + ctx.entityManager.send = vi.fn(async () => undefined) + + const response = await globalRouter.fetch( + request(`PUT`, `/_electric/entities/chat/one`, { + dispatch_policy: dispatchPolicy, + }), + ctx + ) + + expect(response.status).toBe(201) + expect(ctx.streamClient.addSubscriptionStreams).not.toHaveBeenCalled() + }) }) diff --git a/packages/agents-server/test/electric-agents-routes.test.ts b/packages/agents-server/test/electric-agents-routes.test.ts index 9d5afd46ae..8153b578f4 100644 --- a/packages/agents-server/test/electric-agents-routes.test.ts +++ b/packages/agents-server/test/electric-agents-routes.test.ts @@ -267,7 +267,7 @@ describe(`ElectricAgentsRoutes shared-state streams`, () => { createRequest(`PUT`, `/_electric/shared-state/board-1`), { service: `test`, - durableStreamsUrl: `http://durable.local/v1/stream/test`, + durableStreamsUrl: `http://durable.local/custom/ds-prefix`, isShuttingDown: () => false, } as unknown as TenantContext ) @@ -276,7 +276,7 @@ describe(`ElectricAgentsRoutes shared-state streams`, () => { expect(fetchSpy).toHaveBeenCalledOnce() const [url, init] = fetchSpy.mock.calls[0]! expect(String(url)).toBe( - `http://durable.local/v1/stream/test/_electric/shared-state/board-1` + `http://durable.local/custom/ds-prefix/_electric/shared-state/board-1` ) expect(init).toMatchObject({ method: `PUT` }) } finally { @@ -294,7 +294,7 @@ describe(`ElectricAgentsRoutes shared-state streams`, () => { createRequest(`GET`, `/__ds/subscriptions/sub-1`), { service: `test`, - durableStreamsUrl: `http://durable.local/v1/stream/test`, + durableStreamsUrl: `http://durable.local/custom/ds-prefix`, isShuttingDown: () => false, } as unknown as TenantContext ) @@ -303,7 +303,7 @@ describe(`ElectricAgentsRoutes shared-state streams`, () => { expect(fetchSpy).toHaveBeenCalledOnce() const [url, init] = fetchSpy.mock.calls[0]! expect(String(url)).toBe( - `http://durable.local/v1/stream/test/__ds/subscriptions/sub-1` + `http://durable.local/custom/ds-prefix/__ds/subscriptions/sub-1` ) expect(init).toMatchObject({ method: `GET` }) } finally { diff --git a/packages/agents-server/test/entity-lifecycle.test.ts b/packages/agents-server/test/entity-lifecycle.test.ts index f4ff58bc27..c637ad64af 100644 --- a/packages/agents-server/test/entity-lifecycle.test.ts +++ b/packages/agents-server/test/entity-lifecycle.test.ts @@ -6,6 +6,7 @@ import { TEST_POSTGRES_URL, resetElectricAgentsTestBackend, } from './test-backend' +import { durableStreamTestServerUrl } from './test-utils' describe(`entity lifecycle`, () => { let dsServer: DurableStreamTestServer | null = null @@ -21,7 +22,7 @@ describe(`entity lifecycle`, () => { await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, diff --git a/packages/agents-server/test/horton-pull-wake-e2e.test.ts b/packages/agents-server/test/horton-pull-wake-e2e.test.ts index b631d57b3e..c7bc259d92 100644 --- a/packages/agents-server/test/horton-pull-wake-e2e.test.ts +++ b/packages/agents-server/test/horton-pull-wake-e2e.test.ts @@ -1,9 +1,14 @@ +import { createHash } from 'node:crypto' import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest' import { DurableStreamTestServer } from '@durable-streams/server' import { BuiltinAgentsServer } from '../../agents/src/server' import { ElectricAgentsServer } from '../src/server' import { parsePrincipalKey } from '../src/principal' -import { readStreamEvents, waitFor } from './test-utils' +import { + durableStreamTestServerUrl, + readStreamEvents, + waitFor, +} from './test-utils' import { TEST_POSTGRES_URL, resetElectricAgentsTestBackend, @@ -64,6 +69,152 @@ function eventType(event: any): unknown { return event.type ?? event.value?.type ?? event.value?.value?.type } +function runnerEntitySubscriptionId( + runnerId: string, + entityUrl: string +): string { + const digest = createHash(`sha256`).update(entityUrl).digest(`hex`) + return `runner:${runnerId}:${digest.slice(0, 16)}` +} + +function subscriptionUrl( + streamBaseUrl: string, + subscriptionId: string +): string { + const url = new URL(streamBaseUrl) + const match = /^(.*)\/v1\/stream\/([^/]+)\/?$/.exec(url.pathname) + if (match) { + const [, prefix = ``, serviceId] = match + url.pathname = `${prefix}/v1/stream/__ds/subscriptions/${encodeURIComponent(subscriptionId)}` + url.searchParams.set(`service`, decodeURIComponent(serviceId!)) + return url.toString() + } + + url.pathname = `${url.pathname.replace(/\/+$/, ``)}/__ds/subscriptions/${encodeURIComponent(subscriptionId)}` + return url.toString() +} + +function truncateDiagnostic(value: string, max = 4_000): string { + return value.length > max ? `${value.slice(0, max)}...` : value +} + +async function responseDiagnostic( + label: string, + input: RequestInfo | URL, + init?: RequestInit +): Promise { + try { + const res = await fetch(input, init) + const body = truncateDiagnostic(await res.text()) + return `${label}: ${res.status} ${res.statusText}\n${body}` + } catch (err) { + return `${label}: fetch failed\n${err instanceof Error ? err.stack : String(err)}` + } +} + +async function expectNoContentWithDiagnostics( + res: Response, + opts: { + phase: string + baseUrl: string + streamBaseUrl: string + entityApiUrl: string + entityUrl: string + runnerId: string + authHeaders: Record + } +): Promise { + if (res.status === 204) return + + const body = truncateDiagnostic(await res.text()) + const subscriptionId = runnerEntitySubscriptionId( + opts.runnerId, + opts.entityUrl + ) + const diagnostics = await Promise.all([ + responseDiagnostic(`entity`, opts.entityApiUrl, { + headers: opts.authHeaders, + }), + responseDiagnostic( + `runner`, + `${opts.baseUrl}/_electric/runners/${opts.runnerId}`, + { + headers: opts.authHeaders, + } + ), + responseDiagnostic( + `runner health`, + `${opts.baseUrl}/_electric/runners/${opts.runnerId}/health`, + { headers: opts.authHeaders } + ), + responseDiagnostic( + `subscription ${subscriptionId}`, + subscriptionUrl(opts.streamBaseUrl, subscriptionId) + ), + ]) + + throw new Error( + [ + `${opts.phase} returned ${res.status} ${res.statusText}; expected 204`, + `response body:\n${body}`, + ...diagnostics, + ].join(`\n\n`) + ) +} + +async function waitForMockCallWithDiagnostics( + predicate: () => boolean, + opts: { + phase: string + baseUrl: string + streamBaseUrl: string + entityApiUrl: string + entityUrl: string + entityStream: string + runnerId: string + authHeaders: Record + } +): Promise { + try { + await waitFor(async () => predicate(), 20_000, 50) + } catch (err) { + const subscriptionId = runnerEntitySubscriptionId( + opts.runnerId, + opts.entityUrl + ) + const diagnostics = await Promise.all([ + responseDiagnostic(`entity`, opts.entityApiUrl, { + headers: opts.authHeaders, + }), + responseDiagnostic( + `runner health`, + `${opts.baseUrl}/_electric/runners/${opts.runnerId}/health`, + { headers: opts.authHeaders } + ), + responseDiagnostic( + `subscription ${subscriptionId}`, + subscriptionUrl(opts.streamBaseUrl, subscriptionId) + ), + responseDiagnostic( + `runner wake stream`, + `${opts.streamBaseUrl}/runners/${opts.runnerId}/wake?offset=-1&live=false` + ), + responseDiagnostic( + `entity main stream`, + `${opts.streamBaseUrl}${opts.entityStream}?offset=-1&live=false` + ), + ]) + + throw new Error( + [ + `${opts.phase} did not reach Horton within 20000ms`, + err instanceof Error ? err.message : String(err), + ...diagnostics, + ].join(`\n\n`) + ) + } +} + function assertCompleteResponses( events: Array, responseText: string, @@ -113,7 +264,7 @@ describe(`pull-wake Horton e2e with mocked LLM`, () => { }) await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: undefined, @@ -130,7 +281,7 @@ describe(`pull-wake Horton e2e with mocked LLM`, () => { pullWake: { runnerId, registerRunner: true, - ownerUserId: testPrincipal.key, + ownerPrincipal: testPrincipal.url, headers: authHeaders, claimHeaders: authHeaders, claimTokenHeader: `electric-claim-token`, @@ -149,6 +300,7 @@ describe(`pull-wake Horton e2e with mocked LLM`, () => { it(`dispatches explicit runner-policy wakes and Horton writes mocked responses`, async () => { const id = `pull-wake-horton-${Date.now()}` + const entityUrl = `/horton/${id}` const entityApiUrl = `${baseUrl}/_electric/entities/horton/${id}` const dispatch_policy = { targets: [{ type: `runner`, runnerId }] } @@ -180,9 +332,29 @@ describe(`pull-wake Horton e2e with mocked LLM`, () => { payload: `Please answer via pull-wake.`, }), }) - expect(sendRes.status).toBe(204) + await expectNoContentWithDiagnostics(sendRes, { + phase: `initial send`, + baseUrl, + streamBaseUrl, + entityApiUrl, + entityUrl, + runnerId, + authHeaders, + }) - await waitFor(async () => mockStreamFn.mock.calls.length > 0, 20_000, 50) + await waitForMockCallWithDiagnostics( + () => mockStreamFn.mock.calls.length > 0, + { + phase: `initial send`, + baseUrl, + streamBaseUrl, + entityApiUrl, + entityUrl, + entityStream: spawned.streams.main, + runnerId, + authHeaders, + } + ) await waitFor(async () => { const events = await readStreamEvents(streamBaseUrl, spawned.streams.main) @@ -203,12 +375,28 @@ describe(`pull-wake Horton e2e with mocked LLM`, () => { payload: `Please answer via pull-wake again after idle.`, }), }) - expect(secondSendRes.status).toBe(204) + await expectNoContentWithDiagnostics(secondSendRes, { + phase: `second send`, + baseUrl, + streamBaseUrl, + entityApiUrl, + entityUrl, + runnerId, + authHeaders, + }) - await waitFor( - async () => mockStreamFn.mock.calls.length > firstCallCount, - 20_000, - 50 + await waitForMockCallWithDiagnostics( + () => mockStreamFn.mock.calls.length > firstCallCount, + { + phase: `second send`, + baseUrl, + streamBaseUrl, + entityApiUrl, + entityUrl, + entityStream: spawned.streams.main, + runnerId, + authHeaders, + } ) await waitFor(async () => { diff --git a/packages/agents-server/test/horton-spawn-worker.test.ts b/packages/agents-server/test/horton-spawn-worker.test.ts index 630b33d58d..cba4d8d173 100644 --- a/packages/agents-server/test/horton-spawn-worker.test.ts +++ b/packages/agents-server/test/horton-spawn-worker.test.ts @@ -2,7 +2,7 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest' import { DurableStreamTestServer } from '@durable-streams/server' import { BuiltinAgentsServer } from '../../agents/src/server' import { ElectricAgentsServer } from '../src/server' -import { waitForStreamEvents } from './test-utils' +import { durableStreamTestServerUrl, waitForStreamEvents } from './test-utils' import { TEST_ELECTRIC_URL, TEST_POSTGRES_URL, @@ -25,7 +25,7 @@ describe.skipIf(!process.env.ANTHROPIC_API_KEY)( }) await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, @@ -36,7 +36,7 @@ describe.skipIf(!process.env.ANTHROPIC_API_KEY)( pullWake: { runnerId: `horton-spawn-worker-test`, registerRunner: true, - ownerUserId: `test-user`, + ownerPrincipal: `/principal/system%3Atest-user`, }, }) await builtinAgentsServer.start() @@ -79,7 +79,7 @@ describe.skipIf(!process.env.ANTHROPIC_API_KEY)( expect(sendRes.status).toBe(204) const events = await waitForStreamEvents( - dsServer.url, + durableStreamTestServerUrl(dsServer.url), horton.streams.main, (currentEvents) => currentEvents.some((event) => { diff --git a/packages/agents-server/test/horton-title-generation.test.ts b/packages/agents-server/test/horton-title-generation.test.ts index 00a8811f92..19454171df 100644 --- a/packages/agents-server/test/horton-title-generation.test.ts +++ b/packages/agents-server/test/horton-title-generation.test.ts @@ -2,7 +2,7 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest' import { DurableStreamTestServer } from '@durable-streams/server' import { BuiltinAgentsServer } from '../../agents/src/server' import { ElectricAgentsServer } from '../src/server' -import { waitFor } from './test-utils' +import { durableStreamTestServerUrl, waitFor } from './test-utils' import { TEST_ELECTRIC_URL, TEST_POSTGRES_URL, @@ -25,7 +25,7 @@ describe.skipIf(!process.env.ANTHROPIC_API_KEY)( }) await Promise.all([resetElectricAgentsTestBackend(), dsServer.start()]) electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, @@ -36,7 +36,7 @@ describe.skipIf(!process.env.ANTHROPIC_API_KEY)( pullWake: { runnerId: `horton-title-generation-test`, registerRunner: true, - ownerUserId: `test-user`, + ownerPrincipal: `/principal/system%3Atest-user`, }, }) await builtinAgentsServer.start() diff --git a/packages/agents-server/test/principal.test.ts b/packages/agents-server/test/principal.test.ts index e5075e059a..ddb8687701 100644 --- a/packages/agents-server/test/principal.test.ts +++ b/packages/agents-server/test/principal.test.ts @@ -1,7 +1,9 @@ import { describe, expect, it } from 'vitest' import { + getPrincipalFromRequest, + parsePrincipalInput, + parsePrincipalUrl, parsePrincipalKey, - principalKeyFromUrl, principalUrl, } from '../src/principal.js' @@ -19,7 +21,7 @@ describe(`principal parser`, () => { const url = `/principal/${encodeURIComponent(key)}` expect(principal.url).toBe(url) expect(principalUrl(key)).toBe(url) - expect(principalKeyFromUrl(url)).toBe(key) + expect(parsePrincipalUrl(url)?.key).toBe(key) }) } @@ -32,15 +34,40 @@ describe(`principal parser`, () => { it(`encodes URL-unsafe principal ids canonically`, () => { const principal = parsePrincipalKey(`user:alice@example.com`) expect(principal.url).toBe(`/principal/user%3Aalice%40example.com`) - expect(principalKeyFromUrl(principal.url)).toBe(`user:alice@example.com`) - expect(principalKeyFromUrl(`/principal/user:alice@example.com`)).toBe( + expect(parsePrincipalUrl(principal.url)?.key).toBe(`user:alice@example.com`) + expect(parsePrincipalUrl(`/principal/user:alice@example.com`)?.key).toBe( `user:alice@example.com` ) }) + it(`parses principal keys and URLs through one canonical input parser`, () => { + expect(parsePrincipalInput(`user:alice@example.com`)?.url).toBe( + `/principal/user%3Aalice%40example.com` + ) + expect(parsePrincipalInput(`/principal/user:alice@example.com`)?.url).toBe( + `/principal/user%3Aalice%40example.com` + ) + }) + it(`rejects invalid keys`, () => { for (const key of [`userkyle`, `user:`, `user:/kyle`, `admin:kyle`]) { expect(() => parsePrincipalKey(key)).toThrow() } }) + + it(`ignores malformed principal request headers`, () => { + const request = new Request(`http://server`, { + headers: { 'electric-principal': `not-a-principal` }, + }) + + expect(getPrincipalFromRequest(request)).toBeNull() + }) + + it(`accepts canonical principal URLs in request headers`, () => { + const request = new Request(`http://server`, { + headers: { 'electric-principal': `/principal/user%3Akyle` }, + }) + + expect(getPrincipalFromRequest(request)?.key).toBe(`user:kyle`) + }) }) diff --git a/packages/agents-server/test/pull-wake-subscription-stack.test.ts b/packages/agents-server/test/pull-wake-subscription-stack.test.ts new file mode 100644 index 0000000000..241cde77db --- /dev/null +++ b/packages/agents-server/test/pull-wake-subscription-stack.test.ts @@ -0,0 +1,162 @@ +import { createServer } from 'node:http' +import { createServerAdapter } from '@whatwg-node/server' +import { stream } from '@durable-streams/client' +import { DurableStreamTestServer } from '@durable-streams/server' +import { afterEach, describe, expect, it } from 'vitest' +import { globalRouter } from '../src/routing/global-router' +import { StreamClient } from '../src/stream-client' +import { durableStreamTestServerUrl } from './test-utils' +import type { Server } from 'node:http' +import type { TenantContext } from '../src/routing/context' + +async function closeServer(server: Server): Promise { + await new Promise((resolve, reject) => { + server.close((err) => { + if (err) reject(err) + else resolve() + }) + }) +} + +async function readJsonStream( + baseUrl: string, + path: string +): Promise> { + const res = await stream({ + url: `${baseUrl}${path}`, + offset: `-1`, + live: false, + }) + return await res.json() +} + +describe(`pull-wake subscription stack`, () => { + let dsServer: DurableStreamTestServer | undefined + let proxyServer: Server | undefined + + afterEach(async () => { + await Promise.allSettled([ + proxyServer ? closeServer(proxyServer) : undefined, + dsServer?.stop(), + ]) + proxyServer = undefined + dsServer = undefined + }) + + it(`emits and claims runner wakes through Durable Streams subscriptions`, async () => { + dsServer = new DurableStreamTestServer({ + port: 0, + longPollTimeout: 100, + webhooks: true, + }) + await dsServer.start() + const streamBaseUrl = durableStreamTestServerUrl(dsServer.url) + const client = new StreamClient(streamBaseUrl) + + await client.ensure(`/runners/runner-1/wake`, { + contentType: `application/json`, + }) + await client.ensure(`/horton/one/main`, { + contentType: `application/json`, + }) + await client.putSubscription(`runner:runner-1:one`, { + type: `pull-wake`, + streams: [`/horton/one/main`], + wake_stream: `/runners/runner-1/wake`, + }) + + await client.append( + `/horton/one/main`, + JSON.stringify({ type: `message`, value: `hello` }) + ) + + const wakes = await readJsonStream>( + streamBaseUrl, + `/runners/runner-1/wake` + ) + expect(wakes).toEqual([ + expect.objectContaining({ + type: `wake`, + subscription_id: `runner:runner-1:one`, + stream: `horton/one/main`, + generation: 1, + }), + ]) + + await expect( + client.claimSubscription(`runner:runner-1:one`, `worker-1`) + ).resolves.toMatchObject({ + wake_id: expect.any(String), + generation: 1, + token: expect.any(String), + streams: [ + expect.objectContaining({ + path: `horton/one/main`, + has_pending: true, + }), + ], + }) + }) + + it(`proxies pre-existing runner wake events to pull-wake runners`, async () => { + dsServer = new DurableStreamTestServer({ + port: 0, + longPollTimeout: 100, + webhooks: true, + }) + await dsServer.start() + const streamBaseUrl = durableStreamTestServerUrl(dsServer.url) + const client = new StreamClient(streamBaseUrl) + await client.ensure(`/runners/runner-1/wake`, { + contentType: `application/json`, + }) + await client.append( + `/runners/runner-1/wake`, + JSON.stringify({ + type: `wake`, + subscription_id: `runner:runner-1:one`, + stream: `horton/one/main`, + generation: 1, + }) + ) + + const ctx = { + service: `default`, + principal: { + kind: `user`, + id: `owner@example.com`, + key: `user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, + }, + publicUrl: `http://agents.local`, + durableStreamsUrl: streamBaseUrl, + entityBridgeManager: { + beginClientRead: async () => null, + touchByStreamPath: async () => undefined, + }, + isShuttingDown: () => false, + } as unknown as TenantContext + const adapter = createServerAdapter((request) => + globalRouter.fetch(request as any, ctx) + ) + proxyServer = createServer(adapter) + await new Promise((resolve) => + proxyServer!.listen(0, `127.0.0.1`, resolve) + ) + const address = proxyServer.address() + if (!address || typeof address === `string`) { + throw new Error(`Expected TCP test server address`) + } + + const wakes = await readJsonStream>( + `http://127.0.0.1:${address.port}`, + `/runners/runner-1/wake` + ) + expect(wakes).toEqual([ + expect.objectContaining({ + type: `wake`, + subscription_id: `runner:runner-1:one`, + }), + ]) + }) +}) diff --git a/packages/agents-server/test/routing-hooks.test.ts b/packages/agents-server/test/routing-hooks.test.ts index 1eda54afdf..90cb4aac29 100644 --- a/packages/agents-server/test/routing-hooks.test.ts +++ b/packages/agents-server/test/routing-hooks.test.ts @@ -41,6 +41,9 @@ describe(`routing/hooks`, () => { expect(wrapped?.headers.get(`access-control-allow-methods`)).toContain( `GET` ) + expect(wrapped?.headers.get(`access-control-allow-headers`)).toContain( + `electric-principal` + ) }) it(`errorMapper converts ElectricAgentsError to API error JSON`, async () => { diff --git a/packages/agents-server/test/runners-router.test.ts b/packages/agents-server/test/runners-router.test.ts index 78101b5fea..0ddba15ade 100644 --- a/packages/agents-server/test/runners-router.test.ts +++ b/packages/agents-server/test/runners-router.test.ts @@ -15,11 +15,11 @@ function request(method: string, path: string, body?: unknown): Request { function runner(overrides: Record = {}) { return { id: `runner-1`, - owner_user_id: `user:owner@example.com`, + owner_principal: `/principal/user%3Aowner%40example.com`, label: `Local runner`, - kind: `local`, - admin_status: `enabled`, - liveness: `offline`, + kind: `local` as const, + admin_status: `enabled` as const, + liveness: `offline` as const, wake_stream: `/runners/runner-1/wake`, created_at: new Date(0).toISOString(), updated_at: new Date(0).toISOString(), @@ -32,7 +32,7 @@ function buildContext(overrides: Partial = {}): TenantContext { createRunner: vi.fn(async (input) => runner({ id: input.id, - owner_user_id: input.ownerUserId, + owner_principal: input.ownerPrincipal, label: input.label, wake_stream: input.wakeStream ?? `/runners/${input.id}/wake`, }) @@ -48,6 +48,13 @@ function buildContext(overrides: Partial = {}): TenantContext { getEntityByStream: vi.fn(), materializeActiveClaim: vi.fn(), updateStatus: vi.fn(), + getActiveClaimsForRunner: vi.fn(async () => []), + getRunnerDiagnostics: vi.fn(async () => null), + getDispatchStatsForRunner: vi.fn(async () => ({ + entities_with_active_claim: 0, + entities_with_outstanding_wake: 0, + entities_with_pending_work: 0, + })), } const insertChain = { values: vi.fn(() => ({ @@ -60,7 +67,7 @@ function buildContext(overrides: Partial = {}): TenantContext { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, publicUrl: `http://server`, durableStreamsUrl: `http://durable.local`, @@ -86,7 +93,7 @@ describe(`runner routes`, () => { const response = await globalRouter.fetch( request(`POST`, `/_electric/runners`, { id: `runner-1`, - owner_user_id: `other@example.com`, + owner_principal: `/principal/user%3Aother%40example.com`, label: `Local runner`, }), buildContext({ @@ -94,7 +101,7 @@ describe(`runner routes`, () => { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, }) ) @@ -108,14 +115,14 @@ describe(`runner routes`, () => { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, }) const response = await globalRouter.fetch( request(`POST`, `/_electric/runners`, { id: `runner-1`, - owner_user_id: `user:owner@example.com`, + owner_principal: `/principal/user%3Aowner%40example.com`, label: `Local runner`, }), ctx @@ -125,7 +132,7 @@ describe(`runner routes`, () => { expect(ctx.entityManager.registry.createRunner).toHaveBeenCalledWith( expect.objectContaining({ id: `runner-1`, - ownerUserId: `user:owner@example.com`, + ownerPrincipal: `/principal/user%3Aowner%40example.com`, }) ) expect(ctx.streamClient.ensure).toHaveBeenCalledWith( @@ -134,13 +141,33 @@ describe(`runner routes`, () => { ) }) + it(`canonicalizes legacy owner_principal URLs on registration`, async () => { + const ctx = buildContext() + + const response = await globalRouter.fetch( + request(`POST`, `/_electric/runners`, { + id: `runner-1`, + owner_principal: `/principal/user:owner@example.com`, + label: `Local runner`, + }), + ctx + ) + + expect(response.status).toBe(201) + expect(ctx.entityManager.registry.createRunner).toHaveBeenCalledWith( + expect.objectContaining({ + ownerPrincipal: `/principal/user%3Aowner%40example.com`, + }) + ) + }) + it(`infers runner owner from the authenticated user when omitted`, async () => { const ctx = buildContext({ principal: { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, }) @@ -155,13 +182,190 @@ describe(`runner routes`, () => { expect(response.status).toBe(201) expect(ctx.entityManager.registry.createRunner).toHaveBeenCalledWith( expect.objectContaining({ - ownerUserId: `user:owner@example.com`, + ownerPrincipal: `/principal/user%3Aowner%40example.com`, }) ) }) - it(`allows unauthenticated runner claims when no server auth is configured`, async () => { + it(`canonicalizes legacy owner_principal URLs when listing runners`, async () => { const ctx = buildContext() + + const response = await globalRouter.fetch( + request( + `GET`, + `/_electric/runners?owner_principal=${encodeURIComponent(`/principal/user:owner@example.com`)}` + ), + ctx + ) + + expect(response.status).toBe(200) + expect(ctx.entityManager.registry.listRunners).toHaveBeenCalledWith({ + ownerPrincipal: `/principal/user%3Aowner%40example.com`, + }) + }) + + it(`rejects unauthenticated runner listing`, async () => { + const ctx = buildContext({ principal: undefined as any }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners`), + ctx + ) + + expect(response.status).toBe(401) + expect(ctx.entityManager.registry.listRunners).not.toHaveBeenCalled() + }) + + it(`returns runner health with diagnostics and claim state`, async () => { + const ctx = buildContext() + vi.mocked(ctx.entityManager.registry.getRunner).mockResolvedValue( + runner({ + admin_status: `enabled`, + }) + ) + vi.mocked( + ctx.entityManager.registry.getRunnerDiagnostics + ).mockResolvedValue({ + runner_id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + liveness_lease_expires_at: new Date(Date.now() + 30_000).toISOString(), + last_seen_at: new Date().toISOString(), + diagnostics: { + stream_connected: true, + reconnect_count: 0, + last_heartbeat_ok: true, + }, + updated_at: new Date().toISOString(), + }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners/runner-1/health`), + ctx + ) + + expect(response.status).toBe(200) + const body = (await response.json()) as Record + expect(body.runner).toMatchObject({ + id: `runner-1`, + liveness_status: `online`, + }) + expect(body.client).toMatchObject({ stream_connected: true }) + expect(body.claims).toMatchObject({ active_count: 0 }) + expect(body.health).toMatchObject({ status: `healthy`, issues: [] }) + }) + + it(`sanitizes heartbeat diagnostics before storing them`, async () => { + const ctx = buildContext() + + const response = await globalRouter.fetch( + request(`POST`, `/_electric/runners/runner-1/heartbeat`, { + lease_ms: 30_000, + wake_stream_offset: `123`, + diagnostics: { + status: `streaming`, + stream_connected: `yes`, + stream_connected_since: null, + reconnect_count: `2`, + last_heartbeat_ok: false, + last_claim_result: `invalid`, + last_error: `heartbeat failed`, + claims_failed: 1, + events_received: -1, + extra: { noisy: true }, + }, + }), + ctx + ) + + expect(response.status).toBe(200) + const heartbeatInput = vi.mocked(ctx.entityManager.registry.heartbeatRunner) + .mock.calls[0]![0] + expect(heartbeatInput).toMatchObject({ + runnerId: `runner-1`, + wakeStreamOffset: `123`, + diagnostics: { + status: `streaming`, + stream_connected_since: null, + last_heartbeat_ok: false, + last_error: `heartbeat failed`, + claims_failed: 1, + }, + }) + expect(heartbeatInput.diagnostics).not.toHaveProperty(`stream_connected`) + expect(heartbeatInput.diagnostics).not.toHaveProperty(`reconnect_count`) + expect(heartbeatInput.diagnostics).not.toHaveProperty(`last_claim_result`) + expect(heartbeatInput.diagnostics).not.toHaveProperty(`events_received`) + expect(heartbeatInput.diagnostics).not.toHaveProperty(`extra`) + }) + + it(`sanitizes stored runner diagnostics before returning health`, async () => { + const ctx = buildContext() + vi.mocked(ctx.entityManager.registry.getRunner).mockResolvedValue( + runner({ + admin_status: `enabled`, + }) + ) + vi.mocked( + ctx.entityManager.registry.getRunnerDiagnostics + ).mockResolvedValue({ + runner_id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + liveness_lease_expires_at: new Date(Date.now() + 30_000).toISOString(), + last_seen_at: new Date().toISOString(), + diagnostics: { + stream_connected: `yes`, + reconnect_count: 6, + last_heartbeat_ok: false, + last_error: 500, + }, + updated_at: new Date().toISOString(), + }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners/runner-1/health`), + ctx + ) + + expect(response.status).toBe(200) + const body = (await response.json()) as Record + expect(body.client).toEqual({ + reconnect_count: 6, + last_heartbeat_ok: false, + }) + expect(body.health.issues).toContain(`Client reports last heartbeat failed`) + expect(body.health.issues).toContain(`Client has reconnected 6 times`) + }) + + it(`returns unhealthy when runner lease is expired`, async () => { + const ctx = buildContext() + vi.mocked(ctx.entityManager.registry.getRunner).mockResolvedValue( + runner({ + admin_status: `enabled`, + }) + ) + vi.mocked( + ctx.entityManager.registry.getRunnerDiagnostics + ).mockResolvedValue({ + runner_id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + liveness_lease_expires_at: new Date(Date.now() - 10_000).toISOString(), + last_seen_at: new Date(Date.now() - 15_000).toISOString(), + updated_at: new Date().toISOString(), + }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners/runner-1/health`), + ctx + ) + + expect(response.status).toBe(200) + const body = (await response.json()) as Record + expect(body.health.status).toBe(`unhealthy`) + expect(body.health.issues.length).toBeGreaterThan(0) + }) + + it(`rejects unauthenticated runner claims`, async () => { + const ctx = buildContext({ principal: undefined as any }) const response = await globalRouter.fetch( request(`POST`, `/_electric/runners/runner-1/claim`, { subscription_id: `runner:runner-1`, @@ -171,8 +375,23 @@ describe(`runner routes`, () => { ctx ) - expect(response.status).toBe(204) - expect(ctx.streamClient.claimSubscription).toHaveBeenCalled() + expect(response.status).toBe(401) + expect(ctx.streamClient.claimSubscription).not.toHaveBeenCalled() + }) + + it(`rejects unauthenticated runner registration for an explicit owner`, async () => { + const ctx = buildContext({ principal: undefined as any }) + const response = await globalRouter.fetch( + request(`POST`, `/_electric/runners`, { + id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + label: `Local runner`, + }), + ctx + ) + + expect(response.status).toBe(401) + expect(ctx.entityManager.registry.createRunner).not.toHaveBeenCalled() }) it(`returns DS claim conflicts as 409 responses`, async () => { @@ -181,7 +400,7 @@ describe(`runner routes`, () => { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, }) vi.mocked(ctx.streamClient.claimSubscription).mockRejectedValue( @@ -224,7 +443,7 @@ describe(`runner routes`, () => { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, }) vi.mocked(ctx.streamClient.claimSubscription).mockResolvedValue({ @@ -274,13 +493,119 @@ describe(`runner routes`, () => { ) }) + it(`rejects invalid owner_principal with 400`, async () => { + const response = await globalRouter.fetch( + request(`POST`, `/_electric/runners`, { + id: `runner-1`, + owner_principal: `/principal/not-a-valid-key`, + label: `Local runner`, + }), + buildContext({ + principal: { + kind: `user`, + id: `owner@example.com`, + key: `user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, + }, + }) + ) + + expect(response.status).toBe(400) + }) + + it(`returns unhealthy when runner is disabled`, async () => { + const ctx = buildContext() + vi.mocked(ctx.entityManager.registry.getRunner).mockResolvedValue( + runner({ + admin_status: `disabled`, + }) + ) + vi.mocked( + ctx.entityManager.registry.getRunnerDiagnostics + ).mockResolvedValue({ + runner_id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + liveness_lease_expires_at: new Date(Date.now() + 30_000).toISOString(), + last_seen_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners/runner-1/health`), + ctx + ) + + expect(response.status).toBe(200) + const body = (await response.json()) as Record + expect(body.health.status).toBe(`unhealthy`) + expect(body.health.issues).toContain(`Runner is disabled`) + expect(body.runner.liveness_status).toBe(`offline`) + }) + + it(`returns degraded when stream is disconnected`, async () => { + const ctx = buildContext() + vi.mocked(ctx.entityManager.registry.getRunner).mockResolvedValue( + runner({ + admin_status: `enabled`, + }) + ) + vi.mocked( + ctx.entityManager.registry.getRunnerDiagnostics + ).mockResolvedValue({ + runner_id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + liveness_lease_expires_at: new Date(Date.now() + 30_000).toISOString(), + last_seen_at: new Date().toISOString(), + diagnostics: { + stream_connected: false, + reconnect_count: 2, + last_heartbeat_ok: true, + }, + updated_at: new Date().toISOString(), + }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners/runner-1/health`), + ctx + ) + + expect(response.status).toBe(200) + const body = (await response.json()) as Record + expect(body.health.status).toBe(`degraded`) + expect(body.health.issues).toContain(`Client reports stream disconnected`) + }) + + it(`ignores invalid runner lease timestamps in health output`, async () => { + const ctx = buildContext() + vi.mocked( + ctx.entityManager.registry.getRunnerDiagnostics + ).mockResolvedValue({ + runner_id: `runner-1`, + owner_principal: `/principal/user%3Aowner%40example.com`, + liveness_lease_expires_at: `not-a-date`, + last_seen_at: new Date().toISOString(), + updated_at: new Date().toISOString(), + }) + + const response = await globalRouter.fetch( + request(`GET`, `/_electric/runners/runner-1/health`), + ctx + ) + + expect(response.status).toBe(200) + const body = (await response.json()) as Record + expect(body.runner.lease_expires_at).toBeNull() + expect(body.runner.lease_remaining_ms).toBeNull() + expect(body.runner.liveness_status).toBe(`offline`) + }) + it(`uses the pending stream from multi-stream claim responses`, async () => { const ctx = buildContext({ principal: { kind: `user`, id: `owner@example.com`, key: `user:owner@example.com`, - url: `/principal/user:owner@example.com`, + url: `/principal/user%3Aowner%40example.com`, }, }) vi.mocked(ctx.streamClient.claimSubscription).mockResolvedValue({ diff --git a/packages/agents-server/test/scheduler-integration.test.ts b/packages/agents-server/test/scheduler-integration.test.ts index e4971d7f8b..af5ba79fbb 100644 --- a/packages/agents-server/test/scheduler-integration.test.ts +++ b/packages/agents-server/test/scheduler-integration.test.ts @@ -2,7 +2,11 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest' import { getCronStreamPath } from '@electric-ax/agents-runtime' import { DurableStreamTestServer } from '@durable-streams/server' import { ElectricAgentsServer } from '../src/server' -import { readStreamEvents, waitFor } from './test-utils' +import { + durableStreamTestServerUrl, + readStreamEvents, + waitFor, +} from './test-utils' import { TEST_ELECTRIC_URL, TEST_POSTGRES_URL, @@ -17,7 +21,7 @@ describe(`Scheduler Integration`, () => { async function startElectricAgentsServer(): Promise { electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, diff --git a/packages/agents-server/test/server-claim-write-token.test.ts b/packages/agents-server/test/server-claim-write-token.test.ts index f2bb3d868d..87d3b9f200 100644 --- a/packages/agents-server/test/server-claim-write-token.test.ts +++ b/packages/agents-server/test/server-claim-write-token.test.ts @@ -8,6 +8,7 @@ import { TEST_POSTGRES_URL, resetElectricAgentsTestBackend, } from './test-backend' +import { durableStreamTestServerUrl } from './test-utils' import type { Server } from 'node:http' describe(`Claim-scoped write tokens`, () => { @@ -19,7 +20,7 @@ describe(`Claim-scoped write tokens`, () => { async function startElectricAgentsServer(): Promise { electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, diff --git a/packages/agents-server/test/server-start.test.ts b/packages/agents-server/test/server-start.test.ts index 4bc6feb898..85d5b8d5cb 100644 --- a/packages/agents-server/test/server-start.test.ts +++ b/packages/agents-server/test/server-start.test.ts @@ -179,8 +179,6 @@ vi.mock(`drizzle-orm`, () => ({ })) vi.mock(`../src/stream-client`, () => ({ - durableStreamsServiceUrl: (baseUrl: string, serviceId: string) => - `${baseUrl.replace(/\/+$/, ``)}/v1/stream/${encodeURIComponent(serviceId)}`, StreamClient: class MockStreamClient { exists(): Promise { return streamExistsMock() @@ -193,10 +191,6 @@ vi.mock(`../src/stream-client`, () => ({ readJson(): Promise>> { return streamReadJsonMock() } - - getConsumerState(): Promise { - return Promise.resolve(null) - } }, })) diff --git a/packages/agents-server/test/server-utils.test.ts b/packages/agents-server/test/server-utils.test.ts new file mode 100644 index 0000000000..b81883a2ce --- /dev/null +++ b/packages/agents-server/test/server-utils.test.ts @@ -0,0 +1,49 @@ +import { describe, expect, it } from 'vitest' +import { buildElectricProxyTarget } from '../src/utils/server-utils' + +function shapeTarget(query: string): URL { + return buildElectricProxyTarget({ + incomingUrl: new URL(`http://server/_electric/electric/v1/shape?${query}`), + electricUrl: `http://electric.local`, + tenantId: `tenant-test`, + principalUrl: `/principal/user%3Aowner%40example.com`, + }) +} + +describe(`server utils`, () => { + it(`owner-scopes runner shapes to the authenticated principal`, () => { + const target = shapeTarget(`table=runners`) + + expect(target.pathname).toBe(`/v1/shape`) + expect(target.searchParams.get(`table`)).toBe(`runners`) + const columns = target.searchParams.get(`columns`) + expect(columns).toContain(`"owner_principal"`) + expect(columns).not.toContain(`"diagnostics"`) + expect(columns).not.toContain(`"last_seen_at"`) + expect(target.searchParams.get(`where`)).toBe( + `tenant_id = 'tenant-test' AND owner_principal = '/principal/user%3Aowner%40example.com'` + ) + }) + + it(`combines runner owner scoping with Electric protocol where clauses`, () => { + const target = shapeTarget( + `table=runners&where=${encodeURIComponent(`kind = 'local'`)}` + ) + + expect(target.searchParams.get(`where`)).toBe( + `tenant_id = 'tenant-test' AND owner_principal = '/principal/user%3Aowner%40example.com' AND (kind = 'local')` + ) + }) + + it(`owner-scopes runner runtime diagnostics shapes and preserves runner filters`, () => { + const target = shapeTarget( + `table=runner_runtime_diagnostics&where=${encodeURIComponent(`runner_id = 'runner-1'`)}` + ) + + expect(target.searchParams.get(`table`)).toBe(`runner_runtime_diagnostics`) + expect(target.searchParams.get(`columns`)).toContain(`"diagnostics"`) + expect(target.searchParams.get(`where`)).toBe( + `tenant_id = 'tenant-test' AND owner_principal = '/principal/user%3Aowner%40example.com' AND (runner_id = 'runner-1')` + ) + }) +}) diff --git a/packages/agents-server/test/stream-client-fork.test.ts b/packages/agents-server/test/stream-client-fork.test.ts index 9622d7e6cc..d50acac5a1 100644 --- a/packages/agents-server/test/stream-client-fork.test.ts +++ b/packages/agents-server/test/stream-client-fork.test.ts @@ -2,6 +2,7 @@ import { afterAll, beforeAll, describe, expect, it } from 'vitest' import { DurableStreamTestServer } from '@durable-streams/server' import { StreamClient } from '../src/stream-client' +import { durableStreamTestServerUrl } from './test-utils' describe(`StreamClient.fork`, () => { let dsServer: DurableStreamTestServer | null = null @@ -14,7 +15,7 @@ describe(`StreamClient.fork`, () => { webhooks: true, }) const baseUrl = await dsServer.start() - client = new StreamClient(baseUrl) + client = new StreamClient(durableStreamTestServerUrl(baseUrl)) }) afterAll(async () => { diff --git a/packages/agents-server/test/stream-client.test.ts b/packages/agents-server/test/stream-client.test.ts index 5378b82666..6daa4aa173 100644 --- a/packages/agents-server/test/stream-client.test.ts +++ b/packages/agents-server/test/stream-client.test.ts @@ -1,6 +1,6 @@ import { beforeEach, describe, expect, it, vi } from 'vitest' -import { StreamClient, durableStreamsServiceUrl } from '../src/stream-client' +import { StreamClient } from '../src/stream-client' const { appendMock, @@ -97,13 +97,15 @@ describe(`StreamClient`, () => { await expect(client.exists(`/_cron/test`)).rejects.toBe(error) }) - it(`createSubscription uses the reserved __ds subscription contract`, async () => { + it(`createSubscription appends reserved __ds control paths to the opaque backend URL`, async () => { const fetchMock = vi.spyOn(globalThis, `fetch`).mockResolvedValueOnce( new Response(JSON.stringify({ subscription_id: `sub-1` }), { headers: { 'content-type': `application/json` }, }) ) - const client = new StreamClient(`http://127.0.0.1:4545/v1/stream/tenant-a`) + const client = new StreamClient( + `http://127.0.0.1:4545/custom/ds-prefix?tenant=tenant-a` + ) try { await client.createSubscription( @@ -114,7 +116,7 @@ describe(`StreamClient`, () => { ) expect(fetchMock).toHaveBeenCalledWith( - `http://127.0.0.1:4545/v1/stream/tenant-a/__ds/subscriptions/sub-1`, + `http://127.0.0.1:4545/custom/ds-prefix/__ds/subscriptions/sub-1?tenant=tenant-a`, expect.objectContaining({ method: `PUT` }) ) const [, init] = fetchMock.mock.calls[0]! @@ -245,29 +247,3 @@ describe(`StreamClient`, () => { } }) }) - -describe(`durableStreamsServiceUrl`, () => { - it(`derives a single-tenant stream root from a bare server origin`, () => { - expect( - durableStreamsServiceUrl(`http://127.0.0.1:4545`, `tenant-a`, { - scope: `stream-root`, - }) - ).toBe(`http://127.0.0.1:4545/v1/stream`) - }) - - it(`derives a service-scoped stream root for host tenant registrations`, () => { - expect(durableStreamsServiceUrl(`http://127.0.0.1:4545`, `tenant-a`)).toBe( - `http://127.0.0.1:4545/v1/stream/tenant-a` - ) - }) - - it(`preserves explicitly scoped stream roots`, () => { - expect( - durableStreamsServiceUrl( - `https://streams.test/v1/streams/tenant-a`, - `tenant-a`, - { scope: `stream-root` } - ) - ).toBe(`https://streams.test/v1/streams/tenant-a`) - }) -}) diff --git a/packages/agents-server/test/test-utils.ts b/packages/agents-server/test/test-utils.ts index 5cbd7a4368..12fdb01a5f 100644 --- a/packages/agents-server/test/test-utils.ts +++ b/packages/agents-server/test/test-utils.ts @@ -2,6 +2,12 @@ import { stream } from '@durable-streams/client' const debugTestTiming = process.env.ELECTRIC_AGENTS_DEBUG_TEST_TIMING === `1` +export function durableStreamTestServerUrl(origin: string): string { + const url = new URL(origin) + url.pathname = `${url.pathname.replace(/\/+$/, ``)}/v1/stream` + return url.toString().replace(/\/+$/, ``) +} + export async function timeStep( label: string, fn: () => Promise diff --git a/packages/agents-server/test/wake-registry.test.ts b/packages/agents-server/test/wake-registry.test.ts index 43ced41a2c..f7e05fefbd 100644 --- a/packages/agents-server/test/wake-registry.test.ts +++ b/packages/agents-server/test/wake-registry.test.ts @@ -16,7 +16,11 @@ import { afterAll, beforeAll, describe, expect, it, vi } from 'vitest' import { EntityManager } from '../src/entity-manager' import { ElectricAgentsServer } from '../src/server' import { WakeRegistry } from '../src/wake-registry' -import { timeStep, waitForStreamEvents } from './test-utils' +import { + durableStreamTestServerUrl, + timeStep, + waitForStreamEvents, +} from './test-utils' import { TEST_ELECTRIC_URL, TEST_POSTGRES_URL, @@ -761,8 +765,6 @@ describe(`Wake Registry Integration`, () => { let baseUrl: string let receiver: Server let receiverUrl: string - let wakeCount = 0 - let wakeResolvers: Array<() => void> = [] function getElectricAgentsManager(): EntityManager { return (electricAgentsServer as any).electricAgentsManager as EntityManager @@ -774,12 +776,8 @@ describe(`Wake Registry Integration`, () => { const chunks: Array = [] req.on(`data`, (c: Buffer) => chunks.push(c)) req.on(`end`, () => { - wakeCount++ res.writeHead(200, { 'content-type': `application/json` }) res.end(JSON.stringify({ done: true })) - const resolvers = wakeResolvers - wakeResolvers = [] - for (const resolve of resolvers) resolve() }) }) @@ -801,7 +799,7 @@ describe(`Wake Registry Integration`, () => { receiverUrl = `http://127.0.0.1:${addr.port}` electricAgentsServer = new ElectricAgentsServer({ - durableStreamsUrl: dsServer.url, + durableStreamsUrl: durableStreamTestServerUrl(dsServer.url), port: 0, postgresUrl: TEST_POSTGRES_URL, electricUrl: TEST_ELECTRIC_URL, @@ -823,36 +821,6 @@ describe(`Wake Registry Integration`, () => { }) }, 120_000) - function waitForWakes( - targetCount: number, - timeoutMs = 10_000 - ): Promise { - return new Promise((resolve, reject) => { - if (wakeCount >= targetCount) { - resolve() - return - } - const timeout = setTimeout( - () => - reject( - new Error( - `Timed out waiting for ${targetCount} wakes (got ${wakeCount})` - ) - ), - timeoutMs - ) - const check = (): void => { - if (wakeCount >= targetCount) { - clearTimeout(timeout) - resolve() - } else { - wakeResolvers.push(check) - } - } - wakeResolvers.push(check) - }) - } - async function waitForWakeEvents( streamPath: string, count: number, @@ -898,7 +866,6 @@ describe(`Wake Registry Integration`, () => { } it(`spawn with wake registers condition and delivers wake on child run completion`, async () => { - const startCount = wakeCount const ts = Date.now() const typeName = `wakerf${ts}` @@ -931,16 +898,6 @@ describe(`Wake Registry Integration`, () => { streams: { main: string } } - // Send a message to trigger the initial webhook wake for parent - await fetch(`${baseUrl}/_electric/entities${parent.url}/send`, { - method: `POST`, - headers: { 'content-type': `application/json` }, - body: JSON.stringify({ payload: `init` }), - }) - - // Wait for the parent's webhook - await waitForWakes(startCount + 1) - // Spawn child entity const childRes = await fetch( `${baseUrl}/_electric/entities/${typeName}/child`, @@ -1451,15 +1408,6 @@ describe(`Wake Registry Integration`, () => { oneShot: false, }) - // Send a message to watcher to trigger initial webhook (transition consumer to idle) - await fetch(`${baseUrl}/_electric/entities${watcher.url}/send`, { - method: `POST`, - headers: { 'content-type': `application/json` }, - body: JSON.stringify({ payload: `init` }), - }) - const afterSendTarget = wakeCount + 1 - await waitForWakes(afterSendTarget) - // Trigger wake evaluation directly on the manager await manager.evaluateWakes(source.url, { type: `texts`, @@ -1528,15 +1476,6 @@ describe(`Wake Registry Integration`, () => { oneShot: false, }) - // Trigger initial webhook for subscriber - await fetch(`${baseUrl}/_electric/entities${subscriber.url}/send`, { - method: `POST`, - headers: { 'content-type': `application/json` }, - body: JSON.stringify({ payload: `init` }), - }) - const afterSendTarget = wakeCount + 1 - await waitForWakes(afterSendTarget) - // Trigger wake evaluation directly await manager.evaluateWakes(observed.url, { type: `run`, diff --git a/packages/agents/src/bootstrap.ts b/packages/agents/src/bootstrap.ts index 382866aacb..9ff077aadc 100644 --- a/packages/agents/src/bootstrap.ts +++ b/packages/agents/src/bootstrap.ts @@ -143,7 +143,7 @@ export async function createBuiltinAgentHandler( subscriptionPathForType: (name) => `/${name}/*/main`, defaultDispatchPolicyForType, serverHeaders, - idleTimeout: 5_000, + idleTimeout: 5 * 60_000, createElectricTools, publicUrl, name: runtimeName ?? `builtin-agents`, diff --git a/packages/agents/src/server.ts b/packages/agents/src/server.ts index e24eb0c0d6..71b3aaaf77 100644 --- a/packages/agents/src/server.ts +++ b/packages/agents/src/server.ts @@ -33,20 +33,6 @@ import type { import type { ChangeEvent } from '@durable-streams/state' import type { StreamFn } from '@mariozechner/pi-agent-core' -const PRINCIPAL_KEY_PREFIXES = new Set([`user`, `agent`, `service`, `system`]) - -function normalizeOwnerUserId( - ownerUserId: string | undefined -): string | undefined { - const trimmed = ownerUserId?.trim() - if (!trimmed) return undefined - const colon = trimmed.indexOf(`:`) - if (colon > 0 && PRINCIPAL_KEY_PREFIXES.has(trimmed.slice(0, colon))) { - return trimmed - } - return `user:${trimmed}` -} - export interface BuiltinAgentsServerOptions { agentServerUrl: string workingDirectory?: string @@ -54,13 +40,14 @@ export interface BuiltinAgentsServerOptions { /** Pull-wake runner configuration for built-in agents. */ pullWake: { runnerId: string - ownerUserId?: string + ownerPrincipal?: string label?: string registerRunner?: boolean headers?: PullWakeRunnerConfig[`headers`] claimHeaders?: PullWakeRunnerConfig[`claimHeaders`] claimTokenHeader?: PullWakeRunnerConfig[`claimTokenHeader`] heartbeatIntervalMs?: PullWakeRunnerConfig[`heartbeatIntervalMs`] + eventHeartbeatThrottleMs?: PullWakeRunnerConfig[`eventHeartbeatThrottleMs`] leaseMs?: PullWakeRunnerConfig[`leaseMs`] } /** Invoked when an `authorizationCode` server needs user consent. */ @@ -331,11 +318,11 @@ export class BuiltinAgentsServer { claimHeaders: pullWake.claimHeaders, claimTokenHeader: pullWake.claimTokenHeader, heartbeatIntervalMs: pullWake.heartbeatIntervalMs, + eventHeartbeatThrottleMs: pullWake.eventHeartbeatThrottleMs, leaseMs: pullWake.leaseMs, offset: registeredRunner?.wake_stream_offset, onError: (error) => { serverLog.error(`[builtin-agents] pull-wake runner failed`, error) - return true }, }) this.pullWakeRunner.start() @@ -413,20 +400,18 @@ export class BuiltinAgentsServer { : pullWake.headers ) headers.set(`content-type`, `application/json`) - const ownerUserId = normalizeOwnerUserId(pullWake.ownerUserId) - const body: Record = { - id: pullWake.runnerId, - label: pullWake.label ?? `Built-in agents`, - kind: `local`, - admin_status: `enabled`, - } - if (ownerUserId) body.owner_user_id = ownerUserId const response = await fetch( appendPathToUrl(this.options.agentServerUrl, `/_electric/runners`), { method: `POST`, headers, - body: JSON.stringify(body), + body: JSON.stringify({ + id: pullWake.runnerId, + owner_principal: pullWake.ownerPrincipal, + label: pullWake.label ?? `Built-in agents`, + kind: `local`, + admin_status: `enabled`, + }), } ) if (!response.ok) { diff --git a/packages/electric-ax/src/start.ts b/packages/electric-ax/src/start.ts index 997a1f9180..0895a1d5ac 100644 --- a/packages/electric-ax/src/start.ts +++ b/packages/electric-ax/src/start.ts @@ -19,7 +19,8 @@ export { readDotEnvFile, resolveAnthropicApiKey } from './env.js' const DEFAULT_ELECTRIC_AGENTS_PORT = 4437 const DEFAULT_COMPOSE_PROJECT_NAME = `electric-agents` const DEFAULT_PULL_WAKE_RUNNER_ID = `builtin-agents` -const DEFAULT_PULL_WAKE_OWNER_ID = `builtin-agents` +const DEFAULT_PULL_WAKE_OWNER_PRINCIPAL = `/principal/system%3Abuiltin-agents` +const PRINCIPAL_URL_PREFIX = `/principal/` const DOCKER_COMPOSE_FILE = fileURLToPath( new URL(`../docker-compose.full.yml`, import.meta.url) ) @@ -114,6 +115,12 @@ function runnerIdFromIdentity(identity: string | undefined): string { return slug ? `builtin-${slug}` : DEFAULT_PULL_WAKE_RUNNER_ID } +function principalUrlFromConfig(value: string): string { + return value.startsWith(PRINCIPAL_URL_PREFIX) + ? value + : `${PRINCIPAL_URL_PREFIX}${encodeURIComponent(value)}` +} + export function resolvePullWakeRunnerId( env: NodeJS.ProcessEnv = process.env, fileEnv: Record = readDotEnvFile() @@ -129,15 +136,15 @@ export function resolvePullWakeRunnerId( ) } -export function resolvePullWakeOwnerId( +export function resolvePullWakeOwnerPrincipal( env: NodeJS.ProcessEnv = process.env, fileEnv: Record = readDotEnvFile() ): string { - return ( - readConfigValue(env, fileEnv, [`ELECTRIC_AGENTS_PRINCIPAL`]) ?? - readConfigValue(env, fileEnv, [`ELECTRIC_AGENTS_IDENTITY`]) ?? - DEFAULT_PULL_WAKE_OWNER_ID - ) + const principal = readConfigValue(env, fileEnv, [`ELECTRIC_AGENTS_PRINCIPAL`]) + if (principal) return principalUrlFromConfig(principal) + const identity = readConfigValue(env, fileEnv, [`ELECTRIC_AGENTS_IDENTITY`]) + if (identity) return principalUrlFromConfig(identity) + return DEFAULT_PULL_WAKE_OWNER_PRINCIPAL } function parseAdditionalServerHeaders( @@ -388,7 +395,7 @@ export async function startBuiltinAgentsServer( const fileEnv = readDotEnvFile(cwd) const anthropicApiKey = resolveAnthropicApiKey(options, env, fileEnv) const runnerId = resolvePullWakeRunnerId(env, fileEnv) - const ownerUserId = resolvePullWakeOwnerId(env, fileEnv) + const ownerPrincipal = resolvePullWakeOwnerPrincipal(env, fileEnv) const serverHeaders = mergeHeaders(resolveServerHeaders(env, fileEnv)) const agentServerUrl = params.agentServerUrl ?? @@ -404,7 +411,7 @@ export async function startBuiltinAgentsServer( loadProjectMcpConfig: true, pullWake: { runnerId, - ownerUserId, + ownerPrincipal, registerRunner: true, headers: serverHeaders, claimHeaders: serverHeaders, diff --git a/packages/electric-ax/test/start.test.ts b/packages/electric-ax/test/start.test.ts index 0995f15aae..e551cea34f 100644 --- a/packages/electric-ax/test/start.test.ts +++ b/packages/electric-ax/test/start.test.ts @@ -6,7 +6,7 @@ import { resolveAnthropicApiKey, resolveComposeProjectName, resolveElectricAgentsPort, - resolvePullWakeOwnerId, + resolvePullWakeOwnerPrincipal, resolvePullWakeRunnerId, waitForElectricAgentsServer, } from '../src/start' @@ -100,27 +100,32 @@ describe(`resolvePullWakeRunnerId`, () => { }) }) -describe(`resolvePullWakeOwnerId`, () => { +describe(`resolvePullWakeOwnerPrincipal`, () => { it(`prefers the configured agents principal`, () => { expect( - resolvePullWakeOwnerId( + resolvePullWakeOwnerPrincipal( { ELECTRIC_AGENTS_PRINCIPAL: `service:svc-test`, ELECTRIC_AGENTS_IDENTITY: `a@example.com`, }, {} ) - ).toBe(`service:svc-test`) + ).toBe(`/principal/service%3Asvc-test`) }) it(`uses the agents identity when present`, () => { expect( - resolvePullWakeOwnerId({ ELECTRIC_AGENTS_IDENTITY: `a@example.com` }, {}) - ).toBe(`a@example.com`) + resolvePullWakeOwnerPrincipal( + { ELECTRIC_AGENTS_IDENTITY: `user:a@example.com` }, + {} + ) + ).toBe(`/principal/user%3Aa%40example.com`) }) it(`falls back to the local builtin owner`, () => { - expect(resolvePullWakeOwnerId({}, {})).toBe(`builtin-agents`) + expect(resolvePullWakeOwnerPrincipal({}, {})).toBe( + `/principal/system%3Abuiltin-agents` + ) }) })