diff --git a/.env.example b/.env.example index d5d0a59..47e1d6c 100644 --- a/.env.example +++ b/.env.example @@ -14,23 +14,34 @@ DB_POOL_MAX=20 # Min 2 in production DB_POOL_IDLE_TIMEOUT=30000 DB_POOL_CONNECTION_TIMEOUT=5000 -# ── Runtime (gRPC) ─────────────────────────────── +# ── Runtime (gRPC) — observer-only control-plane identity ──── +# The control-plane has ONE least-privilege runtime identity. Agents authenticate +# to the runtime directly (RFC-MACP-0004 §4); the control-plane never calls Send. +# Its token in MACP_AUTH_TOKENS_JSON must have `can_start_sessions: false`. RUNTIME_KIND=rust RUNTIME_ADDRESS=127.0.0.1:50051 RUNTIME_TLS=false # Must be true in production (or set RUNTIME_ALLOW_INSECURE=true) RUNTIME_ALLOW_INSECURE=true # Defaults to false in production -RUNTIME_BEARER_TOKEN= # Required in production when RUNTIME_USE_DEV_HEADER is enabled -RUNTIME_USE_DEV_HEADER=true # Defaults to false in production +RUNTIME_BEARER_TOKEN= # Control-plane's own observer Bearer token +RUNTIME_USE_DEV_HEADER=true # Defaults to false in production (dev-only) RUNTIME_DEV_AGENT_ID=control-plane RUNTIME_REQUEST_TIMEOUT_MS=30000 # Warn if < 5000 +# ── Session polling (observer mode) ────────────── +# Control-plane polls GetSession(sessionId) until the initiator agent opens +# the session, then subscribes read-only via StreamSession. +SESSION_POLL_BASE_MS=100 +SESSION_POLL_MAX_MS=1000 +SESSION_POLL_TIMEOUT_MS=60000 + +# ── UI-initiated cancel (Option A) ──────────────── +# HTTP timeout for proxying a UI cancel to the initiator agent's cancelCallback. +CANCEL_CALLBACK_TIMEOUT_MS=5000 + # ── Circuit Breaker ────────────────────────────── RUNTIME_CIRCUIT_BREAKER_THRESHOLD=5 RUNTIME_CIRCUIT_BREAKER_RESET_MS=30000 -# ── Kickoff Retry ──────────────────────────────── -KICKOFF_MAX_RETRIES=3 - # ── Stream Consumer ───────────────────────────── STREAM_IDLE_TIMEOUT_MS=120000 STREAM_MAX_RETRIES=5 diff --git a/README.md b/README.md index cade339..c20dc4b 100644 --- a/README.md +++ b/README.md @@ -1,124 +1,92 @@ # MACP Control Plane (NestJS) -A scenario-agnostic control plane for the MACP runtime. +A scenario-agnostic, **observer-only** control plane for the MACP runtime. This service is the backend that a Next.js UI talks to for run lifecycle, live stream projection, replay, traces, metrics, and artifacts. -## Boundary +## Role -The control plane intentionally does **not** own scenario definitions. +The control plane is an observer. **It never calls `Send`** on the runtime. -- **UI**: browse scenarios, launch runs, render graphs and traces -- **Scenario Registry**: scenario packs, templates, validation, scenario-to-execution request creation -- **Control Plane**: run lifecycle, runtime execution, session streaming, event normalization, replay, traces, artifacts -- **Runtime**: actual MACP orchestration and mode semantics +- **UI**: browse runs, launch, render graphs and traces. +- **Scenario layer** (e.g. `examples-service`): compile scenarios → produce a generic `RunDescriptor` for this service + per-agent bootstrap for the initiator + participant agents. +- **Agents**: authenticate to the runtime directly with their own Bearer tokens and emit their own envelopes (SessionStart, kickoff, Proposal / Evaluation / Vote / etc.) via `macp-sdk-python` or `macp-sdk-typescript`. +- **Control plane**: allocates `sessionId`, polls `GetSession(sessionId)` until the initiator agent opens it, then subscribes read-only to `StreamSession(sessionId)`. Projects canonical events for the UI. +- **Runtime**: authoritative orchestrator of MACP envelopes and modes. -## Why this repo is generic +## Invariants (see `../ui-console/plans/direct-agent-auth.md` §Invariants) -This service only accepts a fully resolved `ExecutionRequest`. -It does not accept `scenarioId`, interpret fraud/business meaning, or infer domain semantics. - -It only knows: - -- how to validate execution-plan structure -- how to start a runtime session -- how to stream runtime events -- how to normalize them for the UI -- how to persist projections and replay data - -## Important contract additions - -To make the request truly runtime-safe, this implementation adds three fields beyond the original sketch: - -1. `session.initiatorParticipantId` - - MACP session start needs a sender identity. - - If omitted, the control plane falls back to the first kickoff sender, then the requester, then the first participant. - -2. `kickoff[].messageType` - - The runtime envelope needs an exact MACP `message_type`. - - A generic `kind = "request"` is not enough to build a runtime envelope. - -3. `payloadEnvelope` / `contextEnvelope` - - The runtime uses raw `bytes` payloads. - - JSON is supported for convenience, but this repo also supports: - - `json` - - `text` - - `base64` - - `proto` (fully-qualified protobuf type name + value) - -These additions keep the API scenario-agnostic while making it executable against the runtime. - -## Runtime integration notes - -This repo vendors the runtime protobuf files under `proto/` and uses `@grpc/grpc-js` + `@grpc/proto-loader` at runtime. - -The current runtime protobuf surface supports: - -- `Initialize` -- `Send` -- `StreamSession` -- `GetSession` -- `CancelSession` -- `GetManifest` -- `ListModes` -- `ListRoots` - -### StreamSession assumption - -The uploaded runtime currently disables `StreamSession`, but your target design says it will be added. - -This repo therefore introduces one explicit runtime-facing assumption: - -- the control plane opens `StreamSession` -- the first outbound streaming frame is a subscription envelope -- the subscription `messageType` defaults to `SessionWatch` -- the payload defaults to `{ "sessionId": "..." }` - -That behavior is isolated in `RustRuntimeProvider` so you can update it the moment the runtime finalizes the stream-subscription contract. +1. The control-plane runtime identity is least-privilege: `can_start_sessions: false` in runtime's `MACP_AUTH_TOKENS_JSON`. +2. The control-plane never calls `Send` — enforced by an invariant lint test (`src/runtime/observer-invariant.spec.ts`). +3. `POST /runs` accepts only a scenario-agnostic `RunDescriptor`. Fields like `kickoff[]`, `participants[].role`, `policyHints`, `commitments[]`, `initiatorParticipantId` are rejected (`forbidNonWhitelisted: true`). +4. `sessionId` ownership: allocated by the control-plane (UUID v4) at `POST /runs` and returned to the caller, which distributes it to agents via bootstrap. +5. Cancellation authority stays with the initiator agent unless the scenario's policy explicitly delegates to the control-plane (see `metadata.cancellationDelegated`). ## Endpoints ### Runs - -- `POST /runs` -- `GET /runs/:id` -- `GET /runs/:id/state` -- `GET /runs/:id/events` -- `GET /runs/:id/stream` (SSE) -- `POST /runs/:id/cancel` -- `POST /runs/:id/messages` (send a session-bound MACP message into an active run) -- `POST /runs/:id/replay` -- `GET /runs/:id/replay/stream` (SSE) -- `GET /runs/:id/replay/state` +- `POST /runs` — accepts a `RunDescriptor`; returns `{runId, sessionId, status, traceId}` +- `GET /runs/:id` — run record +- `GET /runs/:id/state` — projected UI state +- `GET /runs/:id/events` — canonical events +- `GET /runs/:id/stream` — SSE of live events +- `POST /runs/:id/cancel` — UI cancel (Option A: proxies to initiator agent's cancelCallback; Option B: calls runtime.CancelSession when policy-delegated) +- `POST /runs/validate` — preflight validation +- `POST /runs/:id/clone` — clone with optional tag overrides (session context overrides rejected) +- `POST /runs/:id/replay` — replay descriptor + +### Removed (direct-agent-auth CP-5/6/7) +These endpoints return **410 Gone**. Agents emit envelopes via the SDKs directly: +- ~~`POST /runs/:id/messages`~~ +- ~~`POST /runs/:id/signal`~~ +- ~~`POST /runs/:id/context`~~ ### Runtime discovery - -- `GET /runtime/manifest` -- `GET /runtime/modes` -- `GET /runtime/roots` -- `GET /runtime/health` +- `GET /runtime/manifest`, `/runtime/modes`, `/runtime/roots`, `/runtime/health` +- `GET /runtime/policies`, `POST /runtime/policies`, `DELETE /runtime/policies/:id` ### Observability +- `GET /runs/:id/traces`, `/runs/:id/artifacts`, `/runs/:id/metrics` +- `GET /dashboard/overview`, `/dashboard/agents/metrics` +- `GET /healthz`, `/readyz`, `/metrics`, `/docs` (dev only) -- `GET /runs/:id/traces` -- `GET /runs/:id/artifacts` -- `GET /runs/:id/metrics` - -### Ops +## Request shape -- `GET /healthz` -- `GET /readyz` -- `GET /docs` - -## Database tables +```json +{ + "mode": "live", + "runtime": { "kind": "rust" }, + "session": { + "sessionId": "optional — UUID v4/v7 or base64url 22+", + "modeName": "macp.mode.decision.v1", + "modeVersion": "1.0.0", + "configurationVersion": "config.default", + "policyVersion": "policy.default", + "ttlMs": 600000, + "participants": [ + { "id": "fraud-agent" }, + { "id": "risk-agent" }, + { "id": "growth-agent" } + ], + "metadata": { + "source": "examples-service", + "sourceRef": "fraud/high-value-new-device@1.0.0", + "environment": "production", + "cancelCallback": { + "url": "http://initiator.internal/agent/cancel", + "bearer": "opt-in-shared-secret" + } + } + }, + "execution": { + "idempotencyKey": "fraud-high-value-new-device-demo-1", + "tags": ["demo", "fraud"], + "requester": { "actorId": "coordinator", "actorType": "service" } + } +} +``` -- `runs` -- `runtime_sessions` -- `run_events_raw` -- `run_events_canonical` -- `run_projections` -- `run_artifacts` -- `run_metrics` +Response: `{ "runId": "", "sessionId": "", "status": "queued", "traceId": "..." }` ## Local development @@ -129,9 +97,7 @@ npm run drizzle:migrate npm run start:dev ``` -Make sure the runtime is running and accessible at `RUNTIME_ADDRESS`. - -For local development against the current reference runtime profile: +Make sure the runtime is running at `RUNTIME_ADDRESS`. For dev auth against the reference runtime profile: ```bash export MACP_ALLOW_INSECURE=1 @@ -139,7 +105,7 @@ export MACP_ALLOW_DEV_SENDER_HEADER=1 cargo run ``` -Then set: +Then: ```bash RUNTIME_ALLOW_INSECURE=true @@ -147,73 +113,47 @@ RUNTIME_USE_DEV_HEADER=true RUNTIME_DEV_AGENT_ID=control-plane ``` -## Example execution request +## Production runtime auth + +Add one entry to the runtime's `MACP_AUTH_TOKENS_JSON` for the control-plane. It is a **read-only observer** and must not have session-start authority: ```json { - "mode": "live", - "runtime": { "kind": "rust", "version": "v1" }, - "session": { - "modeName": "macp.mode.decision.v1", - "modeVersion": "1.0.0", - "configurationVersion": "config.default", - "policyVersion": "policy.default", - "ttlMs": 600000, - "initiatorParticipantId": "coordinator", - "participants": [ - { "id": "fraud-agent", "role": "fraud" }, - { "id": "risk-agent", "role": "risk" }, - { "id": "growth-agent", "role": "growth" }, - { "id": "coordinator", "role": "coordinator" } - ], - "context": { - "transactionAmount": 1800, - "deviceTrustScore": 0.14 - }, - "metadata": { - "source": "scenario-registry", - "sourceRef": "fraud/high-value-new-device@1.0.0", - "intent": "evaluate transaction" - } - }, - "kickoff": [ - { - "from": "coordinator", - "to": ["fraud-agent", "risk-agent", "growth-agent"], - "kind": "proposal", - "messageType": "Proposal", - "payloadEnvelope": { - "encoding": "proto", - "proto": { - "typeName": "macp.modes.decision.v1.ProposalPayload", - "value": { - "proposal_id": "p1", - "option": "step_up_verification", - "rationale": "new device + elevated amount" - } - } - } - } - ], - "execution": { - "idempotencyKey": "fraud-high-value-new-device-demo-1", - "tags": ["demo", "fraud"], - "requester": { - "actorId": "coordinator", - "actorType": "service" - } - } + "token": "obs-control-plane-token", + "sender": "control-plane", + "can_start_sessions": false } ``` +If your deployment makes the control-plane the policy admin (optional), set `can_manage_mode_registry: true`. + +Then in the control-plane environment: +```bash +RUNTIME_BEARER_TOKEN=obs-control-plane-token +``` + +Each agent additionally gets its own entry (with `can_start_sessions: true` for the initiator). Per-agent tokens are **not** shared with the control-plane — the scenario layer distributes them to agents via bootstrap. See `../ui-console/plans/direct-agent-auth.md` for the full onboarding flow. + +## Migration from pre-2026-04 control-plane + +If you're upgrading from a control-plane version that had `POST /runs/:id/{messages,signal,context}`, those endpoints now return **410 Gone**. Agents must migrate to `macp-sdk-python` or `macp-sdk-typescript` and authenticate directly to the runtime. `RUNTIME_AGENT_TOKENS_JSON` is removed; its entries move to the runtime's `MACP_AUTH_TOKENS_JSON` (one per agent) and to the scenario layer's per-agent bootstrap. + +## Database tables + +- `runs` (with `runtime_session_id` populated at creation) +- `runtime_sessions` +- `run_events_raw`, `run_events_canonical` +- `run_projections`, `run_artifacts`, `run_metrics` +- `run_outbound_messages`, `audit_log`, `webhooks`, `webhook_deliveries` + ## Repo layout ```text src/ controllers/ # NestJS controllers - runs/ # run manager, executor, stream consumer - runtime/ # runtime provider registry + Rust provider + proto codec - events/ # canonical event normalizer + live SSE hub + runs/ # run manager, observer executor, stream consumer + runtime/ # observer-only runtime provider, proto decoder, credential resolver + events/ # canonical event normalizer + SSE hub projection/ # UI read models replay/ # deterministic replay endpoints metrics/ # metrics aggregation @@ -222,5 +162,5 @@ src/ db/ # Drizzle schema + database service telemetry/ # OpenTelemetry bootstrap and manual spans dto/ # request/response schemas for OpenAPI - contracts/ # TypeScript interfaces + contracts/ # TypeScript interfaces (RunDescriptor, RuntimeProvider, ...) ``` diff --git a/docs/API.md b/docs/API.md index 924ecb5..37e83bc 100644 --- a/docs/API.md +++ b/docs/API.md @@ -15,12 +15,30 @@ curl -H 'Authorization: Bearer ' http://localhost:3001/runs Rate limit: 100 requests per 60 seconds per client. Payload limit: 1MB. +### Upstream runtime auth (observer identity) + +The control-plane has **exactly one** runtime identity — its own least-privilege +Bearer token. It never calls `Send`; agents authenticate to the runtime directly +(RFC-MACP-0004 §4). Its entry in the runtime's `MACP_AUTH_TOKENS_JSON` must have +`can_start_sessions: false`. + +| Env var | Purpose | +| --- | --- | +| `RUNTIME_BEARER_TOKEN` | Control-plane's own observer Bearer token. Used for every runtime call (`Initialize`, `GetSession`, `StreamSession`, `ListPolicies`, etc.). | +| `RUNTIME_USE_DEV_HEADER` | Local dev fallback — sends `x-macp-agent-id: ` when no Bearer token is configured. Requires `MACP_ALLOW_DEV_SENDER_HEADER=1` on the runtime. | + +Per-agent tokens are **not** held by the control-plane. They live in the scenario +layer (examples-service) and flow to agents via their bootstrap. + --- ## Runs ### `POST /runs` -Create and launch a runtime execution run. + +Create and launch a runtime execution run. Accepts only a **scenario-agnostic `RunDescriptor`**. +Scenario-specific fields (`kickoff[]`, `participants[].role`, `commitments[]`, +`policyHints`, `initiatorParticipantId`) are rejected with 400. ```bash curl -X POST http://localhost:3001/runs \ @@ -35,17 +53,13 @@ curl -X POST http://localhost:3001/runs \ "configurationVersion": "config.default", "ttlMs": 60000, "participants": [ - { "id": "agent-1", "role": "proposer" }, - { "id": "agent-2", "role": "evaluator" } - ] + { "id": "agent-1" }, + { "id": "agent-2" } + ], + "metadata": { + "cancelCallback": { "url": "http://initiator/agent/cancel", "bearer": "opt-shared-secret" } + } }, - "kickoff": [{ - "from": "agent-1", - "to": ["agent-2"], - "kind": "proposal", - "messageType": "Proposal", - "payload": { "proposalId": "p-1", "option": "Deploy" } - }], "execution": { "idempotencyKey": "unique-key", "tags": ["production"], @@ -54,7 +68,15 @@ curl -X POST http://localhost:3001/runs \ }' ``` -**Response (202):** `{ "runId": "uuid", "status": "queued", "traceId": "..." }` +**Response (202):** `{ "runId": "", "sessionId": "", "status": "queued", "traceId": "..." }` + +The caller distributes `sessionId` to every agent via bootstrap. The initiator agent +uses its own Bearer token to call `SessionStart(sessionId)` on the runtime. The +control-plane's async observer loop polls `GetSession(sessionId)` until `OPEN`, +then subscribes read-only. + +If the caller provides `session.sessionId`, the control-plane validates it (must +be UUID v4/v7 or base64url 22+ chars) and echoes it back in the response. ### `POST /runs/validate` Preflight validation without creating a run. @@ -100,13 +122,13 @@ Fetch the projected run state for UI rendering. Returns: **Decision projection enrichments (§2.1 – §2.3):** -- `decision.current.prompt` — scenario-supplied decision prompt, sourced from `ExecutionRequest.session.metadata.decisionPrompt` at run-creation time. Use this instead of reading prompt text from `reasons[]`. +- `decision.current.prompt` — populated from the initiator's `Proposal` envelope when the runtime includes a `prompt` / `rationale` field. The control-plane no longer reads scenario-specific fields from the request body. - `decision.current.proposals[]` — per-contributor breakdown built from `proposal.created` and `proposal.updated` events. Each entry: `{ participantId, action, confidence?, reasons[], ts, vote?: 'allow'|'deny', messageType? }`. Capped at 50 most-recent. - `decision.current.resolvedAt` / `resolvedBy` — populated from the `decision.finalized` event's `ts` and `sender`. **Policy projection enrichments (§2.4 – §2.5):** -- `policy.expectedCommitments[]` — seeded from `ExecutionRequest.session.commitments` at `session.bound` time. Each entry: `{ id, title?, description?, requiredRoles?, policyRef? }`. +- `policy.expectedCommitments[]` — populated from runtime `PolicyResolved` events when the runtime attaches commitment expectations. The control-plane no longer seeds this from the request body. - `policy.voteTally[]` — derived from vote-bearing `proposal.updated` events (Vote / Approve / Reject / Accept / Evaluation). Each entry: `{ commitmentId (≈ proposalId until finalized), allow, deny, threshold, quorum: { required, cast } }`. `required` is the count of `role === 'voter'` participants (fallback: total participants); `threshold` is the simple majority `ceil(required/2)`. - `policy.quorumStatus` — `pending` until a `policy.commitment.evaluated` with `decision === 'allow'` arrives (→ `reached`). On run terminal (`failed` / `cancelled`) with no `allow` evaluation, flips to `failed`. @@ -166,6 +188,11 @@ The following list endpoints return `{ data, total, limit, offset | nextCursor } ### `POST /runs/:id/cancel` Cancel a running session. Body: `{ "reason": "optional" }` +Two flows, selected via run metadata: + +- **Option A (default)** — control-plane HTTP-POSTs to the initiator agent's `cancelCallback` URL (recorded in the run's `metadata.cancelCallback`). The agent then calls `runtime.CancelSession` with its own identity. Fails with 400 if no callback is registered. +- **Option B (policy-delegated)** — when the run's `metadata.cancellationDelegated` is `true`, the control-plane calls `runtime.CancelSession` directly using its own observer identity. Requires the scenario's policy to grant cancel authority to the control-plane. + ### `POST /runs/:id/clone` Clone a run with optional overrides. Body: `{ "tags": [...], "context": {...} }` @@ -180,55 +207,24 @@ Rebuild the projection from canonical events. --- -## Messages & Signals - -### `POST /runs/:id/messages` -Send a session-bound MACP message into an active run. +## Messages & Signals — emission is NOT via the control-plane -```bash -curl -X POST http://localhost:3001/runs/{id}/messages \ - -H 'Content-Type: application/json' \ - -d '{ - "from": "evaluator", - "to": ["proposer"], - "messageType": "Evaluation", - "payload": { "recommendation": "APPROVE", "confidence": 0.95 } - }' -``` +Agents emit envelopes directly against the runtime via `macp-sdk-python` or +`macp-sdk-typescript`. The control-plane observes them via `StreamSession` and +exposes read-only views. -For proto-encoded payloads (required by real runtime): -```json -{ - "from": "evaluator", - "to": ["proposer"], - "messageType": "Evaluation", - "payloadEnvelope": { - "encoding": "proto", - "proto": { - "typeName": "macp.modes.decision.v1.EvaluationPayload", - "value": { "proposalId": "p-1", "recommendation": "APPROVE", "confidence": 0.95 } - } - } -} -``` +### Removed endpoints (return 410 Gone) -### `POST /runs/:id/signal` -Send a signal (ambient plane, non-binding). Signals use empty `sessionId` and `modeName`. +| Endpoint | Migration | +| --- | --- | +| `POST /runs/:id/messages` | `from macp_sdk import DecisionSession; DecisionSession(client, session_id=…).evaluate(...)` | +| `POST /runs/:id/signal` | `session.signal(...)` via the SDK (or build an `Envelope` with `messageType='Signal'` and unary-`Send` it) | +| `POST /runs/:id/context` | Construct an envelope with `messageType='ContextUpdate'` via `macp_sdk.build_envelope()` | -```json -{ - "from": "evaluator", - "to": ["proposer"], - "messageType": "Signal", - "payload": { "signalType": "progress", "data": "Analyzing...", "confidence": 0.5 } -} -``` +Each response: `{ "statusCode": 410, "errorCode": "ENDPOINT_REMOVED", "message": "…" }`. ### `GET /runs/:id/messages` -List outbound messages for a run. - -### `POST /runs/:id/context` -Update session context during execution. +List outbound messages captured from the runtime stream. --- diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 7d32cbc..ee57651 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -40,25 +40,30 @@ MACP distinguishes between two communication planes: └─────────────────────────────────────┘ └───────────────────────────────────┘ ``` -## Request Flow +## Request Flow (observer mode — direct-agent-auth 2026-04-15) ``` -POST /runs +POST /runs (RunDescriptor — scenario-agnostic; see CP-1) → RunsController.createRun() → RunExecutorService.launch() - → RunManagerService.createRun() [status: queued] + → resolveSessionId() [UUID v4 allocated or validated] + → RunManagerService.createRun(request, sessionId) [status: queued] + → return { runId, sessionId, status, traceId } [synchronous 202] → async execute(): → markStarted() [status: starting] → provider.initialize() [gRPC — mode validation] - → provider.openSession() [gRPC — bidirectional stream] - → await sessionAck [runtime confirms session] + → pollForOpenSession(sessionId) [GetSession backoff 100ms→1s] + ↑ waits for initiator agent to emit SessionStart directly → bindSession() [status: binding_session] - → send kickoff messages [through stream handle] - → handle.closeWrite() [half-close write side] + → provider.subscribeSession() [gRPC — read-only StreamSession] → markRunning() [status: running] → StreamConsumerService.start() [begins event consumption] ``` +The control-plane **never** calls `Send` — agents drive the session via their own gRPC +connection with their own Bearer tokens (RFC-MACP-0004 §4). The read-only observer stream +filters envelopes by `sessionId` and never writes a frame. + ## Event Pipeline ``` @@ -72,30 +77,13 @@ Runtime gRPC stream → StreamHubService.publishEvent (SSE → live UI subscribers) ``` -## Message Flow (POST /runs/:id/messages) - -``` -POST /runs/:id/messages - → RunsController.sendMessage() - → RunExecutorService.sendMessage() - → validate run state (binding_session | running) - → resolve modeName from RuntimeSession - → encode payload (JSON or proto via ProtoRegistryService) - → provider.send() [gRPC unary] - → emit message.sent canonical event - → return { messageId, ack } -``` +## Message / Signal / Context — removed (direct-agent-auth CP-5/6/7) -## Signal Flow (POST /runs/:id/signal) - -``` -POST /runs/:id/signal - → RunsController.sendSignal() - → RunExecutorService.sendSignal() - → validate run state (running) - → provider.send() with empty sessionId + modeName [ambient plane] - → emit message.sent canonical event (subject.kind = 'signal') -``` +The `POST /runs/:id/{messages,signal,context}` endpoints were removed 2026-04-15 and now +return `410 Gone` with `errorCode: ENDPOINT_REMOVED`. Agents emit envelopes directly +against the runtime using `macp-sdk-python` / `macp-sdk-typescript`. The control-plane +observes those envelopes through its read-only `subscribeSession` stream and normalizes +them into canonical events via the pipeline above. ## Layer Map @@ -153,9 +141,9 @@ All modes terminate with `Commitment` (from `macp.v1.CommitmentPayload`). ## Key Design Decisions -1. **Scenario-agnostic**: Accepts only fully resolved `ExecutionRequest` — no scenario resolution. +1. **Scenario-agnostic**: Accepts only a generic `RunDescriptor` — scenario-specific fields (`kickoff[]`, `participants[].role`, `policyHints`, `commitments[]`, `initiatorParticipantId`) are rejected with 400 via `forbidNonWhitelisted: true`. 2. **Three-layer event pipeline**: Raw → canonical → projections. Raw preserves original data; canonical provides normalized, typed view. -3. **Bidirectional streaming**: `openSession()` returns a `RuntimeSessionHandle` with send/events/closeWrite/abort. +3. **Observer-only streaming**: `subscribeSession({runId, sessionId})` returns a read-only `RuntimeSessionHandle` — `events` async iterable + `abort()`. No `send()`. 4. **Transactional event persistence**: Sequence allocation + persistence in single DB transaction. 5. **Snake_case → camelCase normalization**: ProtoRegistryService converts Python/JSON snake_case to protobufjs camelCase. 6. **Proto-encoded payloads**: Real runtime requires proto encoding; control plane supports JSON fallback for testing. diff --git a/docs/INTEGRATION.md b/docs/INTEGRATION.md index 26e4ae2..7367c54 100644 --- a/docs/INTEGRATION.md +++ b/docs/INTEGRATION.md @@ -6,73 +6,47 @@ 2. Register it as a NestJS provider in `app.module.ts` 3. Add it to `RuntimeProviderRegistry` so it can be looked up by `kind` -Key methods to implement: -- `initialize()` — protocol version negotiation -- `openSession()` — bidirectional session stream (preferred) -- `send()` — unary message send with ack -- `getSession()` / `cancelSession()` — session management -- `getManifest()` / `listModes()` — metadata - -## Sending Messages from External Agents - -Agents communicate with active runs via HTTP: - -```bash -# Send an Evaluation (JSON payload — for mock runtime) -curl -X POST http://localhost:3001/runs/{runId}/messages \ - -H 'Authorization: Bearer ' \ - -H 'Content-Type: application/json' \ - -d '{ - "from": "evaluator", - "to": ["proposer"], - "messageType": "Evaluation", - "payload": { "recommendation": "APPROVE", "confidence": 0.95 } - }' - -# Send an Evaluation (proto-encoded — required by real Rust runtime) -curl -X POST http://localhost:3001/runs/{runId}/messages \ - -H 'Authorization: Bearer ' \ - -H 'Content-Type: application/json' \ - -d '{ - "from": "evaluator", - "to": ["proposer"], - "messageType": "Evaluation", - "payloadEnvelope": { - "encoding": "proto", - "proto": { - "typeName": "macp.modes.decision.v1.EvaluationPayload", - "value": { - "proposalId": "p-1", - "recommendation": "APPROVE", - "confidence": 0.95, - "reason": "Meets all criteria" - } - } - } - }' +Key methods to implement (observer-only surface, post direct-agent-auth): +- `initialize()` — protocol version negotiation. +- `subscribeSession({runId, runtimeSessionId})` — read-only `StreamSession` observer; returns `{events, abort}`. **Never writes envelopes.** +- `getSession()` — poll for session state (used by the observer's `pollForOpenSession` loop). +- `cancelSession()` — only called when `run.metadata.cancellationDelegated === true` (Option B in direct-agent-auth §Cancellation design). +- `getManifest()` / `listModes()` / `listRoots()` / `health()` — metadata. +- `registerPolicy()` / `unregisterPolicy()` / `getPolicy()` / `listPolicies()` — governance (RFC-MACP-0012). + +## Agents emit envelopes directly + +Agents authenticate to the runtime with their own Bearer tokens (RFC-MACP-0004 §4) and emit envelopes via `macp-sdk-python` / `macp-sdk-typescript`: + +```python +# Python example (direct-agent-auth) +from macp_sdk import MacpClient, AuthConfig, DecisionSession, new_session_id + +auth = AuthConfig.for_bearer(os.environ["MACP_BEARER_TOKEN"], expected_sender="evaluator") +client = MacpClient(target="runtime.internal:50051", secure=True, auth=auth) +await client.initialize() +session = DecisionSession(client, session_id=bootstrap.run.sessionId, auth=auth) +stream = session.open_stream() +await session.evaluate(proposal_id="prop-1", recommendation="APPROVE", confidence=0.95) ``` -## Sending Signals (Ambient Plane) - -Signals are non-binding, non-session-bound messages for observability: - -```bash -curl -X POST http://localhost:3001/runs/{runId}/signal \ - -H 'Authorization: Bearer ' \ - -H 'Content-Type: application/json' \ - -d '{ - "from": "evaluator", - "to": ["proposer"], - "messageType": "Signal", - "payload": { - "signalType": "progress", - "data": "Starting analysis", - "confidence": 0.0 - } - }' +```typescript +// TypeScript example +import { MacpClient, Auth, DecisionSession } from 'macp-sdk-typescript'; + +const client = new MacpClient({ + address: 'runtime.internal:50051', + secure: true, + auth: Auth.bearer(process.env.MACP_BEARER_TOKEN!, { expectedSender: 'evaluator' }), +}); +await client.initialize(); +const session = new DecisionSession(client, { sessionId: bootstrap.run.sessionId }); +const stream = session.openStream(); +await session.evaluate({ proposalId: 'prop-1', recommendation: 'APPROVE', confidence: 0.95 }); ``` -Signals use empty `sessionId` and `modeName` at the gRPC level. They do NOT enter session history but appear as `signal.emitted` canonical events when echoed by the runtime. +The control-plane's old HTTP escalation endpoints (`POST /runs/:id/{messages,signal,context}`) +now return **410 Gone**. See `../plans/../../ui-console/plans/direct-agent-auth.md` for the full migration story. ## Consuming SSE Streams @@ -114,7 +88,8 @@ Replay modes: `timed` (proportional timing), `step` (all at once), `instant` (no 1. Add proto definitions under `proto/macp/modes/{mode}/v1/` 2. Update `MESSAGE_TYPE_MAP` in `src/runtime/proto-registry.service.ts` 3. Update `deriveEventType()` in `src/events/event-normalizer.service.ts` for new message types -4. Add mode to `src/runtime/mock-runtime.provider.ts` supported modes list +4. Add mode to `test/helpers/scripted-mock-runtime.provider.ts` supported modes list (integration tests) +5. Add a projection reducer branch in `src/projection/projection.service.ts` — the `projection-coverage.spec.ts` invariant will fail CI otherwise ## Webhooks @@ -147,7 +122,7 @@ INTEGRATION_RUNTIME=remote RUNTIME_ADDRESS=127.0.0.1:50051 npm run test:integrat ./scripts/run-e2e.sh decision ``` -See `test/` for TypeScript integration tests and `test-agents/` for Python agent harnesses. +See `test/integration/` for TypeScript integration tests. Python agent harnesses now live in the `examples-service` repo (not `test-agents/`). ## Environment Variables diff --git a/package.json b/package.json index 1416fd7..01f90fa 100644 --- a/package.json +++ b/package.json @@ -9,7 +9,7 @@ "start": "node dist/main.js", "start:dev": "nest start --watch", "start:debug": "nest start --debug --watch", - "lint": "eslint \"src/**/*.ts\"", + "lint": "eslint \"src/**/*.ts\" --max-warnings=0", "lint:fix": "eslint \"src/**/*.ts\" --fix", "format": "prettier --write \"src/**/*.ts\"", "test": "jest", diff --git a/scripts/run-e2e.sh b/scripts/run-e2e.sh deleted file mode 100755 index 6e6c7a4..0000000 --- a/scripts/run-e2e.sh +++ /dev/null @@ -1,76 +0,0 @@ -#!/usr/bin/env bash -# Run MACP Control Plane E2E tests with Python agents (LangChain + CrewAI). -# -# Usage: -# ./scripts/run-e2e.sh # Run all agent E2E tests -# ./scripts/run-e2e.sh decision # Run only decision flow tests -# ./scripts/run-e2e.sh task # Run only task flow tests -# ./scripts/run-e2e.sh crewai # Run only CrewAI tests -# ./scripts/run-e2e.sh mixed # Run mixed-framework test -# ./scripts/run-e2e.sh integration # Run TypeScript integration tests -# -# Prerequisites: -# 1. Copy .env.example to .env and configure (DATABASE_URL, etc.) -# 2. pip install -r test-agents/requirements.txt -# 3. Docker running (for test database): docker compose -f docker-compose.test.yml up -d - -set -euo pipefail -cd "$(dirname "$0")/.." - -# Load .env if present (without overriding existing env vars) -if [ -f .env ]; then - set -a - # shellcheck disable=SC1091 - source .env - set +a - echo "Loaded .env" -fi - -case "${1:-all}" in - integration) - echo "Running TypeScript integration tests (mock runtime)..." - echo "═══════════════════════════════════════════════════════" - npm run test:integration - ;; - - decision) - echo "Running LangChain Decision Mode E2E tests..." - echo "═══════════════════════════════════════════════════════" - cd test-agents - python -m pytest scenarios/test_decision_flow.py -v --tb=short - ;; - - task) - echo "Running LangChain Task Mode E2E tests..." - echo "═══════════════════════════════════════════════════════" - cd test-agents - python -m pytest scenarios/test_task_flow.py -v --tb=short - ;; - - crewai) - echo "Running CrewAI E2E tests..." - echo "═══════════════════════════════════════════════════════" - cd test-agents - python -m pytest scenarios/test_crewai_decision.py scenarios/test_crewai_task.py -v --tb=short - ;; - - mixed) - echo "Running Mixed Agent (LangChain + CrewAI) E2E tests..." - echo "═══════════════════════════════════════════════════════" - cd test-agents - python -m pytest scenarios/test_mixed_agents.py -v --tb=short - ;; - - all) - echo "Running ALL agent E2E tests..." - echo "═══════════════════════════════════════════════════════" - cd test-agents - python -m pytest scenarios/ -v --tb=short - ;; - - *) - echo "Unknown option: $1" - echo "Usage: $0 [integration|decision|task|crewai|mixed|all]" - exit 1 - ;; -esac diff --git a/src/app.module.ts b/src/app.module.ts index f0732f3..2c15ab0 100644 --- a/src/app.module.ts +++ b/src/app.module.ts @@ -60,10 +60,14 @@ import { WebhookService } from './webhooks/webhook.service'; ConfigModule, DatabaseModule, AuthModule, - ThrottlerModule.forRoot([{ - ttl: Number(process.env.THROTTLE_TTL_MS ?? 60000), - limit: Number(process.env.THROTTLE_LIMIT ?? 100) - }]) + ThrottlerModule.forRootAsync({ + imports: [ConfigModule], + inject: [AppConfigService], + useFactory: (config: AppConfigService) => [{ + ttl: config.throttleTtlMs, + limit: config.throttleLimit, + }], + }) ], controllers: [RunsController, RunInsightsController, RuntimeController, ObservabilityController, HealthController, MetricsController, AdminController, AuditController, WebhookController, DashboardController, EventsController], providers: [ diff --git a/src/audit/audit.service.spec.ts b/src/audit/audit.service.spec.ts index d92c19e..015a853 100644 --- a/src/audit/audit.service.spec.ts +++ b/src/audit/audit.service.spec.ts @@ -100,7 +100,6 @@ describe('AuditService', () => { // Second select call (count) — needs its own chain const countWhere = jest.fn().mockResolvedValue([{ count: 42 }]); const countFrom = jest.fn().mockReturnValue({ where: countWhere }); - const countSelect = jest.fn().mockReturnValue({ from: countFrom }); // Promise.all calls select twice: once for data, once for count mockSelect diff --git a/src/config/app-config.service.spec.ts b/src/config/app-config.service.spec.ts index ef3292e..2b88338 100644 --- a/src/config/app-config.service.spec.ts +++ b/src/config/app-config.service.spec.ts @@ -274,4 +274,32 @@ describe('AppConfigService', () => { expect(() => config.onModuleInit()).not.toThrow(); }); }); + + describe('session polling config (observer mode, CP-4)', () => { + it('defaults sessionPollBaseMs to 100', () => { + const config = new AppConfigService(); + expect(config.sessionPollBaseMs).toBe(100); + }); + + it('defaults sessionPollMaxMs to 1000', () => { + const config = new AppConfigService(); + expect(config.sessionPollMaxMs).toBe(1000); + }); + + it('defaults sessionPollTimeoutMs to 60000', () => { + const config = new AppConfigService(); + expect(config.sessionPollTimeoutMs).toBe(60000); + }); + + it('honours SESSION_POLL_TIMEOUT_MS override', () => { + process.env.SESSION_POLL_TIMEOUT_MS = '10000'; + const config = new AppConfigService(); + expect(config.sessionPollTimeoutMs).toBe(10000); + }); + + it('defaults cancelCallbackTimeoutMs to 5000', () => { + const config = new AppConfigService(); + expect(config.cancelCallbackTimeoutMs).toBe(5000); + }); + }); }); diff --git a/src/config/app-config.service.ts b/src/config/app-config.service.ts index b896e06..1d2de7d 100644 --- a/src/config/app-config.service.ts +++ b/src/config/app-config.service.ts @@ -49,23 +49,33 @@ export class AppConfigService implements OnModuleInit { readonly runtimeAddress = process.env.RUNTIME_ADDRESS ?? '127.0.0.1:50051'; readonly runtimeTls = readBoolean('RUNTIME_TLS', false); readonly runtimeAllowInsecure = readBoolean('RUNTIME_ALLOW_INSECURE', process.env.NODE_ENV === 'development'); + /** + * Control-plane's own single Bearer token. The control-plane has exactly one + * runtime identity, least-privilege (`can_start_sessions: false`). Per-agent + * tokens were removed with direct-agent-auth Phase 4 (CP-9) — agents authenticate + * themselves now. + */ readonly runtimeBearerToken = process.env.RUNTIME_BEARER_TOKEN ?? ''; readonly runtimeUseDevHeader = readBoolean('RUNTIME_USE_DEV_HEADER', process.env.NODE_ENV === 'development'); readonly runtimeRequestTimeoutMs = readNumber('RUNTIME_REQUEST_TIMEOUT_MS', 30000); readonly runtimeDevAgentId = process.env.RUNTIME_DEV_AGENT_ID ?? 'control-plane'; - /** @deprecated SessionWatch is no longer part of the base protocol. Kept for backward compat. */ - readonly runtimeStreamSubscriptionMessageType = - process.env.RUNTIME_STREAM_SUBSCRIPTION_MESSAGE_TYPE ?? 'SessionWatch'; - /** @deprecated SessionWatch is no longer part of the base protocol. Kept for backward compat. */ - readonly runtimeStreamSubscriberId = - process.env.RUNTIME_STREAM_SUBSCRIBER_ID ?? this.runtimeDevAgentId; + /** + * Observer-mode poll cadence. Control-plane polls GetSession after POST /runs + * until the initiator agent opens the session. See direct-agent-auth §End-to-end target flow. + */ + readonly sessionPollBaseMs = readNumber('SESSION_POLL_BASE_MS', 100); + readonly sessionPollMaxMs = readNumber('SESSION_POLL_MAX_MS', 1000); + readonly sessionPollTimeoutMs = readNumber('SESSION_POLL_TIMEOUT_MS', 60000); + /** HTTP timeout for proxying UI cancel to the initiator agent's callback (Option A). */ + readonly cancelCallbackTimeoutMs = readNumber('CANCEL_CALLBACK_TIMEOUT_MS', 5000); // Circuit breaker readonly runtimeCircuitBreakerThreshold = readNumber('RUNTIME_CIRCUIT_BREAKER_THRESHOLD', 5); readonly runtimeCircuitBreakerResetMs = readNumber('RUNTIME_CIRCUIT_BREAKER_RESET_MS', 30000); - // Kickoff retry - readonly kickoffMaxRetries = readNumber('KICKOFF_MAX_RETRIES', 3); + // Throttler (NestJS @nestjs/throttler) + readonly throttleTtlMs = readNumber('THROTTLE_TTL_MS', 60000); + readonly throttleLimit = readNumber('THROTTLE_LIMIT', 100); readonly streamIdleTimeoutMs = readNumber('STREAM_IDLE_TIMEOUT_MS', 120000); readonly streamMaxRetries = readNumber('STREAM_MAX_RETRIES', 5); @@ -111,6 +121,14 @@ export class AppConfigService implements OnModuleInit { ); } + // Warn (don't fail) when the control-plane has no runtime identity configured — + // observer calls (GetSession, StreamSession, ListPolicies) will be rejected as unauthenticated. + if (!this.runtimeBearerToken) { + this.logger.warn( + 'Production start: RUNTIME_BEARER_TOKEN is not set. Runtime calls will be rejected as unauthenticated.' + ); + } + // 1.2: Fail-fast if TLS is off and insecure is not explicitly allowed if (!this.runtimeTls && !this.runtimeAllowInsecure) { throw new Error( diff --git a/src/contracts/control-plane.ts b/src/contracts/control-plane.ts index b5d4bda..0ca0a8c 100644 --- a/src/contracts/control-plane.ts +++ b/src/contracts/control-plane.ts @@ -1,41 +1,21 @@ -export type ExecutionMode = 'live' | 'replay' | 'sandbox'; - -export type PayloadEncoding = 'json' | 'text' | 'base64' | 'proto'; - -export interface ProtoPayload { - typeName: string; - value: Record; -} - -export interface PayloadEnvelopeInput { - encoding: PayloadEncoding; - mediaType?: string; - json?: Record; - text?: string; - base64?: string; - proto?: ProtoPayload; -} - -export interface RootRef { - uri: string; - name?: string; -} +export type ExecutionMode = 'live' | 'sandbox'; export interface ParticipantRef { + /** Bare sender string — must match the agent's identity in the runtime. */ id: string; - role?: string; - transportIdentity?: string; - metadata?: Record; } -export interface KickoffMessage { - from: string; - to?: string[]; - kind: 'request' | 'broadcast' | 'proposal' | 'context'; - messageType: string; - payload?: Record; - payloadEnvelope?: PayloadEnvelopeInput; - metadata?: Record; +/** + * Populated on the `policy.expectedCommitments[]` projection when (future) runtime + * `PolicyResolved` events supply commitment expectations. Not part of the inbound + * HTTP contract — the control-plane does not accept commitments from callers. + */ +export interface ExpectedCommitment { + id: string; + title?: string; + description?: string; + requiredRoles?: string[]; + policyRef?: string; } export interface ExecutionRequester { @@ -43,44 +23,52 @@ export interface ExecutionRequester { actorType?: 'user' | 'service' | 'system'; } -export interface RunMessageInput { - from: string; - to?: string[]; - messageType: string; - payload?: Record; - payloadEnvelope?: PayloadEnvelopeInput; - metadata?: Record; -} - -export interface ExpectedCommitment { - id: string; - title?: string; - description?: string; - requiredRoles?: string[]; - policyRef?: string; +/** + * Scenario-agnostic cancellation callback (Option A, direct-agent-auth plan §Cancellation design). + * Points to a per-initiator HTTP endpoint the control-plane POSTs when the UI cancels a run. + * `bearer` is optional and opaque; the initiator validates it. + */ +export interface CancelCallback { + url: string; + bearer?: string; } -export interface ExecutionRequest { +/** + * Scenario-agnostic run descriptor — the control-plane accepts only these fields. + * + * No scenario-specific fields cross this boundary: no kickoff templates, no policy hints, + * no participant roles, no initiator designation. Agents authenticate to the runtime + * directly; the control-plane is an observer. See direct-agent-auth.md §Generic contracts. + */ +export interface RunDescriptor { mode: ExecutionMode; runtime: { kind: string; version?: string; }; session: { + /** + * Optional caller-allocated session id. Must satisfy runtime validator + * (UUID v4/v7 or base64url 22+ chars). When omitted, control-plane allocates a UUID v4. + */ + sessionId?: string; modeName: string; modeVersion: string; configurationVersion: string; + /** Opaque; control-plane never interprets it. */ policyVersion?: string; ttlMs: number; - initiatorParticipantId?: string; + /** Bare sender strings; for audit / projection only. */ participants: ParticipantRef[]; - roots?: RootRef[]; - context?: Record; - contextEnvelope?: PayloadEnvelopeInput; + /** + * Opaque metadata bag. Reserved keys: + * - `source`, `sourceRef` — scenario provenance tags (for filtering) + * - `environment`, `scenarioRef` — filter facets + * - `cancelCallback` — CancelCallback (Option A) + * - `cancellationDelegated` — boolean (Option B — control-plane may cancel directly) + */ metadata?: Record; - commitments?: ExpectedCommitment[]; }; - kickoff?: KickoffMessage[]; execution?: { idempotencyKey?: string; tags?: string[]; @@ -88,6 +76,7 @@ export interface ExecutionRequest { }; } + export type RunStatus = | 'queued' | 'starting' diff --git a/src/contracts/runtime.ts b/src/contracts/runtime.ts index ee97399..7fd0a75 100644 --- a/src/contracts/runtime.ts +++ b/src/contracts/runtime.ts @@ -1,7 +1,6 @@ import { CanonicalEvent, - ExecutionRequest, - ParticipantRef, + RunDescriptor, SessionState } from './control-plane'; @@ -44,6 +43,11 @@ export interface RawRuntimeEvent { receivedAt: string; envelope?: RuntimeEnvelope; sessionSnapshot?: RuntimeSessionSnapshot; + /** + * Retained on the raw-event discriminator for the normalizer's shape-union, but + * the control-plane observer no longer produces `send-ack` events — all outbound + * envelopes are emitted by agents directly against the runtime. See direct-agent-auth.md §Invariants. + */ ack?: RuntimeAck; streamStatus?: { status: 'opened' | 'reconnecting' | 'closed'; @@ -76,41 +80,17 @@ export interface RuntimeInitializeResult { instructions?: string; } -export interface RuntimeStartSessionRequest { - runId: string; - execution: ExecutionRequest; -} - -export interface RuntimeStartSessionResult { +/** + * Result shape returned once the observer has confirmed the session is OPEN. + * `initiator` is learned from the runtime's session metadata (GetSession response), + * never chosen by the control-plane. + */ +export interface RuntimeSessionOpenResult { runtimeSessionId: string; initiator: string; ack: RuntimeAck; } -export interface RuntimeSendRequest { - runId: string; - runtimeSessionId: string; - modeName: string; - from: string; - to: string[]; - messageType: string; - payload: Buffer; - payloadDescriptor?: Record; - metadata?: Record; -} - -export interface RuntimeSendResult { - ack: RuntimeAck; - envelope: RuntimeEnvelope; -} - -export interface RuntimeStreamSessionRequest { - runId: string; - runtimeSessionId: string; - modeName: string; - subscriberId: string; -} - export interface RuntimeGetSessionRequest { runId: string; runtimeSessionId: string; @@ -176,24 +156,24 @@ export interface RuntimeCallOptions { deadline?: Date; } -/** Request to open a unified bidirectional session stream */ -export interface RuntimeOpenSessionRequest { +/** Request to subscribe to an existing session's event stream (read-only). */ +export interface RuntimeSubscribeSessionRequest { runId: string; - execution: ExecutionRequest; + runtimeSessionId: string; } -/** Handle to an open bidirectional StreamSession */ +/** + * Handle to an observer-only StreamSession. + * + * **Invariant (direct-agent-auth §Invariants #5):** the control-plane NEVER writes envelopes + * on this stream. There is intentionally no `send()` on this handle. Agents authenticate + * to the runtime directly with their own Bearer tokens and emit their own envelopes. + */ export interface RuntimeSessionHandle { - /** Send an envelope through the open stream */ - send(envelope: RuntimeEnvelope): void; - /** Async iterable of raw events from the stream */ + /** Async iterable of raw events from the stream. */ events: AsyncIterable; - /** Close the write side (after all kickoff messages sent) */ - closeWrite(): void; - /** Abort the stream immediately */ + /** Abort the stream immediately. */ abort(): void; - /** The ack derived from the SessionStart echo (resolved after first response) */ - sessionAck: Promise; } /** Stored runtime capabilities from Initialize response */ @@ -207,21 +187,31 @@ export interface RuntimeCapabilities { policyRegistry?: { registerPolicy?: boolean; listPolicies?: boolean; listChanged?: boolean }; } +/** + * Observer-only runtime provider surface. + * + * The control-plane does not call `Send` for any reason — agents authenticate directly to + * the runtime (RFC-MACP-0004 §4). The provider's job is to initialize, observe, inspect, + * and (conditionally) cancel sessions. See direct-agent-auth.md §Invariants. + */ export interface RuntimeProvider { readonly kind: string; initialize(req: RuntimeInitializeRequest, opts?: RuntimeCallOptions): Promise; - /** Open a unified bidirectional session — replaces startSession() + streamSession() */ - openSession(req: RuntimeOpenSessionRequest): RuntimeSessionHandle; + /** Attach a read-only StreamSession observer. Never writes. */ + subscribeSession(req: RuntimeSubscribeSessionRequest): RuntimeSessionHandle; - /** @deprecated Use openSession() for new session creation. Kept for backward compat. */ - startSession(req: RuntimeStartSessionRequest, opts?: RuntimeCallOptions): Promise; - send(req: RuntimeSendRequest): Promise; - /** @deprecated Use openSession().events for streaming. Kept for reconnection fallback. */ - streamSession(req: RuntimeStreamSessionRequest): AsyncIterable; getSession(req: RuntimeGetSessionRequest): Promise; + + /** + * Policy-delegated cancellation (direct-agent-auth §Cancellation design — Option B). + * Only called when the run's metadata records `cancellationDelegated: true`. Default + * cancellation flow (Option A) proxies through the initiator agent's callback and + * does not invoke this method. + */ cancelSession(req: RuntimeCancelSessionRequest): Promise; + getManifest(): Promise; listModes(): Promise; listRoots(): Promise; @@ -273,18 +263,18 @@ export interface RuntimeListPoliciesRequest { // ── Credential types ──────────────────────────────────────────────── +/** + * Single-bearer credential resolver (CP-9). The control-plane has exactly one + * runtime identity — its own least-privilege Bearer token with `can_start_sessions: false`. + * Per-sender token maps were removed in direct-agent-auth Phase 4. + */ export interface RuntimeCredentialResolver { - resolve(req: { - runtimeKind: string; - requester?: { actorId?: string; actorType?: string }; - participant?: ParticipantRef; - fallbackSender?: string; - }): Promise; + resolve(req: { runtimeKind: string }): Promise; } export interface NormalizeContext { knownParticipants: Set; - execution: ExecutionRequest; + execution: RunDescriptor; runtimeSessionId: string; } diff --git a/src/controllers/observability.controller.ts b/src/controllers/observability.controller.ts index bb2c86e..d39e054 100644 --- a/src/controllers/observability.controller.ts +++ b/src/controllers/observability.controller.ts @@ -48,7 +48,7 @@ export class ObservabilityController { @ApiCreatedResponse({ description: 'Artifact created.' }) async createArtifact( @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: CreateArtifactDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) body: CreateArtifactDto ) { await this.runManager.getRun(id); const artifact = await this.artifactService.register({ diff --git a/src/controllers/run-insights.controller.ts b/src/controllers/run-insights.controller.ts index 4d6368e..9fb0546 100644 --- a/src/controllers/run-insights.controller.ts +++ b/src/controllers/run-insights.controller.ts @@ -73,7 +73,7 @@ export class RunInsightsController { @ApiBody({ type: CompareRunsDto }) @ApiOkResponse({ type: RunComparisonResultDto }) async compareRuns( - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: CompareRunsDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) body: CompareRunsDto ) { return this.insightsService.compareRuns(body.leftRunId, body.rightRunId); } diff --git a/src/controllers/runs.controller.spec.ts b/src/controllers/runs.controller.spec.ts index 0049a2b..5daef29 100644 --- a/src/controllers/runs.controller.spec.ts +++ b/src/controllers/runs.controller.spec.ts @@ -1,3 +1,4 @@ +import { BadRequestException, HttpException } from '@nestjs/common'; import { RunsController } from './runs.controller'; import { RunExecutorService } from '../runs/run-executor.service'; import { RunManagerService } from '../runs/run-manager.service'; @@ -9,14 +10,12 @@ import { InstrumentationService } from '../telemetry/instrumentation.service'; import { ProjectionService } from '../projection/projection.service'; import { OutboundMessageRepository } from '../storage/outbound-message.repository'; -describe('RunsController', () => { +describe('RunsController (observer mode)', () => { let controller: RunsController; let mockRunExecutor: { launch: jest.Mock; cancel: jest.Mock; - sendMessage: jest.Mock; - sendSignal: jest.Mock; - updateContext: jest.Mock; + clone: jest.Mock; }; let mockRunManager: { listRuns: jest.Mock; @@ -42,9 +41,7 @@ describe('RunsController', () => { mockRunExecutor = { launch: jest.fn(), cancel: jest.fn(), - sendMessage: jest.fn(), - sendSignal: jest.fn(), - updateContext: jest.fn(), + clone: jest.fn(), }; mockRunManager = { listRuns: jest.fn(), @@ -58,15 +55,9 @@ describe('RunsController', () => { }; mockReplayService = {}; mockStreamHub = {}; - mockConfig = { - streamSseHeartbeatMs: 15000, - }; - mockProjectionService = { - rebuild: jest.fn(), - }; - mockOutboundMessageRepository = { - listByRunId: jest.fn(), - }; + mockConfig = { streamSseHeartbeatMs: 15000 }; + mockProjectionService = { rebuild: jest.fn() }; + mockOutboundMessageRepository = { listByRunId: jest.fn() }; controller = new RunsController( mockRunExecutor as unknown as RunExecutorService, @@ -81,9 +72,6 @@ describe('RunsController', () => { ); }); - // =========================================================================== - // listRuns - // =========================================================================== describe('listRuns', () => { it('delegates to runManager.listRuns with query params', async () => { const runs = [{ id: 'r1', status: 'running' }]; @@ -100,42 +88,22 @@ describe('RunsController', () => { const result = await controller.listRuns(query as any); expect(result).toEqual(runs); - expect(mockRunManager.listRuns).toHaveBeenCalledWith({ - status: 'running', - tags: ['demo'], - createdAfter: undefined, - createdBefore: undefined, - limit: 10, - offset: 0, - sortBy: 'createdAt', - sortOrder: 'desc', - includeArchived: undefined, - }); + expect(mockRunManager.listRuns).toHaveBeenCalled(); }); it('applies default limit, offset, sortBy, sortOrder when not provided', async () => { mockRunManager.listRuns.mockResolvedValue([]); - await controller.listRuns({} as any); - expect(mockRunManager.listRuns).toHaveBeenCalledWith( - expect.objectContaining({ - limit: 50, - offset: 0, - sortBy: 'createdAt', - sortOrder: 'desc', - }), + expect.objectContaining({ limit: 50, offset: 0, sortBy: 'createdAt', sortOrder: 'desc' }), ); }); }); - // =========================================================================== - // createRun - // =========================================================================== describe('createRun', () => { - it('calls runExecutor.launch and returns correct shape', async () => { - const fakeRun = { id: 'run-123', status: 'queued', traceId: 'trace-abc' }; - mockRunExecutor.launch.mockResolvedValue(fakeRun); + it('launches and returns {runId, sessionId, status, traceId}', async () => { + const run = { id: 'run-123', status: 'queued', traceId: 'trace-abc' }; + mockRunExecutor.launch.mockResolvedValue({ run, sessionId: 'sess-alloc-1' }); const body = { mode: 'live', @@ -153,184 +121,107 @@ describe('RunsController', () => { expect(mockRunExecutor.launch).toHaveBeenCalledWith(body); expect(result).toEqual({ runId: 'run-123', + sessionId: 'sess-alloc-1', status: 'queued', traceId: 'trace-abc', }); }); - - it('returns traceId as undefined when run has no traceId', async () => { - const fakeRun = { id: 'run-456', status: 'queued', traceId: null }; - mockRunExecutor.launch.mockResolvedValue(fakeRun); - - const result = await controller.createRun({ mode: 'sandbox' } as any); - - expect(result.traceId).toBeUndefined(); - }); }); - // =========================================================================== - // getRun - // =========================================================================== - describe('getRun', () => { - it('delegates to runManager.getRun', async () => { - const fakeRun = { id: 'run-789', status: 'completed' }; - mockRunManager.getRun.mockResolvedValue(fakeRun); - - const result = await controller.getRun('run-789'); - - expect(mockRunManager.getRun).toHaveBeenCalledWith('run-789'); - expect(result).toEqual(fakeRun); + describe('getRun / getRunState / getRunEvents', () => { + it('getRun delegates to runManager.getRun', async () => { + mockRunManager.getRun.mockResolvedValue({ id: 'run-1', status: 'completed' }); + await controller.getRun('run-1'); + expect(mockRunManager.getRun).toHaveBeenCalledWith('run-1'); }); - }); - - // =========================================================================== - // getRunState - // =========================================================================== - describe('getRunState', () => { - it('delegates to runManager.getState', async () => { - const fakeState = { - run: { runId: 'run-1', status: 'running' }, - participants: [], - graph: { nodes: [], edges: [] }, - timeline: { latestSeq: 5 }, - }; - mockRunManager.getState.mockResolvedValue(fakeState); - - const result = await controller.getRunState('run-1'); + it('getRunState delegates to runManager.getState', async () => { + mockRunManager.getState.mockResolvedValue({ run: { runId: 'run-1' } }); + await controller.getRunState('run-1'); expect(mockRunManager.getState).toHaveBeenCalledWith('run-1'); - expect(result).toEqual(fakeState); }); - }); - - // =========================================================================== - // getRunEvents - // =========================================================================== - describe('getRunEvents', () => { - it('delegates to eventRepository.listCanonicalByRun', async () => { - const events = [{ id: 'e1', seq: 1, type: 'message.received' }]; - mockEventRepository.listCanonicalByRun.mockResolvedValue(events); - - const query = { afterSeq: 5, limit: 100 }; - const result = await controller.getRunEvents('run-1', query as any); - expect(mockEventRepository.listCanonicalByRun).toHaveBeenCalledWith('run-1', 5, 100); - expect(result).toEqual(events); - }); - - it('applies default afterSeq=0 and limit=200 when not provided', async () => { + it('getRunEvents legacy fast-path uses listCanonicalByRun', async () => { mockEventRepository.listCanonicalByRun.mockResolvedValue([]); - - await controller.getRunEvents('run-1', {} as any); - - expect(mockEventRepository.listCanonicalByRun).toHaveBeenCalledWith('run-1', 0, 200); + await controller.getRunEvents('run-1', { afterSeq: 5, limit: 100 } as any); + expect(mockEventRepository.listCanonicalByRun).toHaveBeenCalledWith('run-1', 5, 100); }); }); - // =========================================================================== - // cancelRun - // =========================================================================== describe('cancelRun', () => { - it('delegates to runExecutor.cancel with reason', async () => { - const cancelledRun = { id: 'run-1', status: 'cancelled' }; - mockRunExecutor.cancel.mockResolvedValue(cancelledRun); - - const result = await controller.cancelRun('run-1', { reason: 'user requested' }); - - expect(mockRunExecutor.cancel).toHaveBeenCalledWith('run-1', 'user requested'); - expect(result).toEqual(cancelledRun); - }); - - it('passes undefined reason when body has no reason', async () => { + it('delegates to runExecutor.cancel', async () => { mockRunExecutor.cancel.mockResolvedValue({ id: 'run-1', status: 'cancelled' }); - - await controller.cancelRun('run-1', {}); - - expect(mockRunExecutor.cancel).toHaveBeenCalledWith('run-1', undefined); + await controller.cancelRun('run-1', { reason: 'user requested' }); + expect(mockRunExecutor.cancel).toHaveBeenCalledWith('run-1', 'user requested'); }); }); // =========================================================================== - // sendMessage + // Removed envelope-emission endpoints (direct-agent-auth CP-5/6/7) // =========================================================================== - describe('sendMessage', () => { - it('delegates to runExecutor.sendMessage', async () => { - const sendResult = { messageId: 'msg-1', ack: { ok: true } }; - mockRunExecutor.sendMessage.mockResolvedValue(sendResult); + describe('removed endpoints — return 410 Gone', () => { + it('POST /runs/:id/messages returns 410', () => { + expect(() => controller.sendMessage('run-1' as any)).toThrow(HttpException); + try { + controller.sendMessage('run-1' as any); + } catch (err) { + expect((err as HttpException).getStatus()).toBe(410); + } + }); - const body = { - from: 'agent-1', - to: ['agent-2'], - messageType: 'Evaluation', - payloadEnvelope: { - encoding: 'proto', - proto: { typeName: 'macp.modes.decision.v1.EvaluationPayload', value: { proposal_id: 'p1' } } - } - }; - const result = await controller.sendMessage('run-1', body as any); + it('POST /runs/:id/signal returns 410', () => { + try { + controller.sendSignal('run-1' as any); + fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(HttpException); + expect((err as HttpException).getStatus()).toBe(410); + } + }); - expect(mockRunExecutor.sendMessage).toHaveBeenCalledWith('run-1', body); - expect(result).toEqual(sendResult); + it('POST /runs/:id/context returns 410', () => { + try { + controller.updateContext('run-1' as any); + fail('should have thrown'); + } catch (err) { + expect(err).toBeInstanceOf(HttpException); + expect((err as HttpException).getStatus()).toBe(410); + } }); }); - // =========================================================================== - // sendSignal - // =========================================================================== - describe('sendSignal', () => { - it('delegates to runExecutor.sendSignal', async () => { - const signalResult = { messageId: 'msg-1', ack: { ok: true } }; - mockRunExecutor.sendSignal.mockResolvedValue(signalResult); - - const body = { - from: 'agent-1', - to: ['agent-2'], - messageType: 'Signal', - payload: { data: 'test' }, - signalType: 'alert', - }; - const result = await controller.sendSignal('run-1', body as any); - - expect(mockRunExecutor.sendSignal).toHaveBeenCalledWith('run-1', body); - expect(result).toEqual(signalResult); + describe('cloneRun', () => { + it('rejects context override (scenario-specific, not accepted by control-plane)', async () => { + await expect( + controller.cloneRun('run-1', { context: { some: 'thing' } } as any), + ).rejects.toThrow(BadRequestException); }); - }); - // =========================================================================== - // updateContext - // =========================================================================== - describe('updateContext', () => { - it('delegates to runExecutor.updateContext', async () => { - const contextResult = { messageId: 'msg-ctx', ack: { ok: true } }; - mockRunExecutor.updateContext.mockResolvedValue(contextResult); + it('delegates to runExecutor.clone and returns {runId, sessionId, status, traceId}', async () => { + const run = { id: 'run-2', status: 'queued', traceId: 't2' }; + mockRunExecutor.clone.mockResolvedValue({ run, sessionId: 'sess-2' }); - const body = { from: 'agent-1', context: { key: 'value' } }; - const result = await controller.updateContext('run-1', body as any); + const result = await controller.cloneRun('run-1', { tags: ['clone'] } as any); - expect(mockRunExecutor.updateContext).toHaveBeenCalledWith('run-1', body); - expect(result).toEqual(contextResult); + expect(mockRunExecutor.clone).toHaveBeenCalledWith('run-1', { tags: ['clone'] }); + expect(result).toEqual({ + runId: 'run-2', + sessionId: 'sess-2', + status: 'queued', + traceId: 't2', + }); }); }); - // =========================================================================== - // rebuildProjection - // =========================================================================== describe('rebuildProjection', () => { it('fetches events and delegates to projectionService.rebuild', async () => { - const fakeRun = { id: 'run-1', status: 'completed' }; - mockRunManager.getRun.mockResolvedValue(fakeRun); - const events = [{ id: 'e1', seq: 1, type: 'run.created' }]; - mockEventRepository.listCanonicalByRun.mockResolvedValue(events); - const projection = { run: { runId: 'run-1', status: 'completed' } }; - mockProjectionService.rebuild.mockResolvedValue(projection); + mockRunManager.getRun.mockResolvedValue({ id: 'run-1', status: 'completed' }); + mockEventRepository.listCanonicalByRun.mockResolvedValue([{ id: 'e1', seq: 1, type: 'run.created' }]); + mockProjectionService.rebuild.mockResolvedValue({ run: { runId: 'run-1' } }); - const result = await controller.rebuildProjection('run-1'); + await controller.rebuildProjection('run-1'); - expect(mockRunManager.getRun).toHaveBeenCalledWith('run-1'); - expect(mockEventRepository.listCanonicalByRun).toHaveBeenCalledWith('run-1', 0, 100000); - expect(mockProjectionService.rebuild).toHaveBeenCalledWith('run-1', events); - expect(result).toEqual(projection); + expect(mockProjectionService.rebuild).toHaveBeenCalledWith('run-1', expect.any(Array)); }); }); - }); diff --git a/src/controllers/runs.controller.ts b/src/controllers/runs.controller.ts index e294db4..108802d 100644 --- a/src/controllers/runs.controller.ts +++ b/src/controllers/runs.controller.ts @@ -6,13 +6,15 @@ import { Get, Headers, HttpCode, + HttpException, + HttpStatus, MessageEvent, Param, ParseUUIDPipe, Post, Query, Sse, - ValidationPipe + ValidationPipe, } from '@nestjs/common'; import { ApiAcceptedResponse, @@ -20,27 +22,24 @@ import { ApiOkResponse, ApiOperation, ApiQuery, - ApiTags + ApiTags, } from '@nestjs/swagger'; import { map, Observable } from 'rxjs'; import { CanonicalEvent, ReplayRequest, RunStatus } from '../contracts/control-plane'; import { AppConfigService } from '../config/app-config.service'; -import { ExecutionRequestDto } from '../dto/execution-request.dto'; +import { RunDescriptorDto } from '../dto/run-descriptor.dto'; import { ListEventsQueryDto } from '../dto/list-events-query.dto'; import { ListRunsQueryDto } from '../dto/list-runs-query.dto'; import { ReplayRequestDto } from '../dto/replay-request.dto'; import { CloneRunDto } from '../dto/clone-run.dto'; -import { SendRunMessageDto } from '../dto/send-run-message.dto'; -import { SendSignalDto } from '../dto/send-signal.dto'; import { StreamRunQueryDto } from '../dto/stream-run-query.dto'; -import { UpdateContextDto } from '../dto/update-context.dto'; import { ProjectionService } from '../projection/projection.service'; import { OutboundMessageRepository } from '../storage/outbound-message.repository'; import { CanonicalEventDto, CreateRunResponseDto, ReplayDescriptorDto, - RunStateResponseDto + RunStateResponseDto, } from '../dto/run-responses.dto'; import { StreamHubService, StreamHubMessage } from '../events/stream-hub.service'; import { InstrumentationService } from '../telemetry/instrumentation.service'; @@ -49,6 +48,27 @@ import { EventRepository } from '../storage/event.repository'; import { RunExecutorService } from '../runs/run-executor.service'; import { RunManagerService } from '../runs/run-manager.service'; +/** + * RFC-MACP-0001 §5.3 forbids HTTP bypass of MACP. Control-plane removed all + * envelope-emission endpoints (direct-agent-auth CP-5/6/7). Agents use the + * macp-sdk-python / macp-sdk-typescript clients to emit envelopes directly. + */ +const MIGRATION_URL = 'https://github.com/multiagentcoordinationprotocol/docs/blob/main/ONBOARDING_AN_AGENT.md'; + +function gone(endpoint: string): never { + throw new HttpException( + { + statusCode: HttpStatus.GONE, + errorCode: 'ENDPOINT_REMOVED', + message: `${endpoint} has been removed. Agents authenticate to the runtime directly via macp-sdk-python / macp-sdk-typescript. See ${MIGRATION_URL}`, + }, + HttpStatus.GONE, + { + cause: new Error(`${endpoint} removed (direct-agent-auth)`), + }, + ); +} + @ApiTags('runs') @Controller('runs') export class RunsController { @@ -61,14 +81,15 @@ export class RunsController { private readonly config: AppConfigService, private readonly projectionService: ProjectionService, private readonly outboundMessageRepository: OutboundMessageRepository, - private readonly instrumentation: InstrumentationService + private readonly instrumentation: InstrumentationService, ) {} @Post('validate') - @ApiOperation({ summary: 'Preflight validation of an execution request without creating a run.' }) - @ApiBody({ type: ExecutionRequestDto }) + @ApiOperation({ summary: 'Preflight validation of a RunDescriptor without creating a run.' }) + @ApiBody({ type: RunDescriptorDto }) async validateRequest( - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: ExecutionRequestDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) + body: RunDescriptorDto, ) { return this.runExecutor.validate(body); } @@ -76,7 +97,7 @@ export class RunsController { @Get() @ApiOperation({ summary: 'List runs with optional filtering and pagination.' }) async listRuns( - @Query(new ValidationPipe({ transform: true, whitelist: true })) query: ListRunsQueryDto + @Query(new ValidationPipe({ transform: true, whitelist: true })) query: ListRunsQueryDto, ) { return this.runManager.listRuns({ status: query.status, @@ -91,20 +112,27 @@ export class RunsController { includeArchived: query.includeArchived, environment: query.environment, scenarioRef: query.scenarioRef, - search: query.search + search: query.search, }); } @Post() - @ApiOperation({ summary: 'Create and launch a runtime execution run.' }) + @ApiOperation({ + summary: + 'Create and launch a runtime execution run. Returns {runId, sessionId} — caller distributes sessionId to agents via bootstrap.', + }) @ApiAcceptedResponse({ type: CreateRunResponseDto }) - @ApiBody({ type: ExecutionRequestDto }) - async createRun(@Body(new ValidationPipe({ transform: true, whitelist: true })) body: ExecutionRequestDto) { - const run = await this.runExecutor.launch(body); + @ApiBody({ type: RunDescriptorDto }) + async createRun( + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) + body: RunDescriptorDto, + ) { + const { run, sessionId } = await this.runExecutor.launch(body); return { runId: run.id, + sessionId, status: run.status as RunStatus, - traceId: run.traceId ?? undefined + traceId: run.traceId ?? undefined, } satisfies CreateRunResponseDto; } @@ -122,7 +150,7 @@ export class RunsController { } @Get(':id/events') - @ApiOperation({ summary: 'List canonical events for a run with optional time-range and type filters (§4.2).' }) + @ApiOperation({ summary: 'List canonical events for a run with optional time-range and type filters.' }) @ApiQuery({ name: 'afterSeq', required: false }) @ApiQuery({ name: 'limit', required: false }) @ApiQuery({ name: 'afterTs', required: false }) @@ -131,9 +159,8 @@ export class RunsController { @ApiOkResponse({ type: [CanonicalEventDto] }) async getRunEvents( @Param('id', new ParseUUIDPipe()) id: string, - @Query(new ValidationPipe({ transform: true, whitelist: true })) query: ListEventsQueryDto + @Query(new ValidationPipe({ transform: true, whitelist: true })) query: ListEventsQueryDto, ) { - // Fast path: no time/type filter — keep legacy array response for backward compat. if (!query.afterTs && !query.beforeTs && !query.type) { return this.eventRepository.listCanonicalByRun(id, query.afterSeq ?? 0, query.limit ?? 200); } @@ -155,7 +182,7 @@ export class RunsController { streamRun( @Param('id', new ParseUUIDPipe()) id: string, @Query(new ValidationPipe({ transform: true, whitelist: true })) query: StreamRunQueryDto, - @Headers('last-event-id') lastEventId?: string + @Headers('last-event-id') lastEventId?: string, ): Observable { const afterSeq = query.afterSeq ?? (lastEventId ? Number(lastEventId) : 0); const includeSnapshot = query.includeSnapshot !== false; @@ -167,7 +194,6 @@ export class RunsController { let backfillDone = false; let highSeq = afterSeq; - // 1. Subscribe to live hub immediately, buffer during backfill const liveSub = this.streamHub.stream(id).subscribe({ next: (msg) => { if (!backfillDone) { @@ -180,14 +206,13 @@ export class RunsController { subscriber.next({ type: msg.event, data: msg.data, - ...(seq !== undefined ? { id: String(seq) } : {}) + ...(seq !== undefined ? { id: String(seq) } : {}), } as MessageEvent); }, complete: () => subscriber.complete(), - error: (err) => subscriber.error(err) + error: (err) => subscriber.error(err), }); - // 2. Heartbeat const heartbeatTimer = setInterval(() => { subscriber.next({ type: 'heartbeat', data: { ts: new Date().toISOString() } } as MessageEvent); }, heartbeatMs); @@ -195,16 +220,13 @@ export class RunsController { heartbeatTimer.unref(); } - // 3. Backfill + drain buffer const runBackfill = async () => { try { - // Emit snapshot if requested if (includeSnapshot) { const state = await this.runManager.getState(id); subscriber.next({ type: 'snapshot', data: state } as MessageEvent); } - // Backfill missed canonical events in batches if (afterSeq > 0) { let cursor = afterSeq; const batchSize = 500; @@ -216,7 +238,7 @@ export class RunsController { subscriber.next({ type: 'canonical_event', data: event, - id: String(event.seq) + id: String(event.seq), } as MessageEvent); } if (events.length < batchSize) break; @@ -224,7 +246,6 @@ export class RunsController { } } - // Drain buffer, deduplicating by seq backfillDone = true; for (const msg of buffer) { const seq = (msg.data as CanonicalEvent)?.seq; @@ -233,7 +254,7 @@ export class RunsController { subscriber.next({ type: msg.event, data: msg.data, - ...(seq !== undefined ? { id: String(seq) } : {}) + ...(seq !== undefined ? { id: String(seq) } : {}), } as MessageEvent); } buffer.length = 0; @@ -253,10 +274,14 @@ export class RunsController { } @Post(':id/cancel') - @ApiOperation({ summary: 'Cancel a running session in the runtime.' }) + @ApiOperation({ + summary: + 'Cancel a running session. Default: proxies to the initiator agent\'s cancelCallback (Option A). ' + + 'Policy-delegated fallback (metadata.cancellationDelegated=true) calls runtime.CancelSession (Option B).', + }) async cancelRun( @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: { reason?: string } + @Body(new ValidationPipe({ transform: true, whitelist: true })) body: { reason?: string }, ) { return this.runExecutor.cancel(id, body?.reason); } @@ -266,13 +291,13 @@ export class RunsController { @ApiAcceptedResponse({ type: ReplayDescriptorDto }) async createReplay( @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: ReplayRequestDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) body: ReplayRequestDto, ) { const replay: ReplayRequest = { mode: body.mode ?? 'timed', speed: body.speed ?? 1, fromSeq: body.fromSeq, - toSeq: body.toSeq + toSeq: body.toSeq, }; return this.replayService.describe(id, replay); } @@ -281,14 +306,14 @@ export class RunsController { @ApiOperation({ summary: 'Replay a run using persisted canonical events.' }) streamReplay( @Param('id', new ParseUUIDPipe()) id: string, - @Query(new ValidationPipe({ transform: true, whitelist: true })) query: ReplayRequestDto + @Query(new ValidationPipe({ transform: true, whitelist: true })) query: ReplayRequestDto, ): Observable { return this.replayService .stream(id, { mode: query.mode ?? 'timed', speed: query.speed ?? 1, fromSeq: query.fromSeq, - toSeq: query.toSeq + toSeq: query.toSeq, }) .pipe(map((item) => ({ type: item.type, data: item.data }) as MessageEvent)); } @@ -297,48 +322,62 @@ export class RunsController { @ApiOperation({ summary: 'Project run state at a specific event sequence for scrubber/replay UIs.' }) async getReplayState( @Param('id', new ParseUUIDPipe()) id: string, - @Query('seq') seq?: string + @Query('seq') seq?: string, ) { return this.replayService.stateAt(id, seq ? Number(seq) : undefined); } + // ── REMOVED: envelope-emission endpoints (direct-agent-auth CP-5/6/7) ───── + // These endpoints violated the invariant that the control-plane must NEVER call Send. + // They return 410 Gone with a migration header pointing to the SDK docs. + @Post(':id/messages') - @ApiOperation({ summary: 'Send a session-bound MACP message to a running session.' }) - @ApiBody({ type: SendRunMessageDto }) - async sendMessage( - @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: SendRunMessageDto - ) { - return this.runExecutor.sendMessage(id, body); + @ApiOperation({ + summary: 'REMOVED. Agents emit session-bound messages via the macp-sdk directly.', + deprecated: true, + }) + sendMessage(@Param('id', new ParseUUIDPipe()) _id: string): never { + gone('POST /runs/:id/messages'); } @Post(':id/signal') - @ApiOperation({ summary: 'Send a signal to a running session.' }) - @ApiBody({ type: SendSignalDto }) - async sendSignal( - @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: SendSignalDto - ) { - // Runtime requires non-empty signal_type when payload is present - if (body.payload && Object.keys(body.payload).length > 0 && !body.signalType) { - throw new BadRequestException('signalType is required when payload is non-empty'); - } - this.instrumentation.signalsTotal.inc({ signal_type: body.signalType ?? body.messageType ?? 'unknown' }); - return this.runExecutor.sendSignal(id, body); + @ApiOperation({ + summary: 'REMOVED. Agents emit signals via the macp-sdk directly.', + deprecated: true, + }) + sendSignal(@Param('id', new ParseUUIDPipe()) _id: string): never { + gone('POST /runs/:id/signal'); + } + + @Post(':id/context') + @ApiOperation({ + summary: 'REMOVED. Agents emit ContextUpdate envelopes via the macp-sdk directly.', + deprecated: true, + }) + updateContext(@Param('id', new ParseUUIDPipe()) _id: string): never { + gone('POST /runs/:id/context'); } @Post(':id/clone') - @ApiOperation({ summary: 'Clone a run with optional overrides.' }) + @ApiOperation({ summary: 'Clone a run with optional tag overrides (produces a fresh sessionId).' }) @ApiBody({ type: CloneRunDto }) async cloneRun( @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: CloneRunDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) body: CloneRunDto, ) { - const run = await this.runExecutor.clone(id, body); + if (body.context && Object.keys(body.context).length > 0) { + throw new BadRequestException( + 'context overrides are no longer accepted — session context is opaque to the control-plane ' + + '(direct-agent-auth §Invariants). Pass any scenario-specific overrides via the caller\'s ' + + 'scenario compiler and submit a fresh POST /runs.', + ); + } + const { run, sessionId } = await this.runExecutor.clone(id, { tags: body.tags }); return { runId: run.id, + sessionId, status: run.status, - traceId: run.traceId ?? undefined + traceId: run.traceId ?? undefined, }; } @@ -350,21 +389,11 @@ export class RunsController { } @Get(':id/messages') - @ApiOperation({ summary: 'List outbound messages for a run.' }) + @ApiOperation({ summary: 'List outbound messages captured from the runtime stream for a run.' }) async getRunMessages(@Param('id', new ParseUUIDPipe()) id: string) { return this.outboundMessageRepository.listByRunId(id); } - @Post(':id/context') - @ApiOperation({ summary: 'Update context during a running session.' }) - @ApiBody({ type: UpdateContextDto }) - async updateContext( - @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: UpdateContextDto - ) { - return this.runExecutor.updateContext(id, body); - } - @Post(':id/projection/rebuild') @ApiOperation({ summary: 'Rebuild the projection from persisted canonical events.' }) async rebuildProjection(@Param('id', new ParseUUIDPipe()) id: string) { @@ -378,5 +407,4 @@ export class RunsController { async archiveRun(@Param('id', new ParseUUIDPipe()) id: string) { return this.runManager.archiveRun(id); } - } diff --git a/src/controllers/webhook.controller.ts b/src/controllers/webhook.controller.ts index 08662c9..5c9aa95 100644 --- a/src/controllers/webhook.controller.ts +++ b/src/controllers/webhook.controller.ts @@ -24,7 +24,7 @@ export class WebhookController { @ApiOperation({ summary: 'Register a new webhook subscription.' }) @ApiBody({ type: CreateWebhookDto }) async createWebhook( - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: CreateWebhookDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) body: CreateWebhookDto ) { return this.webhookService.register({ url: body.url, @@ -44,7 +44,7 @@ export class WebhookController { @ApiBody({ type: UpdateWebhookDto }) async updateWebhook( @Param('id', new ParseUUIDPipe()) id: string, - @Body(new ValidationPipe({ transform: true, whitelist: true })) body: UpdateWebhookDto + @Body(new ValidationPipe({ transform: true, whitelist: true, forbidNonWhitelisted: true })) body: UpdateWebhookDto ) { return this.webhookService.update(id, body); } diff --git a/src/dto/execution-request.dto.ts b/src/dto/execution-request.dto.ts deleted file mode 100644 index 9f04c4d..0000000 --- a/src/dto/execution-request.dto.ts +++ /dev/null @@ -1,262 +0,0 @@ -import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger'; -import { Type } from 'class-transformer'; -import { - ArrayMaxSize, - ArrayMinSize, - IsArray, - IsIn, - IsInt, - IsNotEmpty, - IsObject, - IsOptional, - IsPositive, - IsString, - ValidateNested -} from 'class-validator'; -import { ExecutionRequest } from '../contracts/control-plane'; - -export class RootRefDto { - @ApiProperty() - @IsString() - @IsNotEmpty() - uri!: string; - - @ApiPropertyOptional() - @IsOptional() - @IsString() - name?: string; -} - -export class ParticipantRefDto { - @ApiProperty() - @IsString() - @IsNotEmpty() - id!: string; - - @ApiPropertyOptional() - @IsOptional() - @IsString() - role?: string; - - @ApiPropertyOptional() - @IsOptional() - @IsString() - transportIdentity?: string; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true }) - @IsOptional() - @IsObject() - metadata?: Record; -} - -export class ProtoPayloadDto { - @ApiProperty({ description: 'Fully qualified protobuf message name.' }) - @IsString() - @IsNotEmpty() - typeName!: string; - - @ApiProperty({ type: 'object', additionalProperties: true }) - @IsObject() - value!: Record; -} - -export class PayloadEnvelopeDto { - @ApiProperty({ enum: ['json', 'text', 'base64', 'proto'] }) - @IsIn(['json', 'text', 'base64', 'proto']) - encoding!: 'json' | 'text' | 'base64' | 'proto'; - - @ApiPropertyOptional() - @IsOptional() - @IsString() - mediaType?: string; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true }) - @IsOptional() - @IsObject() - json?: Record; - - @ApiPropertyOptional() - @IsOptional() - @IsString() - text?: string; - - @ApiPropertyOptional({ description: 'Opaque payload already base64-encoded.' }) - @IsOptional() - @IsString() - base64?: string; - - @ApiPropertyOptional({ type: () => ProtoPayloadDto }) - @IsOptional() - @ValidateNested() - @Type(() => ProtoPayloadDto) - proto?: ProtoPayloadDto; -} - -export class KickoffMessageDto { - @ApiProperty() - @IsString() - from!: string; - - @ApiProperty({ type: [String] }) - @IsArray() - @ArrayMinSize(1) - @IsString({ each: true }) - to!: string[]; - - @ApiProperty({ enum: ['request', 'broadcast', 'proposal', 'context'] }) - @IsIn(['request', 'broadcast', 'proposal', 'context']) - kind!: 'request' | 'broadcast' | 'proposal' | 'context'; - - @ApiProperty({ description: 'Exact runtime MACP message type to send.' }) - @IsString() - @IsNotEmpty() - messageType!: string; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true }) - @IsOptional() - @IsObject() - payload?: Record; - - @ApiPropertyOptional({ type: () => PayloadEnvelopeDto }) - @IsOptional() - @ValidateNested() - @Type(() => PayloadEnvelopeDto) - payloadEnvelope?: PayloadEnvelopeDto; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true }) - @IsOptional() - @IsObject() - metadata?: Record; -} - -export class ExecutionRequesterDto { - @ApiPropertyOptional() - @IsOptional() - @IsString() - actorId?: string; - - @ApiPropertyOptional({ enum: ['user', 'service', 'system'] }) - @IsOptional() - @IsIn(['user', 'service', 'system']) - actorType?: 'user' | 'service' | 'system'; -} - -export class SessionDescriptorDto { - @ApiProperty({ example: 'macp.mode.decision.v1' }) - @IsString() - @IsNotEmpty() - modeName!: string; - - @ApiProperty({ example: '1.0.0' }) - @IsString() - modeVersion!: string; - - @ApiProperty({ example: 'config.default' }) - @IsString() - configurationVersion!: string; - - @ApiPropertyOptional({ example: 'policy.default' }) - @IsOptional() - @IsString() - policyVersion?: string; - - @ApiProperty({ minimum: 1 }) - @IsInt() - @IsPositive() - ttlMs!: number; - - @ApiPropertyOptional({ description: 'Sender used for SessionStart if provided.' }) - @IsOptional() - @IsString() - initiatorParticipantId?: string; - - @ApiProperty({ type: () => [ParticipantRefDto] }) - @IsArray() - @ArrayMinSize(1) - @ArrayMaxSize(1000, { message: 'Maximum 1000 participants per session' }) - @ValidateNested({ each: true }) - @Type(() => ParticipantRefDto) - participants!: ParticipantRefDto[]; - - @ApiPropertyOptional({ type: () => [RootRefDto] }) - @IsOptional() - @IsArray() - @ValidateNested({ each: true }) - @Type(() => RootRefDto) - roots?: RootRefDto[]; - - @ApiPropertyOptional({ description: 'Convenience JSON context; will be JSON-encoded to bytes.' }) - @IsOptional() - @IsObject() - context?: Record; - - @ApiPropertyOptional({ type: () => PayloadEnvelopeDto, description: 'Binary/protobuf context override.' }) - @IsOptional() - @ValidateNested() - @Type(() => PayloadEnvelopeDto) - contextEnvelope?: PayloadEnvelopeDto; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true }) - @IsOptional() - @IsObject() - metadata?: Record; -} - -export class ExecutionConfigDto { - @ApiPropertyOptional() - @IsOptional() - @IsString() - idempotencyKey?: string; - - @ApiPropertyOptional({ type: [String] }) - @IsOptional() - @IsArray() - @IsString({ each: true }) - tags?: string[]; - - @ApiPropertyOptional({ type: () => ExecutionRequesterDto }) - @IsOptional() - @ValidateNested() - @Type(() => ExecutionRequesterDto) - requester?: ExecutionRequesterDto; -} - -export class RuntimeSelectionDto { - @ApiProperty({ example: 'rust' }) - @IsString() - kind!: string; - - @ApiPropertyOptional({ example: 'v1' }) - @IsOptional() - @IsString() - version?: string; -} - -export class ExecutionRequestDto implements ExecutionRequest { - @ApiProperty({ enum: ['live', 'replay', 'sandbox'] }) - @IsIn(['live', 'replay', 'sandbox']) - mode!: 'live' | 'replay' | 'sandbox'; - - @ApiProperty({ type: () => RuntimeSelectionDto }) - @ValidateNested() - @Type(() => RuntimeSelectionDto) - runtime!: RuntimeSelectionDto; - - @ApiProperty({ type: () => SessionDescriptorDto }) - @ValidateNested() - @Type(() => SessionDescriptorDto) - session!: SessionDescriptorDto; - - @ApiPropertyOptional({ type: () => [KickoffMessageDto] }) - @IsOptional() - @IsArray() - @ValidateNested({ each: true }) - @Type(() => KickoffMessageDto) - kickoff?: KickoffMessageDto[]; - - @ApiPropertyOptional({ type: () => ExecutionConfigDto }) - @IsOptional() - @ValidateNested() - @Type(() => ExecutionConfigDto) - execution?: ExecutionConfigDto; -} diff --git a/src/dto/run-descriptor.dto.ts b/src/dto/run-descriptor.dto.ts new file mode 100644 index 0000000..9a38820 --- /dev/null +++ b/src/dto/run-descriptor.dto.ts @@ -0,0 +1,148 @@ +import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger'; +import { Type } from 'class-transformer'; +import { + ArrayMaxSize, + ArrayMinSize, + IsArray, + IsIn, + IsInt, + IsNotEmpty, + IsObject, + IsOptional, + IsPositive, + IsString, + ValidateNested +} from 'class-validator'; +import { RunDescriptor } from '../contracts/control-plane'; + +/** + * RunDescriptor DTO — scenario-agnostic. Rejects scenario-specific keys + * via `forbidNonWhitelisted: true` at the controller level. + * + * See direct-agent-auth.md §Generic contracts for the contract invariants: + * no kickoff[], no policyHints, no participants[].role, no commitments[], + * no initiatorParticipantId. + */ + +export class ParticipantRefDto { + @ApiProperty({ description: 'Bare sender string — must match the agent identity in the runtime.' }) + @IsString() + @IsNotEmpty() + id!: string; +} + +export class ExecutionRequesterDto { + @ApiPropertyOptional() + @IsOptional() + @IsString() + actorId?: string; + + @ApiPropertyOptional({ enum: ['user', 'service', 'system'] }) + @IsOptional() + @IsIn(['user', 'service', 'system']) + actorType?: 'user' | 'service' | 'system'; +} + +export class SessionDescriptorDto { + @ApiPropertyOptional({ + description: + 'Caller-allocated session id. Must satisfy runtime validator (UUID v4/v7 or base64url 22+ chars). If omitted, control-plane allocates a UUID v4.' + }) + @IsOptional() + @IsString() + sessionId?: string; + + @ApiProperty({ example: 'macp.mode.decision.v1' }) + @IsString() + @IsNotEmpty() + modeName!: string; + + @ApiProperty({ example: '1.0.0' }) + @IsString() + modeVersion!: string; + + @ApiProperty({ example: 'config.default' }) + @IsString() + configurationVersion!: string; + + @ApiPropertyOptional({ example: 'policy.default' }) + @IsOptional() + @IsString() + policyVersion?: string; + + @ApiProperty({ minimum: 1 }) + @IsInt() + @IsPositive() + ttlMs!: number; + + @ApiProperty({ type: () => [ParticipantRefDto] }) + @IsArray() + @ArrayMinSize(1) + @ArrayMaxSize(1000, { message: 'Maximum 1000 participants per session' }) + @ValidateNested({ each: true }) + @Type(() => ParticipantRefDto) + participants!: ParticipantRefDto[]; + + @ApiPropertyOptional({ + type: 'object', + additionalProperties: true, + description: + 'Opaque metadata. Reserved keys: source, sourceRef, environment, scenarioRef, cancelCallback, cancellationDelegated.' + }) + @IsOptional() + @IsObject() + metadata?: Record; +} + +export class ExecutionConfigDto { + @ApiPropertyOptional() + @IsOptional() + @IsString() + idempotencyKey?: string; + + @ApiPropertyOptional({ type: [String] }) + @IsOptional() + @IsArray() + @IsString({ each: true }) + tags?: string[]; + + @ApiPropertyOptional({ type: () => ExecutionRequesterDto }) + @IsOptional() + @ValidateNested() + @Type(() => ExecutionRequesterDto) + requester?: ExecutionRequesterDto; +} + +export class RuntimeSelectionDto { + @ApiProperty({ example: 'rust' }) + @IsString() + kind!: string; + + @ApiPropertyOptional({ example: 'v1' }) + @IsOptional() + @IsString() + version?: string; +} + +export class RunDescriptorDto implements RunDescriptor { + @ApiProperty({ enum: ['live', 'sandbox'] }) + @IsIn(['live', 'sandbox']) + mode!: 'live' | 'sandbox'; + + @ApiProperty({ type: () => RuntimeSelectionDto }) + @ValidateNested() + @Type(() => RuntimeSelectionDto) + runtime!: RuntimeSelectionDto; + + @ApiProperty({ type: () => SessionDescriptorDto }) + @ValidateNested() + @Type(() => SessionDescriptorDto) + session!: SessionDescriptorDto; + + @ApiPropertyOptional({ type: () => ExecutionConfigDto }) + @IsOptional() + @ValidateNested() + @Type(() => ExecutionConfigDto) + execution?: ExecutionConfigDto; +} + diff --git a/src/dto/run-responses.dto.ts b/src/dto/run-responses.dto.ts index 13ab0df..51777dc 100644 --- a/src/dto/run-responses.dto.ts +++ b/src/dto/run-responses.dto.ts @@ -5,6 +5,9 @@ export class CreateRunResponseDto { @ApiProperty() runId!: string; + @ApiProperty({ description: 'Control-plane-allocated (or echoed back) session id the initiator agent must open.' }) + sessionId!: string; + @ApiProperty({ enum: ['queued', 'starting', 'binding_session', 'running', 'completed', 'failed', 'cancelled'] }) status!: RunStatus; diff --git a/src/dto/send-run-message.dto.ts b/src/dto/send-run-message.dto.ts deleted file mode 100644 index b05417c..0000000 --- a/src/dto/send-run-message.dto.ts +++ /dev/null @@ -1,38 +0,0 @@ -import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger'; -import { Type } from 'class-transformer'; -import { IsArray, IsNotEmpty, IsObject, IsOptional, IsString, ValidateNested } from 'class-validator'; -import { PayloadEnvelopeDto } from './execution-request.dto'; - -export class SendRunMessageDto { - @ApiProperty({ description: 'Sender participant ID.' }) - @IsString() - @IsNotEmpty() - from!: string; - - @ApiPropertyOptional({ type: [String], description: 'Recipient participant IDs. Omit for broadcast.' }) - @IsOptional() - @IsArray() - @IsString({ each: true }) - to?: string[]; - - @ApiProperty({ description: 'Exact MACP message type (e.g. "Evaluation", "Vote", "TaskRequest").' }) - @IsString() - @IsNotEmpty() - messageType!: string; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true, description: 'Convenience JSON payload.' }) - @IsOptional() - @IsObject() - payload?: Record; - - @ApiPropertyOptional({ type: () => PayloadEnvelopeDto, description: 'Binary/proto payload override.' }) - @IsOptional() - @ValidateNested() - @Type(() => PayloadEnvelopeDto) - payloadEnvelope?: PayloadEnvelopeDto; - - @ApiPropertyOptional({ type: 'object', additionalProperties: true, description: 'Optional message metadata.' }) - @IsOptional() - @IsObject() - metadata?: Record; -} diff --git a/src/dto/send-signal.dto.ts b/src/dto/send-signal.dto.ts deleted file mode 100644 index 2801427..0000000 --- a/src/dto/send-signal.dto.ts +++ /dev/null @@ -1,34 +0,0 @@ -import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger'; -import { IsArray, IsNotEmpty, IsObject, IsOptional, IsString } from 'class-validator'; - -export class SendSignalDto { - @ApiProperty({ description: 'Sender participant ID' }) - @IsString() - @IsNotEmpty() - from!: string; - - @ApiProperty({ description: 'Target participant IDs', type: [String] }) - @IsArray() - @IsString({ each: true }) - to!: string[]; - - @ApiProperty({ description: 'Signal message type (e.g., "Signal")' }) - @IsString() - @IsNotEmpty() - messageType!: string; - - @ApiPropertyOptional({ description: 'Signal payload' }) - @IsOptional() - @IsObject() - payload?: Record; - - @ApiPropertyOptional({ description: 'Signal type classification (e.g., anomaly, alert)' }) - @IsOptional() - @IsString() - signalType?: string; - - @ApiPropertyOptional({ description: 'Signal severity (e.g., low, medium, high, critical)' }) - @IsOptional() - @IsString() - severity?: string; -} diff --git a/src/dto/update-context.dto.ts b/src/dto/update-context.dto.ts deleted file mode 100644 index 68529cb..0000000 --- a/src/dto/update-context.dto.ts +++ /dev/null @@ -1,13 +0,0 @@ -import { ApiProperty } from '@nestjs/swagger'; -import { IsNotEmpty, IsObject, IsString } from 'class-validator'; - -export class UpdateContextDto { - @ApiProperty({ description: 'Sender participant ID' }) - @IsString() - @IsNotEmpty() - from!: string; - - @ApiProperty({ description: 'Context payload to send to the runtime session' }) - @IsObject() - context!: Record; -} diff --git a/src/events/event-normalizer.service.spec.ts b/src/events/event-normalizer.service.spec.ts index 30320f7..d8527d5 100644 --- a/src/events/event-normalizer.service.spec.ts +++ b/src/events/event-normalizer.service.spec.ts @@ -2,7 +2,7 @@ import { EventNormalizerService } from './event-normalizer.service'; import { ProtoRegistryService } from '../runtime/proto-registry.service'; import { InstrumentationService } from '../telemetry/instrumentation.service'; import { RawRuntimeEvent, NormalizeContext } from '../contracts/runtime'; -import { ExecutionRequest } from '../contracts/control-plane'; +import { RunDescriptor } from '../contracts/control-plane'; function makeContext(overrides?: Partial): NormalizeContext { return { @@ -18,7 +18,7 @@ function makeContext(overrides?: Partial): NormalizeContext { ttlMs: 30000, participants: [{ id: 'agent-a' }, { id: 'agent-b' }], }, - } as ExecutionRequest, + } as RunDescriptor, ...overrides, }; } diff --git a/src/projection/projection-coverage.spec.ts b/src/projection/projection-coverage.spec.ts new file mode 100644 index 0000000..8a22e9a --- /dev/null +++ b/src/projection/projection-coverage.spec.ts @@ -0,0 +1,49 @@ +import { readFileSync } from 'node:fs'; +import { join } from 'node:path'; +import { CANONICAL_EVENT_TYPES } from '../contracts/control-plane'; + +/** + * Invariant (Q1-4 / plans/quality-cleanup.md): + * + * Every `CanonicalEventType` declared in `CANONICAL_EVENT_TYPES` must have a + * reducer branch in `ProjectionService.applyEvents`. If a new event type is + * added to the union without a matching `case '...'` branch, this test fails — + * preventing silently-dropped events in the UI projection. + * + * Scope choice: we parse the source file textually (`case 'foo'` literals) + * rather than injecting a spy, because the reducer is a large switch and a + * textual check catches additions without needing to wire every event type + * through a full projection state to observe its effect. + */ +describe('Projection coverage invariant — every canonical event has a reducer', () => { + const projectionSource = readFileSync( + join(__dirname, 'projection.service.ts'), + 'utf8', + ); + + // A type is "covered" if the literal `case 'foo':` appears in projection.service.ts + // OR if it's a documented intentional no-op. + const EXEMPT_TYPES = new Set([ + // Reserved type per RFC; not emitted by the default normalizer and not shown + // in the UI projection (decision.finalized is the authoritative surface). + 'decision.proposed', + // No dedicated projection branch — the UI renders this only via timeline + // and graph which are populated uniformly for all envelope events. + 'session.stream.opened', + // Tool events aren't reduced into the current RunStateProjection surface; + // they appear in the raw canonical events list only. + 'tool.called', + 'tool.completed', + // policy.denied is visible via the policy projection via commitment evaluations + // and the event list; no dedicated reducer branch required today. + 'policy.denied', + ]); + + for (const eventType of CANONICAL_EVENT_TYPES) { + it(`'${eventType}' has a reducer branch (or is exempt)`, () => { + if (EXEMPT_TYPES.has(eventType)) return; // documented exemption + const needle = `case '${eventType}'`; + expect(projectionSource).toContain(needle); + }); + } +}); diff --git a/src/retention/data-retention.service.spec.ts b/src/retention/data-retention.service.spec.ts index bfc2001..4eeefca 100644 --- a/src/retention/data-retention.service.spec.ts +++ b/src/retention/data-retention.service.spec.ts @@ -142,9 +142,7 @@ describe('DataRetentionService', () => { mockDb.select.mockReturnValue(makeSelectChain([])); mockDb.delete.mockReturnValue(makeDeleteChain(0)); - const before = Date.now(); await service.runRetention(); - const after = Date.now(); // Verify the advisory lock was used (proving the method ran) expect(mockDatabase.tryAdvisoryLock).toHaveBeenCalled(); diff --git a/src/retention/data-retention.service.ts b/src/retention/data-retention.service.ts index 0892e44..2730645 100644 --- a/src/retention/data-retention.service.ts +++ b/src/retention/data-retention.service.ts @@ -1,5 +1,5 @@ import { Injectable, Logger, OnModuleDestroy, OnModuleInit } from '@nestjs/common'; -import { and, inArray, lt, sql } from 'drizzle-orm'; +import { and, inArray, lt } from 'drizzle-orm'; import { AppConfigService } from '../config/app-config.service'; import { DatabaseService } from '../db/database.service'; import { auditLog, runs, webhookDeliveries } from '../db/schema'; diff --git a/src/runs/run-executor.service.spec.ts b/src/runs/run-executor.service.spec.ts index 1f6a83e..060b6c3 100644 --- a/src/runs/run-executor.service.spec.ts +++ b/src/runs/run-executor.service.spec.ts @@ -4,7 +4,6 @@ import { RunManagerService } from './run-manager.service'; import { RunRepository } from '../storage/run.repository'; import { RuntimeSessionRepository } from '../storage/runtime-session.repository'; import { RuntimeProviderRegistry } from '../runtime/runtime-provider.registry'; -import { ProtoRegistryService } from '../runtime/proto-registry.service'; import { TraceService } from '../telemetry/trace.service'; import { RunEventService } from '../events/run-event.service'; import { ArtifactService } from '../artifacts/artifact.service'; @@ -14,17 +13,14 @@ import { AppConfigService } from '../config/app-config.service'; import { InstrumentationService } from '../telemetry/instrumentation.service'; import { AppException } from '../errors/app-exception'; import { ErrorCode } from '../errors/error-codes'; -import { ExecutionRequest, Run } from '../contracts/control-plane'; -import { - RuntimeProvider, - RuntimeSessionHandle, -} from '../contracts/runtime'; +import { RunDescriptor, Run } from '../contracts/control-plane'; +import { RuntimeSessionHandle } from '../contracts/runtime'; // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- -function makeExecutionRequest(overrides: Partial = {}): ExecutionRequest { +function makeRunDescriptor(overrides: Partial = {}): RunDescriptor { return { mode: 'live', runtime: { kind: 'rust', version: '0.3.0' }, @@ -33,17 +29,8 @@ function makeExecutionRequest(overrides: Partial = {}): Execut modeVersion: '1.0', configurationVersion: '1.0', ttlMs: 60000, - participants: [{ id: 'agent-1', role: 'proposer' }], + participants: [{ id: 'agent-1' }, { id: 'agent-2' }], }, - kickoff: [ - { - from: 'agent-1', - to: ['agent-2'], - kind: 'request', - messageType: 'Proposal', - payload: { text: 'hello' }, - }, - ], ...overrides, }; } @@ -55,16 +42,23 @@ function makeRun(overrides: Partial = {}): Run { runtimeKind: 'rust', runtimeSessionId: 'sess-1', createdAt: new Date().toISOString(), - metadata: { executionRequest: makeExecutionRequest() }, + metadata: { executionRequest: makeRunDescriptor() }, ...overrides, }; } +function makeReadOnlyHandle(): RuntimeSessionHandle { + return { + events: (async function* () {})(), + abort: jest.fn(), + }; +} + // --------------------------------------------------------------------------- // Test suite // --------------------------------------------------------------------------- -describe('RunExecutorService', () => { +describe('RunExecutorService (observer mode, direct-agent-auth)', () => { let service: RunExecutorService; let mockRunManager: { @@ -76,18 +70,12 @@ describe('RunExecutorService', () => { getRun: jest.Mock; bindSession: jest.Mock; }; - let mockRunRepository: Record; let mockRuntimeSessionRepository: { findByRunId: jest.Mock; }; let mockRuntimeRegistry: { get: jest.Mock; }; - let mockProtoRegistry: { - encodePayloadEnvelope: jest.Mock; - getKnownTypeName: jest.Mock; - encodeMessage: jest.Mock; - }; let mockTraceService: { withSpan: jest.Mock; withRunSpan: jest.Mock; @@ -107,16 +95,16 @@ describe('RunExecutorService', () => { }; let mockConfig: { runtimeRequestTimeoutMs: number; - kickoffMaxRetries: number; clientVersion: string; + sessionPollBaseMs: number; + sessionPollMaxMs: number; + sessionPollTimeoutMs: number; + cancelCallbackTimeoutMs: number; }; let mockProvider: { kind: string; initialize: jest.Mock; - openSession: jest.Mock; - startSession: jest.Mock; - send: jest.Mock; - streamSession: jest.Mock; + subscribeSession: jest.Mock; getSession: jest.Mock; cancelSession: jest.Mock; getManifest: jest.Mock; @@ -139,34 +127,16 @@ describe('RunExecutorService', () => { capabilities: {}, instructions: undefined, }), - openSession: jest.fn(), - startSession: jest.fn(), - // Default send ack — tests that care override via mockResolvedValue. - // Kickoff now flows through unary send, so a sensible default is required - // for the launch-flow tests to reach the stream-consumer start step. - send: jest.fn().mockResolvedValue({ - ack: { - ok: true, - duplicate: false, - messageId: 'msg-default', - sessionId: 'sess-default', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - envelope: { - macpVersion: '1.0', - mode: 'decision', - messageType: 'Proposal', - messageId: 'msg-default', - sessionId: 'sess-default', - sender: 'agent-1', - timestampUnixMs: Date.now(), - payload: Buffer.from(''), - }, + subscribeSession: jest.fn().mockReturnValue(makeReadOnlyHandle()), + getSession: jest.fn().mockResolvedValue({ + sessionId: 'sess-1', + mode: 'decision', + state: 'SESSION_STATE_OPEN', + initiator: 'agent-1', + }), + cancelSession: jest.fn().mockResolvedValue({ + ack: { ok: true, sessionState: 'SESSION_STATE_RESOLVED' }, }), - streamSession: jest.fn(), - getSession: jest.fn(), - cancelSession: jest.fn(), getManifest: jest.fn(), listModes: jest.fn(), listRoots: jest.fn(), @@ -187,8 +157,6 @@ describe('RunExecutorService', () => { bindSession: jest.fn().mockResolvedValue(makeRun({ status: 'binding_session' })), }; - mockRunRepository = {}; - mockRuntimeSessionRepository = { findByRunId: jest.fn().mockResolvedValue({ modeName: 'decision', @@ -200,12 +168,6 @@ describe('RunExecutorService', () => { get: jest.fn().mockReturnValue(mockProvider), }; - mockProtoRegistry = { - encodePayloadEnvelope: jest.fn().mockReturnValue(Buffer.from('encoded')), - getKnownTypeName: jest.fn().mockReturnValue(undefined), - encodeMessage: jest.fn().mockReturnValue(Buffer.from('encoded')), - }; - mockTraceService = { withSpan: jest.fn().mockImplementation((_name, _attrs, fn) => fn()), withRunSpan: jest.fn().mockImplementation((_runId, _name, _attrs, fn) => fn()), @@ -232,16 +194,18 @@ describe('RunExecutorService', () => { mockConfig = { runtimeRequestTimeoutMs: 30000, - kickoffMaxRetries: 3, clientVersion: '0.3.0', + sessionPollBaseMs: 10, + sessionPollMaxMs: 50, + sessionPollTimeoutMs: 1000, + cancelCallbackTimeoutMs: 5000, }; service = new RunExecutorService( mockRunManager as unknown as RunManagerService, - mockRunRepository as unknown as RunRepository, + {} as unknown as RunRepository, mockRuntimeSessionRepository as unknown as RuntimeSessionRepository, mockRuntimeRegistry as unknown as RuntimeProviderRegistry, - mockProtoRegistry as unknown as ProtoRegistryService, mockTraceService as unknown as TraceService, mockEventService as unknown as RunEventService, mockArtifactService as unknown as ArtifactService, @@ -257,7 +221,7 @@ describe('RunExecutorService', () => { // ========================================================================= describe('validate', () => { it('returns error when participants are missing', async () => { - const request = makeExecutionRequest({ + const request = makeRunDescriptor({ session: { modeName: 'decision', modeVersion: '1.0', @@ -276,7 +240,7 @@ describe('RunExecutorService', () => { }); it('returns error when modeName is missing', async () => { - const request = makeExecutionRequest({ + const request = makeRunDescriptor({ session: { modeName: '', modeVersion: '1.0', @@ -300,9 +264,7 @@ describe('RunExecutorService', () => { capabilities: {}, }); - const request = makeExecutionRequest(); - - const result = await service.validate(request); + const result = await service.validate(makeRunDescriptor()); expect(result.valid).toBe(false); expect(result.errors).toEqual( @@ -313,693 +275,204 @@ describe('RunExecutorService', () => { }); it('returns valid result when mode is in supported list', async () => { - const request = makeExecutionRequest(); - - const result = await service.validate(request); - + const result = await service.validate(makeRunDescriptor()); expect(result.valid).toBe(true); - expect(result.errors).toHaveLength(0); expect(result.runtime.reachable).toBe(true); - expect(result.runtime.supportedModes).toContain('decision'); }); - it('does not error when supportedModes is empty (accept-all)', async () => { - mockProvider.initialize.mockResolvedValue({ - selectedProtocolVersion: '1.0', - runtimeInfo: { name: 'rust-runtime' }, - supportedModes: [], - capabilities: {}, - }); - - const request = makeExecutionRequest(); - const result = await service.validate(request); - - expect(result.valid).toBe(true); - expect(result.errors).toHaveLength(0); + it('returns error when provided sessionId is not a valid UUID or base64url', async () => { + const result = await service.validate( + makeRunDescriptor({ + session: { + modeName: 'decision', + modeVersion: '1.0', + configurationVersion: '1.0', + ttlMs: 60000, + participants: [{ id: 'agent-1' }], + sessionId: 'bad-id', + }, + }), + ); + expect(result.valid).toBe(false); + expect(result.errors).toContain( + 'session.sessionId must be a UUID v4/v7 or base64url 22+ chars', + ); }); it('returns warning (not error) when runtime is unreachable', async () => { mockProvider.initialize.mockRejectedValue(new Error('UNAVAILABLE: connect failed')); - const request = makeExecutionRequest(); - const result = await service.validate(request); + const result = await service.validate(makeRunDescriptor()); - // Missing runtime should be a warning, not an error expect(result.valid).toBe(true); expect(result.warnings).toEqual( - expect.arrayContaining([ - expect.stringContaining('Runtime not reachable'), - ]), + expect.arrayContaining([expect.stringContaining('Runtime not reachable')]), ); expect(result.runtime.reachable).toBe(false); }); - - it('returns error for kickoff message missing messageType', async () => { - const request = makeExecutionRequest({ - kickoff: [ - { - from: 'agent-1', - to: ['agent-2'], - kind: 'request', - messageType: '', - payload: {}, - }, - ], - }); - - const result = await service.validate(request); - - expect(result.valid).toBe(false); - expect(result.errors).toContain('kickoff message is missing messageType'); - }); - - it('returns error for kickoff message missing from', async () => { - const request = makeExecutionRequest({ - kickoff: [ - { - from: '', - to: ['agent-2'], - kind: 'request', - messageType: 'Proposal', - payload: {}, - }, - ], - }); - - const result = await service.validate(request); - - expect(result.valid).toBe(false); - expect(result.errors).toContain('kickoff message is missing from'); - }); }); // ========================================================================= // launch() // ========================================================================= describe('launch', () => { - it('rejects replay mode', async () => { - const request = makeExecutionRequest({ mode: 'replay' }); + it('allocates a sessionId when one is not provided and passes it to createRun', async () => { + const expectedRun = makeRun({ status: 'queued' }); + mockRunManager.createRun.mockResolvedValue(expectedRun); - await expect(service.launch(request)).rejects.toThrow(BadRequestException); - await expect(service.launch(request)).rejects.toThrow( - /Use \/runs\/:id\/replay for replay mode/, + const result = await service.launch(makeRunDescriptor()); + + expect(result.run).toEqual(expectedRun); + expect(result.sessionId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-/); + expect(mockRunManager.createRun).toHaveBeenCalledWith( + expect.objectContaining({ + session: expect.objectContaining({ sessionId: result.sessionId }), + }), + result.sessionId, ); }); - it('creates run and returns it for live mode', async () => { + it('uses caller-provided sessionId when valid', async () => { + const sessionId = '123e4567-e89b-42d3-a456-426614174000'; const expectedRun = makeRun({ status: 'queued' }); mockRunManager.createRun.mockResolvedValue(expectedRun); - // Set up openSession to return a handle so execute() doesn't blow up - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-1', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-1', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, + const result = await service.launch( + makeRunDescriptor({ + session: { + modeName: 'decision', + modeVersion: '1.0', + configurationVersion: '1.0', + ttlMs: 60000, + participants: [{ id: 'agent-1' }], + sessionId, }, }), - }; - mockProvider.openSession.mockReturnValue(handle); + ); - const request = makeExecutionRequest(); - const result = await service.launch(request); + expect(result.sessionId).toBe(sessionId); + }); - expect(result).toEqual(expectedRun); - expect(mockRunManager.createRun).toHaveBeenCalledWith(request); + it('rejects invalid sessionId', async () => { + await expect( + service.launch( + makeRunDescriptor({ + session: { + modeName: 'decision', + modeVersion: '1.0', + configurationVersion: '1.0', + ttlMs: 60000, + participants: [{ id: 'agent-1' }], + sessionId: 'too-short', + }, + }), + ), + ).rejects.toThrow(BadRequestException); }); - it('creates run and returns it for sandbox mode', async () => { + it('never calls provider.subscribeSession synchronously from launch', async () => { const expectedRun = makeRun({ status: 'queued' }); mockRunManager.createRun.mockResolvedValue(expectedRun); - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-1', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-1', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - }), - }; - mockProvider.openSession.mockReturnValue(handle); + await service.launch(makeRunDescriptor()); - const request = makeExecutionRequest({ mode: 'sandbox' }); - const result = await service.launch(request); - - expect(result).toEqual(expectedRun); + // subscribeSession happens async in execute() — we only verify launch returns quickly. + expect(mockRunManager.createRun).toHaveBeenCalled(); }); }); // ========================================================================= - // cancel() + // cancel() — Option A (callback) and Option B (delegated) // ========================================================================= describe('cancel', () => { - it('cancels session and marks run cancelled', async () => { - const run = makeRun({ status: 'running', runtimeSessionId: 'sess-1' }); - mockRunManager.getRun.mockResolvedValue(run); - mockProvider.cancelSession.mockResolvedValue({ - ack: { ok: true, sessionState: 'SESSION_STATE_RESOLVED' }, - }); - const cancelledRun = makeRun({ status: 'cancelled' }); - mockRunManager.markCancelled.mockResolvedValue(cancelledRun); - - const result = await service.cancel('run-1', 'user requested'); + it('Option A: POSTs to initiator agent cancelCallback when metadata.cancelCallback is set', async () => { + const fetchSpy = jest + .spyOn(globalThis, 'fetch') + .mockResolvedValueOnce({ ok: true, status: 200 } as unknown as Response); - expect(mockProvider.cancelSession).toHaveBeenCalledWith({ - runId: 'run-1', + const run = makeRun({ + status: 'running', runtimeSessionId: 'sess-1', - reason: 'user requested', - requesterId: 'agent-1', + metadata: { + executionRequest: makeRunDescriptor(), + cancelCallback: { url: 'http://agent/cancel', bearer: 'tok' }, + }, }); - expect(mockRunManager.markCancelled).toHaveBeenCalledWith('run-1'); - expect(mockStreamConsumer.stop).toHaveBeenCalledWith('run-1'); - expect(mockStreamHub.complete).toHaveBeenCalledWith('run-1'); - expect(result).toEqual(cancelledRun); - }); - - it('throws BadRequestException when run has no runtime session', async () => { - const run = makeRun({ runtimeSessionId: undefined }); - mockRunManager.getRun.mockResolvedValue(run); - - await expect(service.cancel('run-1')).rejects.toThrow(BadRequestException); - await expect(service.cancel('run-1')).rejects.toThrow( - /run has no bound runtime session/, - ); - }); - - it('uses undefined requesterId when session has no initiator', async () => { - const run = makeRun({ status: 'running', runtimeSessionId: 'sess-1' }); mockRunManager.getRun.mockResolvedValue(run); - mockRuntimeSessionRepository.findByRunId.mockResolvedValue({ - modeName: 'decision', - initiatorParticipantId: null, - }); - mockProvider.cancelSession.mockResolvedValue({ - ack: { ok: true, sessionState: 'SESSION_STATE_RESOLVED' }, - }); - mockRunManager.markCancelled.mockResolvedValue(makeRun({ status: 'cancelled' })); - await service.cancel('run-1'); + await service.cancel('run-1', 'user requested'); - expect(mockProvider.cancelSession).toHaveBeenCalledWith( - expect.objectContaining({ requesterId: undefined }), - ); - }); - }); - - // ========================================================================= - // sendMessage() - // ========================================================================= - describe('sendMessage', () => { - const baseSendResult = { - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-1', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - envelope: { - macpVersion: '1.0', - mode: 'decision', - messageType: 'Proposal', - messageId: 'msg-1', - sessionId: 'sess-1', - sender: 'agent-1', - timestampUnixMs: Date.now(), - payload: Buffer.alloc(0), - }, - }; - - it('sends a message and returns messageId + ack', async () => { - mockProvider.send.mockResolvedValue(baseSendResult); - - const result = await service.sendMessage('run-1', { - from: 'agent-1', - to: ['agent-2'], - messageType: 'Proposal', - payload: { action: 'approve' }, - }); - - expect(result.messageId).toBe('msg-1'); - expect(result.ack.ok).toBe(true); - expect(mockProvider.send).toHaveBeenCalledWith( + expect(fetchSpy).toHaveBeenCalledWith( + 'http://agent/cancel', expect.objectContaining({ - runId: 'run-1', - runtimeSessionId: 'sess-1', - modeName: 'decision', - from: 'agent-1', - to: ['agent-2'], - messageType: 'Proposal', - }), - ); - expect(mockEventService.emitControlPlaneEvents).toHaveBeenCalled(); - }); - - it('throws BadRequestException when run is not ready', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'queued', runtimeSessionId: undefined }), - ); - - await expect( - service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }), - ).rejects.toThrow(BadRequestException); - }); - - it('throws BadRequestException when run status is not binding_session or running', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'completed', runtimeSessionId: 'sess-1' }), - ); - - await expect( - service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }), - ).rejects.toThrow(/run is not ready to accept session-bound messages/); - }); - - it('throws BadRequestException when run has no mode name', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'running', runtimeSessionId: 'sess-1' }), - ); - mockRuntimeSessionRepository.findByRunId.mockResolvedValue({ - modeName: null, - initiatorParticipantId: null, - }); - // Also ensure executionRequest has no modeName - mockRunManager.getRun.mockResolvedValue( - makeRun({ - status: 'running', - runtimeSessionId: 'sess-1', - metadata: { executionRequest: undefined }, + method: 'POST', + headers: expect.objectContaining({ + 'content-type': 'application/json', + authorization: 'Bearer tok', + }), + body: expect.stringContaining('"runId":"run-1"'), }), ); - - await expect( - service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }), - ).rejects.toThrow(/run does not have a bound mode name/); - }); - - it('throws POLICY_DENIED AppException for policy error', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSendResult, - ack: { - ...baseSendResult.ack, - ok: false, - error: { code: 'POLICY_DENIED', message: 'commitment violates rule X' }, - }, - }); - - await expect( - service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Commitment', - }), - ).rejects.toThrow(AppException); - - try { - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Commitment', - }); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.POLICY_DENIED); - expect((error as AppException).getStatus()).toBe(403); - } - }); - - it('throws UNKNOWN_POLICY_VERSION AppException', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSendResult, - ack: { - ...baseSendResult.ack, - ok: false, - error: { code: 'UNKNOWN_POLICY_VERSION', message: 'policy v99 not found' }, - }, - }); - - try { - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Commitment', - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.UNKNOWN_POLICY_VERSION); - expect((error as AppException).getStatus()).toBe(400); - } - }); - - it('throws INVALID_POLICY_DEFINITION AppException', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSendResult, - ack: { - ...baseSendResult.ack, - ok: false, - error: { code: 'INVALID_POLICY_DEFINITION', message: 'malformed rules' }, - }, - }); - - try { - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Commitment', - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.INVALID_POLICY_DEFINITION); - expect((error as AppException).getStatus()).toBe(400); - } - }); - - it('throws SESSION_ALREADY_EXISTS AppException', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSendResult, - ack: { - ...baseSendResult.ack, - ok: false, - error: { code: 'SESSION_ALREADY_EXISTS', message: 'duplicate session' }, - }, - }); - - try { - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.SESSION_ALREADY_EXISTS); - expect((error as AppException).getStatus()).toBe(409); - } - }); - - it('throws MESSAGE_SEND_FAILED for INVALID_SESSION_ID with 400 status', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSendResult, - ack: { - ...baseSendResult.ack, - ok: false, - error: { code: 'INVALID_SESSION_ID', message: 'bad session id' }, - }, - }); - - try { - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.MESSAGE_SEND_FAILED); - expect((error as AppException).getStatus()).toBe(400); - } + expect(mockProvider.cancelSession).not.toHaveBeenCalled(); + expect(mockRunManager.markCancelled).toHaveBeenCalledWith('run-1'); + fetchSpy.mockRestore(); }); - it('throws MESSAGE_SEND_FAILED with 502 for unknown error codes', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSendResult, - ack: { - ...baseSendResult.ack, - ok: false, - error: { code: 'SOME_OTHER_ERROR', message: 'unexpected' }, + it('Option B: calls provider.cancelSession when metadata.cancellationDelegated is true', async () => { + const run = makeRun({ + status: 'running', + runtimeSessionId: 'sess-1', + metadata: { + executionRequest: makeRunDescriptor(), + cancellationDelegated: true, }, }); + mockRunManager.getRun.mockResolvedValue(run); - try { - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.MESSAGE_SEND_FAILED); - expect((error as AppException).getStatus()).toBe(502); - } - }); - - it('uses payloadEnvelope when provided', async () => { - mockProvider.send.mockResolvedValue(baseSendResult); - - await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - payloadEnvelope: { encoding: 'proto', proto: { typeName: 'Foo', value: {} } }, - }); - - expect(mockProtoRegistry.encodePayloadEnvelope).toHaveBeenCalledWith({ - encoding: 'proto', - proto: { typeName: 'Foo', value: {} }, - }); - expect(mockProvider.send).toHaveBeenCalledWith( - expect.objectContaining({ - payload: Buffer.from('encoded'), - }), - ); - }); - - it('accepts messages when run status is binding_session', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'binding_session', runtimeSessionId: 'sess-1' }), - ); - mockProvider.send.mockResolvedValue(baseSendResult); - - const result = await service.sendMessage('run-1', { - from: 'agent-1', - messageType: 'Proposal', - }); - - expect(result.messageId).toBe('msg-1'); - }); - }); - - // ========================================================================= - // sendSignal() - // ========================================================================= - describe('sendSignal', () => { - const baseSignalResult = { - ack: { - ok: true, - duplicate: false, - messageId: 'sig-1', - sessionId: '', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - envelope: { - macpVersion: '1.0', - mode: '', - messageType: 'Signal', - messageId: 'sig-1', - sessionId: '', - sender: 'agent-1', - timestampUnixMs: Date.now(), - payload: Buffer.alloc(0), - }, - }; - - it('sends a signal and returns messageId + ack', async () => { - mockProvider.send.mockResolvedValue(baseSignalResult); + await service.cancel('run-1', 'policy-delegated'); - const result = await service.sendSignal('run-1', { - from: 'agent-1', - to: ['agent-2'], - messageType: 'HeartbeatSignal', - payload: { status: 'alive' }, + expect(mockProvider.cancelSession).toHaveBeenCalledWith({ + runId: 'run-1', + runtimeSessionId: 'sess-1', + reason: 'policy-delegated', }); - - expect(result.messageId).toBe('sig-1'); - expect(result.ack.ok).toBe(true); - expect(mockProvider.send).toHaveBeenCalledWith( - expect.objectContaining({ - runtimeSessionId: '', - modeName: '', - messageType: 'Signal', - from: 'agent-1', - to: ['agent-2'], - }), - ); - expect(mockEventService.emitControlPlaneEvents).toHaveBeenCalledWith( - 'run-1', - expect.arrayContaining([ - expect.objectContaining({ - type: 'message.sent', - subject: { kind: 'signal', id: 'sig-1' }, - }), - ]), - ); - }); - - it('throws BadRequestException when run is not in running state', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'binding_session', runtimeSessionId: 'sess-1' }), - ); - - await expect( - service.sendSignal('run-1', { - from: 'agent-1', - to: [], - messageType: 'Signal', - }), - ).rejects.toThrow(BadRequestException); - await expect( - service.sendSignal('run-1', { - from: 'agent-1', - to: [], - messageType: 'Signal', - }), - ).rejects.toThrow(/run is not in running state/); - }); - - it('throws BadRequestException when run has no session', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'running', runtimeSessionId: undefined }), - ); - - await expect( - service.sendSignal('run-1', { - from: 'agent-1', - to: [], - messageType: 'Signal', - }), - ).rejects.toThrow(/run is not in running state/); + expect(mockRunManager.markCancelled).toHaveBeenCalledWith('run-1'); }); - it('throws SIGNAL_DISPATCH_FAILED when runtime rejects signal', async () => { - mockProvider.send.mockResolvedValue({ - ...baseSignalResult, - ack: { - ...baseSignalResult.ack, - ok: false, - error: { code: 'INVALID_PAYLOAD', message: 'bad signal payload' }, - }, + it('rejects when neither cancelCallback nor delegation is configured', async () => { + const run = makeRun({ + status: 'running', + runtimeSessionId: 'sess-1', + metadata: { executionRequest: makeRunDescriptor() }, }); + mockRunManager.getRun.mockResolvedValue(run); - try { - await service.sendSignal('run-1', { - from: 'agent-1', - to: [], - messageType: 'Signal', - payload: { broken: true }, - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.SIGNAL_DISPATCH_FAILED); - expect((error as AppException).getStatus()).toBe(502); - } + await expect(service.cancel('run-1')).rejects.toThrow(BadRequestException); + expect(mockRunManager.markCancelled).not.toHaveBeenCalled(); }); - }); - - // ========================================================================= - // updateContext() - // ========================================================================= - describe('updateContext', () => { - const baseContextResult = { - ack: { - ok: true, - duplicate: false, - messageId: 'ctx-1', - sessionId: '', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - envelope: { - macpVersion: '1.0', - mode: '', - messageType: 'ContextUpdate', - messageId: 'ctx-1', - sessionId: '', - sender: 'agent-1', - timestampUnixMs: Date.now(), - payload: Buffer.alloc(0), - }, - }; - - it('sends a context update and returns messageId + ack', async () => { - mockProvider.send.mockResolvedValue(baseContextResult); - - const result = await service.updateContext('run-1', { - from: 'agent-1', - context: { key: 'value' }, - }); - expect(result.messageId).toBe('ctx-1'); - expect(result.ack.ok).toBe(true); - expect(mockProvider.send).toHaveBeenCalledWith( - expect.objectContaining({ - messageType: 'ContextUpdate', - from: 'agent-1', - }), - ); + it('throws when run has no runtime session', async () => { + mockRunManager.getRun.mockResolvedValue(makeRun({ runtimeSessionId: undefined })); + await expect(service.cancel('run-1')).rejects.toThrow(/no bound runtime session/); }); - it('throws BadRequestException when run is not running', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ status: 'completed', runtimeSessionId: 'sess-1' }), - ); + it('surfaces 502 when cancelCallback returns non-2xx', async () => { + const fetchSpy = jest + .spyOn(globalThis, 'fetch') + .mockResolvedValueOnce({ ok: false, status: 500 } as unknown as Response); - await expect( - service.updateContext('run-1', { - from: 'agent-1', - context: { key: 'value' }, - }), - ).rejects.toThrow(/run is not in running state/); - }); - - it('throws CONTEXT_UPDATE_FAILED when runtime rejects context update', async () => { - mockProvider.send.mockResolvedValue({ - ...baseContextResult, - ack: { - ...baseContextResult.ack, - ok: false, - error: { code: 'INVALID_PAYLOAD', message: 'bad context' }, + const run = makeRun({ + status: 'running', + runtimeSessionId: 'sess-1', + metadata: { + executionRequest: makeRunDescriptor(), + cancelCallback: { url: 'http://agent/cancel' }, }, }); + mockRunManager.getRun.mockResolvedValue(run); - try { - await service.updateContext('run-1', { - from: 'agent-1', - context: { key: 'value' }, - }); - fail('should have thrown'); - } catch (error) { - expect(error).toBeInstanceOf(AppException); - expect((error as AppException).errorCode).toBe(ErrorCode.CONTEXT_UPDATE_FAILED); - } + await expect(service.cancel('run-1')).rejects.toThrow(AppException); + fetchSpy.mockRestore(); }); }); @@ -1007,134 +480,58 @@ describe('RunExecutorService', () => { // clone() // ========================================================================= describe('clone', () => { - it('throws BadRequestException when run has no execution request in metadata', async () => { - mockRunManager.getRun.mockResolvedValue( - makeRun({ metadata: {} }), - ); - + it('throws when run has no execution request in metadata', async () => { + mockRunManager.getRun.mockResolvedValue(makeRun({ metadata: {} })); await expect(service.clone('run-1')).rejects.toThrow(BadRequestException); - await expect(service.clone('run-1')).rejects.toThrow( - /run does not have an execution request in metadata/, - ); - }); - - it('clones run with tag overrides', async () => { - const originalRequest = makeExecutionRequest(); - mockRunManager.getRun.mockResolvedValue( - makeRun({ metadata: { executionRequest: originalRequest } }), - ); - const clonedRun = makeRun({ id: 'run-2', status: 'queued' }); - mockRunManager.createRun.mockResolvedValue(clonedRun); - - // Set up openSession for the background execute() - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-2', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-2', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - }), - }; - mockProvider.openSession.mockReturnValue(handle); - - const result = await service.clone('run-1', { tags: ['cloned', 'test'] }); - - expect(result).toEqual(clonedRun); - expect(mockRunManager.createRun).toHaveBeenCalledWith( - expect.objectContaining({ - execution: expect.objectContaining({ tags: ['cloned', 'test'] }), - }), - ); }); - it('clones run with context overrides', async () => { - const originalRequest = makeExecutionRequest(); + it('clones with tag overrides and allocates a fresh sessionId', async () => { + const originalRequest = makeRunDescriptor(); mockRunManager.getRun.mockResolvedValue( makeRun({ metadata: { executionRequest: originalRequest } }), ); - const clonedRun = makeRun({ id: 'run-2', status: 'queued' }); - mockRunManager.createRun.mockResolvedValue(clonedRun); - - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-2', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-2', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - }), - }; - mockProvider.openSession.mockReturnValue(handle); + mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-2', status: 'queued' })); - const result = await service.clone('run-1', { - context: { newKey: 'newValue' }, - }); + const result = await service.clone('run-1', { tags: ['cloned'] }); - expect(result).toEqual(clonedRun); + expect(result.sessionId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-4/); expect(mockRunManager.createRun).toHaveBeenCalledWith( expect.objectContaining({ - session: expect.objectContaining({ context: { newKey: 'newValue' } }), + execution: expect.objectContaining({ tags: ['cloned'] }), + session: expect.objectContaining({ sessionId: result.sessionId }), }), + result.sessionId, ); }); - it('clears idempotency key on clone', async () => { - const originalRequest = makeExecutionRequest({ - execution: { idempotencyKey: 'original-key', tags: ['original'] }, + it('clears idempotency key and the original sessionId on clone', async () => { + const originalRequest = makeRunDescriptor({ + execution: { idempotencyKey: 'original-key' }, + session: { + modeName: 'decision', + modeVersion: '1.0', + configurationVersion: '1.0', + ttlMs: 60000, + participants: [{ id: 'agent-1' }], + sessionId: 'original-session-id-that-would-be-valid-base64url', + }, }); mockRunManager.getRun.mockResolvedValue( makeRun({ metadata: { executionRequest: originalRequest } }), ); mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-2' })); - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-2', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-2', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - }), - }; - mockProvider.openSession.mockReturnValue(handle); + const result = await service.clone('run-1'); - await service.clone('run-1'); - - const createRunArg = mockRunManager.createRun.mock.calls[0][0] as ExecutionRequest; - expect(createRunArg.execution?.idempotencyKey).toBeUndefined(); + const cloneArg = mockRunManager.createRun.mock.calls[0][0] as RunDescriptor; + expect(cloneArg.execution?.idempotencyKey).toBeUndefined(); + expect(cloneArg.session.sessionId).toBe(result.sessionId); + expect(cloneArg.session.sessionId).not.toBe('original-session-id-that-would-be-valid-base64url'); }); }); // ========================================================================= - // execute() (tested indirectly via launch) + // execute() observer flow (tested indirectly via launch) // ========================================================================= describe('execute (via launch)', () => { it('marks run failed when runtime mode is not supported', async () => { @@ -1144,188 +541,100 @@ describe('RunExecutorService', () => { supportedModes: ['task'], capabilities: {}, }); - - const request = makeExecutionRequest(); // modeName = 'decision' mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-x' })); - await service.launch(request); - - // Give the async execute() time to complete - await new Promise((resolve) => setTimeout(resolve, 50)); + await service.launch(makeRunDescriptor()); + await new Promise((r) => setTimeout(r, 50)); expect(mockRunManager.markFailed).toHaveBeenCalledWith( 'run-x', - expect.any(AppException), + expect.objectContaining({ errorCode: ErrorCode.MODE_NOT_SUPPORTED }), ); }); - it('marks run failed with UNKNOWN_POLICY_VERSION on policy error', async () => { - mockProvider.openSession.mockReturnValue({ - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.reject(new Error('UNKNOWN_POLICY_VERSION: v99 not found')), - }); - - const request = makeExecutionRequest(); - mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-p' })); + it('polls GetSession until SESSION_STATE_OPEN, then subscribes and starts the stream consumer', async () => { + const handle = makeReadOnlyHandle(); + mockProvider.subscribeSession.mockReturnValue(handle); - await service.launch(request); - await new Promise((resolve) => setTimeout(resolve, 50)); - - expect(mockRunManager.markFailed).toHaveBeenCalledWith( - 'run-p', - expect.objectContaining({ - errorCode: ErrorCode.UNKNOWN_POLICY_VERSION, - }), - ); - }); - - it('marks run failed with POLICY_DENIED on policy denied error', async () => { - mockProvider.openSession.mockReturnValue({ - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.reject(new Error('POLICY_DENIED: rule X violated')), - }); + // First two polls return UNSPECIFIED, third returns OPEN + mockProvider.getSession + .mockResolvedValueOnce({ sessionId: 'sess-ok', state: 'SESSION_STATE_UNSPECIFIED', mode: 'decision' }) + .mockResolvedValueOnce({ sessionId: 'sess-ok', state: 'SESSION_STATE_UNSPECIFIED', mode: 'decision' }) + .mockResolvedValueOnce({ + sessionId: 'sess-ok', + state: 'SESSION_STATE_OPEN', + mode: 'decision', + initiator: 'agent-1', + }); - const request = makeExecutionRequest(); - mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-pd' })); + mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-ok' })); - await service.launch(request); - await new Promise((resolve) => setTimeout(resolve, 50)); + await service.launch(makeRunDescriptor()); + await new Promise((r) => setTimeout(r, 400)); - expect(mockRunManager.markFailed).toHaveBeenCalledWith( - 'run-pd', + expect(mockProvider.getSession).toHaveBeenCalled(); + expect(mockRunManager.bindSession).toHaveBeenCalled(); + expect(mockProvider.subscribeSession).toHaveBeenCalledWith( + expect.objectContaining({ runId: 'run-ok' }), + ); + expect(mockStreamConsumer.start).toHaveBeenCalledWith( expect.objectContaining({ - errorCode: ErrorCode.POLICY_DENIED, + runId: 'run-ok', + sessionHandle: handle, + subscriberId: 'agent-1', }), ); }); - it('marks run failed with SESSION_ALREADY_EXISTS on duplicate session', async () => { - mockProvider.openSession.mockReturnValue({ - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.reject(new Error('SESSION_ALREADY_EXISTS: duplicate')), + it('marks run failed with SESSION_EXPIRED if the session expires before an agent opens it', async () => { + mockProvider.getSession.mockResolvedValue({ + sessionId: 'sess-expired', + state: 'SESSION_STATE_EXPIRED', + mode: 'decision', }); + mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-exp' })); - const request = makeExecutionRequest(); - mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-dup' })); - - await service.launch(request); - await new Promise((resolve) => setTimeout(resolve, 50)); + await service.launch(makeRunDescriptor()); + await new Promise((r) => setTimeout(r, 100)); expect(mockRunManager.markFailed).toHaveBeenCalledWith( - 'run-dup', - expect.objectContaining({ - errorCode: ErrorCode.SESSION_ALREADY_EXISTS, - }), + 'run-exp', + expect.objectContaining({ errorCode: ErrorCode.SESSION_EXPIRED }), ); }); - it('marks run failed with INVALID_POLICY_DEFINITION on invalid policy', async () => { - mockProvider.openSession.mockReturnValue({ - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.reject(new Error('INVALID_POLICY_DEFINITION: bad rules')), + it('marks run failed with RUNTIME_TIMEOUT when GetSession never returns OPEN before timeout', async () => { + mockProvider.getSession.mockResolvedValue({ + sessionId: 'sess-stuck', + state: 'SESSION_STATE_UNSPECIFIED', + mode: 'decision', }); + mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-timeout' })); - const request = makeExecutionRequest(); - mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-ipd' })); - - await service.launch(request); - await new Promise((resolve) => setTimeout(resolve, 50)); + await service.launch(makeRunDescriptor()); + await new Promise((r) => setTimeout(r, 1200)); expect(mockRunManager.markFailed).toHaveBeenCalledWith( - 'run-ipd', - expect.objectContaining({ - errorCode: ErrorCode.INVALID_POLICY_DEFINITION, - }), + 'run-timeout', + expect.objectContaining({ errorCode: ErrorCode.RUNTIME_TIMEOUT }), ); }); + }); - it('starts stream consumer after successful session open', async () => { - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-ok', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-ok', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - }), - }; - mockProvider.openSession.mockReturnValue(handle); - mockRunManager.markRunning.mockResolvedValue(makeRun({ traceId: undefined })); - - const request = makeExecutionRequest(); - mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-ok' })); - - await service.launch(request); - await new Promise((resolve) => setTimeout(resolve, 100)); - - expect(mockRunManager.markStarted).toHaveBeenCalledWith('run-ok', request); - expect(mockRunManager.bindSession).toHaveBeenCalled(); - expect(mockStreamConsumer.start).toHaveBeenCalledWith( - expect.objectContaining({ - runId: 'run-ok', - runtimeSessionId: 'sess-ok', - sessionHandle: handle, - }), - ); + // ========================================================================= + // Invariant — observer never calls provider.send (CP-3) + // ========================================================================= + describe('invariant: observer never writes envelopes', () => { + it('the provider mock has no send method on RunExecutorService dependencies', () => { + expect((mockProvider as unknown as { send?: unknown }).send).toBeUndefined(); }); - it('registers trace artifact when run has traceId', async () => { - const handle: RuntimeSessionHandle = { - send: jest.fn(), - events: (async function* () {})(), - closeWrite: jest.fn(), - abort: jest.fn(), - sessionAck: Promise.resolve({ - runtimeSessionId: 'sess-tr', - initiator: 'agent-1', - ack: { - ok: true, - duplicate: false, - messageId: 'msg-1', - sessionId: 'sess-tr', - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' as const, - }, - }), - }; - mockProvider.openSession.mockReturnValue(handle); - mockRunManager.markRunning.mockResolvedValue(makeRun({ traceId: 'trace-123' })); - - const request = makeExecutionRequest(); - mockRunManager.createRun.mockResolvedValue(makeRun({ id: 'run-tr' })); - - await service.launch(request); - await new Promise((resolve) => setTimeout(resolve, 100)); - - expect(mockArtifactService.register).toHaveBeenCalledWith( - expect.objectContaining({ - runId: 'run-tr', - kind: 'trace', - label: 'Root run trace', - inline: { traceId: 'trace-123' }, - }), - ); + it('RunExecutorService does not expose sendMessage / sendSignal / updateContext methods', () => { + const executor = service as unknown as Record; + expect(executor.sendMessage).toBeUndefined(); + expect(executor.sendSignal).toBeUndefined(); + expect(executor.updateContext).toBeUndefined(); + expect(executor.retryKickoff).toBeUndefined(); }); }); }); diff --git a/src/runs/run-executor.service.ts b/src/runs/run-executor.service.ts index 8d3174e..8471295 100644 --- a/src/runs/run-executor.service.ts +++ b/src/runs/run-executor.service.ts @@ -1,13 +1,12 @@ import { BadRequestException, Injectable, Logger } from '@nestjs/common'; import { randomUUID } from 'node:crypto'; -import { ExecutionRequest, RunMessageInput } from '../contracts/control-plane'; +import { RunDescriptor } from '../contracts/control-plane'; import { ArtifactService } from '../artifacts/artifact.service'; import { AppConfigService } from '../config/app-config.service'; import { RunEventService } from '../events/run-event.service'; import { StreamHubService } from '../events/stream-hub.service'; import { AppException } from '../errors/app-exception'; import { ErrorCode } from '../errors/error-codes'; -import { ProtoRegistryService } from '../runtime/proto-registry.service'; import { RuntimeProviderRegistry } from '../runtime/runtime-provider.registry'; import { InstrumentationService } from '../telemetry/instrumentation.service'; import { TraceService } from '../telemetry/trace.service'; @@ -16,6 +15,32 @@ import { RuntimeSessionRepository } from '../storage/runtime-session.repository' import { RunManagerService } from './run-manager.service'; import { StreamConsumerService } from './stream-consumer.service'; +/** + * Validates a sessionId against the runtime's session validator (UUID v4/v7 or base64url 22+). + * Mirrors `runtime/src/session.rs:146-177`. + */ +function isValidSessionId(candidate: string): boolean { + // UUID v4 / v7 pattern (any version 1-7). + const uuid = /^[0-9a-f]{8}-[0-9a-f]{4}-[1-7][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i; + if (uuid.test(candidate)) return true; + // base64url, 22+ chars. + const base64url = /^[A-Za-z0-9_-]{22,}$/; + return base64url.test(candidate); +} + +/** + * Observer-mode RunExecutor (direct-agent-auth CP-4). + * + * Flow: + * 1. `launch(descriptor)` — creates the run record, pre-allocates sessionId if omitted, + * returns `{runId, sessionId}` immediately. + * 2. Async `execute()` — initializes the runtime, polls `GetSession(sessionId)` until the + * initiator agent opens it, then subscribes to a read-only `StreamSession` and passes + * the handle to `StreamConsumerService`. + * + * **No `Send` call anywhere.** Agents emit their own envelopes. The control-plane never + * forges SessionStart, kickoff, messages, signals, or context updates. + */ @Injectable() export class RunExecutorService { private readonly logger = new Logger(RunExecutorService.name); @@ -25,7 +50,6 @@ export class RunExecutorService { private readonly runRepository: RunRepository, private readonly runtimeSessionRepository: RuntimeSessionRepository, private readonly runtimeRegistry: RuntimeProviderRegistry, - private readonly protoRegistry: ProtoRegistryService, private readonly traceService: TraceService, private readonly eventService: RunEventService, private readonly artifactService: ArtifactService, @@ -35,7 +59,7 @@ export class RunExecutorService { private readonly instrumentation: InstrumentationService ) {} - async validate(request: ExecutionRequest) { + async validate(request: RunDescriptor) { const errors: string[] = []; const warnings: string[] = []; @@ -52,15 +76,8 @@ export class RunExecutorService { errors.push('session.modeName is required'); } - if (request.kickoff) { - for (const msg of request.kickoff) { - if (!msg.messageType) { - errors.push('kickoff message is missing messageType'); - } - if (!msg.from) { - errors.push('kickoff message is missing from'); - } - } + if (request.session.sessionId && !isValidSessionId(request.session.sessionId)) { + errors.push('session.sessionId must be a UUID v4/v7 or base64url 22+ chars'); } let runtimeInfo: { reachable: boolean; supportedModes: string[]; capabilities?: unknown } = { @@ -101,285 +118,149 @@ export class RunExecutorService { }; } - async launch(request: ExecutionRequest) { - if (request.mode === 'replay') { - throw new BadRequestException('Use /runs/:id/replay for replay mode. POST /runs launches live or sandbox executions.'); + /** + * Allocate a sessionId if the caller didn't provide one, validating when they did. + * The sessionId is returned to the caller so they can propagate it to agents via bootstrap. + */ + private resolveSessionId(request: RunDescriptor): string { + if (request.session.sessionId) { + if (!isValidSessionId(request.session.sessionId)) { + throw new BadRequestException( + 'session.sessionId must be a UUID v4/v7 or base64url 22+ chars', + ); + } + return request.session.sessionId; } + return randomUUID(); + } - const run = await this.runManager.createRun(request); - void this.execute(run.id, request); - return run; + async launch(request: RunDescriptor): Promise<{ run: Awaited>; sessionId: string }> { + const sessionId = this.resolveSessionId(request); + const requestWithSessionId: RunDescriptor = { + ...request, + session: { ...request.session, sessionId } + }; + const run = await this.runManager.createRun(requestWithSessionId, sessionId); + void this.execute(run.id, requestWithSessionId, sessionId); + return { run, sessionId }; } + /** + * UI-initiated cancel. + * + * Option A (default): proxy to the initiator agent's `cancelCallback` over HTTP. + * Option B (scenario opt-in, `metadata.cancellationDelegated: true`): call + * `provider.cancelSession()` directly with the control-plane's own identity. + * + * See direct-agent-auth.md §Cancellation design. + */ async cancel(runId: string, reason?: string) { const run = await this.runManager.getRun(runId); if (!run.runtimeSessionId) { throw new BadRequestException('run has no bound runtime session'); } - const provider = this.runtimeRegistry.get(run.runtimeKind); - const session = await this.runtimeSessionRepository.findByRunId(runId); - const requesterId = session?.initiatorParticipantId ?? undefined; - try { - await provider.cancelSession({ - runId, - runtimeSessionId: run.runtimeSessionId, - reason, - requesterId - }); - } catch (cancelError) { - // Session may have already expired on the runtime — proceed with local cancellation - this.logger.warn( - `cancelSession failed for run ${runId} (proceeding with local cancel): ${cancelError instanceof Error ? cancelError.message : String(cancelError)}` + + const metadata = (run.metadata ?? {}) as Record; + const delegated = Boolean(metadata.cancellationDelegated); + const cancelCallback = metadata.cancelCallback as { url?: string; bearer?: string } | undefined; + + if (delegated) { + // Option B: scenario policy delegates cancellation authority to the control-plane. + const provider = this.runtimeRegistry.get(run.runtimeKind); + try { + await provider.cancelSession({ + runId, + runtimeSessionId: run.runtimeSessionId, + reason, + }); + } catch (cancelError) { + this.logger.warn( + `cancelSession failed for run ${runId} (proceeding with local cancel): ${cancelError instanceof Error ? cancelError.message : String(cancelError)}`, + ); + } + } else if (cancelCallback?.url) { + // Option A: proxy UI cancel to the initiator agent's local callback. + await this.invokeCancelCallback(runId, cancelCallback, reason); + } else { + // No callback registered and no policy delegation — fail closed. + throw new BadRequestException( + 'run has no cancelCallback in metadata and no policy delegation — cannot cancel from control-plane', ); } + const cancelled = await this.runManager.markCancelled(runId); await this.streamConsumer.stop(runId); this.streamHub.complete(runId); return cancelled; } - async sendMessage(runId: string, params: RunMessageInput) { - return this.traceService.withRunSpan( - runId, - 'runtime.send_message', - { 'macp.message_type': params.messageType, 'macp.sender': params.from }, - () => this.sendMessageInner(runId, params) - ); - } - - private async sendMessageInner(runId: string, params: RunMessageInput) { - const run = await this.runManager.getRun(runId); - if (!run.runtimeSessionId || !['binding_session', 'running'].includes(run.status)) { - throw new BadRequestException('run is not ready to accept session-bound messages'); - } - - const executionRequest = run.metadata?.executionRequest as ExecutionRequest | undefined; - const runtimeSession = await this.runtimeSessionRepository.findByRunId(runId); - const modeName = runtimeSession?.modeName ?? executionRequest?.session?.modeName; - if (!modeName) { - throw new BadRequestException('run does not have a bound mode name'); - } - - const provider = this.runtimeRegistry.get(run.runtimeKind); - let payload: Buffer; - if (params.payloadEnvelope) { - payload = this.protoRegistry.encodePayloadEnvelope(params.payloadEnvelope); - } else { - // Try to auto-encode plain JSON payload as proto using the known type for this mode+messageType - const knownType = this.protoRegistry.getKnownTypeName(modeName, params.messageType); - if (knownType && params.payload) { - payload = this.protoRegistry.encodeMessage(knownType, params.payload as Record); - } else { - payload = Buffer.from(JSON.stringify(params.payload ?? {}), 'utf8'); - } - } - - this.logger.log( - `sendMessage debug: type=${params.messageType} mode=${modeName} payloadLen=${payload.length} payloadHex=${payload.toString('hex').slice(0, 80)} sessionId=${run.runtimeSessionId}` - ); - - const sendResult = await provider.send({ - runId, - runtimeSessionId: run.runtimeSessionId, - modeName, - from: params.from, - to: params.to ?? [], - messageType: params.messageType, - payload, - payloadDescriptor: (params.payloadEnvelope as unknown as Record) ?? params.payload ?? {}, - metadata: params.metadata - }); - - if (!sendResult.ack.ok && sendResult.ack.error) { - const errorCode = sendResult.ack.error.code; - - // Map runtime policy errors to specific error codes - if (errorCode === 'POLICY_DENIED') { - throw new AppException( - ErrorCode.POLICY_DENIED, - `Policy denied commitment: ${sendResult.ack.error.message}`, - 403 - ); - } - if (errorCode === 'UNKNOWN_POLICY_VERSION') { - throw new AppException( - ErrorCode.UNKNOWN_POLICY_VERSION, - `Unknown policy version: ${sendResult.ack.error.message}`, - 400 - ); - } - if (errorCode === 'INVALID_POLICY_DEFINITION') { - throw new AppException( - ErrorCode.INVALID_POLICY_DEFINITION, - `Invalid policy definition: ${sendResult.ack.error.message}`, - 400 - ); - } - if (errorCode === 'SESSION_ALREADY_EXISTS') { + private async invokeCancelCallback( + runId: string, + callback: { url?: string; bearer?: string }, + reason?: string, + ): Promise { + if (!callback.url) return; + const controller = new AbortController(); + const timer = setTimeout(() => controller.abort(), this.config.cancelCallbackTimeoutMs); + try { + const headers: Record = { 'content-type': 'application/json' }; + if (callback.bearer) headers.authorization = `Bearer ${callback.bearer}`; + const body = JSON.stringify({ runId, reason: reason ?? null }); + const res = await fetch(callback.url, { + method: 'POST', + headers, + body, + signal: controller.signal, + }); + if (!res.ok) { throw new AppException( - ErrorCode.SESSION_ALREADY_EXISTS, - `Session already exists: ${sendResult.ack.error.message}`, - 409 + ErrorCode.INTERNAL_ERROR, + `cancel callback ${callback.url} returned ${res.status}`, + 502, ); } - - throw new AppException( - ErrorCode.MESSAGE_SEND_FAILED, - `Runtime rejected message: [${errorCode}] ${sendResult.ack.error.message}`, - errorCode === 'INVALID_SESSION_ID' ? 400 : 502 - ); - } - - await this.eventService.emitControlPlaneEvents(runId, [ - { - ts: new Date().toISOString(), - type: 'message.sent', - source: { kind: 'control-plane', name: 'run-executor' }, - subject: { kind: 'message', id: sendResult.envelope.messageId }, - data: { - sessionId: run.runtimeSessionId, - sender: params.from, - to: params.to ?? [], - messageType: params.messageType, - ack: sendResult.ack, - payloadDescriptor: (params.payloadEnvelope as unknown as Record) ?? params.payload ?? {}, - metadata: params.metadata ?? {} - } - } - ]); - - this.instrumentation.outboundMessagesTotal.inc({ category: 'message', status: 'sent' }); - return { messageId: sendResult.envelope.messageId, ack: sendResult.ack }; - } - - async sendSignal(runId: string, params: { - from: string; - to: string[]; - messageType: string; - payload?: Record; - }) { - const run = await this.runManager.getRun(runId); - if (!run.runtimeSessionId || run.status !== 'running') { - throw new BadRequestException('run is not in running state'); - } - const provider = this.runtimeRegistry.get(run.runtimeKind); - - // Runtime requires empty session_id and mode for Signal messages - const sendResult = await provider.send({ - runId, - runtimeSessionId: '', - modeName: '', - from: params.from, - to: params.to, - messageType: 'Signal', - payload: Buffer.from(JSON.stringify(params.payload ?? {}), 'utf8'), - payloadDescriptor: params.payload - }); - - // Check ack for errors - if (!sendResult.ack.ok && sendResult.ack.error) { - throw new AppException( - ErrorCode.SIGNAL_DISPATCH_FAILED, - `Runtime rejected signal: [${sendResult.ack.error.code}] ${sendResult.ack.error.message}`, - 502 - ); - } - - await this.eventService.emitControlPlaneEvents(runId, [ - { - ts: new Date().toISOString(), - type: 'message.sent', - source: { kind: 'control-plane', name: 'run-executor' }, - subject: { kind: 'signal', id: sendResult.envelope.messageId }, - data: { - sessionId: run.runtimeSessionId, - sender: params.from, - to: params.to, - messageType: params.messageType, - ack: sendResult.ack, - payloadDescriptor: params.payload ?? {} - } - } - ]); - - return { messageId: sendResult.envelope.messageId, ack: sendResult.ack }; - } - - async updateContext(runId: string, dto: { from: string; context: Record }) { - const run = await this.runManager.getRun(runId); - if (!run.runtimeSessionId || run.status !== 'running') { - throw new BadRequestException('run is not in running state'); - } - const provider = this.runtimeRegistry.get(run.runtimeKind); - - const sendResult = await provider.send({ - runId, - runtimeSessionId: '', - modeName: '', - from: dto.from, - to: [], - messageType: 'ContextUpdate', - payload: Buffer.from(JSON.stringify(dto.context), 'utf8'), - payloadDescriptor: dto.context - }); - - if (!sendResult.ack.ok && sendResult.ack.error) { + } catch (error) { + if (error instanceof AppException) throw error; throw new AppException( - ErrorCode.CONTEXT_UPDATE_FAILED, - `Runtime rejected context update: [${sendResult.ack.error.code}] ${sendResult.ack.error.message}`, - 502 + ErrorCode.INTERNAL_ERROR, + `cancel callback ${callback.url} failed: ${error instanceof Error ? error.message : String(error)}`, + 502, ); + } finally { + clearTimeout(timer); } - - await this.eventService.emitControlPlaneEvents(runId, [ - { - ts: new Date().toISOString(), - type: 'message.sent', - source: { kind: 'control-plane', name: 'run-executor' }, - subject: { kind: 'message', id: sendResult.envelope.messageId }, - data: { - sessionId: run.runtimeSessionId, - sender: dto.from, - to: [], - messageType: 'ContextUpdate', - ack: sendResult.ack, - payloadDescriptor: dto.context - } - } - ]); - - return { messageId: sendResult.envelope.messageId, ack: sendResult.ack }; } - async clone(runId: string, overrides?: { tags?: string[]; context?: Record }) { + async clone(runId: string, overrides?: { tags?: string[] }): Promise<{ run: Awaited>; sessionId: string }> { const run = await this.runManager.getRun(runId); - const executionRequest = run.metadata?.executionRequest as ExecutionRequest | undefined; + const executionRequest = run.metadata?.executionRequest as RunDescriptor | undefined; if (!executionRequest) { throw new BadRequestException('run does not have an execution request in metadata'); } - const cloned = { ...executionRequest }; + const cloned: RunDescriptor = { ...executionRequest }; if (overrides?.tags) { cloned.execution = { ...cloned.execution, tags: overrides.tags }; } - if (overrides?.context) { - cloned.session = { ...cloned.session, context: overrides.context }; - } - // Clear idempotency key so clone creates a new run + // Always clear idempotency key + sessionId so clone creates a new run+session. if (cloned.execution) { delete (cloned.execution as unknown as Record).idempotencyKey; } + cloned.session = { ...cloned.session, sessionId: undefined }; return this.launch(cloned); } - private async execute(runId: string, request: ExecutionRequest): Promise { + /** + * Observer execute loop. Runs async after POST /runs returns. + * Never writes envelopes; polls GetSession, then subscribes read-only. + */ + private async execute(runId: string, request: RunDescriptor, sessionId: string): Promise { const provider = this.runtimeRegistry.get(request.runtime.kind); const deadlineMs = this.config.runtimeRequestTimeoutMs; try { await this.runManager.markStarted(runId, request); - // Mode validation via Initialize const initResult = await this.traceService.withSpan( 'runtime.initialize', { @@ -410,108 +291,37 @@ export class RunExecutorService { ); } - // Open unified bidirectional session stream - const handle = provider.openSession({ runId, execution: request }); + // Poll GetSession until the initiator agent opens it, or timeout. + const snapshot = await this.traceService.withSpan( + 'runtime.await_session_open', + { run_id: runId, runtime_kind: request.runtime.kind, session_id: sessionId }, + async () => this.pollForOpenSession(provider, runId, sessionId) + ); - // Wait for SessionStart confirmation - const session = await this.traceService.withSpan( - 'runtime.open_session', + await this.runManager.bindSession( + runId, + request, { - run_id: runId, - runtime_kind: request.runtime.kind, - mode_name: request.session.modeName + runtimeSessionId: sessionId, + initiator: snapshot.initiator ?? '', + ack: { sessionState: snapshot.state }, }, - async () => handle.sessionAck + initResult.capabilities as unknown as Record, ); - await this.runManager.bindSession(runId, request, session, initResult.capabilities as unknown as Record); - - // Send kickoff messages via unary Send RPC (more reliable than bidi stream) - for (const message of request.kickoff ?? []) { - try { - const payload = message.payloadEnvelope - ? this.protoRegistry.encodePayloadEnvelope(message.payloadEnvelope) - : Buffer.from(JSON.stringify(message.payload ?? {}), 'utf8'); - - const sendResult = await this.retryKickoff(async () => { - return provider.send({ - runId, - runtimeSessionId: session.runtimeSessionId, - modeName: request.session.modeName, - from: message.from, - to: message.to ?? [], - messageType: message.messageType, - payload, - payloadDescriptor: (message.payloadEnvelope as unknown as Record) ?? message.payload ?? {}, - metadata: { kickoff: true } - }); - }); - - const kickoffMessageId = sendResult.envelope?.messageId ?? randomUUID(); - - await this.eventService.emitControlPlaneEvents(runId, [ - { - ts: new Date().toISOString(), - type: 'message.sent', - source: { kind: 'control-plane', name: 'run-executor' }, - subject: { kind: 'message', id: kickoffMessageId }, - data: { - sessionId: session.runtimeSessionId, - sender: message.from, - to: message.to, - messageType: message.messageType, - kind: message.kind, - ack: { - ok: sendResult.ack.ok, - duplicate: sendResult.ack.duplicate, - messageId: kickoffMessageId, - sessionId: session.runtimeSessionId, - acceptedAtUnixMs: sendResult.ack.acceptedAtUnixMs ?? Date.now(), - sessionState: sendResult.ack.sessionState ?? 'SESSION_STATE_OPEN' - }, - payloadDescriptor: (message.payloadEnvelope as unknown as Record) ?? message.payload ?? {} - } - } - ]); - } catch (kickoffError) { - this.logger.error( - `kickoff message failed for run ${runId}, messageType=${message.messageType}: ${kickoffError instanceof Error ? kickoffError.message : String(kickoffError)}` - ); - await this.eventService.emitControlPlaneEvents(runId, [ - { - ts: new Date().toISOString(), - type: 'message.send_failed', - source: { kind: 'control-plane', name: 'run-executor' }, - subject: { kind: 'message', id: message.messageType }, - data: { - sessionId: session.runtimeSessionId, - sender: message.from, - to: message.to, - messageType: message.messageType, - error: kickoffError instanceof Error ? kickoffError.message : String(kickoffError) - } - } - ]); - handle.abort(); - await this.runManager.markFailed(runId, kickoffError); - return; - } - } + // Subscribe read-only — never writes. + const handle = provider.subscribeSession({ runId, runtimeSessionId: sessionId }); - // Half-close the write side — kickoff phase done - handle.closeWrite(); + const run = await this.runManager.markRunning(runId, sessionId); + const subscriberId = snapshot.initiator ?? ''; - const run = await this.runManager.markRunning(runId, session.runtimeSessionId); - const subscriberId = session.initiator; - - // Pass the session handle to the stream consumer await this.streamConsumer.start({ runId, execution: request, runtimeKind: request.runtime.kind, - runtimeSessionId: session.runtimeSessionId, + runtimeSessionId: sessionId, subscriberId, - sessionHandle: handle + sessionHandle: handle, }); if (run.traceId) { @@ -537,66 +347,91 @@ export class RunExecutorService { } ]); } + this.instrumentation.outboundMessagesTotal.inc({ category: 'observer', status: 'subscribed' }); } catch (error) { - // Surface policy-specific errors with appropriate error codes + await this.handleExecuteError(runId, error); + } + } + + private async pollForOpenSession( + provider: ReturnType, + runId: string, + sessionId: string, + ) { + const startedAt = Date.now(); + const base = this.config.sessionPollBaseMs; + const max = this.config.sessionPollMaxMs; + const totalTimeout = this.config.sessionPollTimeoutMs; + let attempt = 0; + + while (Date.now() - startedAt < totalTimeout) { try { - if (error instanceof Error) { - const msg = error.message ?? ''; - if (msg.includes('UNKNOWN_POLICY_VERSION')) { - await this.runManager.markFailed( - runId, - new AppException(ErrorCode.UNKNOWN_POLICY_VERSION, `Unknown policy version: ${msg}`, 400) - ); - return; - } - if (msg.includes('POLICY_DENIED')) { - await this.runManager.markFailed( - runId, - new AppException(ErrorCode.POLICY_DENIED, `Policy denied: ${msg}`, 403) - ); - return; - } - if (msg.includes('INVALID_POLICY_DEFINITION')) { - await this.runManager.markFailed( - runId, - new AppException(ErrorCode.INVALID_POLICY_DEFINITION, `Invalid policy definition: ${msg}`, 400) - ); - return; - } - if (msg.includes('SESSION_ALREADY_EXISTS') || msg.includes('SessionAlreadyExists')) { - await this.runManager.markFailed( - runId, - new AppException(ErrorCode.SESSION_ALREADY_EXISTS, `Session already exists: ${msg}`, 409) - ); - return; - } + const snapshot = await provider.getSession({ runId, runtimeSessionId: sessionId }); + if (snapshot.state === 'SESSION_STATE_OPEN') return snapshot; + if (snapshot.state === 'SESSION_STATE_EXPIRED') { + throw new AppException( + ErrorCode.SESSION_EXPIRED, + `session ${sessionId} expired before any agent opened it`, + 400, + ); } - await this.runManager.markFailed(runId, error); - } catch (markFailedError) { - this.logger.error( - `failed to mark run ${runId} as failed (run may have been deleted): ${markFailedError instanceof Error ? markFailedError.message : String(markFailedError)}` + } catch (pollError) { + if (pollError instanceof AppException) throw pollError; + // getSession failing with NotFound is normal while the agent hasn't called SessionStart yet. + this.logger.debug( + `getSession(${sessionId}) attempt ${attempt + 1}: ${pollError instanceof Error ? pollError.message : String(pollError)}`, ); } + attempt += 1; + const delay = Math.min(base * 2 ** (attempt - 1), max); + await new Promise((resolve) => setTimeout(resolve, delay)); } - } - private async retryKickoff(fn: () => Promise): Promise { - const maxRetries = this.config.kickoffMaxRetries; - let lastError: unknown; + throw new AppException( + ErrorCode.RUNTIME_TIMEOUT, + `timed out after ${totalTimeout}ms waiting for initiator agent to open session ${sessionId}`, + 504, + ); + } - for (let attempt = 0; attempt <= maxRetries; attempt++) { - try { - return await fn(); - } catch (error) { - lastError = error; - if (attempt < maxRetries) { - const backoffMs = Math.min(250 * 2 ** attempt, 5000); - const jitter = Math.random() * backoffMs * 0.2; - this.logger.warn(`kickoff attempt ${attempt + 1} failed, retrying in ${Math.round(backoffMs + jitter)}ms`); - await new Promise((resolve) => setTimeout(resolve, backoffMs + jitter)); + private async handleExecuteError(runId: string, error: unknown): Promise { + try { + if (error instanceof Error) { + const msg = error.message ?? ''; + if (msg.includes('UNKNOWN_POLICY_VERSION')) { + await this.runManager.markFailed( + runId, + new AppException(ErrorCode.UNKNOWN_POLICY_VERSION, `Unknown policy version: ${msg}`, 400), + ); + return; + } + if (msg.includes('POLICY_DENIED')) { + await this.runManager.markFailed( + runId, + new AppException(ErrorCode.POLICY_DENIED, `Policy denied: ${msg}`, 403), + ); + return; + } + if (msg.includes('INVALID_POLICY_DEFINITION')) { + await this.runManager.markFailed( + runId, + new AppException(ErrorCode.INVALID_POLICY_DEFINITION, `Invalid policy definition: ${msg}`, 400), + ); + return; + } + if (msg.includes('SESSION_ALREADY_EXISTS') || msg.includes('SessionAlreadyExists')) { + await this.runManager.markFailed( + runId, + new AppException(ErrorCode.SESSION_ALREADY_EXISTS, `Session already exists: ${msg}`, 409), + ); + return; } } + await this.runManager.markFailed(runId, error); + } catch (markFailedError) { + this.logger.error( + `failed to mark run ${runId} as failed (run may have been deleted): ${markFailedError instanceof Error ? markFailedError.message : String(markFailedError)}`, + ); } - throw lastError; } } diff --git a/src/runs/run-manager.service.spec.ts b/src/runs/run-manager.service.spec.ts index c9b5bfe..7ff8ef5 100644 --- a/src/runs/run-manager.service.spec.ts +++ b/src/runs/run-manager.service.spec.ts @@ -11,9 +11,11 @@ import { TraceService } from '../telemetry/trace.service'; import { WebhookService } from '../webhooks/webhook.service'; import { MetricsService } from '../metrics/metrics.service'; import { EventRepository } from '../storage/event.repository'; -import { ExecutionRequest, RunStateProjection } from '../contracts/control-plane'; +import { RunDescriptor, RunStateProjection } from '../contracts/control-plane'; -function makeExecutionRequest(overrides?: Partial): ExecutionRequest { +const FIXED_SESSION_ID = '123e4567-e89b-42d3-a456-426614174000'; + +function makeExecutionRequest(overrides?: Partial): RunDescriptor { return { mode: 'live', runtime: { kind: 'rust', version: '0.1.0' }, @@ -22,10 +24,7 @@ function makeExecutionRequest(overrides?: Partial): ExecutionR modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 30000, - participants: [ - { id: 'agent-a', role: 'proposer' }, - { id: 'agent-b', role: 'evaluator' }, - ], + participants: [{ id: 'agent-a' }, { id: 'agent-b' }], }, ...overrides, }; @@ -163,7 +162,7 @@ describe('RunManagerService', () => { execution: { idempotencyKey: 'key-123' }, }); - const result = await service.createRun(request); + const result = await service.createRun(request, FIXED_SESSION_ID); expect(result).toBe(existing); expect(runRepository.findByIdempotencyKey).toHaveBeenCalledWith('key-123'); @@ -176,7 +175,7 @@ describe('RunManagerService', () => { runRepository.create.mockResolvedValue(created as any); const request = makeExecutionRequest(); - const result = await service.createRun(request); + const result = await service.createRun(request, FIXED_SESSION_ID); expect(result).toEqual(created); expect(runRepository.findByIdempotencyKey).not.toHaveBeenCalled(); @@ -198,7 +197,7 @@ describe('RunManagerService', () => { const request = makeExecutionRequest({ execution: { idempotencyKey: 'new-key' }, }); - const result = await service.createRun(request); + const result = await service.createRun(request, FIXED_SESSION_ID); expect(result).toEqual(created); expect(runRepository.findByIdempotencyKey).toHaveBeenCalledWith('new-key'); diff --git a/src/runs/run-manager.service.ts b/src/runs/run-manager.service.ts index 60949c1..75dafde 100644 --- a/src/runs/run-manager.service.ts +++ b/src/runs/run-manager.service.ts @@ -1,6 +1,6 @@ import { BadRequestException, Injectable, Logger, NotFoundException } from '@nestjs/common'; import { randomUUID } from 'node:crypto'; -import { ExecutionRequest, RunStateProjection } from '../contracts/control-plane'; +import { RunDescriptor, RunStateProjection } from '../contracts/control-plane'; import { AuditService } from '../audit/audit.service'; import { InstrumentationService } from '../telemetry/instrumentation.service'; import { TraceService } from '../telemetry/trace.service'; @@ -29,7 +29,7 @@ export class RunManagerService { private readonly instrumentation: InstrumentationService ) {} - async createRun(request: ExecutionRequest) { + async createRun(request: RunDescriptor, sessionId: string) { const idempotencyKey = request.execution?.idempotencyKey; if (idempotencyKey) { const existing = await this.runRepository.findByIdempotencyKey(idempotencyKey); @@ -40,10 +40,10 @@ export class RunManagerService { const traceId = this.traceService.startRunTrace(runId, { runtime_kind: request.runtime.kind, mode_name: request.session.modeName, - execution_mode: request.mode + execution_mode: request.mode, }); - // Phase 3.6: Auto-tag sandbox runs + // Auto-tag sandbox runs const tags = [...(request.execution?.tags ?? [])]; if (request.mode === 'sandbox' && !tags.includes('sandbox')) { tags.push('sandbox'); @@ -56,21 +56,26 @@ export class RunManagerService { mode: request.mode, runtimeKind: request.runtime.kind, runtimeVersion: request.runtime.version, + runtimeSessionId: sessionId, idempotencyKey, tags, sourceKind: request.session.metadata?.source as string | undefined, sourceRef: request.session.metadata?.sourceRef as string | undefined, metadata: { executionRequest: request, - requester: request.execution?.requester + requester: request.execution?.requester, + // Forward opaque UI-cancel hooks (Option A) and policy-delegation flag (Option B) + // — see direct-agent-auth §Cancellation design. + ...(request.session.metadata?.cancelCallback + ? { cancelCallback: request.session.metadata.cancelCallback } + : {}), + ...(request.session.metadata?.cancellationDelegated + ? { cancellationDelegated: request.session.metadata.cancellationDelegated } + : {}), }, - traceId + traceId, }); - const decisionPrompt = - (request.session.metadata?.decisionPrompt as string | undefined) ?? - (request.session.metadata?.decision_prompt as string | undefined); - await this.runEventService.emitControlPlaneEvents(record.id, [ { ts: new Date().toISOString(), @@ -83,16 +88,16 @@ export class RunManagerService { modeName: request.session.modeName, runtimeKind: request.runtime.kind, runtimeVersion: request.runtime.version, + sessionId, traceId, - ...(decisionPrompt ? { decisionPrompt } : {}) - } - } + }, + }, ]); return record; } - async markStarted(runId: string, request: ExecutionRequest) { + async markStarted(runId: string, request: RunDescriptor) { this.instrumentation.runStateTotal.inc({ status: 'starting' }); const run = await this.runRepository.markStarted(runId); await this.runEventService.emitControlPlaneEvents(runId, [ @@ -107,18 +112,22 @@ export class RunManagerService { startedAt: run.startedAt, modeName: request.session.modeName, runtimeKind: request.runtime.kind, - traceId: run.traceId - } - } + traceId: run.traceId, + }, + }, ]); return run; } + /** + * Record that the initiator agent has opened the runtime session. + * Called from the observer path once `GetSession` reports OPEN state. + */ async bindSession( runId: string, - request: ExecutionRequest, + request: RunDescriptor, session: { runtimeSessionId: string; initiator: string; ack: { sessionState: string } }, - capabilities?: Record + capabilities?: Record, ) { const run = await this.runRepository.markBindingSession(runId, session.runtimeSessionId); await this.runtimeSessionRepository.upsert({ @@ -129,14 +138,13 @@ export class RunManagerService { modeVersion: request.session.modeVersion, configurationVersion: request.session.configurationVersion, policyVersion: request.session.policyVersion || 'policy.default', - initiatorParticipantId: session.initiator, + initiatorParticipantId: session.initiator || null, sessionState: session.ack.sessionState, lastSeenAt: new Date().toISOString(), capabilities: (capabilities ?? {}) as Record, metadata: { participants: request.session.participants, - roots: request.session.roots ?? [] - } + }, }); const participantEvents = request.session.participants.map((participant) => ({ @@ -146,14 +154,10 @@ export class RunManagerService { subject: { kind: 'participant' as const, id: participant.id }, data: { participantId: participant.id, - role: participant.role, - transportIdentity: participant.transportIdentity, - status: 'idle' - } + status: 'idle', + }, })); - const expectedCommitments = request.session.commitments; - await this.runEventService.emitControlPlaneEvents(runId, [ { ts: new Date().toISOString(), @@ -169,10 +173,9 @@ export class RunManagerService { configurationVersion: request.session.configurationVersion, policyVersion: request.session.policyVersion || 'policy.default', participants: request.session.participants.map((item) => item.id), - ...(expectedCommitments && expectedCommitments.length > 0 ? { expectedCommitments } : {}) - } + }, }, - ...participantEvents + ...participantEvents, ]); return run; @@ -190,15 +193,15 @@ export class RunManagerService { trace: run.traceId ? { traceId: run.traceId } : undefined, data: { sessionId: runtimeSessionId, - state: 'SESSION_STATE_OPEN' - } - } + state: 'SESSION_STATE_OPEN', + }, + }, ]); void this.webhookService.fireEvent({ event: 'run.started', runId, status: 'running', - timestamp: new Date().toISOString() + timestamp: new Date().toISOString(), }); return run; } @@ -221,15 +224,15 @@ export class RunManagerService { status: 'completed', endedAt: run.endedAt, runtimeSessionId: run.runtimeSessionId, - traceId: run.traceId - } - } + traceId: run.traceId, + }, + }, ]); void this.webhookService.fireEvent({ event: 'run.completed', runId, status: 'completed', - timestamp: new Date().toISOString() + timestamp: new Date().toISOString(), }); void this.enrichRunMetadata(runId, run); return run; @@ -253,15 +256,15 @@ export class RunManagerService { status: 'cancelled', endedAt: run.endedAt, runtimeSessionId: run.runtimeSessionId, - traceId: run.traceId - } - } + traceId: run.traceId, + }, + }, ]); void this.webhookService.fireEvent({ event: 'run.cancelled', runId, status: 'cancelled', - timestamp: new Date().toISOString() + timestamp: new Date().toISOString(), }); return run; } @@ -286,16 +289,16 @@ export class RunManagerService { endedAt: run.endedAt, runtimeSessionId: run.runtimeSessionId, traceId: run.traceId, - error: message - } - } + error: message, + }, + }, ]); void this.webhookService.fireEvent({ event: 'run.failed', runId, status: 'failed', timestamp: new Date().toISOString(), - data: { error: message } + data: { error: message }, }); void this.enrichRunMetadata(runId, run); return run; @@ -345,7 +348,7 @@ export class RunManagerService { actorType: 'system', action: 'run.deleted', resource: 'run', - resourceId: runId + resourceId: runId, }); await this.runRepository.delete(runId); } @@ -357,7 +360,7 @@ export class RunManagerService { actorType: 'system', action: 'run.archived', resource: 'run', - resourceId: runId + resourceId: runId, }); return this.runRepository.archive(runId); } @@ -375,12 +378,12 @@ export class RunManagerService { private async enrichRunMetadata( runId: string, - run: { startedAt?: string | null; endedAt?: string | null; metadata?: Record | null; status?: string } + run: { startedAt?: string | null; endedAt?: string | null; metadata?: Record | null; status?: string }, ) { try { const [metrics, events] = await Promise.all([ this.metricsService.get(runId), - this.eventRepository.listCanonicalByRun(runId, 0, 2000) + this.eventRepository.listCanonicalByRun(runId, 0, 2000), ]); const decisionEvent = [...events].reverse().find((e) => e.type === 'decision.finalized'); @@ -392,14 +395,13 @@ export class RunManagerService { ? new Date(run.endedAt).getTime() - new Date(run.startedAt).getTime() : metrics?.durationMs ?? undefined; - // Feed latency histogram (§5.4). Only observe when we have a real duration. if (durationMs !== undefined && durationMs >= 0) { const modeName = (run.metadata?.executionRequest as { session?: { modeName?: string } } | undefined)?.session?.modeName ?? 'unknown'; this.instrumentation.runDuration.observe( { terminal_status: run.status ?? 'unknown', mode_name: modeName }, - durationMs / 1000 + durationMs / 1000, ); } @@ -413,7 +415,7 @@ export class RunManagerService { if (Object.keys(enrichment).length > 0) { await this.runRepository.update(runId, { - metadata: { ...(run.metadata ?? {}), ...enrichment } + metadata: { ...(run.metadata ?? {}), ...enrichment }, }); } } catch (err) { diff --git a/src/runs/run-recovery.service.ts b/src/runs/run-recovery.service.ts index fa1edbd..b0c314f 100644 --- a/src/runs/run-recovery.service.ts +++ b/src/runs/run-recovery.service.ts @@ -1,5 +1,5 @@ import { Injectable, Logger, OnApplicationBootstrap } from '@nestjs/common'; -import { ExecutionRequest } from '../contracts/control-plane'; +import { RunDescriptor } from '../contracts/control-plane'; import { AppConfigService } from '../config/app-config.service'; import { DatabaseService } from '../db/database.service'; import { RunEventService } from '../events/run-event.service'; @@ -86,7 +86,7 @@ export class RunRecoveryService implements OnApplicationBootstrap { lastEventSeq: number; metadata: Record; }): Promise { - const executionRequest = run.metadata?.executionRequest as ExecutionRequest | undefined; + const executionRequest = run.metadata?.executionRequest as RunDescriptor | undefined; if (!executionRequest) { throw new Error('missing executionRequest in run metadata'); } @@ -97,11 +97,10 @@ export class RunRecoveryService implements OnApplicationBootstrap { throw new Error('no runtime session ID available for recovery'); } - const subscriberId = - session?.initiatorParticipantId ?? - executionRequest.session.initiatorParticipantId ?? - executionRequest.session.participants[0]?.id ?? - 'control-plane'; + // Observer mode: the initiator sender is whatever the runtime's session metadata said + // at bind time (stored in runtime_sessions.initiator_participant_id). The control-plane + // no longer chooses an initiator from the descriptor. + const subscriberId = session?.initiatorParticipantId ?? 'control-plane'; // Promote binding_session → running if needed if (run.status === 'binding_session') { diff --git a/src/runs/stream-consumer.service.ts b/src/runs/stream-consumer.service.ts index 114a167..dbbf01e 100644 --- a/src/runs/stream-consumer.service.ts +++ b/src/runs/stream-consumer.service.ts @@ -1,5 +1,5 @@ import { Injectable, Logger, OnModuleDestroy } from '@nestjs/common'; -import { ExecutionRequest } from '../contracts/control-plane'; +import { RunDescriptor } from '../contracts/control-plane'; import { RawRuntimeEvent, RuntimeSessionHandle } from '../contracts/runtime'; import { AppConfigService } from '../config/app-config.service'; import { EventNormalizerService } from '../events/event-normalizer.service'; @@ -45,7 +45,7 @@ export class StreamConsumerService implements OnModuleDestroy { async start(params: { runId: string; - execution: ExecutionRequest; + execution: RunDescriptor; runtimeKind: string; runtimeSessionId: string; subscriberId: string; @@ -118,7 +118,7 @@ export class StreamConsumerService implements OnModuleDestroy { marker: ActiveStream, params: { runId: string; - execution: ExecutionRequest; + execution: RunDescriptor; runtimeKind: string; runtimeSessionId: string; subscriberId: string; @@ -128,7 +128,9 @@ export class StreamConsumerService implements OnModuleDestroy { ): Promise { const provider = this.runtimeRegistry.get(params.runtimeKind); const context = { - knownParticipants: new Set(params.execution.session.participants.map((item) => item.id)), + knownParticipants: new Set( + params.execution.session.participants.map((item) => item.id), + ), execution: params.execution, runtimeSessionId: params.runtimeSessionId }; diff --git a/src/runtime/grpc-helpers.spec.ts b/src/runtime/grpc-helpers.spec.ts new file mode 100644 index 0000000..7360c20 --- /dev/null +++ b/src/runtime/grpc-helpers.spec.ts @@ -0,0 +1,134 @@ +import * as grpc from '@grpc/grpc-js'; +import { + buildMetadata, + fromAck, + fromEnvelope, + fromSessionMetadata, + getClientMethod, +} from './grpc-helpers'; + +describe('gRPC helpers (Q3-1)', () => { + describe('fromEnvelope', () => { + it('unwraps a gRPC envelope into the internal shape with numeric timestamp', () => { + const raw = { + macpVersion: '1.0', + mode: 'macp.mode.decision.v1', + messageType: 'Proposal', + messageId: 'msg-1', + sessionId: 'sess-1', + sender: 'agent-a', + timestampUnixMs: '1700000000000', // Long serialized as string (longs: String) + payload: Buffer.from('hello'), + }; + const result = fromEnvelope(raw); + expect(result.timestampUnixMs).toBe(1700000000000); + expect(result.payload.toString()).toBe('hello'); + expect(result.messageId).toBe('msg-1'); + }); + + it('coerces non-buffer payload to Buffer', () => { + const result = fromEnvelope({ + macpVersion: '1.0', mode: '', messageType: '', messageId: '', + sessionId: '', sender: '', payload: 'hello', + }); + expect(Buffer.isBuffer(result.payload)).toBe(true); + }); + + it('defaults timestampUnixMs to now when missing', () => { + const before = Date.now(); + const result = fromEnvelope({ + macpVersion: '1.0', mode: '', messageType: '', messageId: '', + sessionId: '', sender: '', + }); + expect(result.timestampUnixMs).toBeGreaterThanOrEqual(before); + }); + }); + + describe('fromAck', () => { + it('maps a successful ack with default session state', () => { + const ack = fromAck({ ok: true, sessionId: 's', messageId: 'm' }); + expect(ack.ok).toBe(true); + expect(ack.sessionState).toBe('SESSION_STATE_UNSPECIFIED'); + expect(ack.error).toBeUndefined(); + }); + + it('parses structured reasons from error.details JSON', () => { + const detailsBytes = Buffer.from(JSON.stringify({ reasons: ['rule-x', 'rule-y'] })); + const ack = fromAck({ + ok: false, + error: { code: 'POLICY_DENIED', message: 'no', details: detailsBytes }, + }); + expect(ack.error?.reasons).toEqual(['rule-x', 'rule-y']); + expect(ack.error?.code).toBe('POLICY_DENIED'); + expect(ack.error?.detailsBase64).toBe(detailsBytes.toString('base64')); + }); + + it('falls back to trailing macp-error-details-bin metadata for reasons', () => { + const meta = new grpc.Metadata(); + meta.add('macp-error-details-bin', Buffer.from(JSON.stringify({ reasons: ['meta-rule'] }))); + const ack = fromAck({ ok: false, error: { code: 'POLICY_DENIED', message: '' } }, meta); + expect(ack.error?.reasons).toEqual(['meta-rule']); + }); + + it('tolerates malformed details JSON by leaving reasons undefined', () => { + const ack = fromAck({ + ok: false, + error: { code: 'POLICY_DENIED', message: '', details: Buffer.from('{not json') }, + }); + expect(ack.error?.reasons).toBeUndefined(); + }); + }); + + describe('fromSessionMetadata', () => { + it('maps fields including optional initiator', () => { + const snap = fromSessionMetadata({ + sessionId: 's', + mode: 'decision', + state: 'SESSION_STATE_OPEN', + initiator: 'agent-1', + startedAtUnixMs: '123', + }); + expect(snap.sessionId).toBe('s'); + expect(snap.state).toBe('SESSION_STATE_OPEN'); + expect(snap.initiator).toBe('agent-1'); + expect(snap.startedAtUnixMs).toBe(123); + }); + + it('defaults state to UNSPECIFIED when missing', () => { + const snap = fromSessionMetadata({}); + expect(snap.state).toBe('SESSION_STATE_UNSPECIFIED'); + }); + }); + + describe('buildMetadata', () => { + it('sets only truthy credential keys', () => { + const meta = buildMetadata({ authorization: 'Bearer abc', 'x-empty': '' }); + expect(meta.get('authorization')).toEqual(['Bearer abc']); + expect(meta.get('x-empty')).toEqual([]); + }); + + it('always tries to inject trace context (traceparent absent is OK)', () => { + // No active OTel span means inject is a no-op. We just verify buildMetadata + // returns a real grpc.Metadata and doesn't throw. + expect(() => buildMetadata({})).not.toThrow(); + }); + }); + + describe('getClientMethod', () => { + it('resolves PascalCase method names', () => { + const fn = () => 'ok'; + const client = { Initialize: fn }; + expect(getClientMethod(client, 'Initialize')).toBe(fn); + }); + + it('falls back to lowerCamelCase', () => { + const fn = () => 'ok'; + const client = { initialize: fn }; + expect(getClientMethod(client, 'Initialize')).toBe(fn); + }); + + it('throws on unknown method', () => { + expect(() => getClientMethod({}, 'Nope')).toThrow(/not available on client/); + }); + }); +}); diff --git a/src/runtime/grpc-helpers.ts b/src/runtime/grpc-helpers.ts new file mode 100644 index 0000000..8ab2c94 --- /dev/null +++ b/src/runtime/grpc-helpers.ts @@ -0,0 +1,119 @@ +/* eslint-disable @typescript-eslint/no-explicit-any -- gRPC dynamic proto loading returns untyped objects */ +import * as grpc from '@grpc/grpc-js'; +import { context, propagation } from '@opentelemetry/api'; +import { RuntimeAck, RuntimeEnvelope, RuntimeSessionSnapshot } from '../contracts/runtime'; + +/** + * Marshalling helpers extracted from `rust-runtime.provider.ts` (Q3-1). + * + * Intentionally pure functions — no DI, no state. Keeping them here makes the + * provider class itself focused on the RPC entry points and the subscribe-session + * event loop. + */ + +/** gRPC envelope → internal `RuntimeEnvelope`. */ +export function fromEnvelope(envelope: any): RuntimeEnvelope { + return { + macpVersion: envelope.macpVersion, + mode: envelope.mode, + messageType: envelope.messageType, + messageId: envelope.messageId, + sessionId: envelope.sessionId, + sender: envelope.sender, + timestampUnixMs: Number(envelope.timestampUnixMs ?? Date.now()), + payload: Buffer.isBuffer(envelope.payload) ? envelope.payload : Buffer.from(envelope.payload ?? ''), + }; +} + +/** gRPC ack → internal `RuntimeAck`. Extracts structured `reasons` from details bytes or trailing metadata. */ +export function fromAck(ack: any, trailingMetadata?: grpc.Metadata): RuntimeAck { + let reasons: string[] | undefined; + + if (ack?.error?.details) { + try { + const parsed = JSON.parse(Buffer.from(ack.error.details).toString('utf-8')); + if (Array.isArray(parsed.reasons)) reasons = parsed.reasons; + } catch { /* ignore parse errors */ } + } + + if (!reasons && trailingMetadata) { + const detailsBin = trailingMetadata.get('macp-error-details-bin'); + if (detailsBin && detailsBin.length > 0) { + try { + const parsed = JSON.parse(Buffer.from(detailsBin[0] as Buffer).toString('utf-8')); + if (Array.isArray(parsed.reasons)) reasons = parsed.reasons; + } catch { /* ignore parse errors */ } + } + } + + return { + ok: Boolean(ack?.ok), + duplicate: Boolean(ack?.duplicate), + messageId: ack?.messageId ?? '', + sessionId: ack?.sessionId ?? '', + acceptedAtUnixMs: Number(ack?.acceptedAtUnixMs ?? Date.now()), + sessionState: (ack?.sessionState ?? 'SESSION_STATE_UNSPECIFIED') as RuntimeAck['sessionState'], + error: ack?.error + ? { + code: ack.error.code, + message: ack.error.message, + sessionId: ack.error.sessionId, + messageId: ack.error.messageId, + detailsBase64: ack.error.details ? Buffer.from(ack.error.details).toString('base64') : undefined, + details: ack.error.details ? Buffer.from(ack.error.details) : undefined, + reasons, + } + : undefined, + }; +} + +/** gRPC session metadata → internal `RuntimeSessionSnapshot`. */ +export function fromSessionMetadata(metadata: any): RuntimeSessionSnapshot { + return { + sessionId: metadata?.sessionId ?? '', + mode: metadata?.mode ?? '', + state: metadata?.state ?? 'SESSION_STATE_UNSPECIFIED', + startedAtUnixMs: metadata?.startedAtUnixMs ? Number(metadata.startedAtUnixMs) : undefined, + expiresAtUnixMs: metadata?.expiresAtUnixMs ? Number(metadata.expiresAtUnixMs) : undefined, + modeVersion: metadata?.modeVersion, + configurationVersion: metadata?.configurationVersion, + policyVersion: metadata?.policyVersion, + initiator: metadata?.initiator ?? undefined, + }; +} + +/** + * Build gRPC metadata from a credential map, injecting W3C trace context so + * runtime-side spans become children of the active control-plane span. + */ +export function buildMetadata(metadataInput: Record): grpc.Metadata { + const metadata = new grpc.Metadata(); + for (const [key, value] of Object.entries(metadataInput)) { + if (value) metadata.set(key, value); + } + injectTraceContext(metadata); + return metadata; +} + +/** W3C trace context propagation via OTel API — traceparent + optional tracestate. */ +export function injectTraceContext(metadata: grpc.Metadata): void { + const carrier: Record = {}; + propagation.inject(context.active(), carrier); + for (const [key, value] of Object.entries(carrier)) { + if (value) metadata.set(key, value); + } +} + +/** + * Resolve a client method by either PascalCase (the RPC name) or lowerCamelCase + * (what some @grpc/grpc-js versions expose on dynamically-loaded clients). + */ +// eslint-disable-next-line @typescript-eslint/no-unsafe-function-type +export function getClientMethod(client: any, method: string): Function { + const direct = client[method]; + if (typeof direct === 'function') return direct; + const lowerCamel = method.charAt(0).toLowerCase() + method.slice(1); + const fallback = client[lowerCamel]; + if (typeof fallback === 'function') return fallback; + throw new Error(`runtime gRPC method '${method}' is not available on client`); +} diff --git a/src/runtime/mock-runtime.provider.ts b/src/runtime/mock-runtime.provider.ts deleted file mode 100644 index a77b773..0000000 --- a/src/runtime/mock-runtime.provider.ts +++ /dev/null @@ -1,232 +0,0 @@ -import { Injectable, Logger } from '@nestjs/common'; -import { randomUUID } from 'node:crypto'; -import { - RawRuntimeEvent, - RuntimeAck, - RuntimeCancelResult, - RuntimeCancelSessionRequest, - RuntimeGetSessionRequest, - RuntimeHealth, - RuntimeInitializeRequest, - RuntimeInitializeResult, - RuntimeManifestResult, - RuntimeModeDescriptor, - RuntimeOpenSessionRequest, - RuntimeProvider, - RuntimeRootDescriptor, - RuntimeSendRequest, - RuntimeSendResult, - RuntimeSessionHandle, - RuntimeSessionSnapshot, - RuntimeStartSessionRequest, - RuntimeStartSessionResult, - RuntimeStreamSessionRequest, - RuntimeRegisterPolicyRequest, - RuntimeRegisterPolicyResult, - RuntimeUnregisterPolicyRequest, - RuntimeUnregisterPolicyResult, - RuntimeGetPolicyRequest, - RuntimeListPoliciesRequest, - RuntimePolicyDescriptor -} from '../contracts/runtime'; - -@Injectable() -export class MockRuntimeProvider implements RuntimeProvider { - readonly kind = 'mock'; - private readonly logger = new Logger(MockRuntimeProvider.name); - - async initialize(_req: RuntimeInitializeRequest): Promise { - return { - selectedProtocolVersion: '1.0', - runtimeInfo: { name: 'mock-runtime', version: '0.0.1' }, - supportedModes: ['mock-decision', 'mock-task'] - }; - } - - openSession(req: RuntimeOpenSessionRequest): RuntimeSessionHandle { - const sessionId = randomUUID(); - const initiator = req.execution.session.participants[0]?.id ?? 'mock-initiator'; - const ack = this.makeAck(sessionId); - - const events: AsyncIterable = { - [Symbol.asyncIterator]() { - let done = false; - return { - async next(): Promise> { - if (done) return { done: true, value: undefined }; - done = true; - return { - done: false, - value: { - kind: 'stream-status', - receivedAt: new Date().toISOString(), - streamStatus: { status: 'opened' } - } - }; - }, - async return(): Promise> { - done = true; - return { done: true, value: undefined }; - } - }; - } - }; - - return { - send: () => { /* no-op */ }, - events, - closeWrite: () => { /* no-op */ }, - abort: () => { /* no-op */ }, - sessionAck: Promise.resolve({ - runtimeSessionId: sessionId, - initiator, - ack - }) - }; - } - - async startSession(req: RuntimeStartSessionRequest): Promise { - const sessionId = randomUUID(); - return { - runtimeSessionId: sessionId, - initiator: req.execution.session.participants[0]?.id ?? 'mock-initiator', - ack: this.makeAck(sessionId) - }; - } - - async send(req: RuntimeSendRequest): Promise { - const messageId = randomUUID(); - return { - ack: this.makeAck(req.runtimeSessionId, messageId), - envelope: { - macpVersion: '1.0', - mode: req.modeName, - messageType: req.messageType, - messageId, - sessionId: req.runtimeSessionId, - sender: req.from, - timestampUnixMs: Date.now(), - payload: req.payload - } - }; - } - - async *streamSession(_req: RuntimeStreamSessionRequest): AsyncIterable { - yield { - kind: 'stream-status', - receivedAt: new Date().toISOString(), - streamStatus: { status: 'opened' } - }; - // Mock stream ends immediately - } - - async getSession(req: RuntimeGetSessionRequest): Promise { - return { - sessionId: req.runtimeSessionId, - mode: 'mock-decision', - state: 'SESSION_STATE_OPEN' - }; - } - - async cancelSession(_req: RuntimeCancelSessionRequest): Promise { - return { ack: this.makeAck(_req.runtimeSessionId) }; - } - - async getManifest(): Promise { - return { - agentId: 'mock-runtime', - title: 'Mock Runtime', - description: 'A mock runtime for testing', - supportedModes: ['mock-decision', 'mock-task'], - metadata: {} - }; - } - - async listModes(): Promise { - return [ - { - mode: 'mock-decision', - modeVersion: '1.0', - title: 'Mock Decision', - messageTypes: ['Proposal', 'Vote', 'Commitment'], - terminalMessageTypes: ['Commitment'] - } - ]; - } - - async listRoots(): Promise { - return []; - } - - async health(): Promise { - return { ok: true, runtimeKind: this.kind, detail: 'mock runtime always healthy' }; - } - - // ── Governance policy lifecycle (RFC-MACP-0012) ────────────────── - - private policies: Map = new Map([ - ['policy.default', { - policyId: 'policy.default', - mode: '*', - description: 'Default policy — no additional governance constraints', - rules: Buffer.from(JSON.stringify({ - voting: { algorithm: 'none', quorum: { type: 'count', value: 0 } }, - objection_handling: { block_severity_vetoes: false, veto_threshold: 1 }, - evaluation: { required_before_voting: false, minimum_confidence: 0.0 }, - commitment: { authority: 'initiator_only', designated_roles: [], require_vote_quorum: false } - })), - schemaVersion: 1, - registeredAtUnixMs: Date.now() - }] - ]); - - async registerPolicy(req: RuntimeRegisterPolicyRequest): Promise { - const d = req.descriptor; - if (d.policyId === 'policy.default') { - return { ok: false, error: 'cannot register reserved policy.default' }; - } - if (this.policies.has(d.policyId)) { - return { ok: false, error: `policy ${d.policyId} already registered` }; - } - this.policies.set(d.policyId, { ...d, registeredAtUnixMs: Date.now() }); - return { ok: true }; - } - - async unregisterPolicy(req: RuntimeUnregisterPolicyRequest): Promise { - if (req.policyId === 'policy.default') { - return { ok: false, error: 'cannot unregister reserved policy.default' }; - } - if (!this.policies.has(req.policyId)) { - return { ok: false, error: `policy ${req.policyId} not found` }; - } - this.policies.delete(req.policyId); - return { ok: true }; - } - - async getPolicy(req: RuntimeGetPolicyRequest): Promise { - const policy = this.policies.get(req.policyId); - if (!policy) { - throw new Error(`policy ${req.policyId} not found`); - } - return policy; - } - - async listPolicies(req?: RuntimeListPoliciesRequest): Promise { - const all = Array.from(this.policies.values()); - if (req?.mode) { - return all.filter((p) => p.mode === req.mode || p.mode === '*'); - } - return all; - } - - private makeAck(sessionId: string, messageId?: string): RuntimeAck { - return { - ok: true, - duplicate: false, - messageId: messageId ?? randomUUID(), - sessionId, - acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' - }; - } -} diff --git a/src/runtime/observer-invariant.spec.ts b/src/runtime/observer-invariant.spec.ts new file mode 100644 index 0000000..f650647 --- /dev/null +++ b/src/runtime/observer-invariant.spec.ts @@ -0,0 +1,92 @@ +import { readdirSync, readFileSync, statSync } from 'node:fs'; +import { join } from 'node:path'; + +/** + * Invariant lint (direct-agent-auth §Verification). + * + * The control-plane's observer role forbids calling `provider.send(` anywhere in `src/`. + * Agents authenticate to the runtime directly (RFC-MACP-0004 §4). If a future change + * reintroduces an envelope-forging path, this test fails CI. + */ + +const SRC_DIR = join(__dirname, '..', '..', 'src'); +const FORBIDDEN_PATTERNS: Array<{ pattern: RegExp; message: string }> = [ + { + pattern: /provider\.send\s*\(/, + message: + 'provider.send() is forbidden — the control-plane must never emit envelopes. ' + + 'Agents speak for themselves via macp-sdk-* (direct-agent-auth §Invariants #5).', + }, + { + pattern: /openSession\s*\(/, + message: + 'openSession() is forbidden — it forges SessionStart on the agent\'s behalf. ' + + 'Use provider.subscribeSession() for read-only observation (CP-3).', + }, + { + pattern: /chooseInitiator\s*\(/, + message: + 'chooseInitiator() is forbidden — the control-plane must not pick an initiator. ' + + 'The initiator is whichever agent calls SessionStart; learned via GetSession (CP-3).', + }, + { + pattern: /retryKickoff\s*\(/, + message: + 'retryKickoff() is forbidden — kickoff messages are emitted by the initiator agent ' + + 'via its SDK, not by the control-plane (CP-4).', + }, +]; + +function walk(dir: string): string[] { + const files: string[] = []; + for (const entry of readdirSync(dir)) { + const full = join(dir, entry); + const stat = statSync(full); + if (stat.isDirectory()) { + files.push(...walk(full)); + } else if (full.endsWith('.ts') && !full.endsWith('.spec.ts')) { + files.push(full); + } + } + return files; +} + +describe('Observer invariant — no envelope-forging paths in src/', () => { + const tsFiles = walk(SRC_DIR); + + for (const { pattern, message } of FORBIDDEN_PATTERNS) { + it(`forbids ${pattern.source}`, () => { + const violations: Array<{ file: string; line: number; text: string }> = []; + for (const file of tsFiles) { + const content = readFileSync(file, 'utf8'); + let inBlockComment = false; + content.split('\n').forEach((line, idx) => { + const trimmed = line.trim(); + // Track multi-line /* ... */ comments across lines. + if (inBlockComment) { + if (trimmed.includes('*/')) inBlockComment = false; + return; + } + if (trimmed.startsWith('/*') && !trimmed.includes('*/')) { + inBlockComment = true; + return; + } + // Skip pure-comment lines (// or * inside a block). + if (/^\s*(?:\/\/|\*)/.test(line)) return; + // Strip trailing line comments from mixed lines. + const codeOnly = line.replace(/\/\/.*$/, '').replace(/\/\*.*?\*\//g, ''); + if (pattern.test(codeOnly)) { + violations.push({ file, line: idx + 1, text: line.trim() }); + } + }); + } + + if (violations.length > 0) { + const msg = violations + .map((v) => ` ${v.file}:${v.line}: ${v.text}`) + .join('\n'); + throw new Error(`${message}\n\nFound ${violations.length} violation(s):\n${msg}`); + } + }); + } +}); diff --git a/src/runtime/proto-registry.service.spec.ts b/src/runtime/proto-registry.service.spec.ts index 3fb4bd3..19d286e 100644 --- a/src/runtime/proto-registry.service.spec.ts +++ b/src/runtime/proto-registry.service.spec.ts @@ -68,110 +68,6 @@ describe('ProtoRegistryService', () => { }); }); - // ========================================================================= - // encodePayloadEnvelope - // ========================================================================= - describe('encodePayloadEnvelope', () => { - it('encodes json to a UTF-8 JSON buffer', () => { - const result = service.encodePayloadEnvelope({ - encoding: 'json', - json: { hello: 'world' } - }); - - expect(result).toBeInstanceOf(Buffer); - expect(JSON.parse(result.toString('utf8'))).toEqual({ hello: 'world' }); - }); - - it('encodes json with empty object when json is undefined', () => { - const result = service.encodePayloadEnvelope({ encoding: 'json' }); - - expect(JSON.parse(result.toString('utf8'))).toEqual({}); - }); - - it('encodes text to a UTF-8 buffer', () => { - const result = service.encodePayloadEnvelope({ - encoding: 'text', - text: 'hello world' - }); - - expect(result).toBeInstanceOf(Buffer); - expect(result.toString('utf8')).toBe('hello world'); - }); - - it('encodes text with empty string when text is undefined', () => { - const result = service.encodePayloadEnvelope({ encoding: 'text' }); - - expect(result.toString('utf8')).toBe(''); - }); - - it('decodes base64 to a buffer', () => { - const original = 'binary data here'; - const b64 = Buffer.from(original).toString('base64'); - - const result = service.encodePayloadEnvelope({ - encoding: 'base64', - base64: b64 - }); - - expect(result).toBeInstanceOf(Buffer); - expect(result.toString('utf8')).toBe(original); - }); - - it('encodes proto using lookupType and encode', () => { - const result = service.encodePayloadEnvelope({ - encoding: 'proto', - proto: { - typeName: 'macp.v1.SessionStartPayload', - value: { sessionId: 'sess-1' } - } - }); - - expect(mockLookupType).toHaveBeenCalledWith('macp.v1.SessionStartPayload'); - expect(mockTypeInstance.fromObject).toHaveBeenCalledWith({ sessionId: 'sess-1' }); - expect(result).toBeInstanceOf(Buffer); - }); - - it('normalizes snake_case proto fields before encoding', () => { - service.encodePayloadEnvelope({ - encoding: 'proto', - proto: { - typeName: 'macp.v1.SessionStartPayload', - value: { - mode_version: '1.0.0', - configuration_version: 'cfg-1', - policy_version: 'policy-1', - ttl_ms: 60000, - roots: [{ website_url: 'https://example.test' }] - } - } - }); - - expect(mockTypeInstance.fromObject).toHaveBeenCalledWith({ - modeVersion: '1.0.0', - configurationVersion: 'cfg-1', - policyVersion: 'policy-1', - ttlMs: 60000, - roots: [{ websiteUrl: 'https://example.test' }] - }); - }); - - it('throws when proto input is missing proto value', () => { - expect(() => - service.encodePayloadEnvelope({ - encoding: 'proto' - } as any) - ).toThrow('proto payload envelope requires proto value'); - }); - - it('throws on unsupported encoding', () => { - expect(() => - service.encodePayloadEnvelope({ - encoding: 'xml' as any - }) - ).toThrow('unsupported payload encoding xml'); - }); - }); - // ========================================================================= // getKnownTypeName // ========================================================================= @@ -286,29 +182,4 @@ describe('ProtoRegistryService', () => { }); }); - // ========================================================================= - // encodeSessionContext - // ========================================================================= - describe('encodeSessionContext', () => { - it('uses encodePayloadEnvelope when contextEnvelope is provided', () => { - const result = service.encodeSessionContext(undefined, { - encoding: 'json', - json: { ctx: true } - }); - - expect(JSON.parse(result.toString('utf8'))).toEqual({ ctx: true }); - }); - - it('encodes context as JSON when no envelope', () => { - const result = service.encodeSessionContext({ foo: 'bar' }); - - expect(JSON.parse(result.toString('utf8'))).toEqual({ foo: 'bar' }); - }); - - it('returns empty buffer when both are undefined', () => { - const result = service.encodeSessionContext(undefined, undefined); - - expect(result.length).toBe(0); - }); - }); }); diff --git a/src/runtime/proto-registry.service.ts b/src/runtime/proto-registry.service.ts index b9f0297..c31ae3c 100644 --- a/src/runtime/proto-registry.service.ts +++ b/src/runtime/proto-registry.service.ts @@ -1,7 +1,6 @@ import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; import * as path from 'node:path'; import * as protobuf from 'protobufjs'; -import { PayloadEnvelopeInput } from '../contracts/control-plane'; const MESSAGE_TYPE_MAP: Record> = { '__core__': { @@ -94,56 +93,6 @@ export class ProtoRegistryService implements OnModuleInit { } } - encodeSessionContext(context?: Record, contextEnvelope?: PayloadEnvelopeInput): Buffer { - if (contextEnvelope) return this.encodePayloadEnvelope(contextEnvelope); - if (!context) return Buffer.alloc(0); - return Buffer.from(JSON.stringify(context), 'utf8'); - } - - encodePayloadEnvelope(input: PayloadEnvelopeInput): Buffer { - switch (input.encoding) { - case 'json': - return Buffer.from(JSON.stringify(input.json ?? {}), 'utf8'); - case 'text': - return Buffer.from(input.text ?? '', 'utf8'); - case 'base64': - return Buffer.from(input.base64 ?? '', 'base64'); - case 'proto': { - if (!input.proto) throw new Error('proto payload envelope requires proto value'); - return this.encodeMessage(input.proto.typeName, input.proto.value); - } - default: - throw new Error(`unsupported payload encoding ${(input as PayloadEnvelopeInput).encoding}`); - } - } - - encodeMessage(typeName: string, value: Record): Buffer { - const type = this.lookupType(typeName); - const normalized = this.normalizeProtoValue(value) as Record; - const message = type.fromObject(normalized); - return Buffer.from(type.encode(message).finish()); - } - - private normalizeProtoValue(value: unknown): unknown { - if (Array.isArray(value)) { - return value.map((item) => this.normalizeProtoValue(item)); - } - - if (!value || typeof value !== 'object' || Buffer.isBuffer(value)) { - return value; - } - - const normalized: Record = {}; - for (const [key, entry] of Object.entries(value as Record)) { - const normalizedEntry = this.normalizeProtoValue(entry); - const normalizedKey = key.includes('_') - ? key.replace(/_([a-z])/g, (_, char: string) => char.toUpperCase()) - : key; - normalized[normalizedKey] = normalizedEntry; - } - return normalized; - } - decodeKnown(modeName: string, messageType: string, payload: Buffer): Record | undefined { const typeName = MESSAGE_TYPE_MAP[modeName]?.[messageType] ?? MESSAGE_TYPE_MAP.__core__[messageType]; @@ -154,7 +103,14 @@ export class ProtoRegistryService implements OnModuleInit { if (typeName === '__json__') { return this.tryDecodeUtf8(payload); } - return this.decodeMessage(typeName, payload); + // Try proto decode first (real Rust runtime). If the bytes aren't valid proto + // (e.g. mock runtime sends JSON), fall back to UTF-8/JSON parsing rather than + // throwing — the normalizer must be resilient to either wire format. + try { + return this.decodeMessage(typeName, payload); + } catch { + return this.tryDecodeUtf8(payload); + } } decodeMessage(typeName: string, payload: Buffer): Record { diff --git a/src/runtime/runtime-credential-resolver.service.spec.ts b/src/runtime/runtime-credential-resolver.service.spec.ts index 4570d1f..5b21718 100644 --- a/src/runtime/runtime-credential-resolver.service.spec.ts +++ b/src/runtime/runtime-credential-resolver.service.spec.ts @@ -1,179 +1,80 @@ import { RuntimeCredentialResolverService } from './runtime-credential-resolver.service'; import { AppConfigService } from '../config/app-config.service'; -describe('RuntimeCredentialResolverService', () => { - let service: RuntimeCredentialResolverService; - let mockConfig: Partial; - - function buildConfig(overrides: Partial = {}): AppConfigService { - return { +describe('RuntimeCredentialResolverService (single-bearer, CP-9)', () => { + function makeService(config: Partial): RuntimeCredentialResolverService { + const merged = { runtimeDevAgentId: 'control-plane', runtimeBearerToken: '', runtimeUseDevHeader: false, - ...overrides, + ...config, } as AppConfigService; + return new RuntimeCredentialResolverService(merged); } - beforeEach(() => { - mockConfig = buildConfig(); - service = new RuntimeCredentialResolverService(mockConfig as AppConfigService); - }); - - // =========================================================================== - // Sender resolution priority - // =========================================================================== - describe('sender resolution', () => { - it('uses participant.id as highest priority (ignores transportIdentity for sender consistency)', async () => { - // participant.id must equal the sender string used in startSession so that - // session.initiator_sender matches later Commitment sender checks. - // transportIdentity is accepted in the input for symmetry with the - // runtime contract but is intentionally NOT used as the sender id. - const result = await service.resolve({ - runtimeKind: 'rust', - requester: { actorId: 'actor-1' }, - participant: { id: 'part-1', transportIdentity: 'transport-id' }, - fallbackSender: 'fallback', - }); - - expect(result.sender).toBe('part-1'); - }); - - it('uses participant.id when only it is provided', async () => { - const result = await service.resolve({ - runtimeKind: 'rust', - requester: { actorId: 'actor-1' }, - participant: { id: 'part-1' }, - fallbackSender: 'fallback', - }); - - expect(result.sender).toBe('part-1'); - }); - - it('falls back to requester.actorId when no participant', async () => { - const result = await service.resolve({ - runtimeKind: 'rust', - requester: { actorId: 'actor-1' }, - fallbackSender: 'fallback', - }); - - expect(result.sender).toBe('actor-1'); - }); - - it('falls back to fallbackSender when no participant or requester actorId', async () => { - const result = await service.resolve({ - runtimeKind: 'rust', - requester: {}, - fallbackSender: 'fallback', - }); - - expect(result.sender).toBe('fallback'); - }); - - it('falls back to config.runtimeDevAgentId as last resort', async () => { - service = new RuntimeCredentialResolverService( - buildConfig({ runtimeDevAgentId: 'dev-agent-99' }) as AppConfigService, - ); - - const result = await service.resolve({ - runtimeKind: 'rust', - }); - - expect(result.sender).toBe('dev-agent-99'); + describe('sender identity', () => { + it('always returns the control-plane dev agent id as sender', async () => { + const service = makeService({ runtimeDevAgentId: 'my-control-plane' }); + const result = await service.resolve({ runtimeKind: 'rust' }); + expect(result.sender).toBe('my-control-plane'); }); - it('uses config.runtimeDevAgentId when all optional fields are undefined', async () => { - const result = await service.resolve({ - runtimeKind: 'rust', - requester: undefined, - participant: undefined, - fallbackSender: undefined, - }); - + it('defaults sender to "control-plane" when no dev agent id is configured', async () => { + const service = makeService({}); + const result = await service.resolve({ runtimeKind: 'rust' }); expect(result.sender).toBe('control-plane'); }); }); - // =========================================================================== - // Metadata — bearer token - // =========================================================================== - describe('bearer token in metadata', () => { - it('sets authorization header when runtimeBearerToken is configured', async () => { - service = new RuntimeCredentialResolverService( - buildConfig({ runtimeBearerToken: 'my-secret-token' }) as AppConfigService, - ); - + describe('bearer token', () => { + it('attaches the configured bearer token as Authorization', async () => { + const service = makeService({ runtimeBearerToken: 'obs-token' }); const result = await service.resolve({ runtimeKind: 'rust' }); - - expect(result.metadata.authorization).toBe('Bearer my-secret-token'); + expect(result.metadata.authorization).toBe('Bearer obs-token'); }); - it('does not set x-macp-agent-id when bearer token is present (even if useDevHeader is true)', async () => { - service = new RuntimeCredentialResolverService( - buildConfig({ - runtimeBearerToken: 'token', - runtimeUseDevHeader: true, - }) as AppConfigService, - ); - + it('does not attach an x-macp-agent-id header when a bearer token is present', async () => { + const service = makeService({ + runtimeBearerToken: 'obs-token', + runtimeUseDevHeader: true, + }); const result = await service.resolve({ runtimeKind: 'rust' }); - - expect(result.metadata.authorization).toBe('Bearer token'); expect(result.metadata['x-macp-agent-id']).toBeUndefined(); }); }); - // =========================================================================== - // Metadata — dev header - // =========================================================================== - describe('dev header in metadata', () => { - it('sets x-macp-agent-id when useDevHeader is true and no bearer token', async () => { - service = new RuntimeCredentialResolverService( - buildConfig({ - runtimeBearerToken: '', - runtimeUseDevHeader: true, - runtimeDevAgentId: 'dev-agent', - }) as AppConfigService, - ); - + describe('dev header fallback', () => { + it('falls back to x-macp-agent-id when no bearer token and dev header is enabled', async () => { + const service = makeService({ + runtimeBearerToken: '', + runtimeUseDevHeader: true, + runtimeDevAgentId: 'control-plane', + }); const result = await service.resolve({ runtimeKind: 'rust' }); - - expect(result.metadata['x-macp-agent-id']).toBe('dev-agent'); + expect(result.metadata['x-macp-agent-id']).toBe('control-plane'); expect(result.metadata.authorization).toBeUndefined(); }); - - it('uses the resolved sender for x-macp-agent-id (not the config value)', async () => { - service = new RuntimeCredentialResolverService( - buildConfig({ - runtimeBearerToken: '', - runtimeUseDevHeader: true, - runtimeDevAgentId: 'default-agent', - }) as AppConfigService, - ); - - const result = await service.resolve({ - runtimeKind: 'rust', - participant: { id: 'agent-42' }, - }); - - expect(result.metadata['x-macp-agent-id']).toBe('agent-42'); - }); }); - // =========================================================================== - // No auth headers - // =========================================================================== - describe('no auth headers', () => { - it('returns empty metadata when no bearer token and useDevHeader is false', async () => { - service = new RuntimeCredentialResolverService( - buildConfig({ - runtimeBearerToken: '', - runtimeUseDevHeader: false, - }) as AppConfigService, - ); - + describe('no credentials configured', () => { + it('returns empty metadata when neither bearer token nor dev header is enabled', async () => { + const service = makeService({}); const result = await service.resolve({ runtimeKind: 'rust' }); - expect(result.metadata).toEqual({}); }); }); + + describe('invariant — no per-sender overrides (direct-agent-auth §Invariants)', () => { + it('ignores any extra fields in the request (participant, requester, fallbackSender)', async () => { + const service = makeService({ runtimeBearerToken: 'obs-token' }); + const result = await service.resolve({ + runtimeKind: 'rust', + // Cast — these fields are no longer accepted, but the resolver must tolerate them + // during the deprecation window without routing on them. + ...( { participant: { id: 'risk-agent' }, requester: { actorId: 'user-1' } } as unknown as Record), + } as { runtimeKind: string }); + expect(result.metadata.authorization).toBe('Bearer obs-token'); + expect(result.sender).toBe('control-plane'); + }); + }); }); diff --git a/src/runtime/runtime-credential-resolver.service.ts b/src/runtime/runtime-credential-resolver.service.ts index ea3c54d..72d6e23 100644 --- a/src/runtime/runtime-credential-resolver.service.ts +++ b/src/runtime/runtime-credential-resolver.service.ts @@ -2,29 +2,28 @@ import { Injectable } from '@nestjs/common'; import { AppConfigService } from '../config/app-config.service'; import { RuntimeCredentialResolver, RuntimeCredentials } from '../contracts/runtime'; +/** + * Single-bearer credential resolver (CP-9, direct-agent-auth.md). + * + * The control-plane has one runtime identity — its own least-privilege Bearer + * token with `can_start_sessions: false`. All observer calls (GetSession, + * StreamSession, ListPolicies, CancelSession) use this identity. + * + * Per-agent token maps were removed because agents now authenticate to the + * runtime directly (RFC-MACP-0004 §4). The control-plane never forges envelopes + * on behalf of agents. + */ @Injectable() export class RuntimeCredentialResolverService implements RuntimeCredentialResolver { constructor(private readonly config: AppConfigService) {} - async resolve(req: { - runtimeKind: string; - requester?: { actorId?: string; actorType?: string }; - participant?: { id: string; transportIdentity?: string }; - fallbackSender?: string; - }): Promise { - // Use participant ID as the sender — must be consistent between startSession and send - // so that session.initiator_sender matches later Commitment sender checks. - const sender = - req.participant?.id ?? - req.requester?.actorId ?? - req.fallbackSender ?? - this.config.runtimeDevAgentId; - + async resolve(_req: { runtimeKind: string }): Promise { + const sender = this.config.runtimeDevAgentId; const metadata: Record = {}; + if (this.config.runtimeBearerToken) { metadata.authorization = `Bearer ${this.config.runtimeBearerToken}`; - } - if (!metadata.authorization && this.config.runtimeUseDevHeader) { + } else if (this.config.runtimeUseDevHeader) { metadata['x-macp-agent-id'] = sender; } diff --git a/src/runtime/rust-runtime.provider.ts b/src/runtime/rust-runtime.provider.ts index fa5cfc2..e830aee 100644 --- a/src/runtime/rust-runtime.provider.ts +++ b/src/runtime/rust-runtime.provider.ts @@ -2,33 +2,23 @@ import { Injectable, Logger, OnModuleInit } from '@nestjs/common'; import * as grpc from '@grpc/grpc-js'; import * as protoLoader from '@grpc/proto-loader'; -import { context, propagation } from '@opentelemetry/api'; -import { randomUUID } from 'node:crypto'; import * as path from 'node:path'; import { AppConfigService } from '../config/app-config.service'; -import { ExecutionRequest, ParticipantRef } from '../contracts/control-plane'; import { RawRuntimeEvent, - RuntimeAck, RuntimeCancelResult, RuntimeCancelSessionRequest, - RuntimeEnvelope, RuntimeGetSessionRequest, RuntimeHealth, RuntimeInitializeRequest, RuntimeInitializeResult, RuntimeManifestResult, RuntimeModeDescriptor, - RuntimeOpenSessionRequest, RuntimeProvider, RuntimeRootDescriptor, - RuntimeSendRequest, - RuntimeSendResult, RuntimeSessionHandle, RuntimeSessionSnapshot, - RuntimeStartSessionRequest, - RuntimeStartSessionResult, - RuntimeStreamSessionRequest, + RuntimeSubscribeSessionRequest, RuntimeRegisterPolicyRequest, RuntimeRegisterPolicyResult, RuntimeUnregisterPolicyRequest, @@ -37,17 +27,34 @@ import { RuntimeListPoliciesRequest, RuntimePolicyDescriptor } from '../contracts/runtime'; -import { AppException } from '../errors/app-exception'; -import { ErrorCode } from '../errors/error-codes'; import { InstrumentationService } from '../telemetry/instrumentation.service'; import { CircuitBreaker } from './circuit-breaker'; -import { ProtoRegistryService } from './proto-registry.service'; +import { + buildMetadata, + fromAck, + fromEnvelope, + fromSessionMetadata, + getClientMethod, +} from './grpc-helpers'; import { RuntimeCredentialResolverService } from './runtime-credential-resolver.service'; export interface GrpcCallOptions { deadline?: Date; } +/** + * Observer-only Rust runtime provider. + * + * **Invariants (direct-agent-auth.md §Invariants):** + * - Never calls `Send`. Agents emit their own envelopes directly against the runtime. + * - Never allocates a sessionId. The control-plane allocates at POST /runs; the initiator + * agent calls SessionStart with its own Bearer token. + * - `subscribeSession()` attaches a read-only bidi `StreamSession` — the control-plane + * only reads; it does not write the first frame (no SessionStart, no SessionWatch). + * + * The previously-shipped `openSession()` / `startSession()` / `send()` / `chooseInitiator()` + * paths were deleted in CP-3 because they violated §2, §3, and §5 of the plan's invariants. + */ @Injectable() export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { readonly kind = 'rust'; @@ -61,7 +68,6 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { constructor( private readonly config: AppConfigService, private readonly credentialResolver: RuntimeCredentialResolverService, - private readonly protoRegistry: ProtoRegistryService, private readonly instrumentation: InstrumentationService ) {} @@ -122,7 +128,7 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { name: req.clientName, title: req.clientName, version: req.clientVersion, - description: 'MACP Control Plane', + description: 'MACP Control Plane (observer)', websiteUrl: '' }, capabilities: { @@ -160,100 +166,8 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { }; } - async startSession(req: RuntimeStartSessionRequest, opts?: GrpcCallOptions): Promise { - // Create a fresh gRPC channel per session to avoid stale connection issues - this.client = this.createClient(); - const initiator = this.chooseInitiator(req.execution); - const participant = this.findParticipant(req.execution, initiator); - const creds = await this.credentialResolver.resolve({ - runtimeKind: this.kind, - requester: req.execution.execution?.requester, - participant, - fallbackSender: initiator - }); - - const runtimeSessionId = randomUUID(); - const payload = this.protoRegistry.encodeMessage('macp.v1.SessionStartPayload', { - intent: req.execution.session.metadata?.intent ?? '', - participants: req.execution.session.participants.map((item) => item.id), - mode_version: req.execution.session.modeVersion, - configuration_version: req.execution.session.configurationVersion, - policy_version: req.execution.session.policyVersion ?? '', - ttl_ms: req.execution.session.ttlMs, - context: this.protoRegistry.encodeSessionContext( - req.execution.session.context, - req.execution.session.contextEnvelope - ), - roots: (req.execution.session.roots ?? []).map((root) => ({ uri: root.uri, name: root.name ?? '' })) - }); - - const envelope = this.buildEnvelope({ - mode: req.execution.session.modeName, - messageType: 'SessionStart', - messageId: randomUUID(), - sessionId: runtimeSessionId, - sender: creds.sender, - payload - }); - - const response = await this.unary( - 'Send', - { envelope: this.toGrpcEnvelope(envelope) }, - this.buildMetadata(creds.metadata), - opts - ); - - const ack = this.fromAck(response.ack); - if (!ack.ok && ack.error) { - if (ack.error.code === 'INVALID_SESSION_ID') { - throw new AppException( - ErrorCode.INVALID_SESSION_ID, - `Runtime rejected SessionStart: [${ack.error.code}] ${ack.error.message}`, - 400 - ); - } - throw new AppException( - ErrorCode.RUNTIME_UNAVAILABLE, - `Runtime rejected SessionStart: [${ack.error.code}] ${ack.error.message}`, - 502 - ); - } - return { - runtimeSessionId: ack.sessionId || runtimeSessionId, - initiator: creds.sender, - ack - }; - } - - openSession(req: RuntimeOpenSessionRequest): RuntimeSessionHandle { - const initiator = this.chooseInitiator(req.execution); - const participant = this.findParticipant(req.execution, initiator); - const runtimeSessionId = randomUUID(); - - const payload = this.protoRegistry.encodeMessage('macp.v1.SessionStartPayload', { - intent: req.execution.session.metadata?.intent ?? '', - participants: req.execution.session.participants.map((item) => item.id), - mode_version: req.execution.session.modeVersion, - configuration_version: req.execution.session.configurationVersion, - policy_version: req.execution.session.policyVersion ?? '', - ttl_ms: req.execution.session.ttlMs, - context: this.protoRegistry.encodeSessionContext( - req.execution.session.context, - req.execution.session.contextEnvelope - ), - roots: (req.execution.session.roots ?? []).map((root) => ({ uri: root.uri, name: root.name ?? '' })) - }); - - const sessionStartEnvelope = this.buildEnvelope({ - mode: req.execution.session.modeName, - messageType: 'SessionStart', - messageId: randomUUID(), - sessionId: runtimeSessionId, - sender: '', - payload - }); - - // Event-driven async queue for the bidirectional stream + subscribeSession(req: RuntimeSubscribeSessionRequest): RuntimeSessionHandle { + // Event-driven async queue for the read-only stream const buffer: RawRuntimeEvent[] = []; let resolveWait: (() => void) | null = null; let ended = false; @@ -277,37 +191,18 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { } }); - // Session ack promise — resolved when we receive the SessionStart echo - let resolveSessionAck: (result: RuntimeStartSessionResult) => void; - let rejectSessionAck: (err: Error) => void; - const sessionAck = new Promise((resolve, reject) => { - resolveSessionAck = resolve; - rejectSessionAck = reject; - }); - - let sessionAckSettled = false; - - // Launch the bidirectional stream asynchronously const launch = async () => { try { - const creds = await this.credentialResolver.resolve({ - runtimeKind: this.kind, - requester: req.execution.execution?.requester, - participant, - fallbackSender: initiator - }); - - const metadata = this.buildMetadata(creds.metadata); - const streamMethod = this.getClientMethod('StreamSession'); + const creds = await this.credentialResolver.resolve({ runtimeKind: this.kind }); + const metadata = buildMetadata(creds.metadata); + const streamMethod = getClientMethod(this.client, 'StreamSession'); grpcCall = streamMethod.call(this.client, metadata); grpcCall.on('data', (chunk: any) => { const receivedAt = new Date().toISOString(); - // StreamSessionResponse oneof: response.envelope | response.error const responseBody = chunk.response ?? chunk; if (responseBody.error) { - // Inline MACPError — non-terminal, stream stays open const inlineError = responseBody.error; buffer.push({ kind: 'stream-inline-error', @@ -326,69 +221,39 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { const rawEnvelope = responseBody.envelope ?? chunk.envelope; if (!rawEnvelope) return; - const envelope = this.fromEnvelope(rawEnvelope); - const event: RawRuntimeEvent = { - kind: 'stream-envelope', - receivedAt, - envelope - }; + // Filter to the session we're observing. Runtime may broadcast across sessions + // on a shared stream; we only care about `req.runtimeSessionId`. + const envelope = fromEnvelope(rawEnvelope); + if (envelope.sessionId && envelope.sessionId !== req.runtimeSessionId) return; - // First envelope back is the SessionStart echo — resolve the ack - if (!sessionAckSettled && envelope.messageType === 'SessionStart') { - sessionAckSettled = true; - resolveSessionAck({ - runtimeSessionId: envelope.sessionId || runtimeSessionId, - initiator: creds.sender, - ack: { - ok: true, - duplicate: false, - messageId: envelope.messageId, - sessionId: envelope.sessionId || runtimeSessionId, - acceptedAtUnixMs: envelope.timestampUnixMs, - sessionState: 'SESSION_STATE_OPEN' - } - }); - } - - buffer.push(event); + buffer.push({ kind: 'stream-envelope', receivedAt, envelope }); notify(); }); grpcCall.on('error', (error: Error) => { streamFailure = error; ended = true; - if (!sessionAckSettled) { - sessionAckSettled = true; - rejectSessionAck(error); - } notify(); }); grpcCall.on('end', () => { ended = true; - if (!sessionAckSettled) { - sessionAckSettled = true; - rejectSessionAck(new Error('stream ended before SessionStart ack')); - } notify(); }); - // Write the SessionStart envelope as the first frame - grpcCall.write({ envelope: this.toGrpcEnvelope(sessionStartEnvelope) }); + // Observer stream: end the write side immediately — we only read. + // This tells the runtime the client is a passive subscriber. + try { grpcCall.end(); } catch { /* some gRPC impls no-op on empty streams */ } } catch (error) { + streamFailure = error instanceof Error ? error : new Error(String(error)); ended = true; - if (!sessionAckSettled) { - sessionAckSettled = true; - rejectSessionAck(error instanceof Error ? error : new Error(String(error))); - } notify(); } }; void launch(); - // Build the async iterable for events const events: AsyncIterable = { [Symbol.asyncIterator]() { let started = false; @@ -427,119 +292,36 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { } }; - const handle: RuntimeSessionHandle = { - send: (envelope: RuntimeEnvelope) => { - if (grpcCall && !ended) { - grpcCall.write({ envelope: this.toGrpcEnvelope(envelope) }); - } - }, + return { events, - closeWrite: () => { - if (grpcCall && !ended) { - grpcCall.end(); - } - }, abort: () => { ended = true; if (grpcCall) { try { grpcCall.cancel(); } catch { /* ignore */ } } notify(); - }, - sessionAck - }; - - return handle; - } - - async send(req: RuntimeSendRequest): Promise { - const participant = { id: req.from } as ParticipantRef; - const creds = await this.credentialResolver.resolve({ - runtimeKind: this.kind, - participant, - fallbackSender: req.from - }); - - this.logger.debug( - `send() resolved creds: sender=${creds.sender}, metadataKeys=${Object.keys(creds.metadata).join(',')}` - ); - - const envelope = this.buildEnvelope({ - mode: req.modeName, - messageType: req.messageType, - messageId: randomUUID(), - sessionId: req.runtimeSessionId, - sender: creds.sender, - payload: req.payload - }); - - let response: any; - try { - response = await this.unary( - 'Send', - { envelope: this.toGrpcEnvelope(envelope) }, - this.buildMetadata(creds.metadata) - ); - } catch (error) { - const grpcError = error as { code?: number; details?: string; metadata?: { toJSON?: () => unknown } }; - this.logger.error( - `send() gRPC error: code=${grpcError.code}, details=${grpcError.details}` - ); - throw error; - } - - const ack = this.fromAck(response.ack); - if (!ack.ok && ack.error) { - if (ack.error.code === 'INVALID_SESSION_ID') { - throw new AppException( - ErrorCode.INVALID_SESSION_ID, - `Runtime rejected message: [${ack.error.code}] ${ack.error.message}`, - 400 - ); } - throw new AppException( - ErrorCode.RUNTIME_UNAVAILABLE, - `Runtime rejected message: [${ack.error.code}] ${ack.error.message}`, - 502 - ); - } - return { ack, envelope }; - } - - async *streamSession(_req: RuntimeStreamSessionRequest): AsyncIterable { - // SessionWatch / passive attach is no longer part of the base protocol. - // Reconnection now uses getSession() polling in StreamConsumerService. - throw new AppException( - ErrorCode.INTERNAL_ERROR, - 'streamSession() is deprecated — reconnection uses getSession() polling', - 500 - ); + }; } async getSession(req: RuntimeGetSessionRequest): Promise { - const creds = await this.credentialResolver.resolve({ - runtimeKind: this.kind, - fallbackSender: req.requesterId ?? this.config.runtimeDevAgentId - }); + const creds = await this.credentialResolver.resolve({ runtimeKind: this.kind }); const response = await this.unary( 'GetSession', { sessionId: req.runtimeSessionId }, - this.buildMetadata(creds.metadata) + buildMetadata(creds.metadata) ); - return this.fromSessionMetadata(response.metadata); + return fromSessionMetadata(response.metadata); } async cancelSession(req: RuntimeCancelSessionRequest): Promise { - const creds = await this.credentialResolver.resolve({ - runtimeKind: this.kind, - fallbackSender: req.requesterId ?? this.config.runtimeDevAgentId - }); + const creds = await this.credentialResolver.resolve({ runtimeKind: this.kind }); const response = await this.unary( 'CancelSession', { sessionId: req.runtimeSessionId, reason: req.reason ?? 'cancelled by control plane' }, - this.buildMetadata(creds.metadata) + buildMetadata(creds.metadata) ); - return { ack: this.fromAck(response.ack) }; + return { ack: fromAck(response.ack) }; } async getManifest(): Promise { @@ -646,7 +428,7 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { const start = Date.now(); try { const result = await this.circuitBreaker.execute(() => { - const clientMethod = this.getClientMethod(method); + const clientMethod = getClientMethod(this.client, method); const deadline = opts?.deadline ?? new Date(Date.now() + this.config.runtimeRequestTimeoutMs); return new Promise((resolve, reject) => { const callback = (error: grpc.ServiceError | null, response: any) => { @@ -674,166 +456,4 @@ export class RustRuntimeProvider implements RuntimeProvider, OnModuleInit { } } - // eslint-disable-next-line @typescript-eslint/no-unsafe-function-type - private getClientMethod(method: string): Function { - const direct = this.client[method]; - if (typeof direct === 'function') return direct; - const lowerCamel = method.charAt(0).toLowerCase() + method.slice(1); - const fallback = this.client[lowerCamel]; - if (typeof fallback === 'function') return fallback; - throw new Error(`runtime gRPC method '${method}' is not available on client`); - } - - private chooseInitiator(execution: ExecutionRequest): string { - const explicit = execution.session.initiatorParticipantId; - if (explicit) return explicit; - const kickoffSender = execution.kickoff?.[0]?.from; - if (kickoffSender) return kickoffSender; - const requester = execution.execution?.requester?.actorId; - if (requester) return requester; - const first = execution.session.participants[0]; - return first.transportIdentity ?? first.id; - } - - private findParticipant(execution: ExecutionRequest, sender: string): ParticipantRef | undefined { - return execution.session.participants.find( - (participant) => participant.id === sender || participant.transportIdentity === sender - ); - } - - private buildEnvelope(input: { - mode: string; - messageType: string; - messageId: string; - sessionId: string; - sender: string; - payload: Buffer; - }) { - return { - macpVersion: '1.0', - mode: input.mode, - messageType: input.messageType, - messageId: input.messageId, - sessionId: input.sessionId, - sender: input.sender, - timestampUnixMs: Date.now(), - payload: input.payload - }; - } - - private toGrpcEnvelope(envelope: { - macpVersion: string; - mode: string; - messageType: string; - messageId: string; - sessionId: string; - sender: string; - timestampUnixMs: number; - payload: Buffer; - }) { - return { - macpVersion: envelope.macpVersion, - mode: envelope.mode, - messageType: envelope.messageType, - messageId: envelope.messageId, - sessionId: envelope.sessionId, - sender: envelope.sender, - timestampUnixMs: String(envelope.timestampUnixMs), - payload: envelope.payload - }; - } - - private fromEnvelope(envelope: any) { - return { - macpVersion: envelope.macpVersion, - mode: envelope.mode, - messageType: envelope.messageType, - messageId: envelope.messageId, - sessionId: envelope.sessionId, - sender: envelope.sender, - timestampUnixMs: Number(envelope.timestampUnixMs ?? Date.now()), - payload: Buffer.isBuffer(envelope.payload) - ? envelope.payload - : Buffer.from(envelope.payload ?? '') - }; - } - - private fromAck(ack: any, trailingMetadata?: grpc.Metadata): RuntimeAck { - let reasons: string[] | undefined; - - // Parse structured reasons from error details bytes - if (ack?.error?.details) { - try { - const parsed = JSON.parse(Buffer.from(ack.error.details).toString('utf-8')); - if (Array.isArray(parsed.reasons)) reasons = parsed.reasons; - } catch { /* ignore parse errors */ } - } - - // Also check gRPC trailing metadata for POLICY_DENIED binary details - if (!reasons && trailingMetadata) { - const detailsBin = trailingMetadata.get('macp-error-details-bin'); - if (detailsBin && detailsBin.length > 0) { - try { - const parsed = JSON.parse(Buffer.from(detailsBin[0] as Buffer).toString('utf-8')); - if (Array.isArray(parsed.reasons)) reasons = parsed.reasons; - } catch { /* ignore parse errors */ } - } - } - - return { - ok: Boolean(ack?.ok), - duplicate: Boolean(ack?.duplicate), - messageId: ack?.messageId ?? '', - sessionId: ack?.sessionId ?? '', - acceptedAtUnixMs: Number(ack?.acceptedAtUnixMs ?? Date.now()), - sessionState: (ack?.sessionState ?? 'SESSION_STATE_UNSPECIFIED') as RuntimeAck['sessionState'], - error: ack?.error - ? { - code: ack.error.code, - message: ack.error.message, - sessionId: ack.error.sessionId, - messageId: ack.error.messageId, - detailsBase64: ack.error.details - ? Buffer.from(ack.error.details).toString('base64') - : undefined, - details: ack.error.details ? Buffer.from(ack.error.details) : undefined, - reasons - } - : undefined - }; - } - - private fromSessionMetadata(metadata: any): RuntimeSessionSnapshot { - return { - sessionId: metadata?.sessionId ?? '', - mode: metadata?.mode ?? '', - state: metadata?.state ?? 'SESSION_STATE_UNSPECIFIED', - startedAtUnixMs: metadata?.startedAtUnixMs ? Number(metadata.startedAtUnixMs) : undefined, - expiresAtUnixMs: metadata?.expiresAtUnixMs ? Number(metadata.expiresAtUnixMs) : undefined, - modeVersion: metadata?.modeVersion, - configurationVersion: metadata?.configurationVersion, - policyVersion: metadata?.policyVersion, - initiator: metadata?.initiator ?? undefined - }; - } - - private buildMetadata(metadataInput: Record): grpc.Metadata { - const metadata = new grpc.Metadata(); - for (const [key, value] of Object.entries(metadataInput)) { - if (value) metadata.set(key, value); - } - // Propagate W3C trace context so runtime-side spans become children of the - // control-plane span. OTel's propagator serializes the active span into - // `traceparent` (+ optional `tracestate`) headers. - injectTraceContext(metadata); - return metadata; - } -} - -function injectTraceContext(metadata: grpc.Metadata): void { - const carrier: Record = {}; - propagation.inject(context.active(), carrier); - for (const [key, value] of Object.entries(carrier)) { - if (value) metadata.set(key, value); - } } diff --git a/src/storage/run.repository.ts b/src/storage/run.repository.ts index 7aa4d1c..acde807 100644 --- a/src/storage/run.repository.ts +++ b/src/storage/run.repository.ts @@ -21,6 +21,8 @@ export interface NewRunRecord { mode: string; runtimeKind: string; runtimeVersion?: string; + /** Pre-allocated sessionId — stored on the run record at creation so it's returned by POST /runs. */ + runtimeSessionId?: string; idempotencyKey?: string; tags?: string[]; sourceKind?: string; @@ -42,6 +44,7 @@ export class RunRepository { mode: input.mode, runtimeKind: input.runtimeKind, runtimeVersion: input.runtimeVersion, + runtimeSessionId: input.runtimeSessionId, idempotencyKey: input.idempotencyKey, tags: input.tags ?? [], sourceKind: input.sourceKind, diff --git a/src/telemetry/instrumentation.service.spec.ts b/src/telemetry/instrumentation.service.spec.ts index c48ce5e..29c6b34 100644 --- a/src/telemetry/instrumentation.service.spec.ts +++ b/src/telemetry/instrumentation.service.spec.ts @@ -75,67 +75,98 @@ describe('InstrumentationService', () => { }); // =========================================================================== - // Metric instances are defined + // Metric instances are defined with correct type and (for counters/histograms) + // can round-trip an observation / increment. // =========================================================================== describe('metric instances', () => { - it('should define httpRequestDuration histogram', () => { - expect(service.httpRequestDuration).toBeDefined(); + /** Helper — assert the metric has the expected methods for its type. */ + function assertMetricShape(metric: unknown, type: 'counter' | 'histogram' | 'gauge') { + expect(metric).toBeDefined(); + const m = metric as Record; + if (type === 'counter') { + expect(typeof m.inc).toBe('function'); + } else if (type === 'histogram') { + expect(typeof m.observe).toBe('function'); + } else if (type === 'gauge') { + expect(typeof m.inc).toBe('function'); + expect(typeof m.dec).toBe('function'); + expect(typeof m.set).toBe('function'); + } + } + + it('httpRequestDuration is a histogram and observes', () => { + assertMetricShape(service.httpRequestDuration, 'histogram'); + expect(() => + service.httpRequestDuration.observe({ method: 'GET', status_code: '200' }, 0.1), + ).not.toThrow(); }); - it('should define httpRequestsTotal counter', () => { - expect(service.httpRequestsTotal).toBeDefined(); + it('httpRequestsTotal is a counter and increments', () => { + assertMetricShape(service.httpRequestsTotal, 'counter'); + expect(() => service.httpRequestsTotal.inc({ method: 'GET', status_code: '200' })).not.toThrow(); }); - it('should define activeSseConnections gauge', () => { - expect(service.activeSseConnections).toBeDefined(); + it('activeSseConnections is a gauge and inc/dec', () => { + assertMetricShape(service.activeSseConnections, 'gauge'); + expect(() => service.activeSseConnections.inc()).not.toThrow(); + expect(() => service.activeSseConnections.dec()).not.toThrow(); }); - it('should define activeStreams gauge', () => { - expect(service.activeStreams).toBeDefined(); + it('activeStreams is a gauge', () => { + assertMetricShape(service.activeStreams, 'gauge'); }); - it('should define runStateTotal counter', () => { - expect(service.runStateTotal).toBeDefined(); + it('runStateTotal is a counter with status label', () => { + assertMetricShape(service.runStateTotal, 'counter'); + expect(() => service.runStateTotal.inc({ status: 'queued' })).not.toThrow(); }); - it('should define grpcCallDuration histogram', () => { - expect(service.grpcCallDuration).toBeDefined(); + it('grpcCallDuration is a histogram with method + status labels', () => { + assertMetricShape(service.grpcCallDuration, 'histogram'); + expect(() => + service.grpcCallDuration.observe({ method: 'Initialize', status: 'ok' }, 0.05), + ).not.toThrow(); }); - it('should define circuitBreakerState gauge', () => { - expect(service.circuitBreakerState).toBeDefined(); + it('circuitBreakerState is a gauge', () => { + assertMetricShape(service.circuitBreakerState, 'gauge'); }); - it('should define circuitBreakerFailuresTotal counter', () => { - expect(service.circuitBreakerFailuresTotal).toBeDefined(); + it('circuitBreakerFailuresTotal is a counter', () => { + assertMetricShape(service.circuitBreakerFailuresTotal, 'counter'); }); - it('should define circuitBreakerSuccessTotal counter', () => { - expect(service.circuitBreakerSuccessTotal).toBeDefined(); + it('circuitBreakerSuccessTotal is a counter', () => { + assertMetricShape(service.circuitBreakerSuccessTotal, 'counter'); }); - it('should define outboundMessagesTotal counter', () => { - expect(service.outboundMessagesTotal).toBeDefined(); + it('outboundMessagesTotal is a counter with category + status labels', () => { + assertMetricShape(service.outboundMessagesTotal, 'counter'); + expect(() => + service.outboundMessagesTotal.inc({ category: 'observer', status: 'subscribed' }), + ).not.toThrow(); }); - it('should define inboundMessagesTotal counter', () => { - expect(service.inboundMessagesTotal).toBeDefined(); + it('inboundMessagesTotal is a counter', () => { + assertMetricShape(service.inboundMessagesTotal, 'counter'); }); - it('should define signalsTotal counter', () => { - expect(service.signalsTotal).toBeDefined(); + it('signalsTotal is a counter with signal_type label', () => { + assertMetricShape(service.signalsTotal, 'counter'); + expect(() => service.signalsTotal.inc({ signal_type: 'progress' })).not.toThrow(); }); - it('should define streamReconnectsTotal counter', () => { - expect(service.streamReconnectsTotal).toBeDefined(); + it('streamReconnectsTotal is a counter', () => { + assertMetricShape(service.streamReconnectsTotal, 'counter'); }); - it('should define recoveryTotal counter', () => { - expect(service.recoveryTotal).toBeDefined(); + it('recoveryTotal is a counter with status label', () => { + assertMetricShape(service.recoveryTotal, 'counter'); + expect(() => service.recoveryTotal.inc({ status: 'success' })).not.toThrow(); }); - it('should define webhookDeliveriesTotal counter', () => { - expect(service.webhookDeliveriesTotal).toBeDefined(); + it('webhookDeliveriesTotal is a counter with event + status labels', () => { + assertMetricShape(service.webhookDeliveriesTotal, 'counter'); }); }); diff --git a/test-agents/agents/__init__.py b/test-agents/agents/__init__.py deleted file mode 100644 index f7080eb..0000000 --- a/test-agents/agents/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""MACP Test Agents — LangChain and CrewAI agent implementations.""" diff --git a/test-agents/agents/crewai_decision_agent.py b/test-agents/agents/crewai_decision_agent.py deleted file mode 100644 index b4faa14..0000000 --- a/test-agents/agents/crewai_decision_agent.py +++ /dev/null @@ -1,68 +0,0 @@ -"""CrewAI agents for MACP Decision Mode integration testing. - -Each participant is a CrewAI Agent with MACP tools. -Deterministic — uses rule-based logic, not LLM calls. -""" - -from __future__ import annotations - -from typing import Any - -from ..harness.base_agent import AgentRule, BaseAgent -from ..harness.macp_client import MACPClient - - -class CrewAIDecisionEvaluator(BaseAgent): - """CrewAI-based evaluator agent for Decision Mode. - - Wraps the MACP interaction in CrewAI's Agent/Task/Crew paradigm. - For deterministic testing, bypasses the LLM and uses rules directly. - """ - - def __init__(self, client: MACPClient): - super().__init__(client, participant_id="evaluator", role="evaluator") - - def get_rules(self) -> list[AgentRule]: - return [ - AgentRule( - event_type="message.received", - message_type="Proposal", - response_message_type="Evaluation", - response_payload={ - "recommendation": "APPROVE", - "rationale": "CrewAI evaluator approves this proposal", - "confidence": 0.95, - }, - response_to=["proposer"], - delay_ms=150, - ), - ] - - -class CrewAIDecisionVoter(BaseAgent): - """CrewAI-based voter agent for Decision Mode.""" - - def __init__(self, client: MACPClient): - super().__init__(client, participant_id="voter", role="voter") - - def get_rules(self) -> list[AgentRule]: - return [ - AgentRule( - event_type="message.received", - message_type="Evaluation", - response_message_type="Vote", - response_payload={ - "vote": "approve", - "rationale": "CrewAI voter concurs with evaluation", - }, - response_to=["proposer"], - delay_ms=150, - ), - ] - - -def create_crewai_decision_agents( - client: MACPClient, -) -> tuple[CrewAIDecisionEvaluator, CrewAIDecisionVoter]: - """Factory for creating CrewAI decision agents.""" - return CrewAIDecisionEvaluator(client), CrewAIDecisionVoter(client) diff --git a/test-agents/agents/crewai_task_agent.py b/test-agents/agents/crewai_task_agent.py deleted file mode 100644 index aebae5f..0000000 --- a/test-agents/agents/crewai_task_agent.py +++ /dev/null @@ -1,70 +0,0 @@ -"""CrewAI agent for MACP Task Mode integration testing. - -Worker agent using CrewAI's Agent paradigm. -Deterministic — uses rule-based logic, not LLM calls. -""" - -from __future__ import annotations - -from typing import Any - -from ..harness.base_agent import AgentRule, BaseAgent -from ..harness.macp_client import MACPClient - - -class CrewAITaskWorker(BaseAgent): - """CrewAI-based worker agent for Task Mode. - - Follows a scripted flow: - 1. Receives TaskRequest -> sends TaskAccept - 2. Sends TaskUpdate with progress - 3. Sends TaskComplete with output - """ - - def __init__(self, client: MACPClient): - super().__init__(client, participant_id="worker", role="worker") - - def get_rules(self) -> list[AgentRule]: - return [ - AgentRule( - event_type="message.received", - message_type="TaskRequest", - response_message_type="TaskAccept", - response_payload=lambda ev: { - "taskId": ev.get("data", {}).get("taskId", "task-1"), - }, - response_to=["requester"], - delay_ms=100, - ), - AgentRule( - event_type="message.sent", - message_type="TaskAccept", - response_message_type="TaskUpdate", - response_payload={ - "taskId": "task-1", - "progress": 0.75, - "message": "CrewAI worker processing...", - }, - response_to=["requester"], - delay_ms=200, - ), - AgentRule( - event_type="message.sent", - message_type="TaskUpdate", - response_message_type="TaskComplete", - response_payload={ - "taskId": "task-1", - "output": { - "result": "success", - "processedBy": "crewai-worker", - }, - }, - response_to=["requester"], - delay_ms=200, - ), - ] - - -def create_crewai_task_worker(client: MACPClient) -> CrewAITaskWorker: - """Factory for creating a CrewAI task worker agent.""" - return CrewAITaskWorker(client) diff --git a/test-agents/agents/langchain_decision_agent.py b/test-agents/agents/langchain_decision_agent.py deleted file mode 100644 index 3203cb0..0000000 --- a/test-agents/agents/langchain_decision_agent.py +++ /dev/null @@ -1,108 +0,0 @@ -"""LangChain agent for MACP Decision Mode integration testing. - -Uses LangChain tools to interact with the MACP Control Plane. -Deterministic — uses rule-based logic, not LLM calls. -""" - -from __future__ import annotations - -from typing import Any - -from langchain_core.tools import BaseTool - -from ..harness.base_agent import AgentRule, BaseAgent -from ..harness.macp_client import MACPClient - - -class MACPSendMessageTool(BaseTool): - """LangChain tool for sending MACP messages.""" - - name: str = "send_macp_message" - description: str = "Send an MACP protocol message to the coordination session" - client: Any = None # MACPClient - run_id: str = "" - participant_id: str = "" - - def _run( - self, - message_type: str, - payload: str = "{}", - to: str = "", - ) -> str: - """Send a message via the control plane API.""" - import json - - payload_dict = json.loads(payload) if isinstance(payload, str) else payload - to_list = [t.strip() for t in to.split(",") if t.strip()] if to else None - - result = self.client.send_message( - run_id=self.run_id, - from_participant=self.participant_id, - message_type=message_type, - payload=payload_dict, - to=to_list, - ) - return json.dumps(result) - - -class LangChainDecisionEvaluator(BaseAgent): - """LangChain-based evaluator agent for Decision Mode. - - Listens for Proposal events, sends Evaluation with APPROVE. - """ - - def __init__(self, client: MACPClient): - super().__init__(client, participant_id="evaluator", role="evaluator") - self.send_tool = MACPSendMessageTool( - client=client, participant_id="evaluator" - ) - - def get_rules(self) -> list[AgentRule]: - return [ - AgentRule( - event_type="message.received", - message_type="Proposal", - response_message_type="Evaluation", - response_payload={ - "recommendation": "APPROVE", - "rationale": "LangChain agent approves this proposal", - }, - response_to=["proposer"], - delay_ms=100, - ), - ] - - -class LangChainDecisionVoter(BaseAgent): - """LangChain-based voter agent for Decision Mode. - - Listens for Evaluation events, sends Vote with approve. - """ - - def __init__(self, client: MACPClient): - super().__init__(client, participant_id="voter", role="voter") - self.send_tool = MACPSendMessageTool( - client=client, participant_id="voter" - ) - - def get_rules(self) -> list[AgentRule]: - return [ - AgentRule( - event_type="message.received", - message_type="Evaluation", - response_message_type="Vote", - response_payload={ - "vote": "approve", - "rationale": "LangChain voter approves", - }, - response_to=["proposer"], - delay_ms=100, - ), - ] - - -def create_langchain_decision_agents( - client: MACPClient, -) -> tuple[LangChainDecisionEvaluator, LangChainDecisionVoter]: - """Factory for creating a pair of LangChain decision agents.""" - return LangChainDecisionEvaluator(client), LangChainDecisionVoter(client) diff --git a/test-agents/agents/langchain_task_agent.py b/test-agents/agents/langchain_task_agent.py deleted file mode 100644 index 76516b5..0000000 --- a/test-agents/agents/langchain_task_agent.py +++ /dev/null @@ -1,74 +0,0 @@ -"""LangChain agent for MACP Task Mode integration testing. - -Worker agent: listens for TaskRequest, accepts, sends progress, completes. -Deterministic — uses rule-based logic, not LLM calls. -""" - -from __future__ import annotations - -from typing import Any - -from ..harness.base_agent import AgentRule, BaseAgent -from ..harness.macp_client import MACPClient - - -class LangChainTaskWorker(BaseAgent): - """LangChain-based worker agent for Task Mode. - - Follows a scripted flow: - 1. Receives TaskRequest -> sends TaskAccept - 2. After accepting -> sends TaskUpdate with progress - 3. After update -> sends TaskComplete - """ - - def __init__(self, client: MACPClient): - super().__init__(client, participant_id="worker", role="worker") - self._accepted = False - - def get_rules(self) -> list[AgentRule]: - return [ - AgentRule( - event_type="message.received", - message_type="TaskRequest", - response_message_type="TaskAccept", - response_payload=lambda ev: { - "taskId": ev.get("data", {}).get("taskId", "task-1"), - "acceptedAt": "2026-01-01T00:00:00Z", - }, - response_to=["requester"], - delay_ms=100, - ), - # After seeing our own TaskAccept echoed back, send update - AgentRule( - event_type="message.sent", - message_type="TaskAccept", - response_message_type="TaskUpdate", - response_payload={ - "taskId": "task-1", - "progress": 0.5, - "message": "LangChain worker processing...", - }, - response_to=["requester"], - delay_ms=200, - ), - # After TaskUpdate echoed, send complete - AgentRule( - event_type="message.sent", - message_type="TaskUpdate", - response_message_type="TaskComplete", - response_payload={ - "taskId": "task-1", - "output": { - "result": "success", - "processedBy": "langchain-worker", - }, - }, - response_to=["requester"], - delay_ms=200, - ), - ] - - -def create_langchain_task_worker(client: MACPClient) -> LangChainTaskWorker: - """Factory for creating a LangChain task worker agent.""" - return LangChainTaskWorker(client) diff --git a/test-agents/harness/__init__.py b/test-agents/harness/__init__.py deleted file mode 100644 index 21664ef..0000000 --- a/test-agents/harness/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -"""MACP Test Agent Harness — shared utilities for integration testing.""" - -from .macp_client import MACPClient -from .sse_listener import SSEListener -from .base_agent import BaseAgent - -__all__ = ["MACPClient", "SSEListener", "BaseAgent"] diff --git a/test-agents/harness/base_agent.py b/test-agents/harness/base_agent.py deleted file mode 100644 index 3298646..0000000 --- a/test-agents/harness/base_agent.py +++ /dev/null @@ -1,167 +0,0 @@ -"""Abstract base class for rule-based test agents.""" - -from __future__ import annotations - -import time -from abc import ABC, abstractmethod -from dataclasses import dataclass, field -from typing import Any, Callable - -from .macp_client import MACPClient -from .sse_listener import SSEListener - - -@dataclass -class AgentRule: - """A rule that maps incoming events to outgoing messages.""" - - # Match conditions - event_type: str | None = None - message_type: str | None = None - from_participant: str | None = None - - # Response action - response_message_type: str = "" - response_payload: dict[str, Any] | Callable[[dict[str, Any]], dict[str, Any]] = field( - default_factory=dict - ) - response_to: list[str] | None = None - delay_ms: int = 0 - - -class BaseAgent(ABC): - """Abstract base agent for MACP integration testing. - - Subclasses implement get_rules() to define behavior. - The agent loop: listen to SSE events, match rules, send messages. - All deterministic — no LLM calls. - """ - - def __init__( - self, - client: MACPClient, - participant_id: str, - role: str = "agent", - ): - self.client = client - self.participant_id = participant_id - self.role = role - self.actions_performed: list[dict[str, Any]] = [] - self._running = False - - @abstractmethod - def get_rules(self) -> list[AgentRule]: - """Return the list of rules this agent follows.""" - ... - - def run(self, run_id: str, timeout_s: float = 30.0) -> None: - """Execute the agent loop for the given run. - - Subscribes to SSE events and sends messages based on matching rules. - Stops when the run reaches a terminal state or timeout expires. - """ - self._running = True - rules = self.get_rules() - - listener = SSEListener( - base_url=self.client.base_url, - api_key=self.client.api_key, - ) - listener.connect(run_id, include_snapshot=True) - - processed_count = 0 - deadline = time.time() + timeout_s - - try: - while self._running and time.time() < deadline: - # Process new events - new_events = listener.events[processed_count:] - processed_count = len(listener.events) - - for event in new_events: - # Check for terminal state - if event.type == "snapshot" and isinstance(event.data, dict): - run_data = event.data.get("run", {}) - status = run_data.get("status", "") - if status in ("completed", "failed", "cancelled"): - self._running = False - return - - # Match canonical events against rules - if event.type == "canonical_event" and isinstance(event.data, dict): - self._process_event(run_id, event.data, rules) - - time.sleep(0.1) - finally: - listener.close() - - def stop(self) -> None: - """Stop the agent loop.""" - self._running = False - - def _process_event( - self, - run_id: str, - canonical: dict[str, Any], - rules: list[AgentRule], - ) -> None: - """Check event against rules and send responses.""" - event_type = canonical.get("type", "") - data = canonical.get("data", {}) - - for rule in rules: - if not self._matches(event_type, data, rule): - continue - - if rule.delay_ms > 0: - time.sleep(rule.delay_ms / 1000.0) - - payload = ( - rule.response_payload(canonical) - if callable(rule.response_payload) - else rule.response_payload - ) - - try: - result = self.client.send_message( - run_id=run_id, - from_participant=self.participant_id, - message_type=rule.response_message_type, - payload=payload, - to=rule.response_to, - ) - self.actions_performed.append( - { - "rule": rule.response_message_type, - "event": canonical, - "result": result, - } - ) - except Exception as e: - if self._running: - print( - f"Agent {self.participant_id} failed to send " - f"{rule.response_message_type}: {e}" - ) - - # Only fire first matching rule - break - - def _matches( - self, - event_type: str, - data: dict[str, Any], - rule: AgentRule, - ) -> bool: - """Check if an event matches a rule's conditions.""" - if rule.event_type and event_type != rule.event_type: - return False - if rule.message_type: - msg_type = data.get("messageType", "") - if msg_type != rule.message_type: - return False - if rule.from_participant: - sender = data.get("sender", "") or data.get("from", "") - if sender != rule.from_participant: - return False - return True diff --git a/test-agents/harness/macp_client.py b/test-agents/harness/macp_client.py deleted file mode 100644 index c735822..0000000 --- a/test-agents/harness/macp_client.py +++ /dev/null @@ -1,152 +0,0 @@ -"""HTTP client for the MACP Control Plane REST API.""" - -from __future__ import annotations - -import os -from typing import Any - -import httpx - - -class MACPClient: - """Synchronous HTTP client for the MACP Control Plane.""" - - def __init__( - self, - base_url: str | None = None, - api_key: str | None = None, - timeout: float = 30.0, - ): - self.base_url = ( - base_url or os.environ.get("CONTROL_PLANE_URL", "http://localhost:3001") - ) - self.api_key = api_key or os.environ.get("CONTROL_PLANE_API_KEY", "") - self._client = httpx.Client( - base_url=self.base_url, - timeout=timeout, - headers=self._build_headers(), - ) - - def _build_headers(self) -> dict[str, str]: - headers = {"Content-Type": "application/json"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - return headers - - # ── Run Lifecycle ───────────────────────────────────────────── - - def create_run(self, request: dict[str, Any]) -> dict[str, Any]: - """POST /runs — create and launch a new execution.""" - resp = self._client.post("/runs", json=request) - resp.raise_for_status() - return resp.json() - - def get_run(self, run_id: str) -> dict[str, Any]: - """GET /runs/:id — fetch run record.""" - resp = self._client.get(f"/runs/{run_id}") - resp.raise_for_status() - return resp.json() - - def get_state(self, run_id: str) -> dict[str, Any]: - """GET /runs/:id/state — projected run state.""" - resp = self._client.get(f"/runs/{run_id}/state") - resp.raise_for_status() - return resp.json() - - def list_runs(self, **params: Any) -> dict[str, Any]: - """GET /runs — list runs with filtering.""" - resp = self._client.get("/runs", params=params) - resp.raise_for_status() - return resp.json() - - def cancel_run(self, run_id: str, reason: str | None = None) -> dict[str, Any]: - """POST /runs/:id/cancel — cancel running session.""" - body = {"reason": reason} if reason else {} - resp = self._client.post(f"/runs/{run_id}/cancel", json=body) - resp.raise_for_status() - return resp.json() - - # ── Messaging ───────────────────────────────────────────────── - - def send_message( - self, - run_id: str, - from_participant: str, - message_type: str, - payload: dict[str, Any] | None = None, - to: list[str] | None = None, - metadata: dict[str, Any] | None = None, - ) -> dict[str, Any]: - """POST /runs/:id/messages — send a session-bound MACP message.""" - body: dict[str, Any] = { - "from": from_participant, - "messageType": message_type, - } - if payload is not None: - body["payload"] = payload - if to is not None: - body["to"] = to - if metadata is not None: - body["metadata"] = metadata - - resp = self._client.post(f"/runs/{run_id}/messages", json=body) - resp.raise_for_status() - return resp.json() - - def send_signal( - self, - run_id: str, - from_participant: str, - message_type: str, - payload: dict[str, Any] | None = None, - ) -> dict[str, Any]: - """POST /runs/:id/signal — send a signal.""" - body: dict[str, Any] = { - "from": from_participant, - "messageType": message_type, - } - if payload is not None: - body["payload"] = payload - - resp = self._client.post(f"/runs/{run_id}/signal", json=body) - resp.raise_for_status() - return resp.json() - - # ── Events ──────────────────────────────────────────────────── - - def list_events( - self, run_id: str, after_seq: int | None = None - ) -> list[dict[str, Any]]: - """GET /runs/:id/events — list canonical events.""" - params = {} - if after_seq is not None: - params["afterSeq"] = after_seq - resp = self._client.get(f"/runs/{run_id}/events", params=params) - resp.raise_for_status() - return resp.json() - - # ── Validation ──────────────────────────────────────────────── - - def validate_run(self, request: dict[str, Any]) -> dict[str, Any]: - """POST /runs/validate — preflight validation.""" - resp = self._client.post("/runs/validate", json=request) - resp.raise_for_status() - return resp.json() - - # ── Health ──────────────────────────────────────────────────── - - def healthz(self) -> dict[str, Any]: - """GET /healthz — liveness probe.""" - resp = self._client.get("/healthz") - resp.raise_for_status() - return resp.json() - - def close(self) -> None: - """Close the HTTP client.""" - self._client.close() - - def __enter__(self) -> "MACPClient": - return self - - def __exit__(self, *args: Any) -> None: - self.close() diff --git a/test-agents/harness/sse_listener.py b/test-agents/harness/sse_listener.py deleted file mode 100644 index 22362f1..0000000 --- a/test-agents/harness/sse_listener.py +++ /dev/null @@ -1,168 +0,0 @@ -"""SSE stream consumer for MACP Control Plane events.""" - -from __future__ import annotations - -import json -import os -import threading -import time -from dataclasses import dataclass, field -from queue import Empty, Queue -from typing import Any - -import httpx -from httpx_sse import connect_sse - - -@dataclass -class SSEEvent: - """Parsed SSE event.""" - - type: str - data: Any - id: str | None = None - raw: str = "" - - -class SSEListener: - """Background SSE stream consumer. - - Connects to GET /runs/:id/stream and pushes events to a queue. - Runs in a background thread for non-blocking operation. - """ - - def __init__( - self, - base_url: str | None = None, - api_key: str | None = None, - ): - self.base_url = ( - base_url or os.environ.get("CONTROL_PLANE_URL", "http://localhost:3001") - ) - self.api_key = api_key or os.environ.get("CONTROL_PLANE_API_KEY", "") - self.events: list[SSEEvent] = [] - self.queue: Queue[SSEEvent] = Queue() - self._thread: threading.Thread | None = None - self._stop_event = threading.Event() - - def connect( - self, - run_id: str, - include_snapshot: bool = True, - after_seq: int | None = None, - ) -> None: - """Start listening to SSE events in a background thread.""" - self._stop_event.clear() - self._thread = threading.Thread( - target=self._listen, - args=(run_id, include_snapshot, after_seq), - daemon=True, - ) - self._thread.start() - - def _listen( - self, - run_id: str, - include_snapshot: bool, - after_seq: int | None, - ) -> None: - """Background thread: connect to SSE and consume events.""" - headers = {"Accept": "text/event-stream"} - if self.api_key: - headers["Authorization"] = f"Bearer {self.api_key}" - - params: dict[str, Any] = {"includeSnapshot": str(include_snapshot).lower()} - if after_seq is not None: - params["afterSeq"] = after_seq - - try: - with httpx.Client( - base_url=self.base_url, timeout=60.0, headers=headers - ) as client: - with connect_sse( - client, "GET", f"/runs/{run_id}/stream", params=params - ) as sse: - for event in sse.iter_sse(): - if self._stop_event.is_set(): - break - - parsed_data: Any - try: - parsed_data = json.loads(event.data) - except (json.JSONDecodeError, TypeError): - parsed_data = event.data - - sse_event = SSEEvent( - type=event.event or "message", - data=parsed_data, - id=event.id, - raw=event.data, - ) - self.events.append(sse_event) - self.queue.put(sse_event) - except Exception as e: - # Push error as a special event - error_event = SSEEvent( - type="error", - data={"error": str(e)}, - ) - self.events.append(error_event) - self.queue.put(error_event) - - def wait_for_event( - self, event_type: str, timeout_s: float = 30.0 - ) -> SSEEvent | None: - """Wait for an event of the given type.""" - # Check already-received events - for ev in self.events: - if ev.type == event_type: - return ev - - # Wait for new events - deadline = time.time() + timeout_s - while time.time() < deadline: - try: - event = self.queue.get(timeout=0.5) - if event.type == event_type: - return event - except Empty: - continue - return None - - def wait_for_status( - self, target_status: str, timeout_s: float = 30.0 - ) -> SSEEvent | None: - """Wait for a snapshot event showing the target run status.""" - deadline = time.time() + timeout_s - while time.time() < deadline: - for ev in self.events: - if ev.type == "snapshot" and isinstance(ev.data, dict): - run = ev.data.get("run", {}) - if run.get("status") == target_status: - return ev - - try: - event = self.queue.get(timeout=0.5) - if event.type == "snapshot" and isinstance(event.data, dict): - run = event.data.get("run", {}) - if run.get("status") == target_status: - return event - except Empty: - continue - return None - - def get_events_by_type(self, event_type: str) -> list[SSEEvent]: - """Return all events of the given type.""" - return [ev for ev in self.events if ev.type == event_type] - - def close(self) -> None: - """Stop the background listener.""" - self._stop_event.set() - if self._thread and self._thread.is_alive(): - self._thread.join(timeout=5.0) - - def __enter__(self) -> "SSEListener": - return self - - def __exit__(self, *args: Any) -> None: - self.close() diff --git a/test-agents/requirements.txt b/test-agents/requirements.txt deleted file mode 100644 index d2335eb..0000000 --- a/test-agents/requirements.txt +++ /dev/null @@ -1,10 +0,0 @@ -langchain>=0.3.0 -langchain-core>=0.3.0 -langchain-community>=0.3.0 -crewai>=0.80.0 -httpx>=0.27.0 -httpx-sse>=0.4.0 -sseclient-py>=1.8.0 -pytest>=8.0.0 -pytest-asyncio>=0.24.0 -pytest-json-report>=1.5.0 diff --git a/test-agents/scenarios/__init__.py b/test-agents/scenarios/__init__.py deleted file mode 100644 index d689dfc..0000000 --- a/test-agents/scenarios/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""MACP Integration Test Scenarios.""" diff --git a/test-agents/scenarios/conftest.py b/test-agents/scenarios/conftest.py deleted file mode 100644 index cdbb1ba..0000000 --- a/test-agents/scenarios/conftest.py +++ /dev/null @@ -1,112 +0,0 @@ -"""Pytest fixtures for MACP agent integration tests.""" - -from __future__ import annotations - -import os -import time - -import pytest - -from ..harness.macp_client import MACPClient - - -@pytest.fixture(scope="session") -def control_plane_url() -> str: - """URL of the running MACP control plane.""" - return os.environ.get("CONTROL_PLANE_URL", "http://localhost:3001") - - -@pytest.fixture(scope="session") -def api_key() -> str: - """API key for authenticating with the control plane.""" - return os.environ.get("CONTROL_PLANE_API_KEY", "test-key-integration") - - -@pytest.fixture(scope="session") -def macp_client(control_plane_url: str, api_key: str) -> MACPClient: - """Shared MACP HTTP client.""" - client = MACPClient(base_url=control_plane_url, api_key=api_key) - - # Wait for the control plane to be healthy - retries = 30 - while retries > 0: - try: - result = client.healthz() - if result.get("status") == "ok": - break - except Exception: - pass - retries -= 1 - time.sleep(1) - - if retries == 0: - pytest.skip("Control plane not reachable") - - yield client - client.close() - - -def make_decision_request( - runtime_kind: str = "scripted-mock", -) -> dict: - """Create a Decision Mode execution request.""" - return { - "mode": "sandbox", - "runtime": {"kind": runtime_kind}, - "session": { - "modeName": "macp.mode.decision.v1", - "modeVersion": "1.0.0", - "configurationVersion": "1.0.0", - "ttlMs": 60000, - "participants": [ - {"id": "proposer", "role": "proposer"}, - {"id": "evaluator", "role": "evaluator"}, - {"id": "voter", "role": "voter"}, - ], - }, - "kickoff": [ - { - "from": "proposer", - "to": ["evaluator", "voter"], - "kind": "proposal", - "messageType": "Proposal", - "payload": { - "proposalId": "prop-1", - "option": "Deploy feature X", - "rationale": "Agent test proposal", - }, - } - ], - "execution": {"tags": ["agent-e2e-test", "decision-mode"]}, - } - - -def make_task_request(runtime_kind: str = "scripted-mock") -> dict: - """Create a Task Mode execution request.""" - return { - "mode": "sandbox", - "runtime": {"kind": runtime_kind}, - "session": { - "modeName": "macp.mode.task.v1", - "modeVersion": "1.0.0", - "configurationVersion": "1.0.0", - "ttlMs": 60000, - "participants": [ - {"id": "requester", "role": "requester"}, - {"id": "worker", "role": "worker"}, - ], - }, - "kickoff": [ - { - "from": "requester", - "to": ["worker"], - "kind": "request", - "messageType": "TaskRequest", - "payload": { - "taskId": "task-1", - "description": "Process agent test data", - }, - } - ], - "execution": {"tags": ["agent-e2e-test", "task-mode"]}, - } diff --git a/test-agents/scenarios/test_crewai_decision.py b/test-agents/scenarios/test_crewai_decision.py deleted file mode 100644 index e1093d6..0000000 --- a/test-agents/scenarios/test_crewai_decision.py +++ /dev/null @@ -1,76 +0,0 @@ -"""E2E test: CrewAI agents in Decision Mode. - -Tests the full decision flow with CrewAI agents: -1. Create a decision-mode run -2. CrewAI evaluator sends Evaluation -3. CrewAI voter sends Vote -4. Session resolves -""" - -from __future__ import annotations - -import threading -import time - -import pytest - -from ..agents.crewai_decision_agent import create_crewai_decision_agents -from ..harness.macp_client import MACPClient -from .conftest import make_decision_request - - -class TestCrewAIDecisionFlow: - """Test CrewAI agents participating in a Decision Mode session.""" - - def test_crewai_decision_happy_path(self, macp_client: MACPClient) -> None: - """Full decision flow with CrewAI evaluator and voter.""" - result = macp_client.create_run(make_decision_request()) - run_id = result["runId"] - assert run_id - - time.sleep(1.0) - - evaluator, voter = create_crewai_decision_agents(macp_client) - - threads = [ - threading.Thread(target=evaluator.run, args=(run_id, 20.0)), - threading.Thread(target=voter.run, args=(run_id, 20.0)), - ] - for t in threads: - t.start() - for t in threads: - t.join(timeout=25.0) - - evaluator.stop() - voter.stop() - - # Verify - state = macp_client.get_state(run_id) - assert state["run"]["runId"] == run_id - - events = macp_client.list_events(run_id) - assert len(events) > 0 - - def test_crewai_agents_track_actions(self, macp_client: MACPClient) -> None: - """Verify that CrewAI agents record their performed actions.""" - result = macp_client.create_run(make_decision_request()) - run_id = result["runId"] - time.sleep(1.0) - - evaluator, voter = create_crewai_decision_agents(macp_client) - - threads = [ - threading.Thread(target=evaluator.run, args=(run_id, 15.0)), - threading.Thread(target=voter.run, args=(run_id, 15.0)), - ] - for t in threads: - t.start() - for t in threads: - t.join(timeout=20.0) - - evaluator.stop() - voter.stop() - - # Actions list should be available (may be empty if events didn't match) - assert isinstance(evaluator.actions_performed, list) - assert isinstance(voter.actions_performed, list) diff --git a/test-agents/scenarios/test_crewai_task.py b/test-agents/scenarios/test_crewai_task.py deleted file mode 100644 index 1cad478..0000000 --- a/test-agents/scenarios/test_crewai_task.py +++ /dev/null @@ -1,41 +0,0 @@ -"""E2E test: CrewAI agent in Task Mode. - -Tests the full task flow with a CrewAI worker agent. -""" - -from __future__ import annotations - -import threading -import time - -import pytest - -from ..agents.crewai_task_agent import create_crewai_task_worker -from ..harness.macp_client import MACPClient -from .conftest import make_task_request - - -class TestCrewAITaskFlow: - """Test CrewAI agent participating in a Task Mode session.""" - - def test_crewai_task_happy_path(self, macp_client: MACPClient) -> None: - """Full task flow with CrewAI worker.""" - result = macp_client.create_run(make_task_request()) - run_id = result["runId"] - assert run_id - - time.sleep(1.0) - - worker = create_crewai_task_worker(macp_client) - - thread = threading.Thread(target=worker.run, args=(run_id, 20.0)) - thread.start() - thread.join(timeout=25.0) - worker.stop() - - # Verify - state = macp_client.get_state(run_id) - assert state["run"]["runId"] == run_id - - events = macp_client.list_events(run_id) - assert len(events) > 0 diff --git a/test-agents/scenarios/test_decision_flow.py b/test-agents/scenarios/test_decision_flow.py deleted file mode 100644 index eb34194..0000000 --- a/test-agents/scenarios/test_decision_flow.py +++ /dev/null @@ -1,94 +0,0 @@ -"""E2E test: LangChain agents in Decision Mode. - -Tests the full decision flow: -1. Create a decision-mode run -2. LangChain evaluator agent listens for Proposal, sends Evaluation -3. LangChain voter agent listens for Evaluation, sends Vote -4. Runtime emits Commitment, session resolves -""" - -from __future__ import annotations - -import threading -import time - -import pytest - -from ..agents.langchain_decision_agent import create_langchain_decision_agents -from ..harness.macp_client import MACPClient -from .conftest import make_decision_request - - -class TestLangChainDecisionFlow: - """Test LangChain agents participating in a Decision Mode session.""" - - def test_decision_happy_path(self, macp_client: MACPClient) -> None: - """Full decision flow with LangChain evaluator and voter.""" - # Create the run - result = macp_client.create_run(make_decision_request()) - run_id = result["runId"] - assert run_id - - # Wait for run to start - time.sleep(1.0) - - # Create agents - evaluator, voter = create_langchain_decision_agents(macp_client) - - # Run agents in background threads - threads = [ - threading.Thread(target=evaluator.run, args=(run_id, 20.0)), - threading.Thread(target=voter.run, args=(run_id, 20.0)), - ] - for t in threads: - t.start() - - # Wait for agents to finish - for t in threads: - t.join(timeout=25.0) - - # Stop any still-running agents - evaluator.stop() - voter.stop() - - # Verify the run state - state = macp_client.get_state(run_id) - assert state["run"]["runId"] == run_id - - # Verify events were created - events = macp_client.list_events(run_id) - assert len(events) > 0 - - # Verify agents performed actions - # (may be empty if run completed before agents could act) - total_actions = len(evaluator.actions_performed) + len( - voter.actions_performed - ) - # At minimum, run lifecycle events should exist - run_events = [e for e in events if e.get("type", "").startswith("run.")] - assert len(run_events) >= 1 - - def test_decision_run_creates_successfully( - self, macp_client: MACPClient - ) -> None: - """Verify that a decision-mode run can be created.""" - result = macp_client.create_run(make_decision_request()) - assert "runId" in result - assert result["status"] == "queued" - - # Verify run is fetchable - run = macp_client.get_run(result["runId"]) - assert run["id"] == result["runId"] - - def test_decision_run_has_correct_participants( - self, macp_client: MACPClient - ) -> None: - """Verify projected state includes declared participants.""" - result = macp_client.create_run(make_decision_request()) - run_id = result["runId"] - time.sleep(1.0) - - state = macp_client.get_state(run_id) - participants = state.get("participants", []) - # Participants should be populated after session binding - assert isinstance(participants, list) diff --git a/test-agents/scenarios/test_mixed_agents.py b/test-agents/scenarios/test_mixed_agents.py deleted file mode 100644 index c689db6..0000000 --- a/test-agents/scenarios/test_mixed_agents.py +++ /dev/null @@ -1,72 +0,0 @@ -"""E2E test: Mixed LangChain + CrewAI agents in same session. - -Tests that agents from different frameworks can participate in the -same MACP coordination session. -""" - -from __future__ import annotations - -import threading -import time - -import pytest - -from ..agents.langchain_decision_agent import LangChainDecisionEvaluator -from ..agents.crewai_decision_agent import CrewAIDecisionVoter -from ..harness.macp_client import MACPClient -from .conftest import make_decision_request - - -class TestMixedAgentFlow: - """Test LangChain + CrewAI agents in the same session.""" - - def test_mixed_framework_decision(self, macp_client: MACPClient) -> None: - """LangChain evaluator + CrewAI voter in a Decision Mode session.""" - result = macp_client.create_run(make_decision_request()) - run_id = result["runId"] - assert run_id - - time.sleep(1.0) - - # LangChain evaluator - evaluator = LangChainDecisionEvaluator(macp_client) - # CrewAI voter - voter = CrewAIDecisionVoter(macp_client) - - threads = [ - threading.Thread(target=evaluator.run, args=(run_id, 20.0)), - threading.Thread(target=voter.run, args=(run_id, 20.0)), - ] - for t in threads: - t.start() - for t in threads: - t.join(timeout=25.0) - - evaluator.stop() - voter.stop() - - # Verify the run exists and has events - state = macp_client.get_state(run_id) - assert state["run"]["runId"] == run_id - - events = macp_client.list_events(run_id) - assert len(events) > 0 - - # Both agents should have been able to participate - assert isinstance(evaluator.actions_performed, list) - assert isinstance(voter.actions_performed, list) - - def test_mixed_agents_see_same_events(self, macp_client: MACPClient) -> None: - """Both framework agents see the same event stream.""" - result = macp_client.create_run(make_decision_request()) - run_id = result["runId"] - time.sleep(1.0) - - # Verify events are consistent - events = macp_client.list_events(run_id) - - # Each event should have required fields - for event in events: - assert "id" in event - assert "type" in event - assert "seq" in event diff --git a/test-agents/scenarios/test_task_flow.py b/test-agents/scenarios/test_task_flow.py deleted file mode 100644 index 840dd15..0000000 --- a/test-agents/scenarios/test_task_flow.py +++ /dev/null @@ -1,58 +0,0 @@ -"""E2E test: LangChain agent in Task Mode. - -Tests the full task flow: -1. Create a task-mode run -2. LangChain worker agent accepts, sends progress, completes -3. Runtime emits Commitment, session resolves -""" - -from __future__ import annotations - -import threading -import time - -import pytest - -from ..agents.langchain_task_agent import create_langchain_task_worker -from ..harness.macp_client import MACPClient -from .conftest import make_task_request - - -class TestLangChainTaskFlow: - """Test LangChain agent participating in a Task Mode session.""" - - def test_task_happy_path(self, macp_client: MACPClient) -> None: - """Full task flow with LangChain worker.""" - result = macp_client.create_run(make_task_request()) - run_id = result["runId"] - assert run_id - - time.sleep(1.0) - - # Create worker agent - worker = create_langchain_task_worker(macp_client) - - # Run agent - thread = threading.Thread(target=worker.run, args=(run_id, 20.0)) - thread.start() - thread.join(timeout=25.0) - worker.stop() - - # Verify events - events = macp_client.list_events(run_id) - assert len(events) > 0 - - # Verify run state - state = macp_client.get_state(run_id) - assert state["run"]["runId"] == run_id - - def test_task_run_creates_with_two_participants( - self, macp_client: MACPClient - ) -> None: - """Verify task-mode run has requester and worker.""" - result = macp_client.create_run(make_task_request()) - run_id = result["runId"] - time.sleep(1.0) - - state = macp_client.get_state(run_id) - assert isinstance(state.get("participants", []), list) diff --git a/test/fixtures/decision-mode.ts b/test/fixtures/decision-mode.ts index 3bbc678..9d17368 100644 --- a/test/fixtures/decision-mode.ts +++ b/test/fixtures/decision-mode.ts @@ -1,15 +1,18 @@ -import { ExecutionRequest } from '../../src/contracts/control-plane'; +import { RunDescriptor } from '../../src/contracts/control-plane'; import { - makeStreamOpened, makeStreamEnvelope, RuntimeScript, - ScriptedEvent } from '../helpers/scripted-mock-runtime.provider'; import { testRuntimeKind } from '../helpers/runtime-kind'; -export function decisionModeRequest( - overrides?: Partial -): ExecutionRequest { +/** + * Observer-mode fixtures (direct-agent-auth CP-3). + * + * No `kickoff[]` — agents emit the Proposal/Evaluation/Vote/Commitment sequence + * directly against the runtime. Tests verify the control-plane's observer sees them + * and projects them correctly. + */ +export function decisionModeRequest(overrides?: Partial): RunDescriptor { return { mode: 'sandbox', runtime: { kind: testRuntimeKind() }, @@ -19,149 +22,123 @@ export function decisionModeRequest( configurationVersion: '1.0.0', policyVersion: 'policy.default', ttlMs: 60000, - participants: [ - { id: 'proposer', role: 'proposer' }, - { id: 'evaluator', role: 'evaluator' }, - { id: 'voter', role: 'voter' } - ] + participants: [{ id: 'proposer' }, { id: 'evaluator' }, { id: 'voter' }], }, - kickoff: [ - { - from: 'proposer', - to: ['evaluator', 'voter'], - kind: 'proposal', - messageType: 'Proposal', - payload: { - proposalId: 'prop-1', - option: 'Deploy feature X', - rationale: 'Integration test proposal' - } - } - ], execution: { - tags: ['integration-test', 'decision-mode'] + tags: ['integration-test', 'decision-mode'], }, - ...overrides + ...overrides, }; } -/** - * Happy path: Proposal -> Evaluation -> Vote -> Commitment -> resolved - * The runtime echoes messages as stream-envelope events and triggers - * Commitment after receiving a Vote. - */ +/** Happy path: Proposal → Evaluation → Vote → Commitment (outcome_positive: true). */ export function decisionHappyScript(): RuntimeScript { return { supportedModes: ['macp.mode.decision.v1'], + initiator: 'proposer', events: [ - // Stream opens immediately - { event: makeStreamOpened() }, - // After evaluator sends Evaluation, echo it back { - trigger: { afterMessageType: 'Evaluation', fromParticipant: 'evaluator' }, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Evaluation', - 'evaluator', - { recommendation: 'APPROVE', rationale: 'Looks good' } - ) + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Proposal', 'proposer', { + proposalId: 'prop-1', + option: 'Deploy feature X', + rationale: 'Integration test proposal', + }), }, - // After voter sends Vote, echo it and then emit Commitment { - trigger: { afterMessageType: 'Vote', fromParticipant: 'voter' }, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Vote', - 'voter', - { vote: 'approve', rationale: 'Approved' } - ) + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Evaluation', 'evaluator', { + recommendation: 'APPROVE', + rationale: 'Looks good', + }), }, { - trigger: { afterMessageType: 'Vote' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Commitment', - 'system', - { - proposalId: 'prop-1', - outcome: 'approved', - finalized: true, - outcome_positive: true, - rationale: 'Consensus reached' - } - ) - } - ] + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Vote', 'voter', { + vote: 'approve', + rationale: 'Approved', + }), + }, + { + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Commitment', 'proposer', { + proposalId: 'prop-1', + outcome: 'approved', + finalized: true, + outcome_positive: true, + rationale: 'Consensus reached', + }), + }, + ], }; } -/** - * Objection flow: Proposal -> Objection -> revised Proposal -> Evaluation -> Vote -> Commitment - */ +/** Objection flow: Proposal → Objection → revised Proposal → Evaluation → Vote → Commitment. */ export function decisionObjectionScript(): RuntimeScript { return { supportedModes: ['macp.mode.decision.v1'], + initiator: 'proposer', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Objection', fromParticipant: 'evaluator' }, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Objection', - 'evaluator', - { severity: 'high', reason: 'Needs revision' } - ) + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Proposal', 'proposer', { + proposalId: 'prop-2', + option: 'Deploy feature Y', + rationale: 'Initial', + }), }, { - trigger: { afterMessageType: 'Evaluation', fromParticipant: 'evaluator' }, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Evaluation', - 'evaluator', - { recommendation: 'APPROVE', rationale: 'Revised version approved' } - ) + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Objection', 'evaluator', { + severity: 'high', + reason: 'Needs revision', + }), }, { - trigger: { afterMessageType: 'Vote', fromParticipant: 'voter' }, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Vote', - 'voter', - { vote: 'approve' } - ) + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Evaluation', 'evaluator', { + recommendation: 'APPROVE', + rationale: 'Revised version approved', + }), }, { - trigger: { afterMessageType: 'Vote' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Commitment', - 'system', - { proposalId: 'prop-2', outcome: 'approved', finalized: true, outcome_positive: true } - ) - } - ] + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Vote', 'voter', { vote: 'approve' }), + }, + { + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Commitment', 'proposer', { + proposalId: 'prop-2', + outcome: 'approved', + finalized: true, + outcome_positive: true, + }), + }, + ], }; } -/** - * Rejection: Proposal -> Vote (reject) -> session does not resolve (no Commitment) - */ +/** Rejection: Proposal → Vote(reject) — no Commitment emitted. */ export function decisionRejectionScript(): RuntimeScript { return { supportedModes: ['macp.mode.decision.v1'], + initiator: 'proposer', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Vote', fromParticipant: 'voter' }, - event: makeStreamEnvelope( - 'macp.mode.decision.v1', - 'Vote', - 'voter', - { vote: 'reject', rationale: 'Insufficient evidence' } - ) - } - ] + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Proposal', 'proposer', { + proposalId: 'prop-3', + option: 'Deploy feature Z', + rationale: 'Speculative', + }), + }, + { + delayMs: 10, + event: makeStreamEnvelope('macp.mode.decision.v1', 'Vote', 'voter', { + vote: 'reject', + rationale: 'Insufficient evidence', + }), + }, + ], }; } diff --git a/test/fixtures/handoff-mode.ts b/test/fixtures/handoff-mode.ts index 9fc9f1a..519a16b 100644 --- a/test/fixtures/handoff-mode.ts +++ b/test/fixtures/handoff-mode.ts @@ -1,14 +1,11 @@ -import { ExecutionRequest } from '../../src/contracts/control-plane'; +import { RunDescriptor } from '../../src/contracts/control-plane'; import { - makeStreamOpened, makeStreamEnvelope, - RuntimeScript + RuntimeScript, } from '../helpers/scripted-mock-runtime.provider'; import { testRuntimeKind } from '../helpers/runtime-kind'; -export function handoffModeRequest( - overrides?: Partial -): ExecutionRequest { +export function handoffModeRequest(overrides?: Partial): RunDescriptor { return { mode: 'sandbox', runtime: { kind: testRuntimeKind() }, @@ -18,92 +15,67 @@ export function handoffModeRequest( configurationVersion: '1.0.0', policyVersion: 'policy.default', ttlMs: 60000, - participants: [ - { id: 'source', role: 'source_agent' }, - { id: 'target', role: 'target_agent' } - ] + participants: [{ id: 'source' }, { id: 'target' }], }, - kickoff: [ - { - from: 'source', - to: ['target'], - kind: 'request', - messageType: 'HandoffOffer', - payload: { - reason: 'Specialized knowledge required', - contextSummary: 'User needs help with billing' - } - } - ], - execution: { - tags: ['integration-test', 'handoff-mode'] - }, - ...overrides + execution: { tags: ['integration-test', 'handoff-mode'] }, + ...overrides, }; } -/** Successful handoff: HandoffOffer -> HandoffContext -> HandoffAccept -> Commitment */ export function handoffAcceptScript(): RuntimeScript { return { supportedModes: ['macp.mode.handoff.v1'], + initiator: 'source', events: [ - { event: makeStreamOpened() }, { - trigger: { - afterMessageType: 'HandoffContext', - fromParticipant: 'source' - }, - event: makeStreamEnvelope( - 'macp.mode.handoff.v1', - 'HandoffContext', - 'source', - { conversationHistory: ['msg1', 'msg2'], metadata: { topic: 'billing' } } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.handoff.v1', 'HandoffOffer', 'source', { + reason: 'Specialized knowledge required', + contextSummary: 'User needs help with billing', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.handoff.v1', 'HandoffContext', 'source', { + conversationHistory: ['msg1', 'msg2'], + metadata: { topic: 'billing' }, + }), }, { - trigger: { - afterMessageType: 'HandoffAccept', - fromParticipant: 'target' - }, - event: makeStreamEnvelope( - 'macp.mode.handoff.v1', - 'HandoffAccept', - 'target', - { acceptedAt: new Date().toISOString() } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.handoff.v1', 'HandoffAccept', 'target', { + acceptedAt: new Date().toISOString(), + }), }, { - trigger: { afterMessageType: 'HandoffAccept' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.handoff.v1', - 'Commitment', - 'system', - { outcome: 'handoff_completed', finalized: true, outcome_positive: true } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.handoff.v1', 'Commitment', 'source', { + outcome: 'handoff_completed', + finalized: true, + outcome_positive: true, + }), + }, + ], }; } -/** Declined handoff: HandoffOffer -> HandoffDecline */ export function handoffDeclineScript(): RuntimeScript { return { supportedModes: ['macp.mode.handoff.v1'], + initiator: 'source', events: [ - { event: makeStreamOpened() }, { - trigger: { - afterMessageType: 'HandoffDecline', - fromParticipant: 'target' - }, - event: makeStreamEnvelope( - 'macp.mode.handoff.v1', - 'HandoffDecline', - 'target', - { reason: 'Not available' } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.handoff.v1', 'HandoffOffer', 'source', { + reason: 'Specialized', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.handoff.v1', 'HandoffDecline', 'target', { + reason: 'Not available', + }), + }, + ], }; } diff --git a/test/fixtures/proposal-mode.ts b/test/fixtures/proposal-mode.ts index 134a243..f423737 100644 --- a/test/fixtures/proposal-mode.ts +++ b/test/fixtures/proposal-mode.ts @@ -1,14 +1,11 @@ -import { ExecutionRequest } from '../../src/contracts/control-plane'; +import { RunDescriptor } from '../../src/contracts/control-plane'; import { - makeStreamOpened, makeStreamEnvelope, - RuntimeScript + RuntimeScript, } from '../helpers/scripted-mock-runtime.provider'; import { testRuntimeKind } from '../helpers/runtime-kind'; -export function proposalModeRequest( - overrides?: Partial -): ExecutionRequest { +export function proposalModeRequest(overrides?: Partial): RunDescriptor { return { mode: 'sandbox', runtime: { kind: testRuntimeKind() }, @@ -18,117 +15,105 @@ export function proposalModeRequest( configurationVersion: '1.0.0', policyVersion: 'policy.default', ttlMs: 60000, - participants: [ - { id: 'author', role: 'author' }, - { id: 'reviewer', role: 'reviewer' } - ] + participants: [{ id: 'author' }, { id: 'reviewer' }], }, - kickoff: [ - { - from: 'author', - to: ['reviewer'], - kind: 'proposal', - messageType: 'Proposal', - payload: { - proposalId: 'prop-1', - title: 'Integration test proposal', - body: 'This is a test proposal for integration testing' - } - } - ], - execution: { - tags: ['integration-test', 'proposal-mode'] - }, - ...overrides + execution: { tags: ['integration-test', 'proposal-mode'] }, + ...overrides, }; } -/** Accept flow: Proposal -> Accept -> Commitment */ export function proposalAcceptScript(): RuntimeScript { return { supportedModes: ['macp.mode.proposal.v1'], + initiator: 'author', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Accept', fromParticipant: 'reviewer' }, - event: makeStreamEnvelope( - 'macp.mode.proposal.v1', - 'Accept', - 'reviewer', - { proposalId: 'prop-1', comment: 'LGTM' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Proposal', 'author', { + proposalId: 'prop-1', + title: 'Integration test proposal', + body: 'This is a test proposal', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Accept', 'reviewer', { + proposalId: 'prop-1', + comment: 'LGTM', + }), }, { - trigger: { afterMessageType: 'Accept' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.proposal.v1', - 'Commitment', - 'system', - { proposalId: 'prop-1', outcome: 'accepted', finalized: true, outcome_positive: true } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Commitment', 'author', { + proposalId: 'prop-1', + outcome: 'accepted', + finalized: true, + outcome_positive: true, + }), + }, + ], }; } -/** Counter-proposal flow: Proposal -> CounterProposal -> Accept -> Commitment */ export function proposalCounterScript(): RuntimeScript { return { supportedModes: ['macp.mode.proposal.v1'], + initiator: 'author', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'CounterProposal', fromParticipant: 'reviewer' }, - event: makeStreamEnvelope( - 'macp.mode.proposal.v1', - 'CounterProposal', - 'reviewer', - { - proposalId: 'prop-2', - supersedesProposalId: 'prop-1', - title: 'Better approach' - } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Proposal', 'author', { + proposalId: 'prop-1', + title: 'Initial', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'CounterProposal', 'reviewer', { + proposalId: 'prop-2', + supersedesProposalId: 'prop-1', + title: 'Better approach', + }), }, { - trigger: { afterMessageType: 'Accept', fromParticipant: 'author' }, - event: makeStreamEnvelope( - 'macp.mode.proposal.v1', - 'Accept', - 'author', - { proposalId: 'prop-2' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Accept', 'author', { + proposalId: 'prop-2', + }), }, { - trigger: { afterMessageType: 'Accept' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.proposal.v1', - 'Commitment', - 'system', - { proposalId: 'prop-2', outcome: 'accepted', finalized: true, outcome_positive: true } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Commitment', 'author', { + proposalId: 'prop-2', + outcome: 'accepted', + finalized: true, + outcome_positive: true, + }), + }, + ], }; } -/** Rejection flow: Proposal -> Reject */ export function proposalRejectScript(): RuntimeScript { return { supportedModes: ['macp.mode.proposal.v1'], + initiator: 'author', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Reject', fromParticipant: 'reviewer' }, - event: makeStreamEnvelope( - 'macp.mode.proposal.v1', - 'Reject', - 'reviewer', - { proposalId: 'prop-1', reason: 'Out of scope', terminal: true } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Proposal', 'author', { + proposalId: 'prop-1', + title: 'Rejected proposal', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.proposal.v1', 'Reject', 'reviewer', { + proposalId: 'prop-1', + reason: 'Out of scope', + terminal: true, + }), + }, + ], }; } diff --git a/test/fixtures/quorum-mode.ts b/test/fixtures/quorum-mode.ts index 85ea633..3e93e4f 100644 --- a/test/fixtures/quorum-mode.ts +++ b/test/fixtures/quorum-mode.ts @@ -1,14 +1,11 @@ -import { ExecutionRequest } from '../../src/contracts/control-plane'; +import { RunDescriptor } from '../../src/contracts/control-plane'; import { - makeStreamOpened, makeStreamEnvelope, - RuntimeScript + RuntimeScript, } from '../helpers/scripted-mock-runtime.provider'; import { testRuntimeKind } from '../helpers/runtime-kind'; -export function quorumModeRequest( - overrides?: Partial -): ExecutionRequest { +export function quorumModeRequest(overrides?: Partial): RunDescriptor { return { mode: 'sandbox', runtime: { kind: testRuntimeKind() }, @@ -19,163 +16,132 @@ export function quorumModeRequest( policyVersion: 'policy.default', ttlMs: 60000, participants: [ - { id: 'initiator', role: 'initiator' }, - { id: 'voter_a', role: 'voter' }, - { id: 'voter_b', role: 'voter' }, - { id: 'voter_c', role: 'voter' } - ] + { id: 'initiator' }, + { id: 'voter_a' }, + { id: 'voter_b' }, + { id: 'voter_c' }, + ], }, - kickoff: [ - { - from: 'initiator', - to: ['voter_a', 'voter_b', 'voter_c'], - kind: 'request', - messageType: 'ApprovalRequest', - payload: { - requestId: 'approval-1', - subject: 'Release v2.0', - requiredApprovals: 2, - description: 'Approve release of version 2.0' - } - } - ], - execution: { - tags: ['integration-test', 'quorum-mode'] - }, - ...overrides + execution: { tags: ['integration-test', 'quorum-mode'] }, + ...overrides, }; } -/** Quorum reached: 2 of 3 approve -> Commitment */ export function quorumReachedScript(): RuntimeScript { return { supportedModes: ['macp.mode.quorum.v1'], + initiator: 'initiator', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Approve', fromParticipant: 'voter_a' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Approve', - 'voter_a', - { requestId: 'approval-1', comment: 'Ship it' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'ApprovalRequest', 'initiator', { + requestId: 'approval-1', + subject: 'Release v2.0', + requiredApprovals: 2, + }), }, { - trigger: { afterMessageType: 'Approve', fromParticipant: 'voter_b' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Approve', - 'voter_b', - { requestId: 'approval-1' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Approve', 'voter_a', { + requestId: 'approval-1', + comment: 'Ship it', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Approve', 'voter_b', { + requestId: 'approval-1', + }), }, { - trigger: { afterMessageType: 'Approve' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Commitment', - 'system', - { - requestId: 'approval-1', - outcome: 'approved', - approvalCount: 2, - finalized: true, - outcome_positive: true - } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Commitment', 'initiator', { + requestId: 'approval-1', + outcome: 'approved', + approvalCount: 2, + finalized: true, + outcome_positive: true, + }), + }, + ], }; } -/** Quorum not reached: 2 of 3 reject */ export function quorumRejectedScript(): RuntimeScript { return { supportedModes: ['macp.mode.quorum.v1'], + initiator: 'initiator', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Approve', fromParticipant: 'voter_a' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Approve', - 'voter_a', - { requestId: 'approval-1' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'ApprovalRequest', 'initiator', { + requestId: 'approval-1', + }), }, { - trigger: { afterMessageType: 'Reject', fromParticipant: 'voter_b' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Reject', - 'voter_b', - { requestId: 'approval-1', reason: 'Not ready' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Approve', 'voter_a', { + requestId: 'approval-1', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Reject', 'voter_b', { + requestId: 'approval-1', + reason: 'Not ready', + }), }, { - trigger: { afterMessageType: 'Reject', fromParticipant: 'voter_c' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Reject', - 'voter_c', - { requestId: 'approval-1', reason: 'Missing tests' } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Reject', 'voter_c', { + requestId: 'approval-1', + reason: 'Missing tests', + }), + }, + ], }; } -/** Abstention: Approve + Abstain + Approve -> quorum met */ export function quorumAbstentionScript(): RuntimeScript { return { supportedModes: ['macp.mode.quorum.v1'], + initiator: 'initiator', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'Approve', fromParticipant: 'voter_a' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Approve', - 'voter_a', - { requestId: 'approval-1' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'ApprovalRequest', 'initiator', { + requestId: 'approval-1', + }), + }, + { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Approve', 'voter_a', { + requestId: 'approval-1', + }), }, { - trigger: { afterMessageType: 'Abstain', fromParticipant: 'voter_b' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Abstain', - 'voter_b', - { requestId: 'approval-1', reason: 'No opinion' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Abstain', 'voter_b', { + requestId: 'approval-1', + reason: 'No opinion', + }), }, { - trigger: { afterMessageType: 'Approve', fromParticipant: 'voter_c' }, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Approve', - 'voter_c', - { requestId: 'approval-1' } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Approve', 'voter_c', { + requestId: 'approval-1', + }), }, { - trigger: { afterMessageType: 'Approve' }, - delayMs: 50, - event: makeStreamEnvelope( - 'macp.mode.quorum.v1', - 'Commitment', - 'system', - { - requestId: 'approval-1', - outcome: 'approved', - approvalCount: 2, - finalized: true, - outcome_positive: true - } - ) - } - ] + delayMs: 5, + event: makeStreamEnvelope('macp.mode.quorum.v1', 'Commitment', 'initiator', { + requestId: 'approval-1', + outcome: 'approved', + approvalCount: 2, + finalized: true, + outcome_positive: true, + }), + }, + ], }; } diff --git a/test/fixtures/task-mode.ts b/test/fixtures/task-mode.ts index 646a18f..8cdc53a 100644 --- a/test/fixtures/task-mode.ts +++ b/test/fixtures/task-mode.ts @@ -1,14 +1,11 @@ -import { ExecutionRequest } from '../../src/contracts/control-plane'; +import { RunDescriptor } from '../../src/contracts/control-plane'; import { - makeStreamOpened, makeStreamEnvelope, - RuntimeScript + RuntimeScript, } from '../helpers/scripted-mock-runtime.provider'; import { testRuntimeKind } from '../helpers/runtime-kind'; -export function taskModeRequest( - overrides?: Partial -): ExecutionRequest { +export function taskModeRequest(overrides?: Partial): RunDescriptor { return { mode: 'sandbox', runtime: { kind: testRuntimeKind() }, @@ -18,121 +15,111 @@ export function taskModeRequest( configurationVersion: '1.0.0', policyVersion: 'policy.default', ttlMs: 60000, - participants: [ - { id: 'requester', role: 'requester' }, - { id: 'worker', role: 'worker' } - ] + participants: [{ id: 'requester' }, { id: 'worker' }], }, - kickoff: [ - { - from: 'requester', - to: ['worker'], - kind: 'request', - messageType: 'TaskRequest', - payload: { - taskId: 'task-1', - description: 'Process integration test data', - priority: 'normal' - } - } - ], - execution: { - tags: ['integration-test', 'task-mode'] - }, - ...overrides + execution: { tags: ['integration-test', 'task-mode'] }, + ...overrides, }; } -/** - * Happy path: TaskRequest -> TaskAccept -> TaskUpdate (50%) -> TaskComplete -> resolved - */ +/** Happy path: TaskRequest → TaskAccept → TaskUpdate → TaskComplete → Commitment. */ export function taskHappyScript(): RuntimeScript { return { supportedModes: ['macp.mode.task.v1'], + initiator: 'requester', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'TaskAccept', fromParticipant: 'worker' }, + delayMs: 5, + event: makeStreamEnvelope('macp.mode.task.v1', 'TaskRequest', 'requester', { + taskId: 'task-1', + description: 'Process integration test data', + priority: 'normal', + }), + }, + { + delayMs: 5, event: makeStreamEnvelope('macp.mode.task.v1', 'TaskAccept', 'worker', { taskId: 'task-1', - acceptedAt: new Date().toISOString() - }) + }), }, { - trigger: { afterMessageType: 'TaskUpdate', fromParticipant: 'worker' }, + delayMs: 5, event: makeStreamEnvelope('macp.mode.task.v1', 'TaskUpdate', 'worker', { taskId: 'task-1', progress: 0.5, - message: 'Processing...' - }) + message: 'Processing...', + }), }, { - trigger: { afterMessageType: 'TaskComplete', fromParticipant: 'worker' }, - event: makeStreamEnvelope( - 'macp.mode.task.v1', - 'TaskComplete', - 'worker', - { - taskId: 'task-1', - output: { result: 'success', itemsProcessed: 42 } - } - ) + delayMs: 5, + event: makeStreamEnvelope('macp.mode.task.v1', 'TaskComplete', 'worker', { + taskId: 'task-1', + output: { result: 'success', itemsProcessed: 42 }, + }), }, { - trigger: { afterMessageType: 'TaskComplete' }, - delayMs: 50, - event: makeStreamEnvelope('macp.mode.task.v1', 'Commitment', 'system', { + delayMs: 5, + event: makeStreamEnvelope('macp.mode.task.v1', 'Commitment', 'requester', { taskId: 'task-1', outcome: 'completed', finalized: true, - outcome_positive: true - }) - } - ] + outcome_positive: true, + }), + }, + ], }; } -/** - * Task rejection: TaskRequest -> TaskReject -> no completion - */ +/** Rejection: TaskRequest → TaskReject. */ export function taskRejectionScript(): RuntimeScript { return { supportedModes: ['macp.mode.task.v1'], + initiator: 'requester', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'TaskReject', fromParticipant: 'worker' }, + delayMs: 5, + event: makeStreamEnvelope('macp.mode.task.v1', 'TaskRequest', 'requester', { + taskId: 'task-1', + description: 'Will be rejected', + }), + }, + { + delayMs: 5, event: makeStreamEnvelope('macp.mode.task.v1', 'TaskReject', 'worker', { taskId: 'task-1', - reason: 'capacity' - }) - } - ] + reason: 'capacity', + }), + }, + ], }; } -/** - * Task failure: TaskRequest -> TaskAccept -> TaskFail - */ +/** Failure: TaskRequest → TaskAccept → TaskFail. */ export function taskFailureScript(): RuntimeScript { return { supportedModes: ['macp.mode.task.v1'], + initiator: 'requester', events: [ - { event: makeStreamOpened() }, { - trigger: { afterMessageType: 'TaskAccept', fromParticipant: 'worker' }, + delayMs: 5, + event: makeStreamEnvelope('macp.mode.task.v1', 'TaskRequest', 'requester', { + taskId: 'task-1', + }), + }, + { + delayMs: 5, event: makeStreamEnvelope('macp.mode.task.v1', 'TaskAccept', 'worker', { - taskId: 'task-1' - }) + taskId: 'task-1', + }), }, { - trigger: { afterMessageType: 'TaskFail', fromParticipant: 'worker' }, + delayMs: 5, event: makeStreamEnvelope('macp.mode.task.v1', 'TaskFail', 'worker', { taskId: 'task-1', error: 'Processing failed', - retryable: true - }) - } - ] + retryable: true, + }), + }, + ], }; } diff --git a/test/helpers/scripted-mock-runtime.provider.ts b/test/helpers/scripted-mock-runtime.provider.ts index e6c8311..fb9e6eb 100644 --- a/test/helpers/scripted-mock-runtime.provider.ts +++ b/test/helpers/scripted-mock-runtime.provider.ts @@ -13,115 +13,91 @@ import { RuntimeListPoliciesRequest, RuntimeManifestResult, RuntimeModeDescriptor, - RuntimeOpenSessionRequest, RuntimePolicyDescriptor, RuntimeProvider, RuntimeRegisterPolicyRequest, RuntimeRegisterPolicyResult, RuntimeRootDescriptor, - RuntimeSendRequest, - RuntimeSendResult, RuntimeSessionHandle, RuntimeSessionSnapshot, - RuntimeStartSessionRequest, - RuntimeStartSessionResult, - RuntimeStreamSessionRequest, + RuntimeSubscribeSessionRequest, RuntimeUnregisterPolicyRequest, - RuntimeUnregisterPolicyResult + RuntimeUnregisterPolicyResult, } from '../../src/contracts/runtime'; export interface ScriptedEvent { - /** Delay before emitting this event (ms) */ + /** Delay before emitting this event (ms). */ delayMs?: number; - /** Emit only after a message of this type is received via send() */ - trigger?: { afterMessageType: string; fromParticipant?: string }; - /** The raw event to emit */ + /** The raw event to emit. */ event: RawRuntimeEvent; } export interface RuntimeScript { supportedModes: string[]; + /** Events the observer will see, in order. */ events: ScriptedEvent[]; - /** Called on every send() — can return additional events to emit */ - onSend?: (req: RuntimeSendRequest) => RawRuntimeEvent[] | undefined; -} - -interface SentMessage { - req: RuntimeSendRequest; - at: string; + /** Optional initiator identity returned from GetSession — defaults to 'mock-initiator'. */ + initiator?: string; + /** + * How long GetSession reports a non-OPEN state before flipping to OPEN. + * Simulates the initiator agent opening the session. Default: 0ms (immediate). + */ + sessionOpenAfterMs?: number; } /** - * Enhanced mock runtime that follows a scripted sequence of events. - * Used for integration testing where we need deterministic, multi-step - * coordination flows without a real gRPC runtime. + * Scripted observer-mode mock runtime (direct-agent-auth CP-3). + * + * Simulates a runtime that agents have already connected to: `GetSession` flips from + * `UNSPECIFIED` → `OPEN` after `sessionOpenAfterMs`, then the scripted event sequence + * is streamed from `subscribeSession().events`. + * + * No `send()` / `openSession()` / `startSession()` methods — agents drive those directly + * against the runtime; the control-plane observer never writes envelopes. */ export class ScriptedMockRuntimeProvider implements RuntimeProvider { readonly kind = 'scripted-mock'; - readonly sentMessages: SentMessage[] = []; private script: RuntimeScript; private sessionState: SessionState = 'SESSION_STATE_OPEN'; - private pendingTriggerEvents: ScriptedEvent[] = []; - private eventEmitter: ((event: RawRuntimeEvent) => void) | null = null; + private sessionOpenAt: number = 0; private policies = new Map(); constructor(script: RuntimeScript) { this.script = script; + this.sessionOpenAt = Date.now() + (script.sessionOpenAfterMs ?? 0); } - /** Stub for HealthController readyz — mock is always CLOSED */ + /** Stub for HealthController readyz. */ getCircuitBreakerState(): string { return 'CLOSED'; } - /** Stub for HealthController */ resetCircuitBreaker(): void {} - /** Replace the script (useful for per-test configuration) */ setScript(script: RuntimeScript): void { this.script = script; - this.sentMessages.length = 0; this.sessionState = 'SESSION_STATE_OPEN'; - this.pendingTriggerEvents = []; + this.sessionOpenAt = Date.now() + (script.sessionOpenAfterMs ?? 0); } - async initialize( - _req: RuntimeInitializeRequest - ): Promise { + async initialize(_req: RuntimeInitializeRequest): Promise { return { selectedProtocolVersion: '1.0', runtimeInfo: { name: 'scripted-mock', version: '0.0.1' }, - supportedModes: this.script.supportedModes + supportedModes: this.script.supportedModes, }; } - openSession(req: RuntimeOpenSessionRequest): RuntimeSessionHandle { - const sessionId = randomUUID(); - const initiator = - req.execution.session.participants[0]?.id ?? 'mock-initiator'; - const ack = this.makeAck(sessionId); - - // Separate trigger-based events from immediate/delayed events - const immediateEvents: ScriptedEvent[] = []; - this.pendingTriggerEvents = []; - for (const se of this.script.events) { - if (se.trigger) { - this.pendingTriggerEvents.push(se); - } else { - immediateEvents.push(se); - } - } - - const self = this; + subscribeSession(req: RuntimeSubscribeSessionRequest): RuntimeSessionHandle { type ResolverFn = (value: IteratorResult) => void; const state = { resolveNextEvent: null as ResolverFn | null, eventQueue: [] as RawRuntimeEvent[], - streamDone: false + streamDone: false, }; - self.eventEmitter = (event: RawRuntimeEvent) => { + const emit = (event: RawRuntimeEvent) => { if (state.resolveNextEvent) { const resolve = state.resolveNextEvent; state.resolveNextEvent = null; @@ -131,24 +107,33 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { } }; - // Schedule immediate events + // Schedule all scripted events unconditionally — observer mode. (async () => { - for (const se of immediateEvents) { + for (const se of this.script.events) { if (se.delayMs) { await new Promise((r) => setTimeout(r, se.delayMs)); } - self.eventEmitter?.(se.event); - } - // If no trigger events remain, close after a short delay - if (self.pendingTriggerEvents.length === 0) { - await new Promise((r) => setTimeout(r, 50)); - state.streamDone = true; - const pending = state.resolveNextEvent; - state.resolveNextEvent = null; - if (pending) { - pending({ done: true, value: undefined }); + // Re-stamp session id on each envelope so it's routed to this subscriber's sessionId. + if (se.event.kind === 'stream-envelope' && se.event.envelope) { + emit({ + ...se.event, + envelope: { ...se.event.envelope, sessionId: req.runtimeSessionId }, + }); + } else { + emit(se.event); } } + // Close the stream shortly after the last event, simulating SESSION_STATE_RESOLVED. + await new Promise((r) => setTimeout(r, 50)); + emit({ + kind: 'session-snapshot', + receivedAt: new Date().toISOString(), + sessionSnapshot: { sessionId: req.runtimeSessionId, mode: '', state: 'SESSION_STATE_RESOLVED' }, + }); + state.streamDone = true; + const pending = state.resolveNextEvent; + state.resolveNextEvent = null; + if (pending) pending({ done: true, value: undefined }); })(); const events: AsyncIterable = { @@ -156,10 +141,7 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { return { next(): Promise> { if (state.eventQueue.length > 0) { - return Promise.resolve({ - done: false, - value: state.eventQueue.shift()! - }); + return Promise.resolve({ done: false, value: state.eventQueue.shift()! }); } if (state.streamDone) { return Promise.resolve({ done: true, value: undefined }); @@ -171,136 +153,33 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { return(): Promise> { state.streamDone = true; return Promise.resolve({ done: true, value: undefined }); - } + }, }; - } + }, }; - const handle: RuntimeSessionHandle = { - send: () => { - /* Messages from control plane (kickoff) go through here */ - }, + return { events, - closeWrite: () => { - /* Half-close write side */ - }, abort: () => { state.streamDone = true; const pending = state.resolveNextEvent; state.resolveNextEvent = null; - if (pending) { - pending({ done: true, value: undefined }); - } + if (pending) pending({ done: true, value: undefined }); }, - sessionAck: Promise.resolve({ - runtimeSessionId: sessionId, - initiator, - ack - }) - }; - - return handle; - } - - async send(req: RuntimeSendRequest): Promise { - const messageId = randomUUID(); - this.sentMessages.push({ req, at: new Date().toISOString() }); - - // Check for triggered events - const toFire: ScriptedEvent[] = []; - this.pendingTriggerEvents = this.pendingTriggerEvents.filter((se) => { - const trigger = se.trigger!; - const typeMatch = trigger.afterMessageType === req.messageType; - const participantMatch = - !trigger.fromParticipant || trigger.fromParticipant === req.from; - if (typeMatch && participantMatch) { - toFire.push(se); - return false; // Remove from pending - } - return true; - }); - - // Fire triggered events - for (const se of toFire) { - if (se.delayMs) { - setTimeout(() => this.eventEmitter?.(se.event), se.delayMs); - } else { - // Small delay to ensure ordering - setTimeout(() => this.eventEmitter?.(se.event), 10); - } - } - - // Call onSend hook - if (this.script.onSend) { - const extraEvents = this.script.onSend(req); - if (extraEvents) { - for (const event of extraEvents) { - setTimeout(() => this.eventEmitter?.(event), 20); - } - } - } - - // If no more pending triggers and this is a session-bound message, - // schedule stream end. Skip for signals (empty sessionId). - if ( - this.pendingTriggerEvents.length === 0 && - req.runtimeSessionId !== '' - ) { - setTimeout(() => { - this.eventEmitter?.(makeSessionResolved(req.runtimeSessionId)); - }, 100); - } - - return { - ack: this.makeAck(req.runtimeSessionId, messageId), - envelope: { - macpVersion: '1.0', - mode: req.modeName, - messageType: req.messageType, - messageId, - sessionId: req.runtimeSessionId, - sender: req.from, - timestampUnixMs: Date.now(), - payload: req.payload - } }; } - async startSession( - req: RuntimeStartSessionRequest - ): Promise { - const sessionId = randomUUID(); - return { - runtimeSessionId: sessionId, - initiator: - req.execution.session.participants[0]?.id ?? 'mock-initiator', - ack: this.makeAck(sessionId) - }; - } - - async *streamSession( - _req: RuntimeStreamSessionRequest - ): AsyncIterable { - yield { - kind: 'stream-status', - receivedAt: new Date().toISOString(), - streamStatus: { status: 'opened' } - }; - } - - async getSession( - req: RuntimeGetSessionRequest - ): Promise { + async getSession(req: RuntimeGetSessionRequest): Promise { + const isOpen = Date.now() >= this.sessionOpenAt && this.sessionState !== 'SESSION_STATE_UNSPECIFIED'; return { sessionId: req.runtimeSessionId, mode: this.script.supportedModes[0] ?? 'scripted-mock', - state: this.sessionState + state: isOpen ? this.sessionState : 'SESSION_STATE_UNSPECIFIED', + initiator: this.script.initiator ?? 'mock-initiator', }; } - async cancelSession( - req: RuntimeCancelSessionRequest - ): Promise { + async cancelSession(req: RuntimeCancelSessionRequest): Promise { this.sessionState = 'SESSION_STATE_RESOLVED'; return { ack: this.makeAck(req.runtimeSessionId) }; } @@ -309,9 +188,9 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { return { agentId: 'scripted-mock', title: 'Scripted Mock Runtime', - description: 'Integration test runtime with scripted event sequences', + description: 'Observer-mode mock with scripted event sequences', supportedModes: this.script.supportedModes, - metadata: {} + metadata: {}, }; } @@ -321,31 +200,13 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { modeVersion: '1.0', title: `Scripted ${mode}`, messageTypes: [ - 'Proposal', - 'Evaluation', - 'Objection', - 'Vote', - 'Commitment', - 'TaskRequest', - 'TaskAccept', - 'TaskReject', - 'TaskUpdate', - 'TaskComplete', - 'TaskFail', - 'CounterProposal', - 'Accept', - 'Reject', - 'Withdraw', - 'HandoffOffer', - 'HandoffContext', - 'HandoffAccept', - 'HandoffDecline', - 'ApprovalRequest', - 'Approve', - 'Abstain', - 'Signal' + 'Proposal', 'Evaluation', 'Objection', 'Vote', 'Commitment', + 'TaskRequest', 'TaskAccept', 'TaskReject', 'TaskUpdate', 'TaskComplete', 'TaskFail', + 'CounterProposal', 'Accept', 'Reject', 'Withdraw', + 'HandoffOffer', 'HandoffContext', 'HandoffAccept', 'HandoffDecline', + 'ApprovalRequest', 'Approve', 'Abstain', 'Signal', ], - terminalMessageTypes: ['Commitment'] + terminalMessageTypes: ['Commitment'], })); } @@ -354,18 +215,11 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { } async health(): Promise { - return { - ok: true, - runtimeKind: this.kind, - detail: 'scripted mock runtime always healthy' - }; + return { ok: true, runtimeKind: this.kind, detail: 'scripted mock runtime always healthy' }; } async registerPolicy(req: RuntimeRegisterPolicyRequest): Promise { - this.policies.set(req.descriptor.policyId, { - ...req.descriptor, - registeredAtUnixMs: Date.now() - }); + this.policies.set(req.descriptor.policyId, { ...req.descriptor, registeredAtUnixMs: Date.now() }); return { ok: true }; } @@ -397,7 +251,7 @@ export class ScriptedMockRuntimeProvider implements RuntimeProvider { messageId: messageId ?? randomUUID(), sessionId, acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' + sessionState: 'SESSION_STATE_OPEN', }; } } @@ -408,19 +262,19 @@ export function makeStreamOpened(): RawRuntimeEvent { return { kind: 'stream-status', receivedAt: new Date().toISOString(), - streamStatus: { status: 'opened' } + streamStatus: { status: 'opened' }, }; } export function makeSessionSnapshot( sessionId: string, mode: string, - state: SessionState = 'SESSION_STATE_OPEN' + state: SessionState = 'SESSION_STATE_OPEN', ): RawRuntimeEvent { return { kind: 'session-snapshot', receivedAt: new Date().toISOString(), - sessionSnapshot: { sessionId, mode, state } + sessionSnapshot: { sessionId, mode, state }, }; } @@ -428,11 +282,7 @@ export function makeSessionResolved(sessionId: string): RawRuntimeEvent { return { kind: 'session-snapshot', receivedAt: new Date().toISOString(), - sessionSnapshot: { - sessionId, - mode: '', - state: 'SESSION_STATE_RESOLVED' - } + sessionSnapshot: { sessionId, mode: '', state: 'SESSION_STATE_RESOLVED' }, }; } @@ -441,7 +291,7 @@ export function makeStreamEnvelope( messageType: string, sender: string, payload: Record, - sessionId?: string + sessionId?: string, ): RawRuntimeEvent { return { kind: 'stream-envelope', @@ -454,15 +304,12 @@ export function makeStreamEnvelope( sessionId: sessionId ?? randomUUID(), sender, timestampUnixMs: Date.now(), - payload: Buffer.from(JSON.stringify(payload)) - } + payload: Buffer.from(JSON.stringify(payload)), + }, }; } -export function makeSendAck( - sessionId: string, - messageId?: string -): RawRuntimeEvent { +export function makeSendAck(sessionId: string, messageId?: string): RawRuntimeEvent { return { kind: 'send-ack', receivedAt: new Date().toISOString(), @@ -472,7 +319,7 @@ export function makeSendAck( messageId: messageId ?? randomUUID(), sessionId, acceptedAtUnixMs: Date.now(), - sessionState: 'SESSION_STATE_OPEN' - } + sessionState: 'SESSION_STATE_OPEN', + }, }; } diff --git a/test/helpers/test-client.ts b/test/helpers/test-client.ts index b5129d4..e42d3a8 100644 --- a/test/helpers/test-client.ts +++ b/test/helpers/test-client.ts @@ -22,6 +22,7 @@ export class TestClient { async createRun(body: Record | object): Promise<{ runId: string; + sessionId: string; status: string; traceId?: string; }> { @@ -51,28 +52,9 @@ export class TestClient { }); } - // ── Messaging ────────────────────────────────────────────────── - - async sendMessage( - runId: string, - body: Record | object - ): Promise<{ messageId: string; ack: Record | object }> { - return this.request('POST', `/runs/${runId}/messages`, { body }); - } - - async sendSignal( - runId: string, - body: Record | object - ): Promise | object> { - return this.request('POST', `/runs/${runId}/signal`, { body }); - } - - async updateContext( - runId: string, - body: Record | object - ): Promise | object> { - return this.request('POST', `/runs/${runId}/context`, { body }); - } + // ── Removed endpoints (direct-agent-auth CP-5/6/7) ───────────── + // sendMessage / sendSignal / updateContext are deleted. Agents emit envelopes + // directly via macp-sdk-python / macp-sdk-typescript. // ── Events ───────────────────────────────────────────────────── diff --git a/test/helpers/ts-agent.ts b/test/helpers/ts-agent.ts deleted file mode 100644 index 6499934..0000000 --- a/test/helpers/ts-agent.ts +++ /dev/null @@ -1,204 +0,0 @@ -import { TestClient } from './test-client'; -import { TestSSEClient, SSEEvent } from './sse-client'; - -export interface AgentRule { - when: { - eventType?: string; - messageType?: string; - fromParticipant?: string; - }; - then: { - delayMs?: number; - messageType: string; - payload: - | Record - | ((event: Record) => Record); - to?: string[]; - }; -} - -/** - * Rule-based TypeScript agent for integration testing. - * Subscribes to SSE events and sends messages based on matching rules. - * Deterministic — no LLM calls. - */ -export class TypeScriptAgent { - private sseClient: TestSSEClient | null = null; - private running = false; - readonly actionsPerformed: Array<{ - rule: AgentRule; - event: Record; - response: Record; - }> = []; - - constructor( - private readonly client: TestClient, - private readonly baseUrl: string, - private readonly participantId: string, - private readonly rules: AgentRule[], - private readonly apiKey?: string - ) {} - - /** - * Start the agent loop: subscribe to SSE, match rules, send messages. - * Returns when the stream ends or the run reaches a terminal state. - */ - async run(runId: string, timeoutMs = 30000): Promise { - this.running = true; - this.sseClient = new TestSSEClient(this.baseUrl, this.apiKey); - this.sseClient.connect(runId, { includeSnapshot: true }); - - const startTime = Date.now(); - - return new Promise((resolve, reject) => { - const timer = setTimeout(() => { - this.running = false; - this.sseClient?.close(); - reject( - new Error( - `Agent ${this.participantId} timed out after ${timeoutMs}ms` - ) - ); - }, timeoutMs); - - const processEvents = async () => { - let lastProcessed = 0; - - while (this.running) { - // Check for new events - const events = this.sseClient!.events.slice(lastProcessed); - lastProcessed = this.sseClient!.events.length; - - for (const event of events) { - // Check for terminal state in snapshot - if (event.type === 'snapshot') { - const snapshot = event.data as Record; - const run = snapshot.run as Record | undefined; - if (run) { - const status = run.status as string; - if (['completed', 'failed', 'cancelled'].includes(status)) { - this.running = false; - clearTimeout(timer); - resolve(); - return; - } - } - } - - if (event.type === 'canonical_event') { - await this.processEvent(runId, event); - } - } - - // Check if stream closed - if (this.sseClient!.events.length > 0) { - const lastEvent = this.sseClient!.events[this.sseClient!.events.length - 1]; - if (lastEvent.type === 'snapshot') { - const snap = lastEvent.data as Record; - const run = snap.run as Record | undefined; - if ( - run && - ['completed', 'failed', 'cancelled'].includes( - run.status as string - ) - ) { - this.running = false; - clearTimeout(timer); - resolve(); - return; - } - } - } - - await sleep(100); - - if (Date.now() - startTime > timeoutMs) break; - } - - clearTimeout(timer); - resolve(); - }; - - processEvents().catch((err) => { - clearTimeout(timer); - reject(err); - }); - }); - } - - stop(): void { - this.running = false; - this.sseClient?.close(); - } - - private async processEvent(runId: string, sseEvent: SSEEvent): Promise { - const canonical = sseEvent.data as Record; - const eventType = canonical.type as string; - const data = canonical.data as Record | undefined; - - for (const rule of this.rules) { - if (!this.matches(eventType, data, rule.when)) continue; - - if (rule.then.delayMs) { - await sleep(rule.then.delayMs); - } - - const payload = - typeof rule.then.payload === 'function' - ? rule.then.payload(canonical) - : rule.then.payload; - - try { - await this.client.sendMessage(runId, { - from: this.participantId, - to: rule.then.to, - messageType: rule.then.messageType, - payload - }); - - this.actionsPerformed.push({ - rule, - event: canonical, - response: payload - }); - } catch (err) { - // Message send can fail if run already completed - if (this.running) { - console.warn( - `Agent ${this.participantId} failed to send ${rule.then.messageType}:`, - err instanceof Error ? err.message : err - ); - } - } - - // Only fire the first matching rule per event - break; - } - } - - private matches( - eventType: string, - data: Record | undefined, - when: AgentRule['when'] - ): boolean { - if (when.eventType && eventType !== when.eventType) return false; - - if (when.messageType && data) { - const msgType = data.messageType as string | undefined; - if (msgType !== when.messageType) return false; - } - - if (when.fromParticipant && data) { - const sender = data.sender as string | undefined; - const from = data.from as string | undefined; - if (sender !== when.fromParticipant && from !== when.fromParticipant) - return false; - } - - return true; - } -} - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/helpers/wait-for.ts b/test/helpers/wait-for.ts new file mode 100644 index 0000000..3e24bff --- /dev/null +++ b/test/helpers/wait-for.ts @@ -0,0 +1,29 @@ +/** + * Polling helper for integration tests. + * + * Runs `predicate` until it returns a truthy value or `timeoutMs` elapses. + * Returns the resolved value so callers can chain assertions without re-fetching. + * + * This replaces hardcoded `sleep(1500)` waits in integration tests: instead of + * waiting for a fixed duration, we wait for a specific condition — which is both + * faster in the happy case and more reliable under load. + */ +export async function waitFor( + predicate: () => Promise | T | null | undefined, + opts: { timeoutMs?: number; intervalMs?: number; label?: string } = {}, +): Promise { + const timeoutMs = opts.timeoutMs ?? 5000; + const intervalMs = opts.intervalMs ?? 50; + const deadline = Date.now() + timeoutMs; + + let last: T | null | undefined = undefined; + while (Date.now() < deadline) { + last = await predicate(); + if (last) return last; + await new Promise((r) => setTimeout(r, intervalMs)); + } + + throw new Error( + `waitFor timed out after ${timeoutMs}ms${opts.label ? ` (${opts.label})` : ''}${last === undefined ? '' : ` — last value: ${JSON.stringify(last)}`}`, + ); +} diff --git a/test/integration/concurrency.integration.spec.ts b/test/integration/concurrency.integration.spec.ts index 25982d8..6edf99f 100644 --- a/test/integration/concurrency.integration.spec.ts +++ b/test/integration/concurrency.integration.spec.ts @@ -1,66 +1,12 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - decisionModeRequest as decisionModeRequestBase, - decisionHappyScript -} from '../fixtures/decision-mode'; +import { decisionModeRequest, decisionHappyScript } from '../fixtures/decision-mode'; +import { waitFor } from '../helpers/wait-for'; -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function decisionModeRequest(overrides?: Record) { - const base = decisionModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.decision.v1.ProposalPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - base.payload = payload; - } - return base; -} - -describe('Concurrency (integration)', () => { +describe('Concurrency (integration, observer mode)', () => { let ctx: TestAppContext; beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : decisionHappyScript()); + ctx = await createTestApp(decisionHappyScript()); }); afterAll(async () => { @@ -71,78 +17,41 @@ describe('Concurrency (integration)', () => { await ctx.cleanup(); }); - it('creates multiple runs simultaneously', async () => { + it('creates multiple runs simultaneously with unique sessionIds', async () => { const count = 5; - const promises = Array.from({ length: count }, () => - ctx.client.createRun(decisionModeRequest()) + const results = await Promise.all( + Array.from({ length: count }, () => ctx.client.createRun(decisionModeRequest())), ); - const results = await Promise.all(promises); - - // All should succeed expect(results.length).toBe(count); - // All should have unique IDs - const ids = results.map((r) => r.runId); - const uniqueIds = new Set(ids); - expect(uniqueIds.size).toBe(count); + const runIds = new Set(results.map((r) => r.runId)); + const sessionIds = new Set(results.map((r) => r.sessionId)); + expect(runIds.size).toBe(count); + expect(sessionIds.size).toBe(count); - // All should have valid UUIDs - for (const result of results) { - expect(result.runId).toMatch( - /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/ - ); - expect(result.status).toBe('queued'); + for (const r of results) { + expect(r.runId).toMatch(/^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/); + expect(r.status).toBe('queued'); } }); - it('concurrent messages to same run are all accepted', async () => { + it('fetching the same run concurrently does not cause issues', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - // Wait for run to reach running state before sending messages - for (let i = 0; i < 10; i++) { - await sleep(300); - const run = await ctx.client.getRun(runId) as any; - if (['running', 'completed'].includes(run.status)) break; - } - // Send multiple messages concurrently - const promises = Array.from({ length: 3 }, (_, i) => - ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: `Concurrent evaluation ${i}` - }, ['proposer']) - ) + await waitFor( + async () => { + const r = (await ctx.client.getRun(runId)) as any; + return r.id === runId ? r : null; + }, + { timeoutMs: 3000, label: 'run visible' }, ); - const results = await Promise.allSettled(promises); - - // At least some should succeed (run may not be in correct state for all) - const fulfilled = results.filter((r) => r.status === 'fulfilled'); - expect(fulfilled.length).toBeGreaterThan(0); - }); - - it('fetching runs concurrently does not cause issues', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(300); - - // Fetch the same run concurrently - const promises = Array.from({ length: 10 }, () => - ctx.client.getRun(runId) + const results = await Promise.all( + Array.from({ length: 10 }, () => ctx.client.getRun(runId)), ); - - const results = await Promise.all(promises); - - // All should return the same run - for (const result of results) { - expect((result as any).id).toBe(runId); + for (const r of results) { + expect((r as any).id).toBe(runId); } }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/dashboard.integration.spec.ts b/test/integration/dashboard.integration.spec.ts index d6c9dfa..de0ddab 100644 --- a/test/integration/dashboard.integration.spec.ts +++ b/test/integration/dashboard.integration.spec.ts @@ -1,6 +1,7 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; import { decisionHappyScript } from '../fixtures/decision-mode'; import { testRuntimeKind } from '../helpers/runtime-kind'; +import { waitFor } from '../helpers/wait-for'; describe('Dashboard Overview (integration)', () => { let ctx: TestAppContext; @@ -76,13 +77,17 @@ describe('Dashboard Overview (integration)', () => { modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 60000, - participants: [{ id: 'alice', role: 'proposer' }] + participants: [{ id: 'alice' }] } }); - await sleep(500); - - const result = await ctx.client.request('GET', '/dashboard/overview'); + const result = await waitFor( + async () => { + const r = (await ctx.client.request('GET', '/dashboard/overview')) as any; + return r.kpis.totalRuns >= 1 ? r : null; + }, + { timeoutMs: 3000, label: 'dashboard KPIs' }, + ); expect(result.kpis.totalRuns).toBeGreaterThanOrEqual(1); }); @@ -122,15 +127,19 @@ describe('Dashboard Agent Metrics (integration)', () => { configurationVersion: '1.0.0', ttlMs: 60000, participants: [ - { id: 'agent-a', role: 'proposer' }, - { id: 'agent-b', role: 'evaluator' } + { id: 'agent-a' }, + { id: 'agent-b' }, ] } }); - await sleep(1000); - - const result = await ctx.client.request('GET', '/dashboard/agents/metrics'); + const result = await waitFor( + async () => { + const r = (await ctx.client.request('GET', '/dashboard/agents/metrics')) as any[]; + return Array.isArray(r) ? r : null; + }, + { timeoutMs: 3000, label: 'agent metrics' }, + ); expect(Array.isArray(result)).toBe(true); if (result.length > 0) { @@ -165,16 +174,19 @@ describe('Run Listing Filters (integration)', () => { modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 60000, - participants: [{ id: 'alice', role: 'proposer' }], + participants: [{ id: 'alice' }], metadata: { environment: 'staging' } }, execution: { tags: ['env-test'] } }); - await sleep(500); - - // Filter by environment — should not error - const result = await ctx.client.listRuns({ environment: 'staging' }) as any; + const result = await waitFor( + async () => { + const r = (await ctx.client.listRuns({ environment: 'staging' })) as any; + return r.data ? r : null; + }, + { timeoutMs: 3000, label: 'environment filter' }, + ); expect(result).toHaveProperty('data'); }); @@ -191,6 +203,3 @@ describe('Run Listing Filters (integration)', () => { }); }); -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/decision-mode.integration.spec.ts b/test/integration/decision-mode.integration.spec.ts deleted file mode 100644 index 59707ea..0000000 --- a/test/integration/decision-mode.integration.spec.ts +++ /dev/null @@ -1,413 +0,0 @@ -import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - decisionModeRequest as decisionModeRequestBase, - decisionHappyScript -} from '../fixtures/decision-mode'; -import { - makeStreamEnvelope, - RuntimeScript -} from '../helpers/scripted-mock-runtime.provider'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function decisionModeRequest(overrides?: Record) { - const base = decisionModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.decision.v1.ProposalPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** - * Build a message body with proto encoding when running against the real runtime. - * The real Rust runtime requires proto-encoded payloads, while the mock accepts JSON. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - // Real runtime: proto-encoded via payloadEnvelope - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - // Mock runtime: plain JSON - base.payload = payload; - } - return base; -} - -describe('Decision Mode (integration)', () => { - let ctx: TestAppContext; - - beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : decisionHappyScript()); - }); - - afterAll(async () => { - if (ctx) await ctx.app.close(); - }); - - it('creates a decision mode run and reaches running state', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - expect(runId).toBeDefined(); - - await sleep(1000); - - const run = (await ctx.client.getRun(runId)) as any; - expect(['binding_session', 'running', 'completed']).toContain(run.status); - }); - - it('sends Evaluation and Vote — run progresses', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - // Evaluator sends Evaluation (proto-encoded for real runtime) - const evalResult = await ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.95, - reason: 'Approved for testing' - }, ['proposer']) - ); - // Debug: log the full response if ack is missing - if (!evalResult.ack) { - console.log('evalResult:', JSON.stringify(evalResult, null, 2)); - } - expect(evalResult.ack).toBeDefined(); - - await sleep(500); - - // Voter sends Vote (proto-encoded for real runtime) - const voteResult = await ctx.client.sendMessage( - runId, - msg('voter', 'Vote', 'macp.modes.decision.v1.VotePayload', { - proposalId: 'prop-1', - vote: 'approve', - reason: 'Looks good' - }, ['proposer']) - ); - expect(voteResult.ack).toBeDefined(); - - await sleep(1500); - - const run = (await ctx.client.getRun(runId)) as any; - expect(['running', 'completed']).toContain(run.status); - }); - - it('projected state includes participants, graph, decision, and timeline', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - const state = (await ctx.client.getState(runId)) as any; - expect(state.run).toBeDefined(); - expect(state.run.runId).toBe(runId); - expect(state.run.modeName).toBe('macp.mode.decision.v1'); - expect(state.participants).toBeDefined(); - expect(state.graph).toBeDefined(); - expect(state.graph).toHaveProperty('nodes'); - expect(state.graph).toHaveProperty('edges'); - expect(state.decision).toBeDefined(); - expect(state.timeline).toBeDefined(); - expect(state.timeline.totalEvents).toBeGreaterThanOrEqual(1); - }); - - it('canonical events are persisted with monotonic sequence numbers', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - - // Poll until events appear (async pipeline: create -> start -> bind -> persist) - let events: any[] = []; - for (let attempt = 0; attempt < 10; attempt++) { - await sleep(500); - const result = await ctx.client.listEvents(runId); - events = Array.isArray(result) ? result : (result as any).data ?? []; - if (events.length > 0) break; - } - expect(events.length).toBeGreaterThan(0); - - // Verify monotonically increasing sequence numbers - for (let i = 1; i < events.length; i++) { - expect(events[i].seq).toBeGreaterThan(events[i - 1].seq); - } - - // Should include run.created event - const runCreated = events.find((e: any) => e.type === 'run.created'); - expect(runCreated).toBeDefined(); - }); - - it('handles vote rejection', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - await ctx.client.sendMessage( - runId, - msg('voter', 'Vote', 'macp.modes.decision.v1.VotePayload', { - proposalId: 'prop-1', - vote: 'reject', - reason: 'Insufficient evidence' - }, ['proposer']) - ); - - await sleep(1500); - - const run = (await ctx.client.getRun(runId)) as any; - expect(['running', 'completed', 'failed']).toContain(run.status); - }); - - it('handles multi-message flow (objection -> evaluation -> vote)', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - await ctx.client.sendMessage( - runId, - msg('evaluator', 'Objection', 'macp.modes.decision.v1.ObjectionPayload', { - proposalId: 'prop-1', - reason: 'Needs revision', - severity: 'high' - }, ['proposer']) - ); - await sleep(300); - - await ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: 'Revised version OK' - }, ['proposer']) - ); - await sleep(300); - - await ctx.client.sendMessage( - runId, - msg('voter', 'Vote', 'macp.modes.decision.v1.VotePayload', { - proposalId: 'prop-1', - vote: 'approve', - reason: 'Agreed' - }, ['proposer']) - ); - - // Poll until message events appear - let messageEvents: any[] = []; - for (let attempt = 0; attempt < 10; attempt++) { - await sleep(500); - const result = await ctx.client.listEvents(runId); - const events = Array.isArray(result) ? result : (result as any).data ?? []; - messageEvents = events.filter((e: any) => e.type === 'message.sent'); - if (messageEvents.length >= 3) break; - } - expect(messageEvents.length).toBeGreaterThanOrEqual(3); - }); - // ── Signal Tests ──────────────────────────────────────────── - // - // In MACP, signals flow through TWO planes: - // - // COORDINATION PLANE (binding): - // POST /runs/:id/signal → runtime.send() → ack - // Control plane records this as "message.sent" (outbound signal) - // - // AMBIENT PLANE (non-binding): - // Runtime echoes signals back via stream → EventNormalizer - // creates "signal.emitted" canonical events → projection.signals - // - // The mock runtime's onSend callback echoes Signal messages back - // as stream-envelope events, simulating the real runtime behavior. - - it('sends signals during a running session — outbound is recorded', async () => { - // Create a fresh run — the mock's send() records all calls in sentMessages - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - // Evaluator sends "progress" signal — starting analysis - const signal1 = await ctx.client.sendSignal(runId, { - from: 'evaluator', - to: ['proposer'], - messageType: 'Signal', - signalType: 'progress', - payload: { - signalType: 'progress', - data: 'Starting fraud risk analysis', - confidence: 0.0 - } - }); - expect(signal1).toBeDefined(); - await sleep(300); - - // Evaluator sends "completed" signal - const signal2 = await ctx.client.sendSignal(runId, { - from: 'evaluator', - to: ['proposer'], - messageType: 'Signal', - signalType: 'completed', - payload: { - signalType: 'completed', - data: 'Fraud evaluation submitted', - confidence: 1.0 - } - }); - expect(signal2).toBeDefined(); - await sleep(300); - - // Voter sends "attention" signal - const signal3 = await ctx.client.sendSignal(runId, { - from: 'voter', - to: ['proposer'], - messageType: 'Signal', - signalType: 'attention', - payload: { - signalType: 'attention', - data: 'Urgent review required', - confidence: 0.85 - } - }); - expect(signal3).toBeDefined(); - - // In mock mode, verify the mock runtime received all 3 signal sends - if (ctx.mockRuntime) { - const signalSends = ctx.mockRuntime.sentMessages.filter( - (m) => m.req.messageType === 'Signal' - ); - expect(signalSends.length).toBeGreaterThanOrEqual(3); - - // Verify signals were sent with empty sessionId (ambient plane) - for (const s of signalSends) { - expect(s.req.runtimeSessionId).toBe(''); - expect(s.req.modeName).toBe(''); - } - } - - // Poll for outbound events — signals are recorded as message.sent - let signalSentEvents: any[] = []; - for (let attempt = 0; attempt < 10; attempt++) { - await sleep(500); - const result = await ctx.client.listEvents(runId); - const events = Array.isArray(result) ? result : (result as any).data ?? []; - signalSentEvents = events.filter( - (e: any) => - e.type === 'message.sent' && - (e.subjectKind === 'signal' || e.subject_kind === 'signal') - ); - if (signalSentEvents.length >= 3) break; - } - expect(signalSentEvents.length).toBeGreaterThanOrEqual(3); - }); - - it('signal payloads are correctly forwarded to runtime', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - await ctx.client.sendSignal(runId, { - from: 'evaluator', - to: ['proposer'], - messageType: 'Signal', - signalType: 'progress', - payload: { - signalType: 'progress', - data: 'Analyzing fraud patterns', - confidence: 0.5 - } - }); - - // In mock mode, inspect the raw request sent to runtime - if (ctx.mockRuntime) { - const signalSends = ctx.mockRuntime.sentMessages.filter( - (m) => m.req.messageType === 'Signal' - ); - expect(signalSends.length).toBeGreaterThanOrEqual(1); - - const signalReq = signalSends[signalSends.length - 1].req; - expect(signalReq.runtimeSessionId).toBe(''); - expect(signalReq.modeName).toBe(''); - expect(signalReq.from).toBe('evaluator'); - - const payload = JSON.parse(signalReq.payload.toString('utf8')); - expect(payload.signalType).toBe('progress'); - expect(payload.confidence).toBe(0.5); - } - - // In all modes: the signal endpoint completed without throwing - }); - - it('coordination messages and signals coexist on the same run', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest() as any); - await sleep(1000); - - // Coordination plane: send Evaluation (session-bound) - await ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: 'Looks good' - }, ['proposer']) - ); - await sleep(200); - - // Ambient plane: send Signal (not session-bound) - await ctx.client.sendSignal(runId, { - from: 'evaluator', - to: ['proposer'], - messageType: 'Signal', - signalType: 'done', - payload: { signalType: 'done', data: 'Evaluation complete' } - }); - await sleep(500); - - if (ctx.mockRuntime) { - // In mock mode: verify both message types reached the runtime - const coordMsgs = ctx.mockRuntime.sentMessages.filter( - (m) => m.req.messageType === 'Evaluation' - ); - const signalMsgs = ctx.mockRuntime.sentMessages.filter( - (m) => m.req.messageType === 'Signal' - ); - - expect(coordMsgs.length).toBeGreaterThanOrEqual(1); - expect(signalMsgs.length).toBeGreaterThanOrEqual(1); - - // Coordination message has real sessionId - expect(coordMsgs[0].req.runtimeSessionId).not.toBe(''); - - // Signal has empty sessionId - expect(signalMsgs[0].req.runtimeSessionId).toBe(''); - } - - // In all modes: both API calls should have succeeded (no HTTP errors) - const run = (await ctx.client.getRun(runId)) as any; - expect(run.status).toBeDefined(); - }); -}); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/events-cross-run.integration.spec.ts b/test/integration/events-cross-run.integration.spec.ts index e1892e0..009a19d 100644 --- a/test/integration/events-cross-run.integration.spec.ts +++ b/test/integration/events-cross-run.integration.spec.ts @@ -1,37 +1,12 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - decisionModeRequest as decisionModeRequestBase, - decisionHappyScript, -} from '../fixtures/decision-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -function decisionModeRequest(overrides?: Record) { - const base = decisionModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { typeName: 'macp.modes.decision.v1.ProposalPayload', value: k.payload }, - }; - delete k.payload; - } - } - } - } - return base; -} +import { decisionModeRequest, decisionHappyScript } from '../fixtures/decision-mode'; +import { waitFor } from '../helpers/wait-for'; describe('Cross-run events endpoint (§4.1 integration)', () => { let ctx: TestAppContext; beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : decisionHappyScript()); + ctx = await createTestApp(decisionHappyScript()); }); afterAll(async () => { @@ -48,25 +23,33 @@ describe('Cross-run events endpoint (§4.1 integration)', () => { it('GET /events returns { data, total, limit, nextCursor } shape', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1000); - const body: any = await httpGet(`/events?runId=${runId}&limit=100`); + const body = await waitFor( + async () => { + const b: any = await httpGet(`/events?runId=${runId}&limit=100`); + return b.total >= 1 ? b : null; + }, + { timeoutMs: 5000, label: 'events appeared' }, + ); expect(body).toHaveProperty('data'); expect(body).toHaveProperty('total'); expect(body).toHaveProperty('limit'); expect(Array.isArray(body.data)).toBe(true); expect(body.limit).toBe(100); - expect(body.total).toBeGreaterThanOrEqual(0); }); it('GET /events filters by type (§4.1)', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1200); - const body: any = await httpGet(`/events?runId=${runId}&type=run.created,run.started`); + const body = await waitFor( + async () => { + const b: any = await httpGet(`/events?runId=${runId}&type=run.created,run.started`); + return b.data.length >= 1 ? b : null; + }, + { timeoutMs: 5000, label: 'run.created/started events' }, + ); - expect(Array.isArray(body.data)).toBe(true); for (const event of body.data) { expect(['run.created', 'run.started']).toContain(event.type); } @@ -74,12 +57,18 @@ describe('Cross-run events endpoint (§4.1 integration)', () => { it('GET /runs/:id/events with time range returns filtered shape (§4.2)', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1000); - const future = new Date(Date.now() + 60_000).toISOString(); - const body: any = await httpGet(`/runs/${runId}/events?beforeTs=${encodeURIComponent(future)}&limit=50`); - // When filtered, response shape is { data, total, limit, nextCursor } + const body = await waitFor( + async () => { + const b: any = await httpGet( + `/runs/${runId}/events?beforeTs=${encodeURIComponent(future)}&limit=50`, + ); + return b?.data && b.total >= 1 ? b : null; + }, + { timeoutMs: 5000, label: 'filtered events' }, + ); + expect(body).toHaveProperty('data'); expect(body).toHaveProperty('total'); expect(body).toHaveProperty('limit'); @@ -87,14 +76,15 @@ describe('Cross-run events endpoint (§4.1 integration)', () => { it('GET /runs/:id/events with no filter keeps legacy array shape (backward compat)', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(800); - const body: any = await httpGet(`/runs/${runId}/events?limit=50`); + const body = await waitFor( + async () => { + const b: any = await httpGet(`/runs/${runId}/events?limit=50`); + return Array.isArray(b) && b.length >= 1 ? b : null; + }, + { timeoutMs: 5000, label: 'legacy array events' }, + ); expect(Array.isArray(body)).toBe(true); }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/handoff-mode.integration.spec.ts b/test/integration/handoff-mode.integration.spec.ts deleted file mode 100644 index 6fb6400..0000000 --- a/test/integration/handoff-mode.integration.spec.ts +++ /dev/null @@ -1,158 +0,0 @@ -import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - handoffModeRequest as handoffModeRequestBase, - handoffAcceptScript, - handoffDeclineScript -} from '../fixtures/handoff-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function handoffModeRequest(overrides?: Record) { - const base = handoffModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.handoff.v1.HandoffOfferPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - * The real Rust runtime requires proto-encoded payloads, while the mock accepts JSON. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - // Real runtime: proto-encoded via payloadEnvelope - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - // Mock runtime: plain JSON - base.payload = payload; - } - return base; -} - -describe('Handoff Mode (integration)', () => { - let ctx: TestAppContext; - - afterEach(async () => { - if (ctx) await ctx.cleanup(); - }); - - afterAll(async () => { - if (ctx) await ctx.app.close(); - }); - - describe('Successful Handoff', () => { - beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : handoffAcceptScript()); - }); - - it('source offers, provides context, target accepts', async () => { - const { runId } = await ctx.client.createRun(handoffModeRequest() as any); - await sleep(500); - - // Source provides context - await ctx.client.sendMessage( - runId, - msg('source', 'HandoffContext', 'macp.modes.handoff.v1.HandoffContextPayload', { - conversationHistory: ['msg1', 'msg2'], - metadata: { topic: 'billing' } - }, ['target']) - ); - await sleep(200); - - // Target accepts - await ctx.client.sendMessage( - runId, - msg('target', 'HandoffAccept', 'macp.modes.handoff.v1.HandoffAcceptPayload', { - acceptedAt: new Date().toISOString() - }, ['source']) - ); - - // Wait for the run to process messages - await sleep(2000); - - const run = await ctx.client.getRun(runId) as any; - expect(run).toHaveProperty('status'); - expect(['running', 'completed']).toContain(run.status); - - // Poll for message events (real runtime may be slower) - let sentEvents: any[] = []; - for (let attempt = 0; attempt < 10; attempt++) { - await sleep(500); - const events = await ctx.client.listEvents(runId) as any[]; - sentEvents = events.filter((e: any) => e.type === 'message.sent'); - if (sentEvents.length >= (isRealRuntime ? 1 : 2)) break; - } - // Mock emits send-ack events for each message; real runtime may only emit for kickoff - expect(sentEvents.length).toBeGreaterThanOrEqual(1); - }); - - it('projected state shows both participants', async () => { - const { runId } = await ctx.client.createRun(handoffModeRequest() as any); - await sleep(1000); - - const state = await ctx.client.getState(runId) as any; - expect(state.participants).toBeDefined(); - expect(state.participants.length).toBeGreaterThanOrEqual(2); - }); - }); - - describe('Declined Handoff', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : handoffDeclineScript()); - }); - - it('target declines handoff', async () => { - const { runId } = await ctx.client.createRun(handoffModeRequest() as any); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('target', 'HandoffDecline', 'macp.modes.handoff.v1.HandoffDeclinePayload', { - reason: 'Not available' - }, ['source']) - ); - - await sleep(1000); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(1); - }); - }); -}); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/metrics-token.integration.spec.ts b/test/integration/metrics-token.integration.spec.ts index b62f496..5b80ea9 100644 --- a/test/integration/metrics-token.integration.spec.ts +++ b/test/integration/metrics-token.integration.spec.ts @@ -1,6 +1,7 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; import { decisionHappyScript } from '../fixtures/decision-mode'; import { testRuntimeKind } from '../helpers/runtime-kind'; +import { waitFor } from '../helpers/wait-for'; describe('Metrics Token Usage (integration)', () => { let ctx: TestAppContext; @@ -13,6 +14,16 @@ describe('Metrics Token Usage (integration)', () => { if (ctx) await ctx.app.close(); }); + async function waitForMetrics(runId: string) { + return waitFor( + async () => { + const metrics = (await ctx.client.request('GET', `/runs/${runId}/metrics`)) as any; + return metrics.runId === runId ? metrics : null; + }, + { timeoutMs: 5000, label: 'metrics ready' }, + ); + } + it('GET /runs/:id/metrics returns token usage fields', async () => { const run = await ctx.client.createRun({ mode: 'sandbox', @@ -22,16 +33,11 @@ describe('Metrics Token Usage (integration)', () => { modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 60000, - participants: [ - { id: 'agent-a', role: 'proposer' }, - { id: 'agent-b', role: 'evaluator' } - ] - } + participants: [{ id: 'agent-a' }, { id: 'agent-b' }], + }, }); - await sleep(1500); - - const metrics = await ctx.client.request('GET', `/runs/${run.runId}/metrics`) as any; + const metrics = await waitForMetrics(run.runId); expect(metrics).toHaveProperty('runId', run.runId); expect(metrics).toHaveProperty('eventCount'); @@ -55,13 +61,11 @@ describe('Metrics Token Usage (integration)', () => { modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 60000, - participants: [{ id: 'agent-a', role: 'proposer' }] - } + participants: [{ id: 'agent-a' }], + }, }); - await sleep(1500); - - const metrics = await ctx.client.request('GET', `/runs/${run.runId}/metrics`) as any; + const metrics = await waitForMetrics(run.runId); expect(metrics.promptTokens).toBe(0); expect(metrics.completionTokens).toBe(0); @@ -78,23 +82,18 @@ describe('Metrics Token Usage (integration)', () => { modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 60000, - participants: [ - { id: 'agent-a', role: 'proposer' }, - { id: 'agent-b', role: 'evaluator' } - ] - } + participants: [{ id: 'agent-a' }, { id: 'agent-b' }], + }, }); - await sleep(1500); + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(run.runId)) as any; + return s.timeline?.totalEvents > 0 ? s : null; + }, + { timeoutMs: 5000, label: 'state populated' }, + ); - const state = await ctx.client.getState(run.runId) as any; - - expect(state).toHaveProperty('timeline'); - expect(state.timeline).toHaveProperty('totalEvents'); expect(typeof state.timeline.totalEvents).toBe('number'); }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/observer-mode.integration.spec.ts b/test/integration/observer-mode.integration.spec.ts new file mode 100644 index 0000000..f72afd3 --- /dev/null +++ b/test/integration/observer-mode.integration.spec.ts @@ -0,0 +1,97 @@ +import { createTestApp, TestAppContext } from '../helpers/test-app'; +import { decisionHappyScript, decisionModeRequest } from '../fixtures/decision-mode'; +import { proposalAcceptScript, proposalModeRequest } from '../fixtures/proposal-mode'; +import { taskHappyScript, taskModeRequest } from '../fixtures/task-mode'; +import { quorumReachedScript, quorumModeRequest } from '../fixtures/quorum-mode'; +import { handoffAcceptScript, handoffModeRequest } from '../fixtures/handoff-mode'; +import { waitFor } from '../helpers/wait-for'; + +/** + * Observer-mode end-to-end (direct-agent-auth CP-13). + * + * These tests verify the control-plane correctly observes and projects the full + * scripted event stream for each of the 5 canonical modes — without any + * envelope-emission HTTP endpoints. Agents (simulated here by the mock) drive + * the session entirely via their own gRPC connection. + */ +describe('Observer mode — end-to-end projection (integration)', () => { + const cases = [ + { + name: 'decision', + request: () => decisionModeRequest(), + script: () => decisionHappyScript(), + expectTypes: ['proposal.created', 'proposal.updated', 'decision.finalized'], + }, + { + name: 'proposal', + request: () => proposalModeRequest(), + script: () => proposalAcceptScript(), + expectTypes: ['proposal.created', 'proposal.updated', 'decision.finalized'], + }, + { + name: 'task', + request: () => taskModeRequest(), + script: () => taskHappyScript(), + expectTypes: ['proposal.created', 'proposal.updated'], + }, + { + name: 'quorum', + request: () => quorumModeRequest(), + script: () => quorumReachedScript(), + expectTypes: ['proposal.created', 'proposal.updated', 'decision.finalized'], + }, + { + name: 'handoff', + request: () => handoffModeRequest(), + script: () => handoffAcceptScript(), + expectTypes: ['proposal.created', 'proposal.updated'], + }, + ] as const; + + for (const c of cases) { + describe(`${c.name} mode`, () => { + let ctx: TestAppContext; + + beforeAll(async () => { + ctx = await createTestApp(c.script()); + }); + + afterAll(async () => { + await ctx.app.close(); + }); + + beforeEach(async () => { + await ctx.cleanup(); + }); + + it('projects the full scripted event stream without control-plane Send', async () => { + const { runId, sessionId } = await ctx.client.createRun(c.request()); + expect(sessionId).toMatch(/^[0-9a-f]{8}-/); + + // Poll until every expected event type has arrived via the observer stream. + const events = await waitFor( + async () => { + const raw = await ctx.client.listEvents(runId); + if (!Array.isArray(raw)) return null; + const allPresent = c.expectTypes.every((t) => raw.some((ev: any) => ev.type === t)); + return allPresent ? raw : null; + }, + { timeoutMs: 8000, label: `${c.name} mode events` }, + ); + + for (const type of c.expectTypes) { + expect(events.find((e: any) => e.type === type)).toBeDefined(); + } + + // Invariant: the control-plane never emitted a send-ack (it doesn't call Send). + // Normalized events from Send acks come out as `message.sent` with subject + // kind 'message' AND source.kind === 'control-plane'. Observer mode only ever + // emits 'message.sent' from received runtime envelopes (source.kind === 'runtime'). + const controlPlaneSends = events.filter( + (e: any) => e.type === 'message.sent' && e.source?.kind === 'control-plane', + ); + expect(controlPlaneSends).toHaveLength(0); + }); + }); + } +}); diff --git a/test/integration/policy.integration.spec.ts b/test/integration/policy.integration.spec.ts index 1249a92..14f1b7f 100644 --- a/test/integration/policy.integration.spec.ts +++ b/test/integration/policy.integration.spec.ts @@ -2,6 +2,7 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; import { decisionHappyScript } from '../fixtures/decision-mode'; import { RuntimeProviderRegistry } from '../../src/runtime/runtime-provider.registry'; import { testRuntimeKind } from '../helpers/runtime-kind'; +import { waitFor } from '../helpers/wait-for'; const isRealRuntime = process.env.INTEGRATION_RUNTIME === 'docker' || @@ -27,13 +28,17 @@ describe('Policy Projection in Run State (integration)', () => { modeVersion: '1.0.0', configurationVersion: '1.0.0', ttlMs: 60000, - participants: [{ id: 'agent-a', role: 'proposer' }] + participants: [{ id: 'agent-a' }], } }); - await sleep(1000); - - const state = await ctx.client.getState(run.runId) as any; + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(run.runId)) as any; + return s.policy ? s : null; + }, + { timeoutMs: 3000, label: 'policy projection populated' }, + ); expect(state).toHaveProperty('policy'); expect(state.policy).toHaveProperty('policyVersion'); @@ -105,6 +110,3 @@ describeProviderMethods('Policy Provider Methods (integration)', () => { }); }); -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/projection.integration.spec.ts b/test/integration/projection.integration.spec.ts index bdcdda1..95e4d03 100644 --- a/test/integration/projection.integration.spec.ts +++ b/test/integration/projection.integration.spec.ts @@ -1,66 +1,12 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - decisionModeRequest as decisionModeRequestBase, - decisionHappyScript -} from '../fixtures/decision-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function decisionModeRequest(overrides?: Record) { - const base = decisionModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.decision.v1.ProposalPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - base.payload = payload; - } - return base; -} - -describe('Projection (integration)', () => { +import { decisionModeRequest, decisionHappyScript } from '../fixtures/decision-mode'; +import { waitFor } from '../helpers/wait-for'; + +describe('Projection (integration, observer mode)', () => { let ctx: TestAppContext; beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : decisionHappyScript()); + ctx = await createTestApp(decisionHappyScript()); }); afterAll(async () => { @@ -73,9 +19,14 @@ describe('Projection (integration)', () => { it('projection includes all required sections', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); - const state = await ctx.client.getState(runId) as any; + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(runId)) as any; + return s.timeline?.totalEvents >= 1 ? s : null; + }, + { timeoutMs: 5000, label: 'projection populated' }, + ); expect(state).toHaveProperty('run'); expect(state).toHaveProperty('participants'); @@ -88,118 +39,68 @@ describe('Projection (integration)', () => { expect(state).toHaveProperty('outboundMessages'); }); - it('projection tracks participant status', async () => { + it('projection tracks participant status from scripted events', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1000); - - const state = await ctx.client.getState(runId) as any; - expect(state.participants).toBeDefined(); - expect(state.participants).toBeInstanceOf(Array); - // Should have participants from the execution request - if (state.participants.length > 0) { - const participant = state.participants[0]; - expect(participant).toHaveProperty('participantId'); - expect(participant).toHaveProperty('status'); - } + + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(runId)) as any; + return s.participants?.length > 0 ? s : null; + }, + { timeoutMs: 5000, label: 'participants populated' }, + ); + + expect(state.participants[0]).toHaveProperty('participantId'); + expect(state.participants[0]).toHaveProperty('status'); }); it('projection tracks timeline with sequence numbers', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1500); - const state = await ctx.client.getState(runId) as any; - expect(state.timeline).toBeDefined(); - expect(state.timeline).toHaveProperty('latestSeq'); - expect(state.timeline).toHaveProperty('totalEvents'); - expect(state.timeline.totalEvents).toBeGreaterThanOrEqual(0); + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(runId)) as any; + return s.timeline?.totalEvents > 0 ? s : null; + }, + { timeoutMs: 5000, label: 'timeline populated' }, + ); + + expect(state.timeline.latestSeq).toBeGreaterThan(0); + expect(state.timeline.totalEvents).toBeGreaterThan(0); }); - it('projection graph tracks message flow', async () => { + it('projection graph tracks message flow emitted by agents (via observer stream)', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1000); - - // Send a message to create a graph edge - await ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: 'Approved' - }, ['proposer']) + + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(runId)) as any; + return s.graph?.nodes?.length > 0 ? s : null; + }, + { timeoutMs: 5000, label: 'graph populated' }, ); - await sleep(1000); - const state = await ctx.client.getState(runId) as any; - expect(state.graph).toBeDefined(); expect(state.graph).toHaveProperty('nodes'); expect(state.graph).toHaveProperty('edges'); }); - it('terminal sweep: participants reach terminal status after run is cancelled (§1.1)', async () => { + it('projection rebuilds from canonical events', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - // Wait for participants to be registered and the kickoff to activate the proposer - let activated = false; - for (let i = 0; i < 10; i++) { - await sleep(300); - const state = await ctx.client.getState(runId) as any; - if (state.participants?.length >= 3) { activated = true; break; } - } - expect(activated).toBe(true); - - // Cancel the run — triggers markCancelled, which emits run.cancelled, which triggers the sweep - await ctx.client.cancelRun(runId, 'integration test sweep'); - - let settled = false; - for (let i = 0; i < 10; i++) { - await sleep(300); - const run = await ctx.client.getRun(runId) as any; - if (['completed', 'failed', 'cancelled'].includes(run.status)) { settled = true; break; } - } - expect(settled).toBe(true); - - const state = await ctx.client.getState(runId) as any; - const terminal = new Set(['completed', 'failed', 'skipped']); - - // Every participant (and graph node) should land in a terminal status after cancel - expect(state.participants.length).toBeGreaterThan(0); - for (const p of state.participants) { - expect(terminal.has(p.status)).toBe(true); - } - for (const n of state.graph.nodes) { - expect(terminal.has(n.status)).toBe(true); - } - }); - - it('projection rebuilds from events', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - - // Wait for run to fully settle (all events processed) - let settled = false; - for (let i = 0; i < 10; i++) { - await sleep(500); - const run = await ctx.client.getRun(runId) as any; - if (['completed', 'failed'].includes(run.status)) { settled = true; break; } - } - - // Get state before rebuild - const before = await ctx.client.getState(runId) as any; - expect(before.run).toBeDefined(); + await waitFor( + async () => { + const run = (await ctx.client.getRun(runId)) as any; + return ['completed', 'failed', 'cancelled'].includes(run.status) ? run : null; + }, + { timeoutMs: 5000, label: 'run terminal' }, + ); - // Trigger projection rebuild + const before = (await ctx.client.getState(runId)) as any; await ctx.client.rebuildProjection(runId); - await sleep(500); + const after = (await ctx.client.getState(runId)) as any; - // Get state after rebuild — core identity should be the same - const after = await ctx.client.getState(runId) as any; expect(after.run.runId).toBe(before.run.runId); expect(after.run.status).toBe(before.run.status); - // Event count should be >= before (events may still trickle in) expect(after.timeline.totalEvents).toBeGreaterThanOrEqual(before.timeline.totalEvents); }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/proposal-mode.integration.spec.ts b/test/integration/proposal-mode.integration.spec.ts deleted file mode 100644 index 314b522..0000000 --- a/test/integration/proposal-mode.integration.spec.ts +++ /dev/null @@ -1,169 +0,0 @@ -import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - proposalModeRequest as proposalModeRequestBase, - proposalAcceptScript, - proposalCounterScript, - proposalRejectScript -} from '../fixtures/proposal-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function proposalModeRequest(overrides?: Record) { - const base = proposalModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.proposal.v1.ProposalPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - * The real Rust runtime requires proto-encoded payloads, while the mock accepts JSON. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - // Real runtime: proto-encoded via payloadEnvelope - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - // Mock runtime: plain JSON - base.payload = payload; - } - return base; -} - -describe('Proposal Mode (integration)', () => { - let ctx: TestAppContext; - - afterEach(async () => { - if (ctx) await ctx.cleanup(); - }); - - afterAll(async () => { - if (ctx) await ctx.app.close(); - }); - - describe('Accept Flow', () => { - beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : proposalAcceptScript()); - }); - - it('reviewer accepts proposal', async () => { - const { runId } = await ctx.client.createRun(proposalModeRequest()); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('reviewer', 'Accept', 'macp.modes.proposal.v1.AcceptPayload', { - proposalId: 'prop-1', - comment: 'LGTM' - }, ['author']) - ); - - await sleep(1000); - - const run = await ctx.client.getRun(runId) as any; - expect(['running', 'completed']).toContain(run.status); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(1); - }); - }); - - describe('Counter-Proposal Flow', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : proposalCounterScript()); - }); - - it('reviewer counter-proposes, author accepts', async () => { - const { runId } = await ctx.client.createRun(proposalModeRequest()); - await sleep(500); - - // Reviewer sends counter-proposal - await ctx.client.sendMessage( - runId, - msg('reviewer', 'CounterProposal', 'macp.modes.proposal.v1.CounterProposalPayload', { - proposalId: 'prop-2', - supersedesProposalId: 'prop-1', - title: 'Better approach' - }, ['author']) - ); - await sleep(200); - - // Author accepts the counter-proposal - await ctx.client.sendMessage( - runId, - msg('author', 'Accept', 'macp.modes.proposal.v1.AcceptPayload', { - proposalId: 'prop-2' - }, ['reviewer']) - ); - - await sleep(1000); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(2); - }); - }); - - describe('Rejection Flow', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : proposalRejectScript()); - }); - - it('reviewer rejects proposal', async () => { - const { runId } = await ctx.client.createRun(proposalModeRequest()); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('reviewer', 'Reject', 'macp.modes.proposal.v1.RejectPayload', { - proposalId: 'prop-1', - reason: 'Out of scope', - terminal: true - }, ['author']) - ); - - await sleep(1000); - - const run = await ctx.client.getRun(runId) as any; - expect(['running', 'completed', 'failed']).toContain(run.status); - }); - }); -}); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/quorum-mode.integration.spec.ts b/test/integration/quorum-mode.integration.spec.ts deleted file mode 100644 index f482d55..0000000 --- a/test/integration/quorum-mode.integration.spec.ts +++ /dev/null @@ -1,182 +0,0 @@ -import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - quorumModeRequest as quorumModeRequestBase, - quorumReachedScript, - quorumRejectedScript, - quorumAbstentionScript -} from '../fixtures/quorum-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function quorumModeRequest(overrides?: Record) { - const base = quorumModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.quorum.v1.ApprovalRequestPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - * The real Rust runtime requires proto-encoded payloads, while the mock accepts JSON. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - // Real runtime: proto-encoded via payloadEnvelope - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - // Mock runtime: plain JSON - base.payload = payload; - } - return base; -} - -describe('Quorum Mode (integration)', () => { - let ctx: TestAppContext; - - afterEach(async () => { - if (ctx) await ctx.cleanup(); - }); - - afterAll(async () => { - if (ctx) await ctx.app.close(); - }); - - describe('Quorum Reached', () => { - beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : quorumReachedScript()); - }); - - it('two approvals reach quorum', async () => { - const { runId } = await ctx.client.createRun(quorumModeRequest()); - await sleep(500); - - await ctx.client.sendMessage(runId, - msg('voter_a', 'Approve', 'macp.modes.quorum.v1.ApprovePayload', { - requestId: 'approval-1', comment: 'Ship it' - }, ['initiator']) - ); - await sleep(200); - - await ctx.client.sendMessage(runId, - msg('voter_b', 'Approve', 'macp.modes.quorum.v1.ApprovePayload', { - requestId: 'approval-1' - }, ['initiator']) - ); - - await sleep(1000); - - const run = await ctx.client.getRun(runId) as any; - expect(['running', 'completed']).toContain(run.status); - }); - }); - - describe('Quorum Not Reached', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : quorumRejectedScript()); - }); - - it('majority rejection blocks quorum', async () => { - const { runId } = await ctx.client.createRun(quorumModeRequest()); - await sleep(500); - - await ctx.client.sendMessage(runId, - msg('voter_a', 'Approve', 'macp.modes.quorum.v1.ApprovePayload', { - requestId: 'approval-1' - }, ['initiator']) - ); - await sleep(200); - - await ctx.client.sendMessage(runId, - msg('voter_b', 'Reject', 'macp.modes.quorum.v1.RejectPayload', { - requestId: 'approval-1', reason: 'Not ready' - }, ['initiator']) - ); - await sleep(200); - - await ctx.client.sendMessage(runId, - msg('voter_c', 'Reject', 'macp.modes.quorum.v1.RejectPayload', { - requestId: 'approval-1', reason: 'Missing tests' - }, ['initiator']) - ); - - await sleep(1000); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(3); - }); - }); - - describe('Abstention Handling', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : quorumAbstentionScript()); - }); - - it('abstention does not block quorum when approvals sufficient', async () => { - const { runId } = await ctx.client.createRun(quorumModeRequest()); - await sleep(500); - - await ctx.client.sendMessage(runId, - msg('voter_a', 'Approve', 'macp.modes.quorum.v1.ApprovePayload', { - requestId: 'approval-1' - }, ['initiator']) - ); - await sleep(200); - - await ctx.client.sendMessage(runId, - msg('voter_b', 'Abstain', 'macp.modes.quorum.v1.AbstainPayload', { - requestId: 'approval-1', reason: 'No opinion' - }, ['initiator']) - ); - await sleep(200); - - await ctx.client.sendMessage(runId, - msg('voter_c', 'Approve', 'macp.modes.quorum.v1.ApprovePayload', { - requestId: 'approval-1' - }, ['initiator']) - ); - - await sleep(1000); - - const run = await ctx.client.getRun(runId) as any; - expect(['running', 'completed']).toContain(run.status); - }); - }); -}); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/runs-cancel.integration.spec.ts b/test/integration/runs-cancel.integration.spec.ts index d4e5bbb..deb33d6 100644 --- a/test/integration/runs-cancel.integration.spec.ts +++ b/test/integration/runs-cancel.integration.spec.ts @@ -1,7 +1,14 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; import { decisionModeRequest, decisionHappyScript } from '../fixtures/decision-mode'; +import { waitFor } from '../helpers/wait-for'; -describe('Run Cancellation (integration)', () => { +/** + * Observer-mode cancel flow (direct-agent-auth CP-8). + * + * Default (Option A): control-plane POSTs to initiator agent's cancelCallback. + * Opt-in (Option B): metadata.cancellationDelegated=true lets control-plane call CancelSession directly. + */ +describe('Run Cancellation (integration, observer mode)', () => { let ctx: TestAppContext; beforeAll(async () => { @@ -16,39 +23,55 @@ describe('Run Cancellation (integration)', () => { await ctx.cleanup(); }); - it('cancels a running session', async () => { + it('rejects cancel when neither cancelCallback nor delegation is configured (Option A/B unmet)', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); + // Wait until the run record exists, then confirm cancel is rejected. + await waitFor( + async () => { + const r = (await ctx.client.getRun(runId)) as any; + return r.id === runId ? r : null; + }, + { timeoutMs: 3000, label: 'run visible' }, + ); - const result = await ctx.client.cancelRun(runId, 'Integration test cancellation'); - expect(result).toBeDefined(); - - await sleep(500); - - const run = await ctx.client.getRun(runId) as any; - expect(['cancelled', 'completed', 'failed']).toContain(run.status); + const result = (await ctx.client.cancelRun(runId)) as any; + expect(result.statusCode ?? 0).toBeGreaterThanOrEqual(400); }); - it('cancel emits run.cancelled event', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); + it('Option B: cancels when metadata.cancellationDelegated=true', async () => { + const request = decisionModeRequest({ + session: { + ...decisionModeRequest().session, + metadata: { cancellationDelegated: true }, + }, + }); + const { runId } = await ctx.client.createRun(request); - await ctx.client.cancelRun(runId); - await sleep(500); + await waitFor( + async () => { + const r = (await ctx.client.getRun(runId)) as any; + return ['binding_session', 'running', 'completed'].includes(r.status) ? r : null; + }, + { timeoutMs: 5000, label: 'run bound' }, + ); - const events = await ctx.client.listEvents(runId) as any[]; - // Should have lifecycle events - expect(events.length).toBeGreaterThan(0); + const result = (await ctx.client.cancelRun(runId, 'integration test cancel')) as any; + expect(result.statusCode).toBeUndefined(); + + const terminal = await waitFor( + async () => { + const r = (await ctx.client.getRun(runId)) as any; + return ['cancelled', 'completed'].includes(r.status) ? r : null; + }, + { timeoutMs: 3000, label: 'run terminal after cancel' }, + ); + expect(['cancelled', 'completed']).toContain(terminal.status); }); it('cancel of non-existent run returns error', async () => { - const result = await ctx.client.cancelRun( - '00000000-0000-0000-0000-000000000000' - ) as any; + const result = (await ctx.client.cancelRun( + '00000000-0000-0000-0000-000000000000', + )) as any; expect(result.statusCode || result.errorCode).toBeDefined(); }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/runs-lifecycle.integration.spec.ts b/test/integration/runs-lifecycle.integration.spec.ts index 2cbe808..8f056ed 100644 --- a/test/integration/runs-lifecycle.integration.spec.ts +++ b/test/integration/runs-lifecycle.integration.spec.ts @@ -1,39 +1,13 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - decisionModeRequest as decisionModeRequestBase, - decisionHappyScript -} from '../fixtures/decision-mode'; +import { decisionModeRequest, decisionHappyScript } from '../fixtures/decision-mode'; import { testRuntimeKind } from '../helpers/runtime-kind'; +import { waitFor } from '../helpers/wait-for'; const isRealRuntime = process.env.INTEGRATION_RUNTIME === 'docker' || process.env.INTEGRATION_RUNTIME === 'remote'; -/** Returns the execution request, adjusting for the active runtime mode */ -function decisionModeRequest(overrides?: Record) { - const base = decisionModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.decision.v1.ProposalPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -describe('Run Lifecycle (integration)', () => { +describe('Run Lifecycle (integration, observer mode)', () => { let ctx: TestAppContext; beforeAll(async () => { @@ -48,57 +22,62 @@ describe('Run Lifecycle (integration)', () => { await ctx.cleanup(); }); - it('POST /runs creates a run and returns runId with queued status', async () => { + it('POST /runs creates a run and returns runId + sessionId', async () => { const result = await ctx.client.createRun(decisionModeRequest()); - expect(result).toHaveProperty('runId'); - expect(result.runId).toMatch( - /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/ - ); + expect(result.runId).toMatch(/^[0-9a-f]{8}-/); + expect(result.sessionId).toMatch(/^[0-9a-f]{8}-/); expect(result.status).toBe('queued'); }); - it('GET /runs/:id fetches the run record', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - - // Small delay for async processing - await sleep(200); - - const run = await ctx.client.getRun(runId); - expect(run).toHaveProperty('id', runId); - expect(run).toHaveProperty('status'); - expect(run).toHaveProperty('runtimeKind', testRuntimeKind()); - expect(run).toHaveProperty('createdAt'); + it('POST /runs returns the caller-provided sessionId when valid', async () => { + const sessionId = '123e4567-e89b-42d3-a456-426614174000'; + const result = await ctx.client.createRun({ + ...decisionModeRequest(), + session: { ...decisionModeRequest().session, sessionId }, + }); + expect(result.sessionId).toBe(sessionId); }); - it('GET /runs lists runs with pagination', async () => { - // Create 3 runs - await ctx.client.createRun(decisionModeRequest()); - await ctx.client.createRun(decisionModeRequest()); - await ctx.client.createRun(decisionModeRequest()); + it('POST /runs rejects an invalid sessionId', async () => { + const result = (await ctx.client.createRun({ + ...decisionModeRequest(), + session: { ...decisionModeRequest().session, sessionId: 'too-short' }, + })) as any; + expect(result.statusCode ?? 0).toBeGreaterThanOrEqual(400); + }); - await sleep(200); + it('GET /runs/:id fetches the run record with runtimeSessionId populated', async () => { + const { runId, sessionId } = await ctx.client.createRun(decisionModeRequest()); - const result = await ctx.client.listRuns({ limit: 2 }) as any; - expect(result.data).toBeDefined(); - expect(result.data.length).toBeLessThanOrEqual(2); - expect(result).toHaveProperty('total'); + const run = await waitFor( + async () => { + const r = (await ctx.client.getRun(runId)) as any; + return r.runtimeSessionId === sessionId ? r : null; + }, + { timeoutMs: 3000, label: 'run.runtimeSessionId populated' }, + ); + expect(run.id).toBe(runId); + expect(run.runtimeKind).toBe(testRuntimeKind()); }); it('GET /runs supports filtering by tags', async () => { await ctx.client.createRun( - decisionModeRequest({ - execution: { tags: ['special-tag'] } - }) + decisionModeRequest({ execution: { tags: ['special-tag'] } }), ); await ctx.client.createRun(decisionModeRequest()); - await sleep(200); - - // Test fixture uses sandbox mode; includeSandbox required to see sandbox runs in listing - const result = await ctx.client.listRuns({ tags: 'special-tag', includeSandbox: true }) as any; - expect(result.data).toBeDefined(); - const tagged = result.data.filter( - (r: any) => r.tags && r.tags.includes('special-tag') + const tagged = await waitFor( + async () => { + const result = (await ctx.client.listRuns({ + tags: 'special-tag', + includeSandbox: true, + })) as any; + const filtered = result.data.filter( + (r: any) => r.tags && r.tags.includes('special-tag'), + ); + return filtered.length >= 1 ? filtered : null; + }, + { timeoutMs: 3000, label: 'tagged run visible' }, ); expect(tagged.length).toBeGreaterThanOrEqual(1); }); @@ -106,39 +85,29 @@ describe('Run Lifecycle (integration)', () => { it('GET /runs/:id/state returns the projected state', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - // Wait for run to start processing - await sleep(500); - - const state = await ctx.client.getState(runId); + const state = await waitFor( + async () => { + const s = (await ctx.client.getState(runId)) as any; + return s.timeline?.totalEvents > 0 ? s : null; + }, + { timeoutMs: 5000, label: 'state populated' }, + ); expect(state).toHaveProperty('run'); expect(state).toHaveProperty('participants'); - expect(state).toHaveProperty('graph'); expect(state).toHaveProperty('decision'); - expect(state).toHaveProperty('signals'); - expect(state).toHaveProperty('progress'); expect(state).toHaveProperty('timeline'); }); - it('run transitions through lifecycle states', async () => { + it('transitions queued → starting → binding_session → running → completed without control-plane Send', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - // Initial state should be queued - const initial = await ctx.client.getRun(runId); - expect(['queued', 'starting', 'binding_session', 'running']).toContain( - (initial as any).status + const run = await waitFor( + async () => { + const r = (await ctx.client.getRun(runId)) as any; + return ['completed', 'failed', 'cancelled'].includes(r.status) ? r : null; + }, + { timeoutMs: 5000, label: 'run reached terminal state' }, ); - - // Wait for async execution to progress - await sleep(1000); - - const later = await ctx.client.getRun(runId); - // Should have progressed past queued - expect( - ['starting', 'binding_session', 'running', 'completed', 'failed'] - ).toContain((later as any).status); + expect(['completed', 'failed', 'cancelled']).toContain(run.status); }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/runs-messaging.integration.spec.ts b/test/integration/runs-messaging.integration.spec.ts deleted file mode 100644 index 168d62a..0000000 --- a/test/integration/runs-messaging.integration.spec.ts +++ /dev/null @@ -1,161 +0,0 @@ -import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - decisionModeRequest as decisionModeRequestBase, - decisionHappyScript -} from '../fixtures/decision-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function decisionModeRequest(overrides?: Record) { - const base = decisionModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.decision.v1.ProposalPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - * The real Rust runtime requires proto-encoded payloads, while the mock accepts JSON. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - // Real runtime: proto-encoded via payloadEnvelope - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - // Mock runtime: plain JSON - base.payload = payload; - } - return base; -} - -describe('Run Messaging (integration)', () => { - let ctx: TestAppContext; - - beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : decisionHappyScript()); - }); - - afterAll(async () => { - await ctx.app.close(); - }); - - beforeEach(async () => { - await ctx.cleanup(); - }); - - it('sends a message with JSON payload and receives ack', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); - - const result = await ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: 'Looks good' - }, ['proposer']) - ); - - expect(result).toHaveProperty('messageId'); - expect(result).toHaveProperty('ack'); - expect(result.ack).toHaveProperty('ok', true); - }); - - it('persists message.sent canonical event', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: 'Approved' - }, ['proposer']) - ); - - await sleep(500); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(1); - }); - - it('sends a signal', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); - - const result = await ctx.client.sendSignal(runId, { - from: 'proposer', - messageType: 'Signal', - signalType: 'attention', - payload: { signalType: 'attention', data: 'Urgent review needed' } - }); - - expect(result).toBeDefined(); - }); - - it('updates context during session', async () => { - const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); - - const result = await ctx.client.updateContext(runId, { - from: 'proposer', - context: { additionalData: 'new context information' } - }); - - expect(result).toBeDefined(); - }); - - it('rejects message to non-existent run', async () => { - const result = await ctx.client.sendMessage( - '00000000-0000-0000-0000-000000000000', - msg('evaluator', 'Evaluation', 'macp.modes.decision.v1.EvaluationPayload', { - proposalId: 'prop-1', - recommendation: 'APPROVE', - confidence: 0.9, - reason: 'Test' - }) - ) as any; - - // Should return an error - expect(result.statusCode || result.errorCode).toBeDefined(); - }); -}); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/runs-stream.integration.spec.ts b/test/integration/runs-stream.integration.spec.ts index c155437..bc6011a 100644 --- a/test/integration/runs-stream.integration.spec.ts +++ b/test/integration/runs-stream.integration.spec.ts @@ -1,6 +1,7 @@ import { createTestApp, TestAppContext } from '../helpers/test-app'; import { TestSSEClient } from '../helpers/sse-client'; import { decisionModeRequest, decisionHappyScript } from '../fixtures/decision-mode'; +import { waitFor } from '../helpers/wait-for'; describe('Run SSE Streaming (integration)', () => { let ctx: TestAppContext; @@ -24,19 +25,12 @@ describe('Run SSE Streaming (integration)', () => { sse.connect(runId, { includeSnapshot: true }); try { - // Wait for at least one event - await sleep(2000); + await waitFor(() => sse.events.length > 0, { timeoutMs: 5000, label: 'first SSE event' }); - // Should have received some events - expect(sse.events.length).toBeGreaterThan(0); - - // Check event structure for (const event of sse.events) { expect(event).toHaveProperty('type'); expect(event).toHaveProperty('data'); - expect(['snapshot', 'canonical_event', 'heartbeat']).toContain( - event.type - ); + expect(['snapshot', 'canonical_event', 'heartbeat']).toContain(event.type); } } finally { sse.close(); @@ -45,22 +39,22 @@ describe('Run SSE Streaming (integration)', () => { it('SSE stream includes snapshot when requested', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); const sse = new TestSSEClient(ctx.url, 'test-key-integration'); sse.connect(runId, { includeSnapshot: true }); try { - await sleep(2000); - - const snapshots = sse.getEventsByType('snapshot'); - expect(snapshots.length).toBeGreaterThanOrEqual(1); - - if (snapshots.length > 0) { - const snapshot = snapshots[0].data as Record; - expect(snapshot).toHaveProperty('run'); - expect(snapshot).toHaveProperty('participants'); - } + const snapshots = await waitFor( + () => { + const s = sse.getEventsByType('snapshot'); + return s.length > 0 ? s : null; + }, + { timeoutMs: 5000, label: 'snapshot event' }, + ); + + const snapshot = snapshots[0].data as Record; + expect(snapshot).toHaveProperty('run'); + expect(snapshot).toHaveProperty('participants'); } finally { sse.close(); } @@ -68,22 +62,20 @@ describe('Run SSE Streaming (integration)', () => { it('SSE events have sequential IDs for resume support', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(500); const sse = new TestSSEClient(ctx.url, 'test-key-integration'); sse.connect(runId); try { - await sleep(2000); - - const canonicalEvents = sse.getEventsByType('canonical_event'); - if (canonicalEvents.length >= 2) { - // Event IDs should be parseable as sequence numbers - const ids = canonicalEvents - .map((e) => e.id) - .filter((id) => id !== undefined); - expect(ids.length).toBeGreaterThan(0); - } + const ids = await waitFor( + () => { + const canonical = sse.getEventsByType('canonical_event'); + if (canonical.length < 2) return null; + return canonical.map((e) => e.id).filter((id) => id !== undefined); + }, + { timeoutMs: 5000, label: '>=2 canonical events' }, + ); + expect(ids.length).toBeGreaterThan(0); } finally { sse.close(); } @@ -91,21 +83,23 @@ describe('Run SSE Streaming (integration)', () => { it('SSE stream can resume from afterSeq', async () => { const { runId } = await ctx.client.createRun(decisionModeRequest()); - await sleep(1000); - // Get current events to find a seq to resume from - const events = (await ctx.client.listEvents(runId)) as any[]; - if (events.length === 0) return; // Skip if no events yet + const events = await waitFor( + async () => { + const e = await ctx.client.listEvents(runId); + return Array.isArray(e) && e.length > 0 ? e : null; + }, + { timeoutMs: 5000, label: 'at least one canonical event' }, + ); - const midSeq = events[0].seq; + const midSeq = (events[0] as { seq: number }).seq; const sse = new TestSSEClient(ctx.url, 'test-key-integration'); sse.connect(runId, { afterSeq: midSeq }); try { - await sleep(2000); + await waitFor(() => sse.events.length > 0, { timeoutMs: 5000, label: 'SSE resume delivered events' }); - // All canonical events should have seq > midSeq const canonicalEvents = sse.getEventsByType('canonical_event'); for (const event of canonicalEvents) { const data = event.data as Record; @@ -118,7 +112,3 @@ describe('Run SSE Streaming (integration)', () => { } }); }); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -} diff --git a/test/integration/task-mode.integration.spec.ts b/test/integration/task-mode.integration.spec.ts deleted file mode 100644 index 026c396..0000000 --- a/test/integration/task-mode.integration.spec.ts +++ /dev/null @@ -1,221 +0,0 @@ -import { createTestApp, TestAppContext } from '../helpers/test-app'; -import { - taskModeRequest as taskModeRequestBase, - taskHappyScript, - taskRejectionScript, - taskFailureScript -} from '../fixtures/task-mode'; - -const isRealRuntime = - process.env.INTEGRATION_RUNTIME === 'docker' || - process.env.INTEGRATION_RUNTIME === 'remote'; - -/** Returns the execution request, adjusting for the active runtime mode */ -function taskModeRequest(overrides?: Record) { - const base = taskModeRequestBase(overrides as any); - if (isRealRuntime) { - base.runtime = { kind: 'rust' }; - // Real runtime requires proto-encoded kickoff payloads - if (base.kickoff) { - for (const k of base.kickoff) { - if (k.payload && !k.payloadEnvelope) { - k.payloadEnvelope = { - encoding: 'proto' as const, - proto: { - typeName: 'macp.modes.task.v1.TaskRequestPayload', - value: k.payload - } - }; - delete k.payload; - } - } - } - } - return base; -} - -/** - * Build a message body with proto encoding when running against the real runtime. - * The real Rust runtime requires proto-encoded payloads, while the mock accepts JSON. - */ -function msg( - from: string, - messageType: string, - protoTypeName: string, - payload: Record, - to?: string[] -): Record { - const base: Record = { from, messageType }; - if (to) base.to = to; - - if (isRealRuntime) { - // Real runtime: proto-encoded via payloadEnvelope - base.payloadEnvelope = { - encoding: 'proto', - proto: { typeName: protoTypeName, value: payload } - }; - } else { - // Mock runtime: plain JSON - base.payload = payload; - } - return base; -} - -describe('Task Mode (integration)', () => { - let ctx: TestAppContext; - - afterEach(async () => { - if (ctx) await ctx.cleanup(); - }); - - afterAll(async () => { - if (ctx) await ctx.app.close(); - }); - - describe('Happy Path — Request, Accept, Update, Complete', () => { - beforeAll(async () => { - ctx = await createTestApp(isRealRuntime ? undefined : taskHappyScript()); - }); - - it('creates a task mode run', async () => { - const { runId } = await ctx.client.createRun(taskModeRequest()); - expect(runId).toBeDefined(); - - await sleep(500); - - const run = await ctx.client.getRun(runId) as any; - expect(['binding_session', 'running', 'completed']).toContain(run.status); - }); - - it('worker accepts and completes task', async () => { - const { runId } = await ctx.client.createRun(taskModeRequest()); - await sleep(500); - - // Worker accepts - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskAccept', 'macp.modes.task.v1.TaskAcceptPayload', { - taskId: 'task-1' - }, ['requester']) - ); - await sleep(200); - - // Worker sends progress update - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskUpdate', 'macp.modes.task.v1.TaskUpdatePayload', { - taskId: 'task-1', - progress: 0.5, - message: 'Half done' - }, ['requester']) - ); - await sleep(200); - - // Worker completes - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskComplete', 'macp.modes.task.v1.TaskCompletePayload', { - taskId: 'task-1', - output: { result: 'success', itemsProcessed: 42 } - }, ['requester']) - ); - - await sleep(1000); - - const run = await ctx.client.getRun(runId) as any; - expect(['running', 'completed']).toContain(run.status); - }); - - it('tracks task progress in projection', async () => { - const { runId } = await ctx.client.createRun(taskModeRequest()); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskAccept', 'macp.modes.task.v1.TaskAcceptPayload', { - taskId: 'task-1' - }, ['requester']) - ); - await sleep(200); - - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskUpdate', 'macp.modes.task.v1.TaskUpdatePayload', { - taskId: 'task-1', - progress: 0.5, - message: 'Processing...' - }, ['requester']) - ); - await sleep(500); - - const state = await ctx.client.getState(runId) as any; - expect(state.participants).toBeDefined(); - expect(state.participants.length).toBeGreaterThanOrEqual(2); - }); - }); - - describe('Task Rejection', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : taskRejectionScript()); - }); - - it('worker rejects task', async () => { - const { runId } = await ctx.client.createRun(taskModeRequest()); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskReject', 'macp.modes.task.v1.TaskRejectPayload', { - taskId: 'task-1', - reason: 'capacity' - }, ['requester']) - ); - - await sleep(1000); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(1); - }); - }); - - describe('Task Failure', () => { - beforeAll(async () => { - if (ctx) await ctx.app.close(); - ctx = await createTestApp(isRealRuntime ? undefined : taskFailureScript()); - }); - - it('worker accepts then fails task', async () => { - const { runId } = await ctx.client.createRun(taskModeRequest()); - await sleep(500); - - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskAccept', 'macp.modes.task.v1.TaskAcceptPayload', { - taskId: 'task-1' - }, ['requester']) - ); - await sleep(200); - - await ctx.client.sendMessage( - runId, - msg('worker', 'TaskFail', 'macp.modes.task.v1.TaskFailPayload', { - taskId: 'task-1', - error: 'Processing failed', - retryable: true - }, ['requester']) - ); - - await sleep(1000); - - const events = await ctx.client.listEvents(runId) as any[]; - const sentEvents = events.filter((e: any) => e.type === 'message.sent'); - expect(sentEvents.length).toBeGreaterThanOrEqual(2); - }); - }); -}); - -function sleep(ms: number): Promise { - return new Promise((r) => setTimeout(r, ms)); -}