multiagentcoordinationprotocol · ajit-zer07 · Apr 4, 2026 · Apr 4, 2026
diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml
@@ -0,0 +1,89 @@
+name: Integration Tests
+
+on:
+  workflow_dispatch:
+    inputs:
+      run_e2e:
+        description: 'Also run Tier 3 E2E tests (requires OPENAI_API_KEY secret)'
+        type: boolean
+        default: false
+
+env:
+  CARGO_TERM_COLOR: always
+
+jobs:
+  integration:
+    name: Tier 1+2 Integration Tests
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo registry and build
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+            integration_tests/target
+          key: ${{ runner.os }}-integration-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-integration-
+
+      - name: Install protoc
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y protobuf-compiler
+
+      - name: Build runtime binary
+        run: cargo build
+
+      - name: Run Tier 1+2 integration tests
+        working-directory: integration_tests
+        env:
+          MACP_TEST_BINARY: ../target/debug/macp-runtime
+        run: cargo test -- --test-threads=1
+        timeout-minutes: 10
+
+  e2e:
+    name: Tier 3 E2E Tests
+    runs-on: ubuntu-latest
+    if: github.event.inputs.run_e2e == 'true'
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Install Rust toolchain
+        uses: dtolnay/rust-toolchain@stable
+
+      - name: Cache cargo registry and build
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cargo/registry
+            ~/.cargo/git
+            target
+            integration_tests/target
+          key: ${{ runner.os }}-integration-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: ${{ runner.os }}-integration-
+
+      - name: Install protoc
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y protobuf-compiler
+
+      - name: Build runtime binary
+        run: cargo build
+
+      - name: Run Tier 3 E2E tests
+        working-directory: integration_tests
+        env:
+          MACP_TEST_BINARY: ../target/debug/macp-runtime
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: cargo test -- --ignored --test-threads=1
+        timeout-minutes: 10
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,9 @@ Cargo.lock
 /temp/
 CLAUDE.md
 
+# Integration tests build artifacts
+integration_tests/target/
+
 # OS
 .DS_Store
 Thumbs.db

diff --git a/Makefile b/Makefile
@@ -1,4 +1,4 @@
-.PHONY: setup build test test-integration test-conformance test-all fmt clippy check audit coverage sync-protos sync-protos-local check-protos
+.PHONY: setup build test test-integration test-conformance test-all fmt clippy check audit coverage sync-protos sync-protos-local check-protos test-integration-grpc test-integration-agents test-integration-e2e test-integration-hosted
 
 SPEC_PROTO_DIR := ../multiagentcoordinationprotocol/schemas/proto
 PROTO_FILES := macp/v1/envelope.proto macp/v1/core.proto macp/modes/decision/v1/decision.proto macp/modes/proposal/v1/proposal.proto macp/modes/task/v1/task.proto macp/modes/handoff/v1/handoff.proto macp/modes/quorum/v1/quorum.proto
@@ -55,6 +55,19 @@ sync-protos-local:
 	done
 	@echo "Done. Run 'git diff proto/' to review changes."
 
+## Integration tests (gRPC, Rig agents)
+test-integration-grpc:
+	cd integration_tests && cargo test --test tier1 -- --test-threads=1
+
+test-integration-agents:
+	cd integration_tests && cargo test --test tier2 -- --test-threads=1
+
+test-integration-e2e:
+	cd integration_tests && cargo test -- --ignored --test-threads=1
+
+test-integration-hosted:
+	cd integration_tests && cargo test -- --test-threads=1
+
 ## Check if local protos match BSR
 check-protos:
 	@TMPDIR=$$(mktemp -d); \

diff --git a/README.md b/README.md
@@ -279,12 +279,36 @@ The runtime requires write access to `MACP_DATA_DIR`. Check directory permission
 **Proto drift / `make check-protos` failure**
 Run `make sync-protos` to update local proto files from BSR.
 
+## Testing
+
+```bash
+cargo test --all-targets          # Unit tests + Rust integration tests
+make test-conformance             # JSON fixture-driven conformance suite
+```
+
+A separate integration test crate (`integration_tests/`) tests the runtime through the real gRPC boundary:
+
+```bash
+cargo build
+cd integration_tests
+MACP_TEST_BINARY=../target/debug/macp-runtime cargo test -- --test-threads=1
+```
+
+The integration suite has three tiers:
+
+- **Tier 1 (Protocol)** — 47 scripted gRPC tests covering all modes, error paths, signals, version binding, dedup, and RFC cross-cutting features
+- **Tier 2 (Rig Tools)** — 5 tests using [Rig](https://rig.rs) agent framework `Tool` implementations for all MACP operations
+- **Tier 3 (E2E)** — 3 tests with real OpenAI GPT-4o-mini agents coordinating through the runtime (requires `OPENAI_API_KEY`)
+
+See `docs/testing.md` for full details on running locally, in CI, or against a hosted runtime.
+
 ## Development notes
 
 - The RFC/spec repository remains the normative source for protocol semantics.
 - Five standards-track modes use the canonical `macp.mode.*` identifiers.
 - `multi_round` is a built-in extension (`ext.multi_round.v1`) — not standards-track, but ships with the runtime and enforces strict `SessionStart`.
 - Extension modes can be dynamically registered, unregistered, and promoted via `RegisterExtMode`, `UnregisterExtMode`, and `PromoteMode` RPCs.
 - `StreamSession` is enabled and binds one gRPC stream to one session, emitting accepted envelopes in order.
+- `WatchSignals` broadcasts ambient Signal envelopes to all subscribers in real time.
 
 See `docs/README.md` and `docs/examples.md` for the updated local development and usage guidance.
diff --git a/docs/protocol.md b/docs/protocol.md
@@ -22,6 +22,7 @@ Clients should call `Initialize` before using the runtime.
 - `ListRoots`
 - `WatchModeRegistry`
 - `WatchRoots`
+- `WatchSignals`
 - `ListExtModes`
 - `RegisterExtMode`
 - `UnregisterExtMode`
@@ -31,6 +32,7 @@ Clients should call `Initialize` before using the runtime.
 
 - `WatchModeRegistry` — sends the current registry state, then fires `RegistryChanged` on register/unregister/promote
 - `WatchRoots` — sends the current roots state, then holds the stream open
+- `WatchSignals` — broadcasts ambient Signal envelopes to all subscribers in real time; Signals correlate with sessions via `SignalPayload.correlation_session_id` but do not enter session history
 
 ## Extension mode lifecycle RPCs
 

diff --git a/docs/testing.md b/docs/testing.md
@@ -0,0 +1,131 @@
+# Testing
+
+The runtime has three levels of tests, plus a separate integration test crate that exercises the gRPC boundary with real agents.
+
+## Unit tests and conformance
+
+```bash
+cargo test --all-targets          # unit tests + Rust integration tests
+make test-conformance             # JSON fixture-driven conformance suite
+make test-all                     # fmt → clippy → test → integration → conformance
+```
+
+Unit tests live inside `src/` modules (`#[cfg(test)]`). Conformance fixtures are in `tests/conformance/` and exercise each mode's happy path and reject paths from JSON definitions.
+
+## Integration test suite
+
+A separate Rust crate at `integration_tests/` tests the runtime through the real gRPC transport boundary. It is **not** part of the main Cargo build — `cargo build --release` ignores it entirely.
+
+### Architecture
+
+```
+integration_tests/
+  Cargo.toml              # Depends on macp-runtime (lib) + rig-core + tonic
+  src/
+    config.rs             # Test target configuration (local / CI / hosted)
+    server_manager.rs     # Start/stop runtime as a subprocess on a free port
+    helpers.rs            # Envelope builders, payload helpers, gRPC wrappers
+    macp_tools/           # Rig Tool implementations for all MACP operations
+  tests/
+    tier1.rs → tier1_protocol/    # Scripted gRPC protocol tests
+    tier2.rs → tier2_agents/      # Rig agent tool tests (no LLM)
+    tier3.rs → tier3_e2e/         # Real OpenAI LLM agent tests
+```
+
+### Three tiers
+
+| Tier | What | LLM | Tests | Speed |
+|------|------|-----|-------|-------|
+| **Tier 1: Protocol** | Scripted gRPC calls testing all modes, error paths, RFC cross-cutting features (signals, dedup, version binding, cancel auth) | None | 47 | <1s |
+| **Tier 2: Rig Tools** | MACP operations as Rig `Tool` trait implementations, invoked via `ToolSet::call()` | None | 5 | <1s |
+| **Tier 3: E2E** | Real GPT-4o-mini agents coordinating through the runtime. Orchestrator as plain code, specialists as LLM. Parallel execution. Signals on ambient plane. | OpenAI | 3 | ~15s |
+
+### Running integration tests
+
+```bash
+# Build the runtime first
+cargo build
+
+# Run Tier 1 + 2 (no API keys needed)
+cd integration_tests
+MACP_TEST_BINARY=../target/debug/macp-runtime cargo test -- --test-threads=1
+
+# Run individual tiers
+MACP_TEST_BINARY=../target/debug/macp-runtime cargo test --test tier1 -- --test-threads=1
+MACP_TEST_BINARY=../target/debug/macp-runtime cargo test --test tier2 -- --test-threads=1
+
+# Run Tier 3 E2E (requires OPENAI_API_KEY)
+OPENAI_API_KEY=sk-... MACP_TEST_BINARY=../target/debug/macp-runtime cargo test --test tier3 -- --ignored --test-threads=1
+
+# Run against a hosted runtime (no local server started)
+MACP_TEST_ENDPOINT=host:50051 cargo test -- --test-threads=1
+```
+
+Or use Makefile targets from the project root:
+
+```bash
+make test-integration-grpc      # Tier 1
+make test-integration-agents    # Tier 2
+make test-integration-e2e       # Tier 3 (needs OPENAI_API_KEY)
+make test-integration-hosted    # All tiers against MACP_TEST_ENDPOINT
+```
+
+### Configuration
+
+| Variable | Purpose | Default |
+|----------|---------|---------|
+| `MACP_TEST_BINARY` | Path to runtime binary (skip cargo build) | Builds from parent crate |
+| `MACP_TEST_ENDPOINT` | Connect to hosted runtime (skip server start) | Start local server |
+| `MACP_TEST_TLS` | Use TLS for hosted connection | `0` |
+| `MACP_TEST_AUTH_TOKEN` | Bearer token for hosted runtime | Dev headers |
+| `OPENAI_API_KEY` | Required for Tier 3 E2E tests | Tier 3 tests skip if unset |
+
+### Tier 1 coverage
+
+Protocol tests exercise every mode through gRPC:
+
+- **Initialize**: protocol negotiation, version rejection, runtime info
+- **Decision mode**: happy path, duplicate dedup, non-initiator commit rejection
+- **Proposal mode**: happy path, premature commitment rejection
+- **Task mode**: happy path, non-initiator request rejection, duplicate task rejection
+- **Handoff mode**: happy path, accept-without-offer rejection
+- **Quorum mode**: happy path, approve-before-request, premature commitment
+- **Multi-round mode**: happy path, pre-convergence commit rejection
+- **Signals**: valid signal accepted, session_id/mode violations rejected, WatchSignals broadcast
+- **Version binding**: commitment with wrong mode_version/config_version rejected
+- **Deduplication**: rejected messages don't consume dedup slots, duplicate SessionStart rejected
+- **CancelSession**: non-initiator rejection
+- **Session lifecycle**: TTL expiry, concurrent sessions, parallel session independence
+- **Mode registry**: list/register/unregister extension modes
+- **Discovery**: GetManifest returns all modes, Initialize rejects unsupported version
+
+### Tier 2: Rig agent tools
+
+Each MACP operation (start session, propose, vote, commit, etc.) is implemented as a Rig `Tool` trait. Tier 2 tests validate these tools work correctly by calling them through `ToolSet::call()` — the same interface an LLM agent would use. Tests cover all 5 standard modes.
+
+### Tier 3: E2E with real LLM
+
+Three tests use real OpenAI GPT-4o-mini agents:
+
+1. **Decision with signals**: Orchestrator (code) proposes → 3 specialist LLMs evaluate in parallel → each sends progress/completed Signals on the ambient plane → orchestrator commits. Demonstrates both coordination plane and ambient plane simultaneously.
+
+2. **Decision**: Same as above without signals — simpler version.
+
+3. **Task delegation**: Planner (code) creates task → Worker (LLM) accepts and completes → planner commits.
+
+Architecture follows the RFC:
+- Orchestrator/planner operations are **plain code** (deterministic, no LLM needed)
+- Specialist/worker reasoning uses **real LLM** (where domain expertise matters)
+- Agents run **in parallel** (runtime serializes by acceptance order)
+- LLM reasoning happens **outside the session** (ambient plane)
+- Only the resulting Envelope enters the session
+
+### CI/CD
+
+Integration tests run via manual GitHub Actions dispatch (not on every PR):
+
+```
+Actions → "Integration Tests" → Run workflow → optionally check "Run Tier 3 E2E"
+```
+
+Tier 3 E2E requires the `OPENAI_API_KEY` repository secret.
diff --git a/integration_tests/Cargo.toml b/integration_tests/Cargo.toml
@@ -0,0 +1,25 @@
+[package]
+name = "macp-integration-tests"
+version = "0.0.0"
+edition = "2021"
+publish = false
+
+[dependencies]
+macp-runtime = { path = ".." }
+
+tonic = { version = "0.14", features = ["transport"] }
+prost = "0.14"
+
+tokio = { version = "1", features = ["full", "process"] }
+
+rig-core = "0.34"
+
+uuid = { version = "1", features = ["v4"] }
+chrono = "0.4"
+serde = { version = "1", features = ["derive"] }
+serde_json = "1"
+anyhow = "1"
+thiserror = "1"
+tracing = "0.1"
+tracing-subscriber = { version = "0.3", features = ["env-filter"] }
+async-trait = "0.1"
diff --git a/integration_tests/src/config.rs b/integration_tests/src/config.rs
@@ -0,0 +1,39 @@
+use std::env;
+
+/// Configuration for integration test target.
+///
+/// Supports three modes:
+/// - **Local dev**: no env vars — builds parent crate, starts server on free port
+/// - **CI**: `MACP_TEST_BINARY` set — uses pre-built binary, starts server
+/// - **Hosted**: `MACP_TEST_ENDPOINT` set — connects directly, no server management
+pub struct TestConfig {
+    /// gRPC endpoint to connect to (e.g. "http://127.0.0.1:50051")
+    pub endpoint: Option<String>,
+    /// Use TLS for the connection
+    pub use_tls: bool,
+    /// Bearer token for hosted runtime authentication
+    pub auth_token: Option<String>,
+    /// Path to a pre-built runtime binary
+    pub binary_path: Option<String>,
+}
+
+impl TestConfig {
+    pub fn from_env() -> Self {
+        Self {
+            endpoint: env::var("MACP_TEST_ENDPOINT").ok(),
+            use_tls: env::var("MACP_TEST_TLS").ok().as_deref() == Some("1"),
+            auth_token: env::var("MACP_TEST_AUTH_TOKEN").ok(),
+            binary_path: env::var("MACP_TEST_BINARY").ok(),
+        }
+    }
+
+    /// Whether we need to start a local server (no external endpoint provided).
+    pub fn needs_local_server(&self) -> bool {
+        self.endpoint.is_none()
+    }
+
+    /// Whether to use dev-mode headers (x-macp-agent-id) instead of bearer tokens.
+    pub fn use_dev_headers(&self) -> bool {
+        self.auth_token.is_none()
+    }
+}